1
votes

Using Elastic "number": "6.3.1" "lucene_version": "7.3.1" NEST: 6.1.0

Trying to translate the below search. Essentially message1, message2 can have empty string values. if search1Value or search2Value is empty string, i don't want any records returned for that part of the OR condition where there is an empty string.

This piece is part of a much large Search with other criteria... BUT this piece is causing the Query to ES to be extremely SLOW. I create RAW fields in addition to the original fields when creating the index just to be able to search on NOT EMPTY. Nothing else i had tried allowed me to do that search correctly. IS there a different way to do this? As mentioned the performance of the query is Terrible. Over 2secs. The index in question have around 600k documents. The logic DOES work though. It does return the correct documents.

Thank you in advance for any assistance!!

message1 != "" and message1.StartsWith(search1Value)
OR 
message2 != "" and message2.StartsWith(search2Value)

so if example of available docs in index...

id, message1, message2
1, "", "abc"
2, "", ""
3, "def", ""
4, "", "ghi"

if searchValue1 is empty string, and searchValue2 is abc i want to get back, only record 1. Not record 1, 2, and 4.

in order to properly search on this condition an index was setup as such:

public class MessageSearch() {
       public string Message1 {get; set;}
       public string Message2 {get; set;}
    }

public class MessageModelIndex() {
   public string Message1 {get; set;} = ""
   public string Message2 {get; set;} = ""
}


public override CreateIndexDescriptor DefineIndex(string indexName)
        {
            return new CreateIndexDescriptor(indexName).Settings(s => s
                .NumberOfShards(2)                    
                .Mappings(ms => ms
                    .Map<MessageModelIndex>(m => m
                        .Properties(p => p                                
                            .Text(s => s
                                .Name(x => x.Message1)
                                .Fields(ff => ff
                                    .Text(tt => tt
                                        .Name("raw")
                                    )
                                    .Keyword(k => k
                                        .Name("keyword")
                                        .IgnoreAbove(1)
                                    )
                                )
                            )
                            .Text(s => s
                                .Name(x => x.Message2)
                                .Fields(ff => ff
                                    .Text(tt => tt
                                        .Name("raw")
                                    )
                                    .Keyword(k => k
                                        .Name("keyword")
                                        .IgnoreAbove(1)
                                    )
                                )
                            )
                        )
                    ));
        }

The following Search is used to get these values:

public void PerformSearch(MessageSearch search) {
                var result = _client.Search<MessageModelIndex>(x => x
               .Index("MessageTest")
               .Size(1000)
               .Query(q => q
                        .Bool(b => b
                                .Must(bm => bm
                                    .Bool(bb => bb
                                        .Should(bbs => bbs
                                            .Bool(bbb => bbb
                                                .Must(mm => mm
                                                    .Bool(bbbb => bbbb
                                                        .MustNot(bbbmn => bbbmn.Term(t => t.Verbatim().Field(f => f.Message1.Suffix("keyword")).Value(string.Empty)))
                                                    ),
                                                    mm => mm
                                                    .Bool(bbbb => bbbb
                                                        .Must(bbbmn => bbbmn.MatchPhrasePrefix(mmp => mmp.Query(search.Message1.Trim()).Field(f => f.Message1.Suffix("raw"))))
                                                    )
                                                 )
                                            ), bbs => bbs
                                            .Bool(bbb => bbb
                                                .Must(mm => mm
                                                    .Bool(bbbb => bbbb
                                                        .MustNot(bbbmn => bbbmn.Term(t => t.Verbatim().Field(f => f.Message2.Suffix("keyword")).Value(string.Empty)))
                                                    ),
                                                    mm => mm
                                                    .Bool(bbbb => bbbb
                                                        .Must(bbbmn => bbbmn.MatchPhrasePrefix(mmp => mmp.Query(search.Message2.Trim()).Field(f => f.Message2.Suffix("raw"))))
                                                    )
                                                 )
                                            )
                                        )
                                    )
                                )
                            )
               )
            );
 }
1

1 Answers

1
votes

The mapping and query look incorrect for your desired outcome. Let's break it down

I create RAW fields in addition to the original fields when creating the index just to be able to search on NOT EMPTY. Nothing else i had tried allowed me to do that search correctly. IS there a different way to do this?

The mapping

As an example, the mapping you have

.Text(s => s
    .Name(x => x.Message1)
    .Fields(ff => ff
        .Text(tt => tt
            .Name("raw")
        )
        .Keyword(k => k
            .Name("keyword")
            .IgnoreAbove(1)
        )
    )
)

The "raw" field is superfluous as it is the same as the containing text data type mapping.

The "keyword" multi-field will index single character strings or less for Message1. Here, I think you want .IgnoreAbove(0) if the intention is to use this multi-field to be able to search for documents that have an empty string for Message1. I would question however if it is actually valuable to be able to search on documents with empty Message1; you'd be able to determine documents that have a value (even empty string) with an exists query, and if you did want to search on documents with empty messages, you could do so with a script query.

Ultimately, I guess if it is common to be able to search on empty messages, then having this "keyword" multi-field would be useful; I'd be inclined to name it "empty" though instead, to better match intent.

The search request

.Index("MessageTest")

index name must be lowercase to be valid.

.Bool(b => b
        .Must(bm => bm
            .Bool(bb => bb
                .Should(bbs => bbs

The outer bool query must clause is not needed; the should clauses can be moved out and defined on the outer bool query.

.Bool(bbb => bbb
    .Must(mm => mm
        .Bool(bbbb => bbbb
            .MustNot(bbbmn => bbbmn.Term(t => t.Verbatim().Field(f => f.Message1.Suffix("keyword")).Value(string.Empty)))
        ),
        mm => mm
        .Bool(bbbb => bbbb
            .Must(bbbmn => bbbmn.MatchPhrasePrefix(mmp => mmp.Query(search.Message1.Trim()).Field(f => f.Message1.Suffix("raw"))))
        )
     )
)

The term query in the must_not clause looks superfluous to me, because an empty string input for the match_phrase_prefix query will not match any documents. You can see this for yourself if you indexed the following documents

var bulkResponse = client.Bulk(b => b
    .IndexMany(new [] 
    {
        new MessageModelIndex { Id = 1, Message1 = "", Message2 = "abc" },
        new MessageModelIndex { Id = 2, Message1 = "", Message2 = "" },
        new MessageModelIndex { Id = 3, Message1 = "def", Message2 = "" },
        new MessageModelIndex { Id = 4, Message1 = "", Message2 = "ghi" },
    })
    .Refresh(Refresh.WaitFor)
);

and then run the search

var emptyStringInputResponse = client.Search<MessageModelIndex>(x => x
    .Index(defaultIndex)
    .Query(q => q
        .MatchPhrasePrefix(t => t
            .Verbatim()
            .Field(f => f.Message1)
            .Query("")
        )
    )
);

No documents are returned. This is because of analysis on the Message1 field at index time and input targeting that field at query time.

Also note that .Verbatim() needs to be used here because NEST has a concept known as conditionless queries: if a query is determined to be conditionless, then it is not included in the serialized request JSON. For a MatchPhrasePrefix query, a null or empty string query input makes the query conditionless. Using .Verbatim() overrides this conditionless behaviour, forcing NEST to serialize the query as is.

The query can be simplified down to

var searchResponse = client.Search<MessageModelIndex>(x => x
    .Index(defaultIndex)
    .Size(1000)
    .Query(q => q
        .Bool(bb => bb
            .Should(bbs => bbs
                .MatchPhrasePrefix(mmp => mmp
                    .Query(search.Message1.Trim())
                    .Field(f => f.Message1)
                ), bbs => bbs
                .MatchPhrasePrefix(mmp => mmp
                    .Query(search.Message2.Trim())
                    .Field(f => f.Message2)
                )
            )
        )
    )
);

which can be further simplified with operator overloading on queries to

var searchResponse = client.Search<MessageModelIndex>(x => x
    .Index(defaultIndex)
    .Size(1000)
    .Query(q => q
        .MatchPhrasePrefix(mmp => mmp
                .Query(search.Message1.Trim())
                .Field(f => f.Message1)
            ) || q
        .MatchPhrasePrefix(mmp => mmp
            .Query(search.Message2.Trim())
            .Field(f => f.Message2)
        )
    )
);

which returns only document with id 1 for searchValue1 "", and searchValue2 "abc".

Here's a complete example

private static void Main()
{
    var defaultIndex = "message-test";
    var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));

    var settings = new ConnectionSettings(pool)
        .DefaultIndex(defaultIndex);

    var client = new ElasticClient(settings);

    if (client.IndexExists(defaultIndex).Exists)
        client.DeleteIndex(defaultIndex);

    client.CreateIndex(defaultIndex, c => c
        .Mappings(m => m
            .Map<MessageModelIndex>(mm => mm
               .Properties(p => p
                    .Text(s => s
                        .Name(x => x.Message1)
                        .Fields(ff => ff
                            .Keyword(k => k
                                .Name("keyword")
                                .IgnoreAbove(0)
                            )
                        )
                    )
                    .Text(s => s
                        .Name(x => x.Message2)
                        .Fields(ff => ff
                            .Keyword(k => k
                                .Name("keyword")
                                .IgnoreAbove(0)
                            )
                        )
                    )
                )
            )
        )
    );

    var bulkResponse = client.Bulk(b => b
        .IndexMany(new [] 
        {
            new MessageModelIndex { Id = 1, Message1 = "", Message2 = "abc" },
            new MessageModelIndex { Id = 2, Message1 = "", Message2 = "" },
            new MessageModelIndex { Id = 3, Message1 = "def", Message2 = "" },
            new MessageModelIndex { Id = 4, Message1 = "", Message2 = "ghi" },
        })
        .Refresh(Refresh.WaitFor)
    );

    var search = new MessageSearch
    {
        Message1 = "",
        Message2 = "abc"
    };

    var searchResponse = client.Search<MessageModelIndex>(x => x
        .Index(defaultIndex)
        .Size(1000)
        .Query(q => q
            .MatchPhrasePrefix(mmp => mmp
                    .Query(search.Message1.Trim())
                    .Field(f => f.Message1)
                ) || q
            .MatchPhrasePrefix(mmp => mmp
                .Query(search.Message2.Trim())
                .Field(f => f.Message2)
            )
        )
    );
}

public class MessageSearch 
{
    public string Message1 { get; set; }
    public string Message2 { get; set; }
}

public class MessageModelIndex 
{
   public int Id { get; set; }
   public string Message1 { get; set; } = "";
   public string Message2 { get; set; } = "";
}