0
votes

I created a synonym filter named "synonym_filter"

"synonym_filter": {
                 "type": "synonym",
                 "synonyms": [
                    "adidas, xyz, abc",
                    "nike, rofl, lol"
                 ]
              }

and analyzer named "synonyms"

 "synonyms": {
                 "filter": [
                    "lowercase",
                    "synonym_filter"
                 ],
                 "tokenizer": "standard"
              },

according to the elasticsearch documentation. The complete settings now look like this.

GET /test6_de_idx1/_settings

   "test6_de_idx1": {
  "settings": {
     "index": {
        "creation_date": "1471372087742",
        "analysis": {
           "filter": {
              "edge_ngram_back": {
                 "min_gram": "2",
                 "side": "back",
                 "type": "edgeNGram",
                 "max_gram": "10"
              },
              "edge_ngram_front": {
                 "min_gram": "2",
                 "side": "front",
                 "type": "edgeNGram",
                 "max_gram": "10"
              },
              "synonym_filter": {
                 "type": "synonym",
                 "synonyms": [
                    "adidas, xyz, abc",
                    "nike, rofl, lol"
                 ]
              },
              "stop": {
                 "type": "stop",
                 "stopwords": "_german_"
              },
              "strip_hyphens": {
                 "pattern": "-",
                 "type": "pattern_replace",
                 "replacement": ""
              },
              "length": {
                 "type": "length",
                 "min": "2"
              },
              "strip_spaces": {
                 "pattern": "\\s",
                 "type": "pattern_replace",
                 "replacement": ""
              },
              "snowball": {
                 "type": "snowball",
                 "language": "German"
              },
              "strip_dots": {
                 "pattern": "\\.",
                 "type": "pattern_replace",
                 "replacement": ""
              }
           },
           "analyzer": {
              "std": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "length"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              },
              "synonyms": {
                 "filter": [
                    "lowercase",
                    "synonym_filter"
                 ],
                 "tokenizer": "standard"
              },
              "keyword_suffix": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens",
                    "edge_ngram_back"
                 ],
                 "tokenizer": "keyword"
              },
              "text_suffix": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "edge_ngram_back"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              },
              "language": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "snowball",
                    "length"
                 ],
                 "char_filter": "html_strip",
                 "type": "custom",
                 "tokenizer": "standard"
              },
              "keyword": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens"
                 ],
                 "tokenizer": "keyword"
              },
              "keyword_prefix": {
                 "filter": [
                    "asciifolding",
                    "lowercase",
                    "strip_spaces",
                    "strip_dots",
                    "strip_hyphens",
                    "edge_ngram_front"
                 ],
                 "tokenizer": "keyword"
              },
              "text_prefix": {
                 "filter": [
                    "standard",
                    "elision",
                    "asciifolding",
                    "lowercase",
                    "stop",
                    "edge_ngram_front"
                 ],
                 "char_filter": "html_strip",
                 "tokenizer": "standard"
              }
           }
        },
        "number_of_shards": "1",
        "number_of_replicas": "0",
        "uuid": "sAiM27R2QOKLj9wjrwoAqw",
        "version": {
           "created": "2030399"
        }
     }
  }

}

Testing the analyzer with

GET /test6_de_idx1/_analyze
{
  "analyzer" : "synonyms",
  "text" : "xyz is the English queen"
}

shows that the analyzer is working:

"tokens": [
      {
         "token": "xyz",
         "start_offset": 0,
         "end_offset": 3,
         "type": "<ALPHANUM>",
         "position": 0
      },
      {
         "token": "adidas",
         "start_offset": 0,
         "end_offset": 3,
         "type": "SYNONYM",
         "position": 0
      },
      {
         "token": "abc",
         "start_offset": 0,
         "end_offset": 3,
         "type": "SYNONYM",
         "position": 0
      },
      {
         "token": "is",
         "start_offset": 4,
         "end_offset": 6,
         "type": "<ALPHANUM>",
         "position": 1
      },
      {
         "token": "the",
         "start_offset": 7,
         "end_offset": 10,
         "type": "<ALPHANUM>",
         "position": 2
      },
      {
         "token": "english",
         "start_offset": 11,
         "end_offset": 18,
         "type": "<ALPHANUM>",
         "position": 3
      },
      {
         "token": "queen",
         "start_offset": 19,
         "end_offset": 24,
         "type": "<ALPHANUM>",
         "position": 4
      }
   ]

But when i do a test search with a synonym I get 0 results with adidas, which is used in data i get results.

GET /test6_de_idx1/_search?q=xyz&size=5



 "took": 1,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 0,
      "max_score": null,
      "hits": []
   }

What could please be the problem?

EDIT:

Thanks for the quick reply! A hit looks like this for search term "adidas":

"hits": [
         {
            "_index": "test6_de_idx1",
            "_type": "product",
            "_id": "59279",
            "_score": 1.0859994,
            "_source": {
               "id": 59279,
               "sku": "0002-10780",
               "type_id": "configurable",
               "brand": "Adidas",
               "color": "Mehrfarbig",
               "manufacturer": "Adidas",
               "material": "Polyester",
               "model": "Damen",
               "producttype": "Tank",
               "status": 1,
               "tax_class_id": 1,
               "visibility": 4,
               "price": 24.99,
               "weight": 0,
               "image": "http://sportokay.dev/skin/frontend/default/default/images/catalog/product/placeholder/image.jpg",
               "name": "Adidas Keyhole Tank Damen Fitnessshirt",
               "description": "Das Adidas Keyhole Tank Damen Fitnessshirt ist ein leichtes, weiches Fitness T-Shirt aus Adidas Climalite Material, welches optimales Feuchtigkeitsmanagement ermöglicht.\r\n- 100 % Polyester\r\n- Adidas Climalite\r\n- Lockere Passform\r\n- Schlitz am Rücken",
               "short_description": "Adidas Keyhole Tank Damen Fitnessshirt",
               "_categories": [
                  "Damen",
                  "Alle",
                  "Fitness",
                  "Fitnessbekleidung",
                  "Shirts"
               ],
               "_prices": {
                  "price": 24.99,
                  "final_price": 24.99,
                  "minimal_price": 24.99,
                  "min_price": 24.99,
                  "max_price": 24.99,
                  "tier_price": null
               },
               "_url": "http://xxx.dev/at_de/adidas-keyhole-tank-damen-fitnessshirt.html"
            }
         }

GET /test6_de_idx1/_sear‌​ch?q=brand:xyz does not give results although xyz is synonym to adidas

EDIT2:

This is the mapping currently in use:

"test6_de_idx1": {
  "mappings": {
     "product": {
        "_all": {
           "analyzer": "std"
        },
        "properties": {
           "_categories": {
              "type": "string",
              "analyzer": "language",
              "include_in_all": true
           },
           "_parent_ids": {
              "type": "integer",
              "index": "no",
              "store": true
           },
           "_prices": {
              "properties": {
                 "final_price": {
                    "type": "double"
                 },
                 "max_price": {
                    "type": "double"
                 },
                 "min_price": {
                    "type": "double"
                 },
                 "minimal_price": {
                    "type": "double"
                 },
                 "price": {
                    "type": "double"
                 }
              }
           },
           "_url": {
              "type": "string",
              "index": "no",
              "store": true
           },
           "adjustment": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "adjustmentrange": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "antennas": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "backlength": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
           "bike_colour": {
              "type": "string",
              "fields": {
                 "prefix": {
                    "type": "string",
                    "analyzer": "text_prefix",
                    "search_analyzer": "std"
                 },
                 "std": {
                    "type": "string",
                    "analyzer": "std"
                 },
                 "suffix": {
                    "type": "string",
                    "analyzer": "text_suffix",
                    "search_analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },....
 "brand": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },

EDIT3:

I added the "synonyms" Analyzer to the mapping properties field but it still does not work. Did i do it correct?

"brand": {
              "type": "string",
              "norms": {
                 "enabled": false
              },
              "index_options": "docs",
              "fields": {
                 "std": {
                    "type": "string",
                    "norms": {
                       "enabled": false
                    },
                    "index_options": "docs",
                    "analyzer": "std"
                 },
                 "synonyms": {
                    "type": "string",
                    "analyzer": "synonyms"
                 }
              },
              "analyzer": "language",
              "include_in_all": true
           },
1

1 Answers

1
votes

Try GET /test6_de_idx1/_sear‌​ch?q=some_field:xyz&s‌​ize=5 meaning specifically mention the field name in the query (some_field). Otherwise it will use _all which is not using your analyzer.

q=xyz is translated to using query_string which, by default, it's using the _all field.