I'm trying to implement an auto-suggest control powered by an ES index. The index has multiple fields (Multi-language - Arabic and English) and I want to be able to search in all languages.
The easiest way to do that is NGram with the "_all" field, as long as some care is taken in the mapping definition. The issue we have now how to accomplish this using multi-language.
PS: We are looking to separate field for all the possible languages (Using one index).
I tried to use the nGram tokenizer and filter and it's working good for one language (English).
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true
},
"description": {
"type": "string",
"include_in_all": true
},
"brand": {
"type": "string",
"include_in_all": true
},
"made_id": {
"type": "string",
"include_in_all": true
},
"category": {
"type": "string",
"include_in_all": true
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location" : {
"type" : "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Arabic analyzer:
{
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": []
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
}
},
"analyzer": {
"arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
}
}
}
}
}
can someone suggest any solution? Thanks!