0
votes

i want to use english and german custom analyzers together with other analyzers for example ngram. Is the following mapping correct? i am getting error for german analyzer. [unknown setting [index.filter.german_stop.type]. i searched but i did not find any information about using multiple language analyzers in custom type. Is it possible to use language specific ngram-filter?

PUT test  {
    "settings": {
        "analysis": {
            "analyzer": {
                "english_analyzer": {
                    "type": "custom",
                    "filter": [
                        "lowercase",
                        "english_stop",
                        "ngram_filter_en"
                    ],
                    "tokenizer": "whitespace"
                }
            },
            "filter": {
                "english_stop": {
                    "type": "stop"
                },
                "ngram_filter_en": {
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 25
                }
              },
                  "german_analyzer" : {
                    "type" : "custom",
                    "filter" : [
                         "lowercase",
                         "german_stop",
                        "ngram_filter_de"
                          ],
                    "tokenizer" : "whitespace"
              }
            },
            "filter" : {
                "german_stop" : {
                    "type" : "stop"
              },
                "ngram_filter_de" : {
                    "type" : "edge_ngram",
                    "min_ngram" : "1",
                    "max_gram" : 25
              }
        }
    },
    "mappings" : {
      "dynamic" : true,
      "properties": {
        "content" : {
          "tye" : "text",
          "properties" : {
            "en" : {
              "type" : "text",
              "analyzer" : "english_analyzer"
            },
            "de" : {
              "type" : "text",
              "analyzer" : "german_analyzer"
            }
        }
      }
    } 
1

1 Answers

1
votes

There are small syntax errors.

  1. You have your last filter object outside the analysis context.
  2. You cannot have same keys multiple times in a JSON.

So, below settings would help

{
  "analysis": {
    "analyzer": {
      "english_analyzer": {
        "type": "custom",
        "filter": [
          "lowercase",
          "english_stop",
          "ngram_filter_en"
        ],
        "tokenizer": "whitespace"
      }
    },
    "filter": {
      "english_stop": {
        "type": "stop"
      },
      "ngram_filter_en": {
        "type": "edge_ngram",
        "min_gram": 1,
        "max_gram": 25
      },
      "german_stop": {
        "type": "stop"
      },
      "ngram_filter_de": {
        "type": "edge_ngram",
        "min_ngram": "1",
        "max_gram": 25
      }
    },
    "german_analyzer": {
      "type": "custom",
      "filter": [
        "lowercase",
        "german_stop",
        "ngram_filter_de"
      ],
      "tokenizer": "whitespace"
    }
  }
}

To understand the error in your mapping

{
        "analysis": {
            "analyzer": {
            "filter": {
                "english_stop": {
                    "type": "stop"
                },
                "ngram_filter_en": {
                    "type": "edge_ngram",
                    "min_gram": 1,
                    "max_gram": 25
                }
              },
                  "german_analyzer" : {
                    "type" : "custom",
                    "filter" : [
                         "lowercase",
                         "german_stop",
                        "ngram_filter_de"
                          ],
                    "tokenizer" : "whitespace"
              }
            }, 
            "filter" : {//**This is outside analysis, you cannot simply add another filter key inside analysis, so you can merge both as above**
                "german_stop" : {
                    "type" : "stop"
              },
                "ngram_filter_de" : {
                    "type" : "edge_ngram",
                    "min_ngram" : "1",
                    "max_gram" : 25
              }
        }