1
votes

I am trying to configure the platform allowing queries that start with a stop word. I have the following document:

        {
          "responseHeader":{
            "status":0,
            "QTime":1,
            "params":{
              "indent":"true",
              "q":"*:*",
              "wt":"json"}},
          "response":{"numFound":1,"start":0,"docs":[
              {
                "weight_metric":0.3,
                "maximumPowerDraw":9,
                "beamAngle":50,
                "name_de":"German",
                "type":["product"],
                "id":"5dac69a9-7d54-43f9-b815-0a54e519a1f0",
                "name":"Aloa something"
                }]
          }}

With a field called name, one for English (Default) and another called name_de for German. But I cannot understand why I do this query http://localhost:8080/solr-webapp/collection1/select?q=name_de:German%20welcher&wt=json&indent=true I am able to see the document in the results.
While if I do this other query with a stop word (welcher) at the beginning I cannot get any result http://localhost:8080/solr-webapp/collection1/select?q=name_de:welcher%20German%20welcher&wt=json&indent=true

While I would like to have the same result of the first query in both cases.
However, for the default language it works fine.
Here I copy some snippets of my schema.xml

    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <!-- in this example, we will only use synonyms at query time
                 <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <!-- Case insensitive stop word removal.
                   add enablePositionIncrements=true in both the index and query
          analyzers to leave a 'gap' for more accurate phrase queries.
        -->
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"
                enablePositionIncrements="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
                     <filter class="solr.EnglishMinimalStemFilterFactory"/>
        -->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"
                enablePositionIncrements="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
                     <filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
        <filter class="solr.PorterStemFilterFactory"/>
      </analyzer>
    </fieldType>

    <!-- German -->
    <fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball"
                enablePositionIncrements="true"/>
        <filter class="solr.GermanNormalizationFilterFactory"/>
        <!--<filter class="solr.GermanMinimalStemFilterFactory"/>-->

      </analyzer>
    </fieldType>

...

    <field name="name" type="text_en" stored="true" indexed="true"/>
    <field name="name_de" type="text_de" stored="true" indexed="true"/>
...

    <copyField source="*_de" dest="text_de"/>
    <copyField source="name" dest="text"/>

...

<field name="text" type="text_general" stored="false" indexed="true" multiValued="true" termVectors="true"/>
<field name="text_de" type="text_de" stored="false" indexed="true" multiValued="true" termVectors="true"/>

Does anybody have an idea how I can fix this unwanted behavior?
(btw for the field name instead I have the expected behavior with the result http://localhost:8080/solr-webapp/collection1/select?q=name:the%20Aloa&wt=json&indent=true)

1

1 Answers

4
votes

The problem is in your query syntax. See this example in the Lucene query syntax documentation. your query is:

name_de:welcher German welcher

Only the first query term is going to be searched in name_de. The rest will be searched in the default field (name). You're query is effectively:

name_de:welcher name:German name:welcher

Instead, try:

name_de:(welcher German welcher)