1
votes

I have a few documents indexed in Solr. When I query using q=*:*, I get all the documents but when I send some word to q, I get no results. Below is the snippet of schema.xml

    <?xml version="1.0" ?>


<schema name="default" version="1.5">
  <types>
    <fieldtype name="string"  class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <fieldtype name="binary" class="solr.BinaryField"/>


    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
    <!-- <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/> -->

    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

    <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
    <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
    <fieldtype name="geohash" class="solr.GeoHashField"/>

    <fieldType name="text" class="solr.TextField">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
</fieldType>

    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
      <!-- <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" /> -->
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
        <!-- <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>

      </analyzer> -->
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    </fieldType>

    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
      <!-- <analyzer type="index">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -->
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
          <filter class="solr.EnglishMinimalStemFilterFactory"/>
        -->
        <!-- <filter class="solr.PorterStemFilterFactory"/> -->
      <!-- </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.EnglishPossessiveFilterFactory"/>
        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> -->
        <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
          <filter class="solr.EnglishMinimalStemFilterFactory"/>
        -->

        <!-- <filter class="solr.PorterStemFilterFactory"/>
      </analyzer> -->
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    </fieldType>

    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="ngram" class="solr.TextField" >
      <analyzer type="index">
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.KeywordTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>

    <fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
        <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" />
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory" />
        <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
      </analyzer>
    </fieldType>
  </types>

  <fields>
    <!-- general -->
    <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
    <field name="django_ct" type="string" indexed="true" stored="true" multiValued="false"/>
    <field name="django_id" type="string" indexed="true" stored="true" multiValued="false"/>
    <field name="_version_" type="long" indexed="true" stored ="true"/>

    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
    <dynamicField name="*_t"  type="text_en"    indexed="true"  stored="true"/>
    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
    <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
    <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
    <dynamicField name="*_coordinate"  type="tdouble" indexed="true"  stored="false"/>


    <field name="content" type="text_en" indexed="true" stored="true" multiValued="false" />

    <field name="title" type="text_en" indexed="true" stored="true" multiValued="false" />

    <field name="text" type="text_en" indexed="true" stored="true" multiValued="false" />

    <field name="image" type="text_en" indexed="true" stored="true" multiValued="false" />

    <field name="short_desc" type="text_en" indexed="true" stored="true" multiValued="false" />

    <field name="pub_date" type="text_en" indexed="true" stored="true" multiValued="false" />

  </fields>

  <!-- field to use to determine and enforce document uniqueness. -->
  <uniqueKey>id</uniqueKey>

  <!-- field for the QueryParser to use when an explicit fieldname is absent -->
  <defaultSearchField>text</defaultSearchField>

  <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
  <solrQueryParser defaultOperator="OR"/>
</schema>

What could I possibly be doing wrong?!

EDIT

Here is a sample of the document indexed in Solr.

Indexed Solr Document

And here is the query I ran that gave me 0 results:

Debug Query

As you can clearly see the document has India mentioned. So this document should have been returned. Is there something wrong with the query generated?

3

3 Answers

1
votes

It would have been good if you had shared the definition of field type as in whats the tokenizer been used, which all filters are used etc...

If you have used the keyword tokenizer which is the tokenizer that treats the entire text field as a single token.

Try by using the StandardTokenizerFactory or WhitespaceTokenizerFactory.

In case WhitespaceTokenizerFactory , tokenizer that splits the text stream on whitespace and returns sequences of non-whitespace characters as tokens. Note that any punctuation will be included in the tokenization.

If your input stream is : "The success of Republic Day in India"

Output is : "The", "success", "of", "Republic", "Day", "in", "India"

Again if you add any filter like stopword filter or lowercase filter that would again be good.

As an example

<fieldType name="text" class="solr.TextField">
    <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
</fieldType>

Here the final output would be different

If your input stream is : "The success of Republic Day in India"

Output is : "the", "success", "of", "republic", "day", "in", "india"

and now your can query by "India" as well as "india"... it will get a match

because while indexing you indexed it as "india" and while quering you have the lowercase filter which will make it to "india" even if the search text is "India".

On top of it if you add stopword filter factory

it will not index words like : "of", "the", "in" and search on those words is not meaningful(Its my opinion, may vary from others).

The solr has provided a web interface, where in one you can analyse your fieldtypes, who it is indexing the stream ...what all you need to change so the you get the right result.

I hope this helps...

For more information on all tokenizers and filters please have a look at it ..

https://cwiki.apache.org/confluence/display/solr/Tokenizers#Tokenizers-WhiteSpaceTokenizer

https://cwiki.apache.org/confluence/display/solr/Filter+Descriptions

1
votes

Either you will have to fire your query on a field name like below

q=:content:india

or you will have to define a defaults fields to be searched for a blank query string for your select handler in solrconfig file as below

    <requestHandler name="/select" class="solr.SearchHandler">
        <!-- default values for query parameters can be specified, these
                 will be overridden by parameters in the request
            -->
         <lst name="defaults">
             <int name="rows">10</int>
             <str name="qf">content short_description</str>
         </lst>
    </requestHandler>
1
votes

In these cases I'd add debugQuery=true parameter to my http request. The displayed information includes how Solr sees the q parameter so you should be able to get what's going wrong. Shooting in the dark I guess documents are not actually indexed or you're using a wrong query parser (e.g. *:* is not a valid query for DisMax)

After you post has been updated I see a strange thing (but maybe I could be wrong, I'm reading this looong post from my mobile):

nothing fills the "text" field...

the document you're looking for has the "india" term in the "content" field, but the df (default field used in queries) is "text" so this is the correct behaviour, nothing matches "india" in "text" because "text" is empty. You could do one of the following:

  • change the default field from text to content
  • explicitly name the content field in your query (e.g. content:india)
  • Declare a copyField directive with src=content and dst=text