0
votes

I am running Solr 5.3.1 and I want to make updates to my Solr index, something like "set field x to 'foo' where field y is like '*bar'". But Solr doesn't seem to have to ability to do update by query. So I resorted to code using SolrJ to accomplish this.

The logic is make a query to Solr to fetch the result that I want to update then use Atomic Update (see http://yonik.com/solr/atomic-updates/) to update individual document.

My code looks like

public void updateDocsByQuery(String queryStr, String fieldName, String fieldValue) 
        throws Exception {

    SolrDocumentList docList = fetchDocsByQuery(queryStr, "id" , 5000);

    if (!docList.isEmpty()) {
        Collection<SolrInputDocument> docs = new ArrayList<>();

        for (int i=0; i<docList.size(); i++) {
            SolrDocument doc = docList.get(i);
            String id = (String) doc.getFieldValue("id");

            SolrInputDocument inputDoc = new SolrInputDocument();
            inputDoc.addField("id", id);
            Map<String, Object> fieldMod = new HashMap<>(1);
            fieldMod.put("set", fieldValue);
            inputDoc.addField(fieldName, fieldMod);

            docs.add(inputDoc);

        }                       

        client.add(docs);
        client.commit();
    }               
}

I get very bizarre behavior with Atomic Update. Only the last document in docList get updates with the value, the rest of the documents are deleted. If I run this code again with a different query, the document that was updated in the previous run will also get deleted, again, only the last document in the list gets updated.

Does anyone understand this odd behavior?

My schema is

  <schema name="MySchema" version="1.5">
<fields>
    <field name="_version_" type="long" indexed="true" stored="true"/>
    <field name="id" type="string" indexed="true" stored="true"  required="true" multiValued="false" />  

    <field name="title" type="text_en" indexed="true" stored="true" multiValued="false" termVectors="true" termPositions="true" termOffsets="true"   />
    <field name="subject" type="text_en" indexed="true" stored="true"/>
    <field name="author" type="text_en" indexed="true" stored="true"/>
    <field name="keywords" type="text_en" indexed="true" stored="true"/>
    <field name="category" type="text_en" indexed="true" stored="true"/>
    <field name="suggested_links" type="string" indexed="true" stored="true" />
    <field name="resourcename" type="string" indexed="true" stored="true" docValues="true" />
    <field name="resource_names" type="string" indexed="true" stored="true" multiValued="true" docValues="true" />
    <field name="content_type" type="string" indexed="true" stored="true"  docValues="true"/>
    <field name="last_modified" type="date" indexed="true" stored="true"/>
    <field name="source_group" type="string" indexed="true" stored="true" />

    <!-- Main body of document -->
    <field name="content" type="text_en" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true"   />

    <!-- catchall field, containing all other searchable text fields (implemented via copyField further on in this schema  -->
    <field name="text" type="text_en" indexed="true" stored="false" multiValued="true"  />

<!-- holds Solr dedupe hash code -->
<field name="dedupeSignatureField" type="string" indexed="true" stored="true" multiValued="false" /> 

 <!-- copy fields to search by default in our catch-all field, 'text' -->

 <copyField source="title" dest="text"/> 
 <copyField source="subject" dest="text"/>
     <copyField source="author" dest="text"/> 
     <copyField source="keywords" dest="text"/> 
     <copyField source="content" dest="text"/> 

</fields>

<uniqueKey>id</uniqueKey>

<types>

    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
    <fieldType name="date" class="solr.TrieDateField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.EnglishPossessiveFilterFactory"/>
            <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
            <filter class="solr.PorterStemFilterFactory"/>
          </analyzer>
          <analyzer type="query">
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
            <filter class="solr.StopFilterFactory"
                ignoreCase="true"
                words="lang/stopwords_en.txt"
                />
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.EnglishPossessiveFilterFactory"/>
            <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
            <filter class="solr.PorterStemFilterFactory"/>
        </analyzer>
     </fieldType> 
</types>

enter code here

1
could you also paste your schema?denizdurmus

1 Answers

0
votes

I also faced this issue while working on Solr 5.4.0. Only the last document in docList used to get updated with the value, the rest of the documents were getting deleted.

My solution, in your code replace:

inputDoc.addField("id", id);

with

inputDoc.setField("id", id);

Leave the rest of the code untouched.