1
votes

Steps I did:

  1. curl -u cassandra "http://localhost:8983/solr/admin/cores?action=CREATE&name=tweets.tweets_test&generateResources=true&reindex=true&deleteAll=true"

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<schema name="autoSolrSchema" version="1.5">
<types>
<fieldType class="org.apache.solr.schema.TextField" name="TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType class="org.apache.solr.schema.TrieDateField" name="TrieDateField"/>
<fieldType class="org.apache.solr.schema.TrieLongField" name="TrieLongField"/>
</types>
<fields>
<field indexed="true" multiValued="true" name="atnames" stored="true" type="TextField"/>
<field indexed="true" multiValued="true" name="links" stored="true" type="TextField"/>
<field indexed="true" multiValued="false" name="tweet_date" stored="true" type="TrieDateField"/>
<field indexed="true" multiValued="false" name="tweet" stored="true" type="TextField"/>
<field indexed="true" multiValued="true" name="hashtags" stored="true" type="TextField"/>
<field indexed="true" multiValued="false" name="uid" stored="true" type="TrieLongField"/>
<field indexed="true" multiValued="false" name="tweet_id" stored="true" type="TrieLongField"/>
</fields>
<uniqueKey>(uid,tweet_id)</uniqueKey>
</schema>

I would change the schema to (I want to index urls using KeywordTokenizerFactory):

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<schema name="autoSolrSchema" version="1.5">
<types>
    <fieldType class="org.apache.solr.schema.TextField" name="TextField">
    <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
    </analyzer>
    </fieldType>

    <fieldType class="org.apache.solr.schema.TextField" name="TextFieldURL">
    <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
    </analyzer>
    </fieldType>

    <fieldType class="org.apache.solr.schema.TrieDateField" name="TrieDateField"/>
    <fieldType class="org.apache.solr.schema.TrieLongField" name="TrieLongField"/>
</types>


<fields>
<field indexed="true" multiValued="true" name="atnames" stored="true" type="TextField"/>
<field indexed="true" multiValued="true" name="links" stored="true" type="TextFieldURL"/>
<field indexed="true" multiValued="false" name="tweet_date" stored="true" type="TrieDateField"/>
<field indexed="true" multiValued="false" name="tweet" stored="true" type="TextField"/>
<field indexed="true" multiValued="true" name="hashtags" stored="true" type="TextField"/>
<field indexed="true" multiValued="false" name="uid" stored="true" type="TrieLongField"/>
<field indexed="true" multiValued="false" name="tweet_id" stored="true" type="TrieLongField"/>
</fields>
<uniqueKey>(uid,tweet_id)</uniqueKey>
</schema>
  1. Let's upload changes: curl "http://localhost:8983/solr/resource/tweets.tweets_test/schema.xml" --data-binary @tweets.tweets_test.xml -H 'Content-type:text/xml; charset=utf-8'

  2. Get the latest schema back to make sure it uploaded successfully: http://localhost:8983/solr/tweets.tweets_test/admin/file?file=schema.xml&contentType=text/xml;charset=utf-8

Looks good - I see my changes. (Btw, the changes that I did do not work, the links are still being indexed like so: "t.co", "http", ... ; probably another discussion) So I try to reload:

  1. curl "http://localhost:8983/solr/admin/cores?action=RELOAD&name=tweets.tweets_test&reindex=true&deleteAll=true"

  2. Get the latest schema back: http://localhost:8983/solr/tweets.tweets_test/admin/file?file=schema.xml&contentType=text/xml;charset=utf-8

Don't see any changes that I've uploaded, somehow the schema.xml is back to original.

Ideas?

1
Hi, what version are you using?phact
Using DSE: 4.6.5, Search 4.6.Pavel

1 Answers