2
votes

I'm new to Lucene and trying to sort this out. I'm indexing like this:

        Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(dirIndexDir));

        //Create the indexWriter
        IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), true,
            IndexWriter.MaxFieldLength.UNLIMITED);


            Document doc = new Document();

            doc.Add(new Field("keyform_type", entry.keyForm.type, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("keyform_lang", entry.keyForm.lang, Field.Store.YES, Field.Index.NOT_ANALYZED));

                doc.Add(new Field("keyform_dial", entry.keyForm.dial, Field.Store.YES, Field.Index.NOT_ANALYZED));

            doc.Add(new Field("keyform_reg", entry.keyForm.reg, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.Add(new Field("keyform_term", entry.keyForm.term.Value, Field.Store.YES, Field.Index.ANALYZED));

                if(entry.refForm.type!=null)
                    doc.Add(new Field("refform_type", entry.refForm.type, Field.Store.YES, Field.Index.NOT_ANALYZED));
                if(entry.refForm.lang!=null)
                    doc.Add(new Field("refform_lang", entry.refForm.lang, Field.Store.YES, Field.Index.NOT_ANALYZED));
                if (entry.refForm.dial != null)
                    doc.Add(new Field("refform_dial", entry.refForm.dial, Field.Store.YES, Field.Index.NOT_ANALYZED));

                if(entry.refForm.reg!=null)
                    doc.Add(new Field("refform_reg", entry.refForm.reg, Field.Store.YES, Field.Index.NOT_ANALYZED));
                if(entry.refForm.term.Value!=null)
                    doc.Add(new Field("refform_term", entry.refForm.term.Value, Field.Store.YES, Field.Index.ANALYZED));

                doc.Add(new Field("pos", entry.pos, Field.Store.YES, Field.Index.NOT_ANALYZED));

                for (int s = 0; s < entry.subject.Count; s++)
                {
                    doc.Add(new Field("subject_"+s, entry.subject[s], Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                for (int g = 0; g < entry.sense.gloss.Count; g++)
                {
                    doc.Add(new Field("gloss_"+g, entry.sense.gloss[g], Field.Store.YES, Field.Index.ANALYZED));

                }
                if (entry.signature.action != null)
                    doc.Add(new Field("action", entry.signature.action, Field.Store.YES, Field.Index.NOT_ANALYZED));
                if (entry.signature.source != null)
                    doc.Add(new Field("source", entry.signature.source, Field.Store.YES, Field.Index.NOT_ANALYZED));
                if(entry.signature.date==0)
                    doc.Add(new Field("date", entry.signature.date.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            //Add the doc
            writer.AddDocument(doc);

        writer.Close();

I then query using this code:

        //Doesn't matter what term is, same result
        string term="workers";

        Directory dir = FSDirectory.Open(new System.IO.DirectoryInfo(luceneDir));

        IndexSearcher searcher = new IndexSearcher(dir, true);
        List<string> b=new List<string>();
        b.Add("keyform_gloss");
        b.Add("keyform_term");
        b.Add("refform_term");
        b.Add("refform_gloss");
        for (int i = 0; i < nMaxDupes; i++)
            b.Add("gloss_" + i.ToString());
        MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29,
            b.ToArray(), new StandardAnalyzer());
        Query q = mfqp.Parse(term);
        TopDocs td = searcher.Search(q, 300);

        for (int i = 0; i < td.totalHits; i++)
        {
            //Generate a dictionaryEntry for each hit
            Document doc = searcher.Doc(i);

            //Access the document fields, blah
        }

No matter what the value of term is, Lucene returns the first X documents in the index, where X = the number of documents that actually match term. When I browse the index using LUKE, an identical hand-typed query (keyform_term:term gloss_0:term etc) returns both the correct number of results and the correct documents matching those results.

The C# code above, however, always returns the first X documents, which don't necessarily contain the search term in any of the searched fields. They're not even close.

What am I doing wrong? I know the index is good because I can search it in LUKE, so it has to be something in the query...

Thanks!

1

1 Answers

6
votes

The line:

Document doc = searcher.Doc(i);

should be

Document doc = searcher.Doc(td.scoreDocs[i].doc);

or the correct C# syntax equivalent (I'm a Java guy, sorry)