2
votes

i'm using Lucene.Net to search through a collection of uploaded documents and it worked perfectly when testing it on one document but when i added another document the results that i get belongs to the latest document only and when searching for terms in the first document, no results where found.
and when searching i came across a question how had the same problem but his was with true to recreate the index every time a document is indexed.
here is my searcher class code:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Lucene.Net.Store;
using Lucene.Net.Index;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Search;
using Lucene.Net.QueryParsers;
using Lucene.Net.Analysis.AR;
using Lucene.Net.Search.Highlight;

/// <summary>
/// Summary description for Searcher
/// </summary>
public class Searcher
{
    public Searcher()
    {
        //
        // TODO: Add constructor logic here
        //
    }

    private const int HITS_LIMIT = 25;
    private const int MAX_FRAGMENTS_NUMBER = 3;

    private Directory _Directory;
    public Directory Directory
    {
        get
        {
            string path = HttpContext.Current.Server.MapPath("~/App_Data/Index");
            if (_Directory == null)
                _Directory = FSDirectory.Open(path);

            return _Directory;
        }
    }

    private Analyzer _Analyzer;
    public Analyzer Analyzer
    {
        get
        {
            if (_Analyzer == null)
                _Analyzer = new ArabicAnalyzer(Lucene.Net.Util.Version.LUCENE_30);

            return _Analyzer;
        }
    }

    #region Mapping

    private Document MapDataToLuceneDocument(Data data)
    {
        Document document = new Document();
        document.Add(new Field("ID", data.DataID.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        document.Add(new Field("Path", data.Path, Field.Store.YES, Field.Index.NOT_ANALYZED));
        document.Add(new Field("Title", data.Title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
        document.Add(new Field("Content", data.Content, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));

        return document;
    }

    private Data MapLuceneDocumentToData(Document document)
    {
        Data result = new Data()
        {
            DataID = int.Parse(document.Get("ID")),
            Path = document.Get("Path"),
            Title = document.Get("Title"),
            Content = document.Get("Content"),
        };

        return result;
    }

    #endregion

    #region Indexing

    private void _Index(Data data, IndexWriter writer)
    {
        Query query = new TermQuery(new Term("ID", data.DataID.ToString()));
        writer.DeleteDocuments(query);

        writer.AddDocument(this.MapDataToLuceneDocument(data));
    }

    public void Index(IEnumerable<Data> data)
    {
        IndexWriter writer = null;
        try
        {
            writer = new IndexWriter(this.Directory, this.Analyzer,false, IndexWriter.MaxFieldLength.UNLIMITED);
        }
        catch
        {
            writer = new IndexWriter(this.Directory, this.Analyzer,true, IndexWriter.MaxFieldLength.UNLIMITED);
        }
        foreach (var item in data)
        {
            this._Index(item, writer);
        }
        writer.Dispose();
    }

    public void Index(Data data)
    {
        this.Index(new List<Data>() { data });
    }

    #endregion

    #region Searching

    private List<Data> _Search(string searchPhrase, string searchField = "")
    {
        List<Data> searchResults = new List<Data>();

        if (string.IsNullOrWhiteSpace(searchPhrase))
            return searchResults;

        QueryParser parser;
        if (string.IsNullOrWhiteSpace(searchField))
        {
            parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new String[] { "Title", "Content" }, this.Analyzer);
        }
        else
        {
            parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, searchField, this.Analyzer);
        }

        Query query;
        try
        {
            query = parser.Parse(searchPhrase.Trim());
        }
        catch (Exception exception)
        {
            query = parser.Parse(QueryParser.Escape( searchPhrase.Trim()));
        }

        IndexSearcher searcher = new IndexSearcher(this.Directory);
        //QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "text", this.Analyzer);
        //Query query_ = parser.Parse(query);

        TopDocs hits = searcher.Search(query, null, Searcher.HITS_LIMIT, Sort.RELEVANCE);
        foreach (var doc in hits.ScoreDocs)
        {
            Document document = searcher.Doc(doc.Doc);
            searchResults.Add(this.MapLuceneDocumentToData(document));
        }

        return searchResults;
    }

    public List<Data> Search(string searchPhrase, string searchField = "")
    {
        return this._Search(searchPhrase, searchField);
    }

    #endregion
}
1
I think you are on the right track with the question you linked to. Try outputting some debugging info in your catch block, maybe your first attempt to open the an IndexWriter is failing, and knowing the exception might point the way to a solution.femtoRgon
@femtoRgon I had the same problem before and i have tried this code and removed the try-catch you refered to and removed the boolean parameter (used the other overload) and got the same result.Yaser Jaradeh
Also, a bit of sanity testing: Are you sure you are getting different values for data.DataID.ToString() for each document?femtoRgon

1 Answers

0
votes

It's probable that you are deleting the index every time you call "Index".

Instead of creating a new IndexWriter every time, keep a member var that you initialize once (I usually have an "Open" method).

In "Search" use "searcher = searcher = new IndexSearcher(writer.Getreader())".

Consider the writer as your database and the searcher as a select statement that you run against the db.