3
votes

In my case i need to search Keywords like C#, .Net,C++..etc where standard analyzer strips out special characters so i used whitespace analyzer it doesn't work for me. while Indexing:

public void Indexing(DataSet ds)
{
        string indexFileLocation = @"D:\Lucene.Net\Data";
        Lucene.Net.Store.Directory dir = Lucene.Net.Store.FSDirectory.GetDirectory(indexFileLocation, true);
        IndexWriter indexWriter = new IndexWriter(dir, new WhitespaceAnalyzer(), Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
        if (ds.Tables[0] != null)
        {
            DataTable dt = ds.Tables[0];
            if (dt.Rows.Count > 0)
            {
                foreach (DataRow dr in dt.Rows)
               {
                    //Create the Document object
                    Document doc = new Document();

                    foreach (DataColumn dc in dt.Columns)
                    {
                        string check = dc.ToString();

                        if (check.Equals("Skill_Summary"))
                        {
                            doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                        }
                        if (check.Equals("Title"))
                        {
                            doc.Add(new Field(dc.ColumnName, dr[dc.ColumnName].ToString(), Field.Store.YES, Field.Index.ANALYZED));
                        }
                    }
                    // Write the Document to the catalog
                    indexWriter.AddDocument(doc);
                }
            }
        }
        // Close the writer
        indexWriter.Close();
    }

and Searching the Field like:

string[] searchfields = new string[] { "Skill_Summary", "Title" };
var parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, searchfields, new WhitespaceAnalyzer());
string searchText = "C#";

//Split the search string into separate search terms by word
string[] terms = searchText.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
foreach (string term in terms)
{
    finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), BooleanClause.Occur.MUST);
}
hits = searcher.Search(finalQuery);

how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.

1

1 Answers

8
votes

how to build own analyzer using Whitespaceanalyzer and LowerCase filter in my case?.

public class CaseInsensitiveWhitespaceAnalyzer : Analyzer
{
    /// <summary>
    /// </summary>
    public override TokenStream TokenStream(string fieldName, TextReader reader)
    {
        TokenStream t = null;
        t = new WhitespaceTokenizer(reader);
        t = new LowerCaseFilter(t);

        return t;
    }
}

PS: When you use wildcards(?,*), the query parser does not use any analyzer, just the lowercased form of your term (depending on the value of QueryParser.LowercaseExpandedTerms)