How can restrict Lucene.Net to index only these terms that has length greater than x. I am indexing the document as:
String indexDirectory = @"C:\Users\user\Desktop\Index";
String dataDirectory = @"C:\Users\user\Desktop\Data";
StandardAnalyzer analyzer = new StandardAnalyzer();
IndexWriter writer = new IndexWriter(indexDirectory, analyzer);
Document doc = new Document();
Field fPath = new Lucene.Net.Documents.Field("path", dataDirectory, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO);
Field fContent = new Field("content", ReadTextFile(dataDirectory), Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
doc.Add(fPath);
doc.Add(fContent);
I am using the following code to get indexed Terms from Lucene Index file.
TermFreqVector[] vectors = IndexReader.Open(indexDirectory).GetTermFreqVectors(0);
foreach (Lucene.Net.Index.TermFreqVector vector in vectors)
{
String[] terms = vector.GetTerms();
foreach (String term in terms)
{
// loop through indexed terms
}
}