I've got a solution for this using Lucene.Net 3.0.3 and payloads. I'm not sure if this is the best way to accomplish this using the java-version which currently is some way ahead of the .net port.
It works by assigning a payload, a custom byte-array, to terms that should be custom scored, and a custom Similarity that overrides ScorePayload to parse the byte-array to do custom filtering. (this would require a query that calls this method, like the PayloadTermQuery).
This highly contrived example code will score the term based on (id % 3). (Multiples of three are scored zero). You could use this combined with a PositiveScoresOnlyCollector to ignore matches that receives a zero score.
using System;
using System.IO;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Search.Payloads;
using Lucene.Net.Store;
public static class Program {
public static void Main() {
var directory = new RAMDirectory();
// Initialization; create 50 documents with payload
var writer = new IndexWriter(directory, new KeywordAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
for (var i = 0; i < 50; ++i) {
AddDocument(writer, i, "lorem ipsum etc blah blah");
}
writer.Commit();
var searcher = new IndexSearcher(directory, readOnly: true);
searcher.Similarity = new ShazaamPayloadSimilarity();
// The term we'll be looking for. This should match all documents.
var term = new Term("Data", "lorem");
var query = new PayloadTermQuery(term, new MaxPayloadFunction());
var topDocs = searcher.Search(query, 40);
// This is a bad example of a FieldCache usage.
var iValues = FieldCache_Fields.DEFAULT.GetStrings(searcher.IndexReader, "Id");
foreach (var scoreDoc in topDocs.ScoreDocs) {
Console.WriteLine("Score: {0:0.0000} i={1}", scoreDoc.Score, iValues[scoreDoc.Doc]);
}
Console.ReadLine();
}
public static void AddDocument(IndexWriter writer, Int32 id, String data) {
var payload = BitConverter.GetBytes(id);
var analyzer = new ShazaamPayloadAnalyzer(payload);
var textReader = new StringReader(data);
var document = new Document();
document.Add(new Field("Id", id.ToString(), Field.Store.NO, Field.Index.NOT_ANALYZED));
document.Add(new Field("Data", analyzer.TokenStream(null, textReader)));
writer.AddDocument(document);
}
}
public class ShazaamPayloadAnalyzer : Analyzer {
private readonly Byte[] _value;
public ShazaamPayloadAnalyzer(Byte[] value) {
_value = value;
}
public override TokenStream TokenStream(String fieldName, TextReader reader) {
TokenStream result = new WhitespaceTokenizer(reader);
result = new ShazaamPayloadFilter(result, _value);
return result;
}
}
public class ShazaamPayloadFilter : TokenFilter {
private readonly byte[] _payload;
private readonly IPayloadAttribute _payloadAttr;
public ShazaamPayloadFilter(TokenStream input, Byte[] payload)
: base(input) {
_payload = payload;
_payloadAttr = AddAttribute<IPayloadAttribute>();
}
public override Boolean IncrementToken() {
if (input.IncrementToken()) {
_payloadAttr.Payload = new Payload(_payload);
return true;
}
return false;
}
}
public class ShazaamPayloadSimilarity : DefaultSimilarity {
public override Single ScorePayload(Int32 docId, String fieldName, Int32 start, Int32 end, Byte[] payload, Int32 offset, Int32 length) {
var originalValue = BitConverter.ToInt32(payload, startIndex: 0);
// Advanced logic ahead!
return (originalValue % 3);
}
}