Let us have a sequence of numbers. Say {1, 2, 3, 4, 5}. And a set of subsequence substitution with weights. For example:
{1} -> {3} : 0.2 - witch means 1 could be substituted by 3 with weight 0.1
{2, 3, 4} -> {4, 3} : 0.3
{5} -> {2} : 0.4
I need to find all the sequence we can get using substitutions with a restriction to weight.
The restriction works like this: the sum of substitutions weights of any n (3) items (in a row) should less given number e<= 0.5.
For our example the result is:
{1, 2, 3, 4, 5} : substitution weights: {0, 0, 0, 0, 0}. Sum of any 3 items in a row less than 0.5
{3, 2, 3, 4, 5} : substitution weights: {0.2, 0, 0, 0, 0}
{1, 4, 3, 5} : substitution weights: {0, 0.3/3, 0.3/3, 0.3/3, 0} the sequence of 3 symbols so /3
{3, 4, 3, 5} substitution weights: {0.2, 0.3/3, 0.3/3, 0.3/3, 0}
{1, 2, 3, 4, 2} substitution weights: {0, 0, 0, 0, 0.4}
{3, 2, 3, 4, 2} substitution weights: {0.2, 0, 0, 0, 0.4}
We do not allow {1, 4, 3, 2} because substitution weights {0, 0.3/3, 0.3/3, 0.3/3, 0.4} have 3 last items with sum weight = 0.6.
In real example subsequence substitution set is big. Almost any short subsequence has a replacement. It's obvious that the task could be done using brute force. But I'm looking for a way to do it fast. Any help would be appreciated.
UPDATED
Actually, I deal with strings instead of sequence of number. So far I came up with the following agorithm (implemented in C#):
public class Substitution
{
public string SubstituteWhat { get; private set; }
public string SubstituteTo { get; private set; }
public double TotalWeight { get; private set; }
public double SymbolWeight { get; private set; }
public Substitution(string substituteWhat, string substituteTo, double totalWeight)
{
SubstituteWhat = substituteWhat;
SubstituteTo = substituteTo;
TotalWeight = totalWeight;
SymbolWeight = TotalWeight/SubstituteWhat.Length;
}
public override string ToString()
{
return string.Format("SubstituteWhat: {0}, SubstituteTo: {1}, TotalWeight: {2}", SubstituteWhat, SubstituteTo, TotalWeight);
}
}
class SubstitutedStringWindowImpl
{
public string OriginalPhrase { get; set; }
public string SubstitutedPhrase { get; set; }
private double[] weightVector;
private double windowSum;
private int addItemPointer;
private int substructItemPoiner;
public SubstitutedStringWindowImpl(string phrase, int windowLength)
{
this.OriginalPhrase = phrase;
SubstitutedPhrase = String.Empty;
weightVector = new double[phrase.Length + windowLength ];
windowSum = 0;
substructItemPoiner = 0;
addItemPointer = windowLength;
}
public bool PerformSubstitution(
Substitution substitution,
double maxWeight,
out SubstitutedStringWindowImpl substitutedString)
{
substitutedString = MemberwiseClone() as SubstitutedStringWindowImpl;
substitutedString.weightVector = weightVector.ToArray();
for (int i = 0; i < substitution.SubstituteWhat.Length; i++)
{
substitutedString.weightVector[substitutedString.addItemPointer] = substitution.SymbolWeight;
substitutedString.windowSum = substitutedString.windowSum -
substitutedString.weightVector[substitutedString.substructItemPoiner] +
substitutedString.weightVector[substitutedString.addItemPointer];
substitutedString.substructItemPoiner++;
substitutedString.addItemPointer++;
if (substitutedString.windowSum > maxWeight)
return false;
if (substitutedString.addItemPointer == substitutedString.weightVector.Length)
break;
}
substitutedString.SubstitutedPhrase = SubstitutedPhrase + substitution.SubstituteTo;
return true;
}
}
internal class SubstitutionManagerWindowImpl
{
private readonly Dictionary<string, List<Substitution>> substitutionsDict;
private readonly double maxWeight;
private readonly int maxSubstitutionLength;
private readonly int windowLength;
public SubstitutionManagerWindowImpl(
List<Substitution> substitutions,
double maxWeight,
int windowLength)
{
this.substitutionsDict = substitutions.GroupBy(x => x.SubstituteWhat)
.ToDictionary(x => x.Key, x => x.ToList());
this.maxWeight = maxWeight;
this.windowLength = windowLength;
maxSubstitutionLength = substitutions.Max(x => x.SubstituteWhat.Length);
}
private List<SubstitutedStringWindowImpl> GetAllSubstitutionsPrivate(
SubstitutedStringWindowImpl stringToHandle, int symbolCount)
{
if (stringToHandle.OriginalPhrase.Length == symbolCount)
return new List<SubstitutedStringWindowImpl> {stringToHandle};
var result = new List<SubstitutedStringWindowImpl>();
for (int i = 1;
i <= Math.Min(maxSubstitutionLength, stringToHandle.OriginalPhrase.Length - symbolCount);
i++)
{
var subphraseToSubstitute = stringToHandle.OriginalPhrase.Substring(symbolCount, i);
List<Substitution> appropriateSubstitutions;
if (!substitutionsDict.TryGetValue(subphraseToSubstitute, out appropriateSubstitutions))
continue;
foreach (var substitution in appropriateSubstitutions)
{
SubstitutedStringWindowImpl handledString;
if (!stringToHandle.PerformSubstitution(substitution, maxWeight, out handledString))
continue;
result.AddRange(GetAllSubstitutionsPrivate(handledString,
symbolCount + substitution.SubstituteWhat.Length));
}
}
return result;
}
// this is the entry function
public List<string> GetAllSubstitutions(string phrase)
{
var res = GetAllSubstitutionsPrivate(new SubstitutedStringWindowImpl(phrase,windowLength), 0);
return res.Select(x => x.SubstitutedPhrase).ToList();
}
}
But it seems that it doesn't do the work fast enoung. Any suggestions how to improve it?