Skip to content

Commit

Permalink
use yield to enable linq methods, eg any of first etc
Browse files Browse the repository at this point in the history
  • Loading branch information
vforteli committed Aug 28, 2022
1 parent c054662 commit 99b2a15
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 33 deletions.
5 changes: 4 additions & 1 deletion FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ public class BenchmarkFuzzySearch


[Benchmark]
public void LevenshteinLong() => FuzzySearch.FindLevenshtein(term2, text, 3);
public void LevenshteinLong()
{
_ = FuzzySearch.FindLevenshtein(term2, text, 3).ToList();
}
}
29 changes: 16 additions & 13 deletions FuzzySearchNet/src/FuzzySearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static IEnumerable<MatchResult> Find(string subSequence, string text, int
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindExact(string subSequence, string text)
{
// ok so this whole method is a bit redundant... but the idea is to have this using a stream instead of text... later
// indexof would probably run circles around this...
var needlePosition = 0;
var termLength = subSequence.Length - 1;
var currentIndex = 0;
Expand Down Expand Up @@ -167,7 +167,6 @@ public static async Task<IEnumerable<MatchResult>> FindExactAsync(string subSequ
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence, string text, int maxDistance)
{
var matches = new List<MatchResult>();
var termLengthMinusOne = subSequence.Length - 1;

for (var currentIndex = 0; currentIndex < text.Length - termLengthMinusOne; currentIndex++)
Expand All @@ -191,7 +190,7 @@ public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence,

if (candidateDistance <= maxDistance)
{
matches.Add(new MatchResult
yield return new MatchResult
{
StartIndex = currentIndex,
EndIndex = currentIndex + subSequence.Length,
Expand All @@ -200,11 +199,9 @@ public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence,
Deletions = 0,
Substitutions = candidateDistance,
Insertions = 0,
});
};
}
}

return matches;
}


Expand Down Expand Up @@ -277,13 +274,21 @@ public static async Task<IEnumerable<MatchResult>> FindSubstitutionsOnlyAsync(st


/// <summary>
/// Finds term in text with max distance
/// Finds sub sequence in text with max levenshtein distance
/// </summary>
/// <param name="subSequence"></param>
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, string text, int maxDistance)
public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, string text, int maxDistance) => Utils.GetBestMatches(FindLevenshteinAll(subSequence, text, maxDistance), maxDistance);


/// <summary>
/// Finds sub sequence in text with max levenshtein distance
/// This method finds all matches and does not try to consolidate overlapping matches
/// </summary>
/// <param name="subSequence"></param>
/// <param name="text"></param>
internal static IEnumerable<MatchResult> FindLevenshteinAll(string subSequence, string text, int maxDistance)
{
var matches = new List<MatchResult>();
var candidates = new Stack<CandidateMatch>();

for (var currentIndex = 0; currentIndex < text.Length; currentIndex++)
Expand All @@ -300,7 +305,7 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
if (candidate.TextIndex <= text.Length)
{
bestFoundDistance = candidate.Distance;
matches.Add(new MatchResult
yield return new MatchResult
{
StartIndex = candidate.StartIndex,
EndIndex = candidate.TextIndex,
Expand All @@ -309,7 +314,7 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
Deletions = candidate.Deletions,
Substitutions = candidate.Substitutions,
Insertions = candidate.Insertions,
});
};
}

// No point searching for better matches if we find a perfect match
Expand Down Expand Up @@ -369,7 +374,5 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
}
}
}

return Utils.GetBestMatches(matches.OrderBy(o => o.StartIndex).ToList(), maxDistance);
}
}
40 changes: 21 additions & 19 deletions FuzzySearchNet/src/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,40 @@ public static class Utils
/// </summary>
/// <param name="list"></param>
/// <returns></returns>
public static IEnumerable<MatchResult> GetBestMatches(List<MatchResult> list, int maxDistanece)
public static IEnumerable<MatchResult> GetBestMatches(IEnumerable<MatchResult> matches, int maxDistanece)
{
var matches = list.Distinct().ToList();
var matchesEnumerator = matches.GetEnumerator();

if (matches.Count > 1)
{
var groups = new List<List<MatchResult>>();
var group = new List<MatchResult>();

groups.Add(new List<MatchResult>());
if (matchesEnumerator.MoveNext())
{
group.Add(matchesEnumerator.Current);

var match = matches[0];
groups[0].Add(match);
var match = matchesEnumerator.Current;

for (var i = 1; i < matches.Count; i++)
while (matchesEnumerator.MoveNext())
{
var currentMatch = matches[i];
var currentMatch = matchesEnumerator.Current;

if (currentMatch.StartIndex > (match.StartIndex + maxDistanece))
if (currentMatch != null)
{
groups.Add(new List<MatchResult>());
}
if (currentMatch.StartIndex > (match.StartIndex + maxDistanece))
{
yield return group.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First();
group.Clear();
}

groups.Last().Add(currentMatch);
group.Add(currentMatch);

match = currentMatch;
match = currentMatch;
}
}

return groups.Select(o => o.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First()).ToList();
}
else

if (group.Any())
{
return matches;
yield return group.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First();
}
}
}

0 comments on commit 99b2a15

Please sign in to comment.