Skip to content

Commit

Permalink
Merge pull request #5 from vforteli/feature/ienumerablesupport
Browse files Browse the repository at this point in the history
Feature/ienumerablesupport
  • Loading branch information
vforteli authored Aug 28, 2022
2 parents c054662 + d65dee3 commit 393172d
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 35 deletions.
5 changes: 4 additions & 1 deletion FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ public class BenchmarkFuzzySearch


[Benchmark]
public void LevenshteinLong() => FuzzySearch.FindLevenshtein(term2, text, 3);
public void LevenshteinLong()
{
_ = FuzzySearch.FindLevenshtein(term2, text, 3).ToList();
}
}
24 changes: 24 additions & 0 deletions FuzzySearchNet.Tests/Tests/FuzzySearchLevenshteinTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -356,4 +356,28 @@ public void TestLevenshteinBufferBoundaryShort3Distance(string term, string text
TestUtils.AssertMatch(results[0], expectedStartIndex, expectedMatch, expectedDistance);
});
}


[Test]
public void TestLevenshteinLinq()
{
var text = "---abcc----abc---axc--";
var term = "abc";

var results = FuzzySearch.FindLevenshtein(term, text, 2).ToList();

Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(3));
TestUtils.AssertMatch(results[0], 3, "abc", 0);
TestUtils.AssertMatch(results[1], 11, "abc", 0);
TestUtils.AssertMatch(results[2], 17, "axc", 1);
});

Assert.Multiple(() =>
{
Assert.That(FuzzySearch.FindLevenshtein(term, text, 3).Any());
TestUtils.AssertMatch(FuzzySearch.FindLevenshtein(term, text, 3).First(), 3, "abc", 0);
});
}
}
4 changes: 2 additions & 2 deletions FuzzySearchNet/FuzzySearchNet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

<PropertyGroup>
<PackageId>FuzzySearch.Net</PackageId>
<VersionPrefix>0.3.0</VersionPrefix>
<VersionPrefix>0.2.2</VersionPrefix>
<Title>FuzzySearch.Net</Title>
<Authors>Verner Fortelius</Authors>
<Description>Fuzzy search library for finding strings in strings. Inspired by and attempts to be somewhat compatible with fuzzysearch for python https://github.com/taleinat/fuzzysearch</Description>
Expand All @@ -22,7 +22,7 @@
<PackageProjectUrl>https://github.com/vforteli/FuzzySearch.Net</PackageProjectUrl>
<RepositoryUrl>https://github.com/vforteli/FuzzySearch.Net</RepositoryUrl>
<PackageLicense>https://github.com/vforteli/FuzzySearch.Net/blob/main/LICENSE.md</PackageLicense>
<PackageReleaseNotes>Add support for finding in streams</PackageReleaseNotes>
<PackageReleaseNotes>Add support for finding in streams for exact and substitution only. Properly use yield and ienumerable to allow usage of linq methods</PackageReleaseNotes>
<PackageTags>fuzzy search;levenshtein distance;dotnet;.net;c#;fuzzysearch.net</PackageTags>
<IncludeSymbols>true</IncludeSymbols>
<SymbolPackageFormat>snupkg</SymbolPackageFormat>
Expand Down
29 changes: 16 additions & 13 deletions FuzzySearchNet/src/FuzzySearch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public static IEnumerable<MatchResult> Find(string subSequence, string text, int
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindExact(string subSequence, string text)
{
// ok so this whole method is a bit redundant... but the idea is to have this using a stream instead of text... later
// indexof would probably run circles around this...
var needlePosition = 0;
var termLength = subSequence.Length - 1;
var currentIndex = 0;
Expand Down Expand Up @@ -167,7 +167,6 @@ public static async Task<IEnumerable<MatchResult>> FindExactAsync(string subSequ
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence, string text, int maxDistance)
{
var matches = new List<MatchResult>();
var termLengthMinusOne = subSequence.Length - 1;

for (var currentIndex = 0; currentIndex < text.Length - termLengthMinusOne; currentIndex++)
Expand All @@ -191,7 +190,7 @@ public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence,

if (candidateDistance <= maxDistance)
{
matches.Add(new MatchResult
yield return new MatchResult
{
StartIndex = currentIndex,
EndIndex = currentIndex + subSequence.Length,
Expand All @@ -200,11 +199,9 @@ public static IEnumerable<MatchResult> FindSubstitutionsOnly(string subSequence,
Deletions = 0,
Substitutions = candidateDistance,
Insertions = 0,
});
};
}
}

return matches;
}


Expand Down Expand Up @@ -277,13 +274,21 @@ public static async Task<IEnumerable<MatchResult>> FindSubstitutionsOnlyAsync(st


/// <summary>
/// Finds term in text with max distance
/// Finds sub sequence in text with max levenshtein distance
/// </summary>
/// <param name="subSequence"></param>
/// <param name="text"></param>
public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, string text, int maxDistance)
public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, string text, int maxDistance) => Utils.GetBestMatches(FindLevenshteinAll(subSequence, text, maxDistance), maxDistance);


/// <summary>
/// Finds sub sequence in text with max levenshtein distance
/// This method finds all matches and does not try to consolidate overlapping matches
/// </summary>
/// <param name="subSequence"></param>
/// <param name="text"></param>
internal static IEnumerable<MatchResult> FindLevenshteinAll(string subSequence, string text, int maxDistance)
{
var matches = new List<MatchResult>();
var candidates = new Stack<CandidateMatch>();

for (var currentIndex = 0; currentIndex < text.Length; currentIndex++)
Expand All @@ -300,7 +305,7 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
if (candidate.TextIndex <= text.Length)
{
bestFoundDistance = candidate.Distance;
matches.Add(new MatchResult
yield return new MatchResult
{
StartIndex = candidate.StartIndex,
EndIndex = candidate.TextIndex,
Expand All @@ -309,7 +314,7 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
Deletions = candidate.Deletions,
Substitutions = candidate.Substitutions,
Insertions = candidate.Insertions,
});
};
}

// No point searching for better matches if we find a perfect match
Expand Down Expand Up @@ -369,7 +374,5 @@ public static IEnumerable<MatchResult> FindLevenshtein(string subSequence, strin
}
}
}

return Utils.GetBestMatches(matches.OrderBy(o => o.StartIndex).ToList(), maxDistance);
}
}
40 changes: 21 additions & 19 deletions FuzzySearchNet/src/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,38 +8,40 @@ public static class Utils
/// </summary>
/// <param name="list"></param>
/// <returns></returns>
public static IEnumerable<MatchResult> GetBestMatches(List<MatchResult> list, int maxDistanece)
public static IEnumerable<MatchResult> GetBestMatches(IEnumerable<MatchResult> matches, int maxDistanece)
{
var matches = list.Distinct().ToList();
var matchesEnumerator = matches.GetEnumerator();

if (matches.Count > 1)
{
var groups = new List<List<MatchResult>>();
var group = new List<MatchResult>();

groups.Add(new List<MatchResult>());
if (matchesEnumerator.MoveNext())
{
group.Add(matchesEnumerator.Current);

var match = matches[0];
groups[0].Add(match);
var match = matchesEnumerator.Current;

for (var i = 1; i < matches.Count; i++)
while (matchesEnumerator.MoveNext())
{
var currentMatch = matches[i];
var currentMatch = matchesEnumerator.Current;

if (currentMatch.StartIndex > (match.StartIndex + maxDistanece))
if (currentMatch != null)
{
groups.Add(new List<MatchResult>());
}
if (currentMatch.StartIndex > (match.StartIndex + maxDistanece))
{
yield return group.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First();
group.Clear();
}

groups.Last().Add(currentMatch);
group.Add(currentMatch);

match = currentMatch;
match = currentMatch;
}
}

return groups.Select(o => o.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First()).ToList();
}
else

if (group.Any())
{
return matches;
yield return group.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First();
}
}
}

0 comments on commit 393172d

Please sign in to comment.