diff --git a/FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs b/FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs
index e755aad..0962fcc 100644
--- a/FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs
+++ b/FuzzySearchNet.Benchmark/BenchmarkFuzzySearch.cs
@@ -8,13 +8,17 @@ public class BenchmarkFuzzySearch
private const string term2 = "fooo--foo-----fo";
private const string text = "foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--foo-----fo--foo-f--fooo--";
- [Benchmark]
- public void SubstitutionOnlyBufferingShort() => FuzzySearch.FindSubstitutionsOnlyBuffering(term, text, 1);
+ //[Benchmark]
+ //public void SubstitutionOnlyBufferingShort() => FuzzySearch.FindSubstitutionsOnlyBuffering(term, text, 1);
- [Benchmark]
- public void SubstitutionOnlyBufferingLong() => FuzzySearch.FindSubstitutionsOnlyBuffering(term2, text, 1);
+ //[Benchmark]
+ //public void SubstitutionOnlyBufferingLong() => FuzzySearch.FindSubstitutionsOnlyBuffering(term2, text, 1);
+
+
+ //[Benchmark]
+ //public void SubstitutionOnlyBufferingLong3distance() => FuzzySearch.FindSubstitutionsOnlyBuffering(term2, text, 3);
[Benchmark]
- public void SubstitutionOnlyBufferingLong3distance() => FuzzySearch.FindSubstitutionsOnlyBuffering(term2, text, 3);
+ public void LevenshteinLong() => FuzzySearch.FindBuffering(term2, text, 3);
}
\ No newline at end of file
diff --git a/FuzzySearchNet.Tests/TestUtils.cs b/FuzzySearchNet.Tests/TestUtils.cs
new file mode 100644
index 0000000..0d61d19
--- /dev/null
+++ b/FuzzySearchNet.Tests/TestUtils.cs
@@ -0,0 +1,28 @@
+namespace FuzzySearchNet.Tests;
+
+internal class TestUtils
+{
+ public static void AssertMatch(MatchResult match, int expectedStartIndex, int expectedEndIndex, string text, int? expectedDistance = null)
+ {
+ Assert.That(match.StartIndex, Is.EqualTo(expectedStartIndex));
+ Assert.That(match.EndIndex, Is.EqualTo(expectedEndIndex));
+ Assert.That(match.Match, Is.EqualTo(text[expectedStartIndex..expectedEndIndex]));
+
+ if (expectedDistance.HasValue)
+ {
+ Assert.That(match.Distance, Is.EqualTo(expectedDistance));
+ }
+ }
+
+ public static void AssertMatch(MatchResult match, int expectedStartIndex, string expectedMatch, int? expectedDistance = null)
+ {
+ Assert.That(match.StartIndex, Is.EqualTo(expectedStartIndex));
+ Assert.That(match.EndIndex, Is.EqualTo(expectedStartIndex + expectedMatch.Length));
+ Assert.That(match.Match, Is.EqualTo(expectedMatch));
+
+ if (expectedDistance.HasValue)
+ {
+ Assert.That(match.Distance, Is.EqualTo(expectedDistance));
+ }
+ }
+}
diff --git a/FuzzySearchNet.Tests/Tests/FuzzySearchExactMatchTests.cs b/FuzzySearchNet.Tests/Tests/FuzzySearchExactMatchTests.cs
index a1a2f7f..8f79140 100644
--- a/FuzzySearchNet.Tests/Tests/FuzzySearchExactMatchTests.cs
+++ b/FuzzySearchNet.Tests/Tests/FuzzySearchExactMatchTests.cs
@@ -1,4 +1,4 @@
-namespace FuzzySearchNet.Tests.Tests;
+namespace FuzzySearchNet.Tests;
public class FuzzySearchExactMatchTests
{
diff --git a/FuzzySearchNet.Tests/Tests/FuzzySearchLevenshteinTests.cs b/FuzzySearchNet.Tests/Tests/FuzzySearchLevenshteinTests.cs
index 4710d12..8422138 100644
--- a/FuzzySearchNet.Tests/Tests/FuzzySearchLevenshteinTests.cs
+++ b/FuzzySearchNet.Tests/Tests/FuzzySearchLevenshteinTests.cs
@@ -1,64 +1,56 @@
-namespace FuzzySearchNet.Tests.Tests;
+namespace FuzzySearchNet.Tests;
///
/// Testing with similar tests as https://github.com/taleinat/fuzzysearch to ensure somewhat compatible behaviour
///
public class FuzzySearchLevenshteinTests
{
- [TestCase("PATTERN", "PATTERN", 0, 0, 7, 0)]
- [TestCase("def", "abcddefg", 0, 4, 7, 0)]
- [TestCase("def", "abcdeffg", 1, 3, 6, 0)]
- [TestCase("defgh", "abcdedefghi", 3, 5, 10, 0)]
- [TestCase("cdefgh", "abcdefghghi", 3, 2, 8, 0)]
- [TestCase("bde", "abcdefg", 1, 1, 5, 1)]
- [TestCase("1234567", "--123567--", 1, 2, 8, 1)]
- [TestCase("1234567", "--1238567--", 1, 2, 9, 1)]
- [TestCase("1234567", "23567-----", 2, 0, 5, 2)]
- [TestCase("1234567", "--23567---", 2, 1, 7, 2)]
- [TestCase("1234567", "-----23567", 2, 4, 10, 2)]
- public void TestSingleMatchWithDeletions(string pattern, string text, int maxDistance, int expectedStart, int expectedEnd, int expectedDistance)
+ [TestCase("PATTERN", "PATTERN", 0, 0, "PATTERN", 0)]
+ [TestCase("def", "abcddefg", 0, 4, "def", 0)]
+ [TestCase("def", "abcdeffg", 1, 3, "def", 0)]
+ [TestCase("defgh", "abcdedefghi", 3, 5, "defgh", 0)]
+ [TestCase("cdefgh", "abcdefghghi", 3, 2, "cdefgh", 0)]
+ [TestCase("bde", "abcdefg", 1, 1, "bcde", 1)]
+ [TestCase("1234567", "--123567--", 1, 2, "123567", 1)]
+ [TestCase("1234567", "--1238567--", 1, 2, "1238567", 1)]
+ [TestCase("1234567", "23567-----", 2, 0, "23567", 2)]
+ [TestCase("1234567", "--23567---", 2, 1, "-23567", 2)]
+ [TestCase("1234567", "-----23567", 2, 4, "-23567", 2)]
+ public void TestSingleMatchWithDeletions(string pattern, string text, int maxDistance, int expectedStart, string expectedMatch, int expectedDistance)
{
var results = FuzzySearch.Find(pattern, text, maxDistance).ToList();
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(1));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(expectedStart));
- Assert.That(results[0].EndIndex, Is.EqualTo(expectedEnd));
- Assert.That(results[0].Distance, Is.EqualTo(expectedDistance));
- Assert.That(results[0].Match, Is.EqualTo(text[expectedStart..expectedEnd]));
+ TestUtils.AssertMatch(results[0], expectedStart, expectedMatch, expectedDistance);
});
}
- [TestCase("PATTERN", "----------PATT-ERN---------", 1, 10, 18, 1)]
- [TestCase("PATTERN", "----------PATT-ERN---------", 2, 10, 18, 1)]
+ [TestCase("PATTERN", "----------PATT-ERN---------", 1, 10, "PATT-ERN", 1)]
+ [TestCase("PATTERN", "----------PATT-ERN---------", 2, 10, "PATT-ERN", 1)]
- [TestCase("PATTERN", "----------PATTTERN---------", 1, 10, 18, 1)]
- [TestCase("PATTERN", "----------PATTTERN---------", 2, 10, 18, 1)]
+ [TestCase("PATTERN", "----------PATTTERN---------", 1, 10, "PATTTERN", 1)]
+ [TestCase("PATTERN", "----------PATTTERN---------", 2, 10, "PATTTERN", 1)]
- [TestCase("PATTERN", "----------PATTERNN---------", 0, 10, 17, 0)]
- [TestCase("PATTERN", "----------PATTERNN---------", 1, 10, 17, 0)]
- [TestCase("PATTERN", "----------PATTERNN---------", 2, 10, 17, 0)]
- public void TestSingleMatchWithInsertion(string pattern, string text, int maxDistance, int expectedStart, int expectedEnd, int expectedDistance)
+ [TestCase("PATTERN", "----------PATTERNN---------", 0, 10, "PATTERN", 0)]
+ [TestCase("PATTERN", "----------PATTERNN---------", 1, 10, "PATTERN", 0)]
+ [TestCase("PATTERN", "----------PATTERNN---------", 2, 10, "PATTERN", 0)]
+ public void TestSingleMatchWithInsertion(string pattern, string text, int maxDistance, int expectedStart, string expectedMatch, int expectedDistance)
{
var results = FuzzySearch.Find(pattern, text, maxDistance).ToList();
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(1));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(expectedStart));
- Assert.That(results[0].EndIndex, Is.EqualTo(expectedEnd));
- Assert.That(results[0].Distance, Is.EqualTo(expectedDistance));
- Assert.That(results[0].Match, Is.EqualTo(text[expectedStart..expectedEnd]));
+ TestUtils.AssertMatch(results[0], expectedStart, expectedMatch, expectedDistance);
});
}
[Test]
- public void TestZeroMaxDistanceMultiple2()
+ public void Test2DeletionsBufferStart()
{
var word = "pattern";
var text = "atern----";
@@ -68,10 +60,7 @@ public void TestZeroMaxDistanceMultiple2()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(1));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(0));
- Assert.That(results[0].EndIndex, Is.EqualTo(5));
- Assert.That(results[0].Match, Is.EqualTo(text[0..5]));
+ TestUtils.AssertMatch(results[0], 0, "atern", 2);
});
}
@@ -106,7 +95,7 @@ public void TestZeroMaxDistanceNoMatch2()
[Test]
- public void TestZeroMaxDistanceMultiple85()
+ public void TestSingleDeletionBufferStart()
{
var word = "pattern";
var text = "patern----";
@@ -116,15 +105,13 @@ public void TestZeroMaxDistanceMultiple85()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(1));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(0));
- Assert.That(results[0].EndIndex, Is.EqualTo(6));
- Assert.That(results[0].Match, Is.EqualTo(text[0..6]));
+ TestUtils.AssertMatch(results[0], 0, "patern", 1);
});
}
+
[Test]
- public void TestZeroMaxDistanceMultipleMiddle()
+ public void TestSingleDeletionBufferMiddle()
{
var word = "pattern";
var text = "--patern--";
@@ -134,13 +121,11 @@ public void TestZeroMaxDistanceMultipleMiddle()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(1));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(2));
- Assert.That(results[0].EndIndex, Is.EqualTo(8));
- Assert.That(results[0].Match, Is.EqualTo(text[2..8]));
+ TestUtils.AssertMatch(results[0], 2, "patern", 1);
});
}
+
[Test]
public void TestMultipleMatchesConsecutive()
{
@@ -152,17 +137,12 @@ public void TestMultipleMatchesConsecutive()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(2));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(2));
- Assert.That(results[0].EndIndex, Is.EqualTo(9));
- Assert.That(results[0].Match, Is.EqualTo(text[2..9]));
-
- Assert.That(results[1].StartIndex, Is.EqualTo(9));
- Assert.That(results[1].EndIndex, Is.EqualTo(16));
- Assert.That(results[1].Match, Is.EqualTo(text[9..16]));
+ TestUtils.AssertMatch(results[0], 2, "pattern", 0);
+ TestUtils.AssertMatch(results[1], 9, "pattern", 0);
});
}
+
[Test]
public void TestMultipleMatchesConsecutive2()
{
@@ -174,17 +154,12 @@ public void TestMultipleMatchesConsecutive2()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(2));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(2));
- Assert.That(results[0].EndIndex, Is.EqualTo(9));
- Assert.That(results[0].Match, Is.EqualTo(text[2..9]));
-
- Assert.That(results[1].StartIndex, Is.EqualTo(10));
- Assert.That(results[1].EndIndex, Is.EqualTo(17));
- Assert.That(results[1].Match, Is.EqualTo(text[10..17]));
+ TestUtils.AssertMatch(results[0], 2, "pattern", 0);
+ TestUtils.AssertMatch(results[1], 10, "pattern", 0);
});
}
+
[Test]
public void TestMultipleMatchesConsecutiveSubstitutions()
{
@@ -196,14 +171,40 @@ public void TestMultipleMatchesConsecutiveSubstitutions()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(2));
+ TestUtils.AssertMatch(results[0], 2, "patterm", 1);
+ TestUtils.AssertMatch(results[1], 9, "patyern", 1);
+ });
+ }
- Assert.That(results[0].StartIndex, Is.EqualTo(2));
- Assert.That(results[0].EndIndex, Is.EqualTo(9));
- Assert.That(results[0].Match, Is.EqualTo(text[2..9]));
- Assert.That(results[1].StartIndex, Is.EqualTo(9));
- Assert.That(results[1].EndIndex, Is.EqualTo(16));
- Assert.That(results[1].Match, Is.EqualTo(text[9..16]));
+ [Test]
+ public void TestMultipleMatchesConsecutiveInsertion()
+ {
+ var word = "pattern";
+ var text = "--patyternpatxtern--";
+
+ var results = FuzzySearch.Find(word, text, 1).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(2));
+ TestUtils.AssertMatch(results[0], 2, "patytern", 1);
+ TestUtils.AssertMatch(results[1], 10, "patxtern", 1);
+ });
+ }
+
+ [Test]
+ public void TestOverlappingMatches()
+ {
+ var word = "pattern";
+ var text = "--pattpatterntern--";
+
+ var results = FuzzySearch.Find(word, text, 2).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(1));
+ TestUtils.AssertMatch(results[0], 6, "pattern", 0);
});
}
@@ -218,17 +219,12 @@ public void TestMultipleMatchesConsecutiveDeletion()
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(2));
-
- Assert.That(results[0].StartIndex, Is.EqualTo(2));
- Assert.That(results[0].EndIndex, Is.EqualTo(8));
- Assert.That(results[0].Match, Is.EqualTo(text[2..8]));
-
- Assert.That(results[1].StartIndex, Is.EqualTo(8));
- Assert.That(results[1].EndIndex, Is.EqualTo(14));
- Assert.That(results[1].Match, Is.EqualTo(text[8..14]));
+ TestUtils.AssertMatch(results[0], 2, "pattrn", 1);
+ TestUtils.AssertMatch(results[1], 8, "pttern", 1);
});
}
+
[TestCase("PATTERN", "")]
[TestCase("", "sometext")]
[TestCase("", "")]
@@ -242,6 +238,7 @@ public void TestEmpty(string pattern, string text)
});
}
+
[TestCase("PATTERN", "PATERN", 1)]
public void TestShorterText(string pattern, string text, int expectedMatches)
{
@@ -250,12 +247,11 @@ public void TestShorterText(string pattern, string text, int expectedMatches)
Assert.Multiple(() =>
{
Assert.That(results.Count, Is.EqualTo(expectedMatches));
- Assert.That(results[0].StartIndex, Is.EqualTo(0));
- Assert.That(results[0].EndIndex, Is.EqualTo(text.Length));
- Assert.That(results[0].Match, Is.EqualTo(text[0..text.Length]));
+ TestUtils.AssertMatch(results[0], 0, "PATERN", 1);
});
}
+
[TestCase("PATTERN", "PAERN", 0)]
public void TestShorterTextNoMatch(string pattern, string text, int expectedMatches)
{
@@ -266,4 +262,98 @@ public void TestShorterTextNoMatch(string pattern, string text, int expectedMatc
Assert.That(results.Count, Is.EqualTo(expectedMatches));
});
}
+
+
+ [TestCase("pattern", "pattern---------------------", 0, "pattern", 0)]
+ [TestCase("pattern", "attern---------------------", 0, "attern", 1)]
+ [TestCase("pattern", "ttern---------------------", 0, "ttern", 2)]
+ [TestCase("pattern", "tern---------------------", 0, "tern", 3)]
+ [TestCase("pattern", "--------pattttern-------------", 8, "pattttern", 2)]
+ [TestCase("pattern", "---------pattttern------------", 9, "pattttern", 2)]
+ [TestCase("pattern", "----------pattttern-----------", 10, "pattttern", 2)]
+ [TestCase("pattern", "-----------pattttern----------", 11, "pattttern", 2)]
+ [TestCase("pattern", "------------pattttern---------", 12, "pattttern", 2)]
+ [TestCase("pattern", "-------------pattttern--------", 13, "pattttern", 2)]
+ [TestCase("pattern", "--------------pattttern-------", 14, "pattttern", 2)]
+ [TestCase("pattern", "---------------pattttern------", 15, "pattttern", 2)]
+ [TestCase("pattern", "----------------pattttern-----", 16, "pattttern", 2)]
+ [TestCase("pattern", "-----------------pattttern----", 17, "pattttern", 2)]
+ [TestCase("pattern", "------------------pattttern---", 18, "pattttern", 2)]
+ [TestCase("pattern", "-------------------pattttern--", 19, "pattttern", 2)]
+ [TestCase("pattern", "--------------------pattttern-", 20, "pattttern", 2)]
+ [TestCase("pattern", "---------------------pattttern", 21, "pattttern", 2)]
+ [TestCase("pattern", "---patter", 3, "patter", 1)]
+ [TestCase("pattern", "---patte", 3, "patte", 2)]
+ [TestCase("pattern", "---patt", 3, "patt", 3)]
+ [TestCase("pattern", "----------------------pattttern", 22, "pattttern", 2)]
+ public void TestLevenshteinBufferBoundary(string term, string text, int expectedStartIndex, string expectedMatch, int expectedDistance)
+ {
+ var results = FuzzySearch.FindBuffering(term, text, 3).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(1));
+ TestUtils.AssertMatch(results[0], expectedStartIndex, expectedMatch, expectedDistance);
+ });
+ }
+
+
+ [TestCase("ab", "-a", 1, "a", 1)]
+ [TestCase("ab", "b---", 0, "b", 1)]
+ [TestCase("ab", "-axb", 1, "axb", 1)]
+ [TestCase("ab", "axb-", 0, "axb", 1)]
+ [TestCase("ab", "--ax", 2, "ax", 1)]
+ [TestCase("ab", "ax--", 0, "ax", 1)]
+ [TestCase("ab", "--ab", 2, "ab", 0)]
+ [TestCase("ab", "ab--", 0, "ab", 0)]
+ [TestCase("ab", "ab", 0, "ab", 0)]
+ [TestCase("ab", "-ab", 1, "ab", 0)]
+ [TestCase("ab", "ab-", 0, "ab", 0)]
+ [TestCase("ab", "b", 0, "b", 1)]
+ [TestCase("ab", "a", 0, "a", 1)]
+ [TestCase("a", "a", 0, "a", 0)]
+ [TestCase("ab", "axb", 0, "axb", 1)]
+ public void TestLevenshteinBufferBoundaryShort(string term, string text, int expectedStartIndex, string expectedMatch, int expectedDistance)
+ {
+ var results = FuzzySearch.FindBuffering(term, text, 1).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(1));
+ TestUtils.AssertMatch(results[0], expectedStartIndex, expectedMatch, expectedDistance);
+ });
+ }
+
+
+ [TestCase("abc", "a", 0, "a", 2)]
+ [TestCase("abc", "b", 0, "b", 2)]
+ [TestCase("abc", "c", 0, "c", 2)]
+ public void TestLevenshteinBufferBoundaryShort2Distance(string term, string text, int expectedStartIndex, string expectedMatch, int expectedDistance)
+ {
+ var results = FuzzySearch.FindBuffering(term, text, 2).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(1));
+ TestUtils.AssertMatch(results[0], expectedStartIndex, expectedMatch, expectedDistance);
+ });
+ }
+
+
+ [TestCase("abcd", "ax", 0, "ax", 3)]
+ [TestCase("abcd", "bx", 0, "bx", 3)]
+ [TestCase("abcd", "cx", 0, "cx", 3)]
+ [TestCase("abcd", "xa", 1, "a", 3)]
+ [TestCase("abcd", "xb", 0, "xb", 3)]
+ [TestCase("abcd", "xc", 0, "xc", 3)]
+ public void TestLevenshteinBufferBoundaryShort3Distance(string term, string text, int expectedStartIndex, string expectedMatch, int expectedDistance)
+ {
+ var results = FuzzySearch.FindBuffering(term, text, 3).ToList();
+
+ Assert.Multiple(() =>
+ {
+ Assert.That(results.Count, Is.EqualTo(1));
+ TestUtils.AssertMatch(results[0], expectedStartIndex, expectedMatch, expectedDistance);
+ });
+ }
}
diff --git a/FuzzySearchNet.Tests/Tests/FuzzySearchSubstitutionsOnlyTests.cs b/FuzzySearchNet.Tests/Tests/FuzzySearchSubstitutionsOnlyTests.cs
index 22854d5..3275b58 100644
--- a/FuzzySearchNet.Tests/Tests/FuzzySearchSubstitutionsOnlyTests.cs
+++ b/FuzzySearchNet.Tests/Tests/FuzzySearchSubstitutionsOnlyTests.cs
@@ -1,4 +1,4 @@
-namespace FuzzySearchNet.Tests.Tests;
+namespace FuzzySearchNet.Tests;
public class FuzzySearchSubstitutionsOnlyTests
{
diff --git a/FuzzySearchNet/FuzzySearchNet.csproj b/FuzzySearchNet/FuzzySearchNet.csproj
index 0560a44..7b17160 100644
--- a/FuzzySearchNet/FuzzySearchNet.csproj
+++ b/FuzzySearchNet/FuzzySearchNet.csproj
@@ -13,7 +13,7 @@
FuzzySearch.Net
- 0.2.0
+ 0.2.1
FuzzySearch.Net
Verner Fortelius
Fuzzy search library for finding strings in strings. Inspired by and attempts to be somewhat compatible with fuzzysearch for python https://github.com/taleinat/fuzzysearch
@@ -22,7 +22,7 @@
https://github.com/vforteli/FuzzySearch.Net
https://github.com/vforteli/FuzzySearch.Net
https://github.com/vforteli/FuzzySearch.Net/blob/main/LICENSE.md
- Clean up public methods, fix some bugs
+ Fix some bugs
fuzzy search;levenshtein distance;dotnet;.net;c#;fuzzysearch.net
true
snupkg
diff --git a/FuzzySearchNet/src/CandidateMatch.cs b/FuzzySearchNet/src/CandidateMatch.cs
index 5351472..5c93fb1 100644
--- a/FuzzySearchNet/src/CandidateMatch.cs
+++ b/FuzzySearchNet/src/CandidateMatch.cs
@@ -1,3 +1,17 @@
namespace FuzzySearchNet;
-public record struct CandidateMatch(int StartIndex, int TextIndex, int PatternIndex, int Distance, int Deletions, int Substitutions, int Insertions);
\ No newline at end of file
+public record struct CandidateMatch(int StartIndex, int TextIndex, int SubSequenceIndex = 0, int Position = 0, int Offset = 0, int Distance = 0, int Deletions = 0, int Substitutions = 0, int Insertions = 0);
+
+// using a record struct improves performance around 30% in benchmarks
+//public record CandidateMatch
+//{
+// public int StartIndex;
+// public int TextIndex => StartIndex + Position;
+// public int SubSequenceIndex => Position + Offset;
+// public int Position = 0;
+// public int Offset = 0;
+// public int Deletions = 0;
+// public int Substitutions = 0;
+// public int Insertions = 0;
+// public int Distance => Deletions + Insertions + Substitutions;
+//}
\ No newline at end of file
diff --git a/FuzzySearchNet/src/FuzzySearch.cs b/FuzzySearchNet/src/FuzzySearch.cs
index ac6c084..f273d2c 100644
--- a/FuzzySearchNet/src/FuzzySearch.cs
+++ b/FuzzySearchNet/src/FuzzySearch.cs
@@ -170,29 +170,31 @@ public static IEnumerable FindBuffering(string subSequence, string
var matches = new List();
var candidates = new Stack();
- for (var currentIndex = 0; currentIndex <= text.Length - (subSequence.Length - 1); currentIndex++)
+ for (var currentIndex = 0; currentIndex < text.Length; currentIndex++)
{
- candidates.Push(new CandidateMatch(currentIndex, currentIndex, 0, 0, 0, 0, 0));
+ candidates.Push(new CandidateMatch(currentIndex, currentIndex));
// Keep track of the best distance so far, this means we can ignore candidates with higher distance if we already have a match
var bestFoundDistance = maxDistance;
while (candidates.TryPop(out var candidate))
{
- if (candidate.PatternIndex == subSequence.Length && candidate.Distance <= bestFoundDistance)
+ if (candidate.SubSequenceIndex == subSequence.Length)
{
- matches.Add(new MatchResult
+ if (candidate.TextIndex <= text.Length)
{
- StartIndex = candidate.StartIndex,
- EndIndex = candidate.TextIndex,
- Distance = candidate.Distance,
- Match = text[candidate.StartIndex..candidate.TextIndex],
- Deletions = candidate.Deletions,
- Substitutions = candidate.Substitutions,
- Insertions = candidate.Insertions,
- });
-
- bestFoundDistance = candidate.Distance;
+ bestFoundDistance = candidate.Distance;
+ matches.Add(new MatchResult
+ {
+ StartIndex = candidate.StartIndex,
+ EndIndex = candidate.TextIndex,
+ Distance = candidate.Distance,
+ Match = text[candidate.StartIndex..candidate.TextIndex],
+ Deletions = candidate.Deletions,
+ Substitutions = candidate.Substitutions,
+ Insertions = candidate.Insertions,
+ });
+ }
// No point searching for better matches if we find a perfect match
if (candidate.Distance == 0)
@@ -204,91 +206,54 @@ public static IEnumerable FindBuffering(string subSequence, string
continue;
}
- if (candidate.TextIndex == text.Length)
- {
- continue;
- }
-
- if (text[candidate.TextIndex] == subSequence[candidate.PatternIndex])
+ if (candidate.SubSequenceIndex < subSequence.Length && candidate.TextIndex < text.Length && text[candidate.TextIndex] == subSequence[candidate.SubSequenceIndex])
{
- candidates.Push(new CandidateMatch(candidate.StartIndex, candidate.TextIndex + 1, candidate.PatternIndex + 1, candidate.Distance, candidate.Deletions, candidate.Substitutions, candidate.Insertions));
+ // match
+ candidates.Push(candidate with
+ {
+ Position = candidate.Position + 1,
+ TextIndex = candidate.TextIndex + 1,
+ SubSequenceIndex = candidate.SubSequenceIndex + 1,
+ });
if (candidate.Distance < bestFoundDistance)
{
+ // jump over one character in text
candidates.Push(candidate with
{
- PatternIndex = candidate.PatternIndex + 1,
- Distance = candidate.Distance + 1,
- Deletions = candidate.Deletions + 1,
- });
-
- candidates.Push(candidate with
- {
- TextIndex = candidate.TextIndex + 1,
- Distance = candidate.Distance + 1,
Insertions = candidate.Insertions + 1,
+ Distance = candidate.Distance + 1,
+ Position = candidate.Position + 2,
+ SubSequenceIndex = candidate.SubSequenceIndex + 1,
+ TextIndex = candidate.TextIndex + 2,
+ Offset = candidate.Offset - 1,
});
}
}
- else
+ else if (candidate.Distance < bestFoundDistance)
{
- if (candidate.Distance < bestFoundDistance)
+ // substitute one character
+ candidates.Push(candidate with
{
- candidates.Push(candidate with
- {
- TextIndex = candidate.TextIndex + 1,
- PatternIndex = candidate.PatternIndex + 1,
- Distance = candidate.Distance + 1,
- Substitutions = candidate.Substitutions + 1,
- });
-
- candidates.Push(candidate with
- {
- PatternIndex = candidate.PatternIndex + 1,
- Distance = candidate.Distance + 1,
- Deletions = candidate.Deletions + 1,
- });
+ Substitutions = candidate.Substitutions + 1,
+ Distance = candidate.Distance + 1,
+ Position = candidate.Position + 1,
+ TextIndex = candidate.TextIndex + 1,
+ SubSequenceIndex = candidate.SubSequenceIndex + 1,
+ });
- candidates.Push(candidate with
- {
- TextIndex = candidate.TextIndex + 1,
- Distance = candidate.Distance + 1,
- Insertions = candidate.Insertions + 1,
- });
- }
+ // jump over one character in subsequence
+ candidates.Push(candidate with
+ {
+ Deletions = candidate.Deletions + 1,
+ Distance = candidate.Distance + 1,
+ Offset = candidate.Offset + 1,
+ SubSequenceIndex = candidate.SubSequenceIndex + 1,
+ });
}
}
}
- matches = matches.Distinct().ToList();
-
- if (matches.Count > 1)
- {
- var groups = new List>();
-
- groups.Add(new List());
-
- var match = matches[0];
- groups[0].Add(match);
-
- for (var i = 0; i < matches.Count - 1; i++)
- {
- var currentMatch = matches[i];
- if ((currentMatch.StartIndex + currentMatch.Insertions) >= (match.EndIndex - match.Insertions))
- {
- groups.Add(new List());
- }
-
- groups.Last().Add(currentMatch);
-
- match = currentMatch;
- }
-
- return groups.Select(o => o.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First()).ToList();
- }
- else
- {
- return matches;
- }
+ return Utils.GetBestMatches(matches.OrderBy(o => o.StartIndex).ToList(), maxDistance);
}
}
diff --git a/FuzzySearchNet/src/MatchResult.cs b/FuzzySearchNet/src/MatchResult.cs
index 8e9857a..b98866f 100644
--- a/FuzzySearchNet/src/MatchResult.cs
+++ b/FuzzySearchNet/src/MatchResult.cs
@@ -1,6 +1,6 @@
namespace FuzzySearchNet;
-public class MatchResult
+public record MatchResult
{
public int StartIndex { get; set; }
public int EndIndex { get; set; }
diff --git a/FuzzySearchNet/src/Utils.cs b/FuzzySearchNet/src/Utils.cs
new file mode 100644
index 0000000..10ae6fd
--- /dev/null
+++ b/FuzzySearchNet/src/Utils.cs
@@ -0,0 +1,45 @@
+namespace FuzzySearchNet;
+
+public static class Utils
+{
+ ///
+ /// Group matches and return best.
+ /// Currently assumes the matches are in the same order they are found...
+ ///
+ ///
+ ///
+ public static IEnumerable GetBestMatches(List list, int maxDistanece)
+ {
+ var matches = list.Distinct().ToList();
+
+ if (matches.Count > 1)
+ {
+ var groups = new List>();
+
+ groups.Add(new List());
+
+ var match = matches[0];
+ groups[0].Add(match);
+
+ for (var i = 1; i < matches.Count; i++)
+ {
+ var currentMatch = matches[i];
+
+ if (currentMatch.StartIndex > (match.StartIndex + maxDistanece))
+ {
+ groups.Add(new List());
+ }
+
+ groups.Last().Add(currentMatch);
+
+ match = currentMatch;
+ }
+
+ return groups.Select(o => o.OrderBy(o => o.Distance).ThenByDescending(o => o.Match.Length).First()).ToList();
+ }
+ else
+ {
+ return matches;
+ }
+ }
+}
\ No newline at end of file