From 8ea5c8c85e70f3b107c68ede4c74dcadf1c38a9a Mon Sep 17 00:00:00 2001 From: pandeysubash404 Date: Sun, 3 Nov 2024 16:43:17 +0545 Subject: [PATCH] feat: Add KMP Substring Search Algorithm (#231) --- README.md | 72 ++++++++++++++ .../string/KmpSubstringSearchSnippet.java | 96 +++++++++++++++++++ .../string/KmpSubstringSearchSnippetTest.java | 49 ++++++++++ 3 files changed, 217 insertions(+) create mode 100644 src/main/java/string/KmpSubstringSearchSnippet.java create mode 100644 src/test/java/string/KmpSubstringSearchSnippetTest.java diff --git a/README.md b/README.md index 6459122..01a62e4 100644 --- a/README.md +++ b/README.md @@ -2050,6 +2050,78 @@ public class StringToDateSnippet { } ``` + +### KMP Substring Search Algorithm + +```java +public class KMPSubstringSearchSnippet { + + /** + * Implements the Knuth-Morris-Pratt (KMP) algorithm to find the index of the first occurrence of a substring in a given text. + * + * @param text The text in which the substring is to be searched. + * @param pattern The substring pattern to search for. + * @return The index of the first occurrence of the pattern in the text, or -1 if the pattern is not found. + */ + public static int kmpSearch(String text, String pattern) { + if (pattern == null || pattern.length() == 0) { + return 0; // Trivial case: empty pattern + } + + int[] lps = computeLPSArray(pattern); + int i = 0; // index for text + int j = 0; // index for pattern + + while (i < text.length()) { + if (pattern.charAt(j) == text.charAt(i)) { + i++; + j++; + } + + if (j == pattern.length()) { + return i - j; // Found pattern at index (i - j) + } else if (i < text.length() && pattern.charAt(j) != text.charAt(i)) { + if (j != 0) { + j = lps[j - 1]; // Use the LPS array to skip characters + } else { + i++; // If no match and j is 0, move to the next character in text + } + } + } + return -1; // Pattern not found + } + + /** + * Computes the LPS (Longest Prefix Suffix) array for the pattern, which indicates the longest proper prefix which is also a suffix. + * + * @param pattern The pattern for which the LPS array is to be computed. + * @return The LPS array. + */ + private static int[] computeLPSArray(String pattern) { + int length = 0; + int i = 1; + int[] lps = new int[pattern.length()]; + lps[0] = 0; // LPS for the first character is always 0 + + while (i < pattern.length()) { + if (pattern.charAt(i) == pattern.charAt(length)) { + length++; + lps[i] = length; + i++; + } else { + if (length != 0) { + length = lps[length - 1]; // Fall back to the previous LPS value + } else { + lps[i] = 0; + i++; + } + } + } + return lps; + } +} +``` + ## Thread ### Thread Pool diff --git a/src/main/java/string/KmpSubstringSearchSnippet.java b/src/main/java/string/KmpSubstringSearchSnippet.java new file mode 100644 index 0000000..88f38dd --- /dev/null +++ b/src/main/java/string/KmpSubstringSearchSnippet.java @@ -0,0 +1,96 @@ +/* + * MIT License + * + * Copyright (c) 2017-2024 Ilkka Seppälä + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package string; + +/** + * KmpSubstringSearchSnippet. + */ + +public class KmpSubstringSearchSnippet { + + /** + * Implements the Knuth-Morris-Pratt (KMP) algorithm to find the of a substring. + * + * @param text The text in which the substring is to be searched. + * @param pattern The substring pattern to search for. + * @return The index of the first occurrence, or -1 if the pattern is not found. + */ + public static int kmpSearch(String text, String pattern) { + if (pattern == null || pattern.length() == 0) { + return 0; // Trivial case: empty pattern + } + + int[] lps = computeLpsArray(pattern); + int i = 0; // index for text + int j = 0; // index for pattern + + while (i < text.length()) { + if (pattern.charAt(j) == text.charAt(i)) { + i++; + j++; + } + + if (j == pattern.length()) { + return i - j; // Found pattern at index (i - j) + } else if (i < text.length() && pattern.charAt(j) != text.charAt(i)) { + if (j != 0) { + j = lps[j - 1]; // Use the LPS array to skip characters + } else { + i++; // If no match and j is 0, move to the next character in text + } + } + } + return -1; // Pattern not found + } + + /** + * Computes the LPS (Longest Prefix Suffix) array, which indicates the longest proper prefix. + * + * @param pattern The pattern for which the LPS array is to be computed. + * @return The LPS array. + */ + private static int[] computeLpsArray(String pattern) { + int length = 0; + int i = 1; + int[] lps = new int[pattern.length()]; + lps[0] = 0; // LPS for the first character is always 0 + + while (i < pattern.length()) { + if (pattern.charAt(i) == pattern.charAt(length)) { + length++; + lps[i] = length; + i++; + } else { + if (length != 0) { + length = lps[length - 1]; // Fall back to the previous LPS value + } else { + lps[i] = 0; + i++; + } + } + } + return lps; + } +} \ No newline at end of file diff --git a/src/test/java/string/KmpSubstringSearchSnippetTest.java b/src/test/java/string/KmpSubstringSearchSnippetTest.java new file mode 100644 index 0000000..670e5b9 --- /dev/null +++ b/src/test/java/string/KmpSubstringSearchSnippetTest.java @@ -0,0 +1,49 @@ +/* + * MIT License + * + * Copyright (c) 2017-2024 Ilkka Seppälä + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +package string; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; + +/* +* Tests for 30 Seconds of Java code library +* +*/ +class KmpSubstringSearchSnippetTest { + + /** + * Tests for {@link KmpSubstringSearchSnippet#kmpSearch(String, String)}. + */ + @Test + void testKmpSearch() { + // Test cases for KMP substring search + assertEquals(6, KmpSubstringSearchSnippet.kmpSearch("abxabcabcaby", "abcaby")); + assertEquals(7, KmpSubstringSearchSnippet.kmpSearch("subash pandey", "pandey")); + assertEquals(-1, KmpSubstringSearchSnippet.kmpSearch("abcd", "e")); + assertEquals(0, KmpSubstringSearchSnippet.kmpSearch("aaaaa", "a")); + assertEquals(2, KmpSubstringSearchSnippet.kmpSearch("abcdabcd", "cdab")); + } +} \ No newline at end of file