From 70f99d49a8ec65c713f6b8a45c84d4d93d9f2030 Mon Sep 17 00:00:00 2001 From: Leonard Ehrenfried Date: Fri, 9 Aug 2024 10:37:08 +0200 Subject: [PATCH] Improve regexes and test names --- .../ext/geocoder/EnglishNgramAnalyzerTest.java | 6 ++++++ .../org/opentripplanner/ext/geocoder/LuceneIndexTest.java | 2 +- .../opentripplanner/ext/geocoder/EnglishNGramAnalyzer.java | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ext-test/java/org/opentripplanner/ext/geocoder/EnglishNgramAnalyzerTest.java b/src/ext-test/java/org/opentripplanner/ext/geocoder/EnglishNgramAnalyzerTest.java index 77917399647..398dfb59630 100644 --- a/src/ext-test/java/org/opentripplanner/ext/geocoder/EnglishNgramAnalyzerTest.java +++ b/src/ext-test/java/org/opentripplanner/ext/geocoder/EnglishNgramAnalyzerTest.java @@ -135,6 +135,12 @@ void numberSuffixes(String input, String expected) { assertEquals(List.of(expected), result); } + @Test + void wordBoundary() { + var result = tokenize("1stst"); + assertEquals(List.of("1sts", "1stst", "stst"), result); + } + public List tokenize(String text) { try (var analyzer = new EnglishNGramAnalyzer()) { List result; diff --git a/src/ext-test/java/org/opentripplanner/ext/geocoder/LuceneIndexTest.java b/src/ext-test/java/org/opentripplanner/ext/geocoder/LuceneIndexTest.java index f3af08f29fe..910c5080331 100644 --- a/src/ext-test/java/org/opentripplanner/ext/geocoder/LuceneIndexTest.java +++ b/src/ext-test/java/org/opentripplanner/ext/geocoder/LuceneIndexTest.java @@ -316,7 +316,7 @@ void agenciesAndFeedPublisher() { "meridian av 148", } ) - void shortTokens(String query) { + void numericAdjectives(String query) { var names = index.queryStopClusters(query).map(c -> c.primary().name()).toList(); assertEquals( Stream.of(MERIDIAN_AVE, MERIDIAN_N2, MERIDIAN_N1).map(s -> s.getName().toString()).toList(), diff --git a/src/ext/java/org/opentripplanner/ext/geocoder/EnglishNGramAnalyzer.java b/src/ext/java/org/opentripplanner/ext/geocoder/EnglishNGramAnalyzer.java index 922108427e8..a3ef8440a18 100644 --- a/src/ext/java/org/opentripplanner/ext/geocoder/EnglishNGramAnalyzer.java +++ b/src/ext/java/org/opentripplanner/ext/geocoder/EnglishNGramAnalyzer.java @@ -26,7 +26,7 @@ class EnglishNGramAnalyzer extends Analyzer { // matches one or more numbers followed by the English suffixes "st", "nd", "rd", "th" - private static final Pattern NUMBER_SUFFIX_PATTERN = Pattern.compile("(\\d+)[st|nd|rd|th]+"); + private static final Pattern NUMBER_SUFFIX_PATTERN = Pattern.compile("(\\d+)(st|nd|rd|th)\\b"); @Override protected TokenStreamComponents createComponents(String fieldName) {