Skip to content

Commit

Permalink
Improve regexes and test names
Browse files Browse the repository at this point in the history
  • Loading branch information
leonardehrenfried committed Aug 9, 2024
1 parent edea05e commit 70f99d4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ void numberSuffixes(String input, String expected) {
assertEquals(List.of(expected), result);
}

@Test
void wordBoundary() {
var result = tokenize("1stst");
assertEquals(List.of("1sts", "1stst", "stst"), result);
}

public List<String> tokenize(String text) {
try (var analyzer = new EnglishNGramAnalyzer()) {
List<String> result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ void agenciesAndFeedPublisher() {
"meridian av 148",
}
)
void shortTokens(String query) {
void numericAdjectives(String query) {
var names = index.queryStopClusters(query).map(c -> c.primary().name()).toList();
assertEquals(
Stream.of(MERIDIAN_AVE, MERIDIAN_N2, MERIDIAN_N1).map(s -> s.getName().toString()).toList(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
class EnglishNGramAnalyzer extends Analyzer {

// matches one or more numbers followed by the English suffixes "st", "nd", "rd", "th"
private static final Pattern NUMBER_SUFFIX_PATTERN = Pattern.compile("(\\d+)[st|nd|rd|th]+");
private static final Pattern NUMBER_SUFFIX_PATTERN = Pattern.compile("(\\d+)(st|nd|rd|th)\\b");

@Override
protected TokenStreamComponents createComponents(String fieldName) {
Expand Down

0 comments on commit 70f99d4

Please sign in to comment.