Skip to content

Commit 2b023bc

Browse files
committed
phone-search analyzer: don't emit int'l prefix
this was an oversight in the initial implementation: if the tokenizer emits the international calling prefix in the search analyzer then all documents with the same international calling prefix will match. e.g. when searching for `+1-555-123-4567` not only documents with this number would match but also any other document with a `1` token (i.e. any other number with this prefix). thus the search functionality is currently broken for this analyzer, making it useless. the test coverage has now been extended to cover these and other use-cases. Signed-off-by: Ralph Ursprung <Ralph.Ursprung@avaloq.com>
1 parent bbcbd21 commit 2b023bc

File tree

3 files changed

+77
-7
lines changed

3 files changed

+77
-7
lines changed

plugins/analysis-phonenumber/src/main/java/org/opensearch/analysis/phone/PhoneNumberTermTokenizer.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,11 @@ private Set<String> getTokens() throws IOException {
128128
countryCode = Optional.of(String.valueOf(numberProto.getCountryCode()));
129129
input = String.valueOf(numberProto.getNationalNumber());
130130

131-
// Add Country code, extension, and the number as tokens
132-
tokens.add(countryCode.get());
131+
if (addNgrams) {
132+
// Consider the country code as an ngram - it makes no sense in the search analyzer as it'd match all values with the same country code
133+
tokens.add(countryCode.get());
134+
}
135+
// Add extension, and the number as tokens
133136
tokens.add(countryCode.get() + input);
134137
if (!Strings.isEmpty(numberProto.getExtension())) {
135138
tokens.add(numberProto.getExtension());

plugins/analysis-phonenumber/src/test/java/org/opensearch/analysis/phone/PhoneNumberAnalyzerTests.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public void testEuropeDetailledSearch() throws IOException {
9090
assertTokensAreInAnyOrder(
9191
phoneSearchAnalyzer,
9292
"tel:+441344840400",
93-
Arrays.asList("tel:+441344840400", "tel:", "441344840400", "44", "1344840400")
93+
Arrays.asList("tel:+441344840400", "tel:", "441344840400", "1344840400")
9494
);
9595
}
9696

@@ -189,21 +189,21 @@ public void testLocalNumberWithCH() throws IOException {
189189
}
190190

191191
public void testSearchInternationalPrefixWithZZ() throws IOException {
192-
assertTokensInclude(phoneSearchAnalyzer, "+41583161010", Arrays.asList("41", "41583161010", "583161010"));
192+
assertTokensInclude(phoneSearchAnalyzer, "+41583161010", Arrays.asList("41583161010", "583161010"));
193193
}
194194

195195
public void testSearchInternationalPrefixWithCH() throws IOException {
196-
assertTokensInclude(phoneSearchCHAnalyzer, "+41583161010", Arrays.asList("41", "41583161010", "583161010"));
196+
assertTokensInclude(phoneSearchCHAnalyzer, "+41583161010", Arrays.asList("41583161010", "583161010"));
197197
}
198198

199199
public void testSearchNationalPrefixWithCH() throws IOException {
200200
// + is equivalent to 00 in Switzerland
201-
assertTokensInclude(phoneSearchCHAnalyzer, "0041583161010", Arrays.asList("41", "41583161010", "583161010"));
201+
assertTokensInclude(phoneSearchCHAnalyzer, "0041583161010", Arrays.asList("41583161010", "583161010"));
202202
}
203203

204204
public void testSearchLocalNumberWithCH() throws IOException {
205205
// when omitting the international prefix swiss numbers must start with '0'
206-
assertTokensInclude(phoneSearchCHAnalyzer, "0583161010", Arrays.asList("41", "41583161010", "583161010"));
206+
assertTokensInclude(phoneSearchCHAnalyzer, "0583161010", Arrays.asList("41583161010", "583161010"));
207207
}
208208

209209
/**

plugins/analysis-phonenumber/src/yamlRestTest/resources/rest-api-spec/test/analysis-phone/20_search.yml

+67
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,20 @@
3232
index: test
3333
id: 1
3434
body: { "phone": "+41 58 316 10 10", "phone-ch": "058 316 10 10" }
35+
- do:
36+
index:
37+
index: test
38+
id: 2
39+
body: { "phone": "+41 58 316 99 99", "phone-ch": "058 316 99 99" }
40+
- do:
41+
index:
42+
index: test
43+
id: 2
44+
body: { "phone": "+1-888-280-4331", "phone-ch": "+1-888-280-4331" }
3545
- do:
3646
indices.refresh: {}
3747

48+
# international format in document & search will always work
3849
- do:
3950
search:
4051
rest_total_hits_as_int: true
@@ -45,6 +56,7 @@
4556
"phone": "+41583161010"
4657
- match: { hits.total: 1 }
4758

59+
# correct national format & international format in search will always work
4860
- do:
4961
search:
5062
rest_total_hits_as_int: true
@@ -54,3 +66,58 @@
5466
match:
5567
"phone-ch": "+41583161010"
5668
- match: { hits.total: 1 }
69+
70+
# national format without country specified won't work
71+
- do:
72+
search:
73+
rest_total_hits_as_int: true
74+
index: test
75+
body:
76+
query:
77+
match:
78+
"phone": "0583161010"
79+
- match: { hits.total: 0 }
80+
81+
# correct national format with country specified in document & search will always work
82+
- do:
83+
search:
84+
rest_total_hits_as_int: true
85+
index: test
86+
body:
87+
query:
88+
match:
89+
"phone-ch": "0583161010"
90+
- match: { hits.total: 1 }
91+
92+
# international format in document & search will always work
93+
- do:
94+
search:
95+
rest_total_hits_as_int: true
96+
index: test
97+
body:
98+
query:
99+
match:
100+
"phone": "+1 888 280 4331"
101+
- match: { hits.total: 1 }
102+
103+
# international format in document & search will always work
104+
- do:
105+
search:
106+
rest_total_hits_as_int: true
107+
index: test
108+
body:
109+
query:
110+
match:
111+
"phone-ch": "+1 888 280 4331"
112+
- match: { hits.total: 1 }
113+
114+
# national format in search won't work if no country is specified
115+
- do:
116+
search:
117+
rest_total_hits_as_int: true
118+
index: test
119+
body:
120+
query:
121+
match:
122+
"phone": "888 280 4331"
123+
- match: { hits.total: 0 }

0 commit comments

Comments
 (0)