Skip to content

Commit c69d8f5

Browse files
committed
Add more unit tests
Signed-off-by: Michael Froh <froh@amazon.com>
1 parent 3cd7db9 commit c69d8f5

File tree

2 files changed

+277
-6
lines changed

2 files changed

+277
-6
lines changed

server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java

+19-6
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010

1111
import org.apache.lucene.analysis.Tokenizer;
1212
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
13+
import org.apache.lucene.document.Field;
14+
import org.apache.lucene.document.FieldType;
1315
import org.apache.lucene.document.SortedSetDocValuesField;
14-
import org.apache.lucene.document.TextField;
16+
import org.apache.lucene.index.IndexOptions;
1517
import org.apache.lucene.index.LeafReaderContext;
1618
import org.apache.lucene.index.Term;
1719
import org.apache.lucene.search.BooleanClause;
@@ -177,6 +179,15 @@ public int ignoreAbove() {
177179
return ignoreAbove;
178180
}
179181

182+
private static final FieldType FIELD_TYPE = new FieldType();
183+
static {
184+
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
185+
FIELD_TYPE.setTokenized(true);
186+
FIELD_TYPE.setOmitNorms(true);
187+
FIELD_TYPE.setStored(false);
188+
FIELD_TYPE.freeze();
189+
}
190+
180191
@Override
181192
protected void parseCreateField(ParseContext context) throws IOException {
182193
String value;
@@ -204,7 +215,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
204215
final BytesRef binaryValue = new BytesRef(value);
205216
Tokenizer tokenizer = new WildcardFieldTokenizer();
206217
tokenizer.setReader(new StringReader(value));
207-
context.doc().add(new TextField(fieldType().name(), tokenizer));
218+
context.doc().add(new Field(fieldType().name(), tokenizer, FIELD_TYPE));
208219
if (fieldType().hasDocValues()) {
209220
context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
210221
} else {
@@ -283,13 +294,15 @@ public boolean incrementToken() throws IOException {
283294
// Two zeroes usually means we're done.
284295
if (length == 3 && charTermAttribute.buffer()[1] != 0) {
285296
// The only case where we're not done is if the input has exactly 1 character, so the buffer
286-
// contains 0, char, 0. In that case, we return char,0, and it's our last token.
297+
// contains 0, char, 0. In that case, we return char now, then return char, 0 on the next iteration
287298
charTermAttribute.buffer()[0] = charTermAttribute.buffer()[1];
288299
charTermAttribute.buffer()[1] = 0;
289-
charTermAttribute.setLength(2);
290-
} else {
291-
return false;
300+
charTermAttribute.setLength(1);
301+
length = 2;
302+
offset = 1;
303+
return true;
292304
}
305+
return false;
293306
}
294307
if (length == 3) {
295308
// Read the next character, overwriting the current offset

server/src/test/java/org/opensearch/index/mapper/WildcardFieldMapperTests.java

+258
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,41 @@
88

99
package org.opensearch.index.mapper;
1010

11+
import org.apache.lucene.analysis.TokenStream;
1112
import org.apache.lucene.analysis.Tokenizer;
13+
import org.apache.lucene.analysis.core.LowerCaseFilter;
14+
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
15+
import org.apache.lucene.analysis.standard.StandardAnalyzer;
1216
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
17+
import org.apache.lucene.document.Field;
18+
import org.apache.lucene.index.DocValuesType;
19+
import org.apache.lucene.index.IndexOptions;
20+
import org.apache.lucene.index.IndexableField;
21+
import org.apache.lucene.index.IndexableFieldType;
22+
import org.apache.lucene.util.BytesRef;
23+
import org.opensearch.Version;
24+
import org.opensearch.cluster.metadata.IndexMetadata;
25+
import org.opensearch.common.settings.Settings;
1326
import org.opensearch.core.xcontent.XContentBuilder;
27+
import org.opensearch.index.IndexSettings;
28+
import org.opensearch.index.analysis.AnalyzerScope;
29+
import org.opensearch.index.analysis.CharFilterFactory;
30+
import org.opensearch.index.analysis.CustomAnalyzer;
31+
import org.opensearch.index.analysis.IndexAnalyzers;
32+
import org.opensearch.index.analysis.LowercaseNormalizer;
33+
import org.opensearch.index.analysis.NamedAnalyzer;
34+
import org.opensearch.index.analysis.TokenFilterFactory;
35+
import org.opensearch.index.analysis.TokenizerFactory;
1436

1537
import java.io.IOException;
1638
import java.io.StringReader;
1739
import java.util.ArrayList;
40+
import java.util.Collections;
1841
import java.util.List;
42+
import java.util.Map;
43+
44+
import static java.util.Collections.singletonMap;
45+
import static org.opensearch.index.mapper.FieldTypeTestCase.fetchSourceValue;
1946

2047
public class WildcardFieldMapperTests extends MapperTestCase {
2148

@@ -71,5 +98,236 @@ public void testTokenizer() throws IOException {
7198
),
7299
terms
73100
);
101+
terms.clear();
102+
try (Tokenizer tokenizer = new WildcardFieldMapper.WildcardFieldTokenizer()) {
103+
tokenizer.setReader(new StringReader("a"));
104+
tokenizer.reset();
105+
CharTermAttribute charTermAttribute = tokenizer.getAttribute(CharTermAttribute.class);
106+
while (tokenizer.incrementToken()) {
107+
terms.add(charTermAttribute.toString());
108+
}
109+
}
110+
assertEquals(List.of(WildcardFieldTypeTests.prefixAnchored("a"), "a", WildcardFieldTypeTests.suffixAnchored("a")), terms);
111+
}
112+
113+
public void testEnableDocValues() throws IOException {
114+
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard").field("doc_values", true)));
115+
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
116+
IndexableField[] fields = doc.rootDoc().getFields("field");
117+
assertEquals(2, fields.length);
118+
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
119+
assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType());
120+
121+
mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard")));
122+
doc = mapper.parse(source(b -> b.field("field", "1234")));
123+
fields = doc.rootDoc().getFields("field");
124+
assertEquals(1, fields.length);
125+
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
126+
}
127+
128+
@Override
129+
protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) {
130+
return new IndexAnalyzers(
131+
singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())),
132+
Map.of(
133+
"lowercase",
134+
new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()),
135+
"other_lowercase",
136+
new NamedAnalyzer("other_lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer())
137+
),
138+
singletonMap(
139+
"lowercase",
140+
new NamedAnalyzer(
141+
"lowercase",
142+
AnalyzerScope.INDEX,
143+
new CustomAnalyzer(
144+
TokenizerFactory.newFactory("lowercase", WhitespaceTokenizer::new),
145+
new CharFilterFactory[0],
146+
new TokenFilterFactory[] { new TokenFilterFactory() {
147+
148+
@Override
149+
public String name() {
150+
return "lowercase";
151+
}
152+
153+
@Override
154+
public TokenStream create(TokenStream tokenStream) {
155+
return new LowerCaseFilter(tokenStream);
156+
}
157+
} }
158+
)
159+
)
160+
)
161+
);
162+
}
163+
164+
public void testNormalizer() throws IOException {
165+
DocumentMapper mapper = createDocumentMapper(
166+
fieldMapping(b -> b.field("type", "wildcard").field("normalizer", "lowercase").field("doc_values", true))
167+
);
168+
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "AbC")));
169+
170+
IndexableField[] fields = doc.rootDoc().getFields("field");
171+
assertEquals(2, fields.length);
172+
173+
assertTrue(fields[0] instanceof Field);
174+
Field textField = (Field) fields[0];
175+
List<String> terms = new ArrayList<>();
176+
try (TokenStream tokenStream = textField.tokenStreamValue()) {
177+
tokenStream.reset();
178+
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
179+
while (tokenStream.incrementToken()) {
180+
terms.add(charTermAttribute.toString());
181+
}
182+
}
183+
assertEquals(
184+
List.of(
185+
WildcardFieldTypeTests.prefixAnchored("a"),
186+
WildcardFieldTypeTests.prefixAnchored("ab"),
187+
"a",
188+
"ab",
189+
"abc",
190+
"b",
191+
"bc",
192+
WildcardFieldTypeTests.suffixAnchored("bc"),
193+
"c",
194+
WildcardFieldTypeTests.suffixAnchored("c")
195+
),
196+
terms
197+
);
198+
IndexableFieldType fieldType = fields[0].fieldType();
199+
assertTrue(fieldType.omitNorms());
200+
assertTrue(fieldType.tokenized());
201+
assertFalse(fieldType.stored());
202+
assertEquals(IndexOptions.DOCS, fieldType.indexOptions());
203+
assertFalse(fieldType.storeTermVectors());
204+
assertFalse(fieldType.storeTermVectorOffsets());
205+
assertFalse(fieldType.storeTermVectorPositions());
206+
assertFalse(fieldType.storeTermVectorPayloads());
207+
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
208+
209+
assertEquals(new BytesRef("abc"), fields[1].binaryValue());
210+
fieldType = fields[1].fieldType();
211+
assertEquals(IndexOptions.NONE, fieldType.indexOptions());
212+
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
213+
}
214+
215+
public void testNullValue() throws IOException {
216+
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
217+
ParsedDocument doc = mapper.parse(source(b -> b.nullField("field")));
218+
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
219+
220+
mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard").field("null_value", "uri").field("doc_values", true)));
221+
doc = mapper.parse(source(b -> {}));
222+
IndexableField[] fields = doc.rootDoc().getFields("field");
223+
assertEquals(0, fields.length);
224+
doc = mapper.parse(source(b -> b.nullField("field")));
225+
fields = doc.rootDoc().getFields("field");
226+
assertEquals(2, fields.length);
227+
assertTrue(fields[0] instanceof Field);
228+
Field textField = (Field) fields[0];
229+
List<String> terms = new ArrayList<>();
230+
try (TokenStream tokenStream = textField.tokenStreamValue()) {
231+
tokenStream.reset();
232+
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
233+
while (tokenStream.incrementToken()) {
234+
terms.add(charTermAttribute.toString());
235+
}
236+
}
237+
assertEquals(
238+
List.of(
239+
WildcardFieldTypeTests.prefixAnchored("u"),
240+
WildcardFieldTypeTests.prefixAnchored("ur"),
241+
"u",
242+
"ur",
243+
"uri",
244+
"r",
245+
"ri",
246+
WildcardFieldTypeTests.suffixAnchored("ri"),
247+
"i",
248+
WildcardFieldTypeTests.suffixAnchored("i")
249+
),
250+
terms
251+
);
252+
assertEquals(new BytesRef("uri"), fields[1].binaryValue());
253+
assertEquals(IndexOptions.NONE, fields[1].fieldType().indexOptions());
254+
assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType());
255+
}
256+
257+
public void testDefaults() throws Exception {
258+
XContentBuilder mapping = fieldMapping(this::minimalMapping);
259+
DocumentMapper mapper = createDocumentMapper(mapping);
260+
assertEquals(mapping.toString(), mapper.mappingSource().toString());
261+
262+
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
263+
IndexableField[] fields = doc.rootDoc().getFields("field");
264+
assertEquals(1, fields.length);
265+
266+
assertTrue(fields[0] instanceof Field);
267+
Field textField = (Field) fields[0];
268+
List<String> terms = new ArrayList<>();
269+
try (TokenStream tokenStream = textField.tokenStreamValue()) {
270+
tokenStream.reset();
271+
CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class);
272+
while (tokenStream.incrementToken()) {
273+
terms.add(charTermAttribute.toString());
274+
}
275+
}
276+
assertEquals(
277+
List.of(
278+
WildcardFieldTypeTests.prefixAnchored("1"),
279+
WildcardFieldTypeTests.prefixAnchored("12"),
280+
"1",
281+
"12",
282+
"123",
283+
"2",
284+
"23",
285+
"234",
286+
"3",
287+
"34",
288+
WildcardFieldTypeTests.suffixAnchored("34"),
289+
"4",
290+
WildcardFieldTypeTests.suffixAnchored("4")
291+
),
292+
terms
293+
);
294+
IndexableFieldType fieldType = fields[0].fieldType();
295+
assertTrue(fieldType.omitNorms());
296+
assertTrue(fieldType.tokenized());
297+
assertFalse(fieldType.stored());
298+
assertEquals(IndexOptions.DOCS, fieldType.indexOptions());
299+
assertFalse(fieldType.storeTermVectors());
300+
assertFalse(fieldType.storeTermVectorOffsets());
301+
assertFalse(fieldType.storeTermVectorPositions());
302+
assertFalse(fieldType.storeTermVectorPayloads());
303+
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
304+
}
305+
306+
public void testFetchSourceValue() throws IOException {
307+
Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build();
308+
Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath());
309+
310+
MappedFieldType mapper = new WildcardFieldMapper.Builder("field").build(context).fieldType();
311+
assertEquals(Collections.singletonList("value"), fetchSourceValue(mapper, "value"));
312+
assertEquals(Collections.singletonList("42"), fetchSourceValue(mapper, 42L));
313+
assertEquals(Collections.singletonList("true"), fetchSourceValue(mapper, true));
314+
315+
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> fetchSourceValue(mapper, "value", "format"));
316+
assertEquals("Field [field] of type [wildcard] doesn't support formats.", e.getMessage());
317+
318+
MappedFieldType ignoreAboveMapper = new WildcardFieldMapper.Builder("field").ignoreAbove(4).build(context).fieldType();
319+
assertEquals(Collections.emptyList(), fetchSourceValue(ignoreAboveMapper, "value"));
320+
assertEquals(Collections.singletonList("42"), fetchSourceValue(ignoreAboveMapper, 42L));
321+
assertEquals(Collections.singletonList("true"), fetchSourceValue(ignoreAboveMapper, true));
322+
323+
MappedFieldType normalizerMapper = new WildcardFieldMapper.Builder("field", createIndexAnalyzers(null)).normalizer("lowercase")
324+
.build(context)
325+
.fieldType();
326+
assertEquals(Collections.singletonList("value"), fetchSourceValue(normalizerMapper, "VALUE"));
327+
assertEquals(Collections.singletonList("42"), fetchSourceValue(normalizerMapper, 42L));
328+
assertEquals(Collections.singletonList("value"), fetchSourceValue(normalizerMapper, "value"));
329+
330+
MappedFieldType nullValueMapper = new WildcardFieldMapper.Builder("field").nullValue("NULL").build(context).fieldType();
331+
assertEquals(Collections.singletonList("NULL"), fetchSourceValue(nullValueMapper, null));
74332
}
75333
}

0 commit comments

Comments
 (0)