|
8 | 8 |
|
9 | 9 | package org.opensearch.index.mapper;
|
10 | 10 |
|
| 11 | +import org.apache.lucene.analysis.TokenStream; |
11 | 12 | import org.apache.lucene.analysis.Tokenizer;
|
| 13 | +import org.apache.lucene.analysis.core.LowerCaseFilter; |
| 14 | +import org.apache.lucene.analysis.core.WhitespaceTokenizer; |
| 15 | +import org.apache.lucene.analysis.standard.StandardAnalyzer; |
12 | 16 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
| 17 | +import org.apache.lucene.document.Field; |
| 18 | +import org.apache.lucene.index.DocValuesType; |
| 19 | +import org.apache.lucene.index.IndexOptions; |
| 20 | +import org.apache.lucene.index.IndexableField; |
| 21 | +import org.apache.lucene.index.IndexableFieldType; |
| 22 | +import org.apache.lucene.util.BytesRef; |
| 23 | +import org.opensearch.Version; |
| 24 | +import org.opensearch.cluster.metadata.IndexMetadata; |
| 25 | +import org.opensearch.common.settings.Settings; |
13 | 26 | import org.opensearch.core.xcontent.XContentBuilder;
|
| 27 | +import org.opensearch.index.IndexSettings; |
| 28 | +import org.opensearch.index.analysis.AnalyzerScope; |
| 29 | +import org.opensearch.index.analysis.CharFilterFactory; |
| 30 | +import org.opensearch.index.analysis.CustomAnalyzer; |
| 31 | +import org.opensearch.index.analysis.IndexAnalyzers; |
| 32 | +import org.opensearch.index.analysis.LowercaseNormalizer; |
| 33 | +import org.opensearch.index.analysis.NamedAnalyzer; |
| 34 | +import org.opensearch.index.analysis.TokenFilterFactory; |
| 35 | +import org.opensearch.index.analysis.TokenizerFactory; |
14 | 36 |
|
15 | 37 | import java.io.IOException;
|
16 | 38 | import java.io.StringReader;
|
17 | 39 | import java.util.ArrayList;
|
| 40 | +import java.util.Collections; |
18 | 41 | import java.util.List;
|
| 42 | +import java.util.Map; |
| 43 | + |
| 44 | +import static java.util.Collections.singletonMap; |
| 45 | +import static org.opensearch.index.mapper.FieldTypeTestCase.fetchSourceValue; |
19 | 46 |
|
20 | 47 | public class WildcardFieldMapperTests extends MapperTestCase {
|
21 | 48 |
|
@@ -71,5 +98,236 @@ public void testTokenizer() throws IOException {
|
71 | 98 | ),
|
72 | 99 | terms
|
73 | 100 | );
|
| 101 | + terms.clear(); |
| 102 | + try (Tokenizer tokenizer = new WildcardFieldMapper.WildcardFieldTokenizer()) { |
| 103 | + tokenizer.setReader(new StringReader("a")); |
| 104 | + tokenizer.reset(); |
| 105 | + CharTermAttribute charTermAttribute = tokenizer.getAttribute(CharTermAttribute.class); |
| 106 | + while (tokenizer.incrementToken()) { |
| 107 | + terms.add(charTermAttribute.toString()); |
| 108 | + } |
| 109 | + } |
| 110 | + assertEquals(List.of(WildcardFieldTypeTests.prefixAnchored("a"), "a", WildcardFieldTypeTests.suffixAnchored("a")), terms); |
| 111 | + } |
| 112 | + |
| 113 | + public void testEnableDocValues() throws IOException { |
| 114 | + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard").field("doc_values", true))); |
| 115 | + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); |
| 116 | + IndexableField[] fields = doc.rootDoc().getFields("field"); |
| 117 | + assertEquals(2, fields.length); |
| 118 | + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); |
| 119 | + assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType()); |
| 120 | + |
| 121 | + mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard"))); |
| 122 | + doc = mapper.parse(source(b -> b.field("field", "1234"))); |
| 123 | + fields = doc.rootDoc().getFields("field"); |
| 124 | + assertEquals(1, fields.length); |
| 125 | + assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); |
| 126 | + } |
| 127 | + |
| 128 | + @Override |
| 129 | + protected IndexAnalyzers createIndexAnalyzers(IndexSettings indexSettings) { |
| 130 | + return new IndexAnalyzers( |
| 131 | + singletonMap("default", new NamedAnalyzer("default", AnalyzerScope.INDEX, new StandardAnalyzer())), |
| 132 | + Map.of( |
| 133 | + "lowercase", |
| 134 | + new NamedAnalyzer("lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()), |
| 135 | + "other_lowercase", |
| 136 | + new NamedAnalyzer("other_lowercase", AnalyzerScope.INDEX, new LowercaseNormalizer()) |
| 137 | + ), |
| 138 | + singletonMap( |
| 139 | + "lowercase", |
| 140 | + new NamedAnalyzer( |
| 141 | + "lowercase", |
| 142 | + AnalyzerScope.INDEX, |
| 143 | + new CustomAnalyzer( |
| 144 | + TokenizerFactory.newFactory("lowercase", WhitespaceTokenizer::new), |
| 145 | + new CharFilterFactory[0], |
| 146 | + new TokenFilterFactory[] { new TokenFilterFactory() { |
| 147 | + |
| 148 | + @Override |
| 149 | + public String name() { |
| 150 | + return "lowercase"; |
| 151 | + } |
| 152 | + |
| 153 | + @Override |
| 154 | + public TokenStream create(TokenStream tokenStream) { |
| 155 | + return new LowerCaseFilter(tokenStream); |
| 156 | + } |
| 157 | + } } |
| 158 | + ) |
| 159 | + ) |
| 160 | + ) |
| 161 | + ); |
| 162 | + } |
| 163 | + |
| 164 | + public void testNormalizer() throws IOException { |
| 165 | + DocumentMapper mapper = createDocumentMapper( |
| 166 | + fieldMapping(b -> b.field("type", "wildcard").field("normalizer", "lowercase").field("doc_values", true)) |
| 167 | + ); |
| 168 | + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "AbC"))); |
| 169 | + |
| 170 | + IndexableField[] fields = doc.rootDoc().getFields("field"); |
| 171 | + assertEquals(2, fields.length); |
| 172 | + |
| 173 | + assertTrue(fields[0] instanceof Field); |
| 174 | + Field textField = (Field) fields[0]; |
| 175 | + List<String> terms = new ArrayList<>(); |
| 176 | + try (TokenStream tokenStream = textField.tokenStreamValue()) { |
| 177 | + tokenStream.reset(); |
| 178 | + CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); |
| 179 | + while (tokenStream.incrementToken()) { |
| 180 | + terms.add(charTermAttribute.toString()); |
| 181 | + } |
| 182 | + } |
| 183 | + assertEquals( |
| 184 | + List.of( |
| 185 | + WildcardFieldTypeTests.prefixAnchored("a"), |
| 186 | + WildcardFieldTypeTests.prefixAnchored("ab"), |
| 187 | + "a", |
| 188 | + "ab", |
| 189 | + "abc", |
| 190 | + "b", |
| 191 | + "bc", |
| 192 | + WildcardFieldTypeTests.suffixAnchored("bc"), |
| 193 | + "c", |
| 194 | + WildcardFieldTypeTests.suffixAnchored("c") |
| 195 | + ), |
| 196 | + terms |
| 197 | + ); |
| 198 | + IndexableFieldType fieldType = fields[0].fieldType(); |
| 199 | + assertTrue(fieldType.omitNorms()); |
| 200 | + assertTrue(fieldType.tokenized()); |
| 201 | + assertFalse(fieldType.stored()); |
| 202 | + assertEquals(IndexOptions.DOCS, fieldType.indexOptions()); |
| 203 | + assertFalse(fieldType.storeTermVectors()); |
| 204 | + assertFalse(fieldType.storeTermVectorOffsets()); |
| 205 | + assertFalse(fieldType.storeTermVectorPositions()); |
| 206 | + assertFalse(fieldType.storeTermVectorPayloads()); |
| 207 | + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); |
| 208 | + |
| 209 | + assertEquals(new BytesRef("abc"), fields[1].binaryValue()); |
| 210 | + fieldType = fields[1].fieldType(); |
| 211 | + assertEquals(IndexOptions.NONE, fieldType.indexOptions()); |
| 212 | + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); |
| 213 | + } |
| 214 | + |
| 215 | + public void testNullValue() throws IOException { |
| 216 | + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); |
| 217 | + ParsedDocument doc = mapper.parse(source(b -> b.nullField("field"))); |
| 218 | + assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field")); |
| 219 | + |
| 220 | + mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "wildcard").field("null_value", "uri").field("doc_values", true))); |
| 221 | + doc = mapper.parse(source(b -> {})); |
| 222 | + IndexableField[] fields = doc.rootDoc().getFields("field"); |
| 223 | + assertEquals(0, fields.length); |
| 224 | + doc = mapper.parse(source(b -> b.nullField("field"))); |
| 225 | + fields = doc.rootDoc().getFields("field"); |
| 226 | + assertEquals(2, fields.length); |
| 227 | + assertTrue(fields[0] instanceof Field); |
| 228 | + Field textField = (Field) fields[0]; |
| 229 | + List<String> terms = new ArrayList<>(); |
| 230 | + try (TokenStream tokenStream = textField.tokenStreamValue()) { |
| 231 | + tokenStream.reset(); |
| 232 | + CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); |
| 233 | + while (tokenStream.incrementToken()) { |
| 234 | + terms.add(charTermAttribute.toString()); |
| 235 | + } |
| 236 | + } |
| 237 | + assertEquals( |
| 238 | + List.of( |
| 239 | + WildcardFieldTypeTests.prefixAnchored("u"), |
| 240 | + WildcardFieldTypeTests.prefixAnchored("ur"), |
| 241 | + "u", |
| 242 | + "ur", |
| 243 | + "uri", |
| 244 | + "r", |
| 245 | + "ri", |
| 246 | + WildcardFieldTypeTests.suffixAnchored("ri"), |
| 247 | + "i", |
| 248 | + WildcardFieldTypeTests.suffixAnchored("i") |
| 249 | + ), |
| 250 | + terms |
| 251 | + ); |
| 252 | + assertEquals(new BytesRef("uri"), fields[1].binaryValue()); |
| 253 | + assertEquals(IndexOptions.NONE, fields[1].fieldType().indexOptions()); |
| 254 | + assertEquals(DocValuesType.SORTED_SET, fields[1].fieldType().docValuesType()); |
| 255 | + } |
| 256 | + |
| 257 | + public void testDefaults() throws Exception { |
| 258 | + XContentBuilder mapping = fieldMapping(this::minimalMapping); |
| 259 | + DocumentMapper mapper = createDocumentMapper(mapping); |
| 260 | + assertEquals(mapping.toString(), mapper.mappingSource().toString()); |
| 261 | + |
| 262 | + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); |
| 263 | + IndexableField[] fields = doc.rootDoc().getFields("field"); |
| 264 | + assertEquals(1, fields.length); |
| 265 | + |
| 266 | + assertTrue(fields[0] instanceof Field); |
| 267 | + Field textField = (Field) fields[0]; |
| 268 | + List<String> terms = new ArrayList<>(); |
| 269 | + try (TokenStream tokenStream = textField.tokenStreamValue()) { |
| 270 | + tokenStream.reset(); |
| 271 | + CharTermAttribute charTermAttribute = tokenStream.getAttribute(CharTermAttribute.class); |
| 272 | + while (tokenStream.incrementToken()) { |
| 273 | + terms.add(charTermAttribute.toString()); |
| 274 | + } |
| 275 | + } |
| 276 | + assertEquals( |
| 277 | + List.of( |
| 278 | + WildcardFieldTypeTests.prefixAnchored("1"), |
| 279 | + WildcardFieldTypeTests.prefixAnchored("12"), |
| 280 | + "1", |
| 281 | + "12", |
| 282 | + "123", |
| 283 | + "2", |
| 284 | + "23", |
| 285 | + "234", |
| 286 | + "3", |
| 287 | + "34", |
| 288 | + WildcardFieldTypeTests.suffixAnchored("34"), |
| 289 | + "4", |
| 290 | + WildcardFieldTypeTests.suffixAnchored("4") |
| 291 | + ), |
| 292 | + terms |
| 293 | + ); |
| 294 | + IndexableFieldType fieldType = fields[0].fieldType(); |
| 295 | + assertTrue(fieldType.omitNorms()); |
| 296 | + assertTrue(fieldType.tokenized()); |
| 297 | + assertFalse(fieldType.stored()); |
| 298 | + assertEquals(IndexOptions.DOCS, fieldType.indexOptions()); |
| 299 | + assertFalse(fieldType.storeTermVectors()); |
| 300 | + assertFalse(fieldType.storeTermVectorOffsets()); |
| 301 | + assertFalse(fieldType.storeTermVectorPositions()); |
| 302 | + assertFalse(fieldType.storeTermVectorPayloads()); |
| 303 | + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); |
| 304 | + } |
| 305 | + |
| 306 | + public void testFetchSourceValue() throws IOException { |
| 307 | + Settings settings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id).build(); |
| 308 | + Mapper.BuilderContext context = new Mapper.BuilderContext(settings, new ContentPath()); |
| 309 | + |
| 310 | + MappedFieldType mapper = new WildcardFieldMapper.Builder("field").build(context).fieldType(); |
| 311 | + assertEquals(Collections.singletonList("value"), fetchSourceValue(mapper, "value")); |
| 312 | + assertEquals(Collections.singletonList("42"), fetchSourceValue(mapper, 42L)); |
| 313 | + assertEquals(Collections.singletonList("true"), fetchSourceValue(mapper, true)); |
| 314 | + |
| 315 | + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> fetchSourceValue(mapper, "value", "format")); |
| 316 | + assertEquals("Field [field] of type [wildcard] doesn't support formats.", e.getMessage()); |
| 317 | + |
| 318 | + MappedFieldType ignoreAboveMapper = new WildcardFieldMapper.Builder("field").ignoreAbove(4).build(context).fieldType(); |
| 319 | + assertEquals(Collections.emptyList(), fetchSourceValue(ignoreAboveMapper, "value")); |
| 320 | + assertEquals(Collections.singletonList("42"), fetchSourceValue(ignoreAboveMapper, 42L)); |
| 321 | + assertEquals(Collections.singletonList("true"), fetchSourceValue(ignoreAboveMapper, true)); |
| 322 | + |
| 323 | + MappedFieldType normalizerMapper = new WildcardFieldMapper.Builder("field", createIndexAnalyzers(null)).normalizer("lowercase") |
| 324 | + .build(context) |
| 325 | + .fieldType(); |
| 326 | + assertEquals(Collections.singletonList("value"), fetchSourceValue(normalizerMapper, "VALUE")); |
| 327 | + assertEquals(Collections.singletonList("42"), fetchSourceValue(normalizerMapper, 42L)); |
| 328 | + assertEquals(Collections.singletonList("value"), fetchSourceValue(normalizerMapper, "value")); |
| 329 | + |
| 330 | + MappedFieldType nullValueMapper = new WildcardFieldMapper.Builder("field").nullValue("NULL").build(context).fieldType(); |
| 331 | + assertEquals(Collections.singletonList("NULL"), fetchSourceValue(nullValueMapper, null)); |
74 | 332 | }
|
75 | 333 | }
|
0 commit comments