Skip to content

Commit 7f27ddc

Browse files
Complete keyword changes for star tree (opensearch-project#16233)
--------- Signed-off-by: Bharathwaj G <bharath78910@gmail.com> Signed-off-by: bharath-techie <bharath78910@gmail.com>
1 parent b9d9729 commit 7f27ddc

File tree

45 files changed

+2120
-307
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2120
-307
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1414
- Increase segrep pressure checkpoint default limit to 30 ([#16577](https://github.com/opensearch-project/OpenSearch/pull/16577/files))
1515
- Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483))
1616
- Make IndexStoreListener a pluggable interface ([#16583](https://github.com/opensearch-project/OpenSearch/pull/16583))
17+
- Support for keyword fields in star-tree index ([#16233](https://github.com/opensearch-project/OpenSearch/pull/16233))
1718
- Add a flag in QueryShardContext to differentiate inner hit query ([#16600](https://github.com/opensearch-project/OpenSearch/pull/16600))
1819

1920
### Dependencies

server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java

+23-9
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
5656
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
5757
.build();
5858

59-
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
59+
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
6060
try {
6161
return jsonBuilder().startObject()
6262
.startObject("composite")
@@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
6868
.endObject()
6969
.startArray("ordered_dimensions")
7070
.startObject()
71-
.field("name", getDim(invalidDim, keywordDim))
71+
.field("name", getDim(invalidDim, ipdim))
72+
.endObject()
73+
.startObject()
74+
.field("name", "keyword_dv")
7275
.endObject()
7376
.endArray()
7477
.startArray("metrics")
7578
.startObject()
76-
.field("name", getDim(invalidMetric, false))
79+
.field("name", getMetric(invalidMetric, false))
7780
.endObject()
7881
.endArray()
7982
.endObject()
@@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
99102
.field("type", "keyword")
100103
.field("doc_values", false)
101104
.endObject()
105+
.startObject("ip")
106+
.field("type", "ip")
107+
.field("doc_values", false)
108+
.endObject()
102109
.endObject()
103110
.endObject();
104111
} catch (IOException e) {
@@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
356363
}
357364

358365
private static String getDim(boolean hasDocValues, boolean isKeyword) {
366+
if (hasDocValues) {
367+
return random().nextBoolean() ? "numeric" : "keyword";
368+
} else if (isKeyword) {
369+
return "ip";
370+
}
371+
return "numeric_dv";
372+
}
373+
374+
private static String getMetric(boolean hasDocValues, boolean isKeyword) {
359375
if (hasDocValues) {
360376
return "numeric";
361377
} else if (isKeyword) {
362-
return "keyword";
378+
return "ip";
363379
}
364380
return "numeric_dv";
365381
}
@@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
398414
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
399415
}
400416
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
417+
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
401418
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
402419
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
403420
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
@@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
665682
IllegalArgumentException.class,
666683
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
667684
);
668-
assertEquals(
669-
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
670-
ex.getMessage()
671-
);
685+
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
672686
}
673687

674688
public void testMaxDimsCompositeIndex() {
@@ -734,7 +748,7 @@ public void testUnsupportedDim() {
734748
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
735749
);
736750
assertEquals(
737-
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
751+
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
738752
ex.getMessage()
739753
);
740754
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.index;
10+
11+
import org.apache.lucene.search.DocIdSetIterator;
12+
13+
/**
14+
* Base wrapper class for DocValuesWriter.
15+
*/
16+
public interface DocValuesWriterWrapper<T extends DocIdSetIterator> {
17+
T getDocValues();
18+
}

server/src/main/java/org/apache/lucene/index/SortedNumericDocValuesWriterWrapper.java

+6-5
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
*
1919
* @opensearch.experimental
2020
*/
21-
public class SortedNumericDocValuesWriterWrapper {
21+
public class SortedNumericDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedNumericDocValues> {
2222

23-
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;
23+
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriterDelegate;
2424

2525
/**
2626
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
@@ -29,7 +29,7 @@ public class SortedNumericDocValuesWriterWrapper {
2929
* @param counter a counter for tracking memory usage
3030
*/
3131
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
32-
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
32+
sortedNumericDocValuesWriterDelegate = new SortedNumericDocValuesWriter(fieldInfo, counter);
3333
}
3434

3535
/**
@@ -39,15 +39,16 @@ public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter)
3939
* @param value the value to add
4040
*/
4141
public void addValue(int docID, long value) {
42-
sortedNumericDocValuesWriter.addValue(docID, value);
42+
sortedNumericDocValuesWriterDelegate.addValue(docID, value);
4343
}
4444

4545
/**
4646
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
4747
*
4848
* @return the {@link SortedNumericDocValues} instance
4949
*/
50+
@Override
5051
public SortedNumericDocValues getDocValues() {
51-
return sortedNumericDocValuesWriter.getDocValues();
52+
return sortedNumericDocValuesWriterDelegate.getDocValues();
5253
}
5354
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.index;
10+
11+
import org.apache.lucene.util.ByteBlockPool;
12+
import org.apache.lucene.util.BytesRef;
13+
import org.apache.lucene.util.Counter;
14+
15+
/**
16+
* A wrapper class for writing sorted set doc values.
17+
* <p>
18+
* This class provides a convenient way to add sorted set doc values to a field
19+
* and retrieve the corresponding {@link SortedSetDocValues} instance.
20+
*
21+
* @opensearch.experimental
22+
*/
23+
public class SortedSetDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedSetDocValues> {
24+
25+
private final SortedSetDocValuesWriter sortedSetDocValuesWriterDelegate;
26+
27+
/**
28+
* Sole constructor. Constructs a new {@link SortedSetDocValuesWriterWrapper} instance.
29+
*
30+
* @param fieldInfo the field information for the field being written
31+
* @param counter a counter for tracking memory usage
32+
* @param byteBlockPool a byte block pool for allocating byte blocks
33+
* @see SortedSetDocValuesWriter
34+
*/
35+
public SortedSetDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter, ByteBlockPool byteBlockPool) {
36+
sortedSetDocValuesWriterDelegate = new SortedSetDocValuesWriter(fieldInfo, counter, byteBlockPool);
37+
}
38+
39+
/**
40+
* Adds a bytes ref value to the sorted set doc values for the specified document.
41+
*
42+
* @param docID the document ID
43+
* @param value the value to add
44+
*/
45+
public void addValue(int docID, BytesRef value) {
46+
sortedSetDocValuesWriterDelegate.addValue(docID, value);
47+
}
48+
49+
/**
50+
* Returns the {@link SortedSetDocValues} instance containing the sorted numeric doc values
51+
*
52+
* @return the {@link SortedSetDocValues} instance
53+
*/
54+
@Override
55+
public SortedSetDocValues getDocValues() {
56+
return sortedSetDocValuesWriterDelegate.getDocValues();
57+
}
58+
}

server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java

+12-21
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import org.apache.lucene.codecs.DocValuesProducer;
1515
import org.apache.lucene.index.BinaryDocValues;
1616
import org.apache.lucene.index.CorruptIndexException;
17-
import org.apache.lucene.index.DocValues;
17+
import org.apache.lucene.index.DocValuesType;
1818
import org.apache.lucene.index.FieldInfo;
1919
import org.apache.lucene.index.FieldInfos;
2020
import org.apache.lucene.index.IndexFileNames;
@@ -40,6 +40,7 @@
4040

4141
import java.io.IOException;
4242
import java.util.ArrayList;
43+
import java.util.HashMap;
4344
import java.util.LinkedHashMap;
4445
import java.util.List;
4546
import java.util.Map;
@@ -111,7 +112,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
111112
readState.segmentInfo.getId(),
112113
readState.segmentSuffix
113114
);
114-
115+
Map<String, DocValuesType> dimensionFieldTypeMap = new HashMap<>();
115116
while (true) {
116117

117118
// validate magic marker
@@ -155,13 +156,16 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
155156
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
156157
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);
157158

158-
List<String> dimensionFields = starTreeMetadata.getDimensionFields();
159-
159+
Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
160160
// generating star tree unique fields (fully qualified name for dimension and metrics)
161-
for (String dimensions : dimensionFields) {
162-
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
161+
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
162+
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
163+
compositeFieldName,
164+
dimensionEntry.getKey()
165+
);
166+
fields.add(dimName);
167+
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
163168
}
164-
165169
// adding metric fields
166170
for (Metric metric : starTreeMetadata.getMetrics()) {
167171
for (MetricStat metricStat : metric.getBaseMetrics()) {
@@ -184,7 +188,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
184188

185189
// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
186190
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
187-
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
191+
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields, dimensionFieldTypeMap));
188192
this.readState = new SegmentReadState(
189193
readState.directory,
190194
readState.segmentInfo,
@@ -291,17 +295,4 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp
291295

292296
}
293297

294-
/**
295-
* Returns the sorted numeric doc values for the given sorted numeric field.
296-
* If the sorted numeric field is null, it returns an empty doc id set iterator.
297-
* <p>
298-
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
299-
*
300-
* @param sortedNumeric the sorted numeric doc values for a field
301-
* @return empty sorted numeric values if the field is not present, else sortedNumeric
302-
*/
303-
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
304-
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
305-
}
306-
307298
}

0 commit comments

Comments
 (0)