Skip to content

Commit de6b87b

Browse files
Add Star Tree unsigned-long indexing changes (#17156) (#17161)
Signed-off-by: Shailesh Singh <shaileshkumarsingh260@gmail.com>
1 parent 6b60f22 commit de6b87b

30 files changed

+1226
-251
lines changed

server/src/internalClusterTest/java/org/opensearch/index/mapper/StarTreeMapperIT.java

+10
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
7676
.startObject()
7777
.field("name", "keyword_dv")
7878
.endObject()
79+
.startObject()
80+
.field("name", "unsignedLongDimension") // UnsignedLongDimension
81+
.endObject()
7982
.endArray()
8083
.startArray("metrics")
8184
.startObject()
@@ -117,6 +120,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
117120
.field("type", "wildcard")
118121
.field("doc_values", false)
119122
.endObject()
123+
.startObject("unsignedLongDimension")
124+
.field("type", "unsigned_long")
125+
.field("doc_values", true)
126+
.endObject()
120127
.endObject()
121128
.endObject();
122129
} catch (IOException e) {
@@ -605,8 +612,11 @@ public void testValidCompositeIndex() {
605612
for (int i = 0; i < dateDim.getSortedCalendarIntervals().size(); i++) {
606613
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
607614
}
615+
assertEquals(4, starTreeFieldType.getDimensions().size());
608616
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
609617
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
618+
assertEquals("unsignedLongDimension", starTreeFieldType.getDimensions().get(3).getField());
619+
610620
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
611621
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
612622
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());

server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.opensearch.index.compositeindex.CompositeIndexMetadata;
3434
import org.opensearch.index.compositeindex.datacube.Metric;
3535
import org.opensearch.index.compositeindex.datacube.MetricStat;
36+
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.DimensionConfig;
3637
import org.opensearch.index.compositeindex.datacube.startree.fileformats.meta.StarTreeMetadata;
3738
import org.opensearch.index.compositeindex.datacube.startree.index.CompositeIndexValues;
3839
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
@@ -156,15 +157,15 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
156157
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
157158
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);
158159

159-
Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
160+
Map<String, DimensionConfig> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
160161
// generating star tree unique fields (fully qualified name for dimension and metrics)
161-
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
162+
for (Map.Entry<String, DimensionConfig> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
162163
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
163164
compositeFieldName,
164165
dimensionEntry.getKey()
165166
);
166167
fields.add(dimName);
167-
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
168+
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue().getDocValuesType());
168169
}
169170
// adding metric fields
170171
for (Metric metric : starTreeMetadata.getMetrics()) {

server/src/main/java/org/opensearch/index/compositeindex/datacube/Dimension.java

+18-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.opensearch.common.annotation.ExperimentalApi;
1313
import org.opensearch.core.xcontent.ToXContent;
1414

15+
import java.util.Comparator;
1516
import java.util.List;
1617
import java.util.function.Consumer;
1718

@@ -34,8 +35,8 @@ public interface Dimension extends ToXContent {
3435
/**
3536
* Sets the dimension values with the consumer
3637
*
37-
* @param value The value to be set
38-
* @param dimSetter Consumer which sets the dimensions
38+
* @param value The value to be set
39+
* @param dimSetter Consumer which sets the dimensions
3940
*/
4041
void setDimensionValues(final Long value, final Consumer<Long> dimSetter);
4142

@@ -45,4 +46,19 @@ public interface Dimension extends ToXContent {
4546
List<String> getSubDimensionNames();
4647

4748
DocValuesType getDocValuesType();
49+
50+
/**
51+
* Returns the dimensionDataType used for comparing and parsing dimension values. <br>
52+
* This determines how numeric values are compared and parsed: <br>
53+
* - DimensionDataType.UNSIGNED_LONG for unsigned long values <br>
54+
* - DimensionDataType.LONG for all other numeric types (DEFAULT)
55+
*/
56+
default DimensionDataType getDimensionDataType() {
57+
return DimensionDataType.LONG;
58+
}
59+
60+
default Comparator<Long> comparator() {
61+
return (a, b) -> getDimensionDataType().compare(a, b);
62+
}
63+
4864
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.compositeindex.datacube;
10+
11+
import org.opensearch.common.annotation.ExperimentalApi;
12+
13+
/**
14+
* Represents the data type of the dimension value.
15+
*
16+
* @opensearch.experimental
17+
*/
18+
@ExperimentalApi
19+
public enum DimensionDataType {
20+
LONG {
21+
@Override
22+
int compare(Long a, Long b) {
23+
if (a == null && b == null) {
24+
return 0;
25+
}
26+
if (b == null) {
27+
return -1;
28+
}
29+
if (a == null) {
30+
return 1;
31+
}
32+
return Long.compare(a, b);
33+
}
34+
},
35+
UNSIGNED_LONG {
36+
@Override
37+
int compare(Long a, Long b) {
38+
if (a == null && b == null) {
39+
return 0;
40+
}
41+
if (b == null) {
42+
return -1;
43+
}
44+
if (a == null) {
45+
return 1;
46+
}
47+
return Long.compareUnsigned(a, b);
48+
}
49+
};
50+
51+
abstract int compare(Long a, Long b);
52+
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionFactory.java

+4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ public static Dimension parseAndCreateDimension(
4545
return parseAndCreateDateDimension(name, dimensionMap, c);
4646
case NumericDimension.NUMERIC:
4747
return new NumericDimension(name);
48+
case UnsignedLongDimension.UNSIGNED_LONG:
49+
return new UnsignedLongDimension(name);
4850
case ORDINAL:
4951
return new OrdinalDimension(name);
5052
case IP:
@@ -72,6 +74,8 @@ public static Dimension parseAndCreateDimension(
7274
return parseAndCreateDateDimension(name, dimensionMap, c);
7375
case NUMERIC:
7476
return new NumericDimension(name);
77+
case UNSIGNED_LONG:
78+
return new UnsignedLongDimension(name);
7579
case ORDINAL:
7680
return new OrdinalDimension(name);
7781
case IP:

server/src/main/java/org/opensearch/index/compositeindex/datacube/DimensionType.java

+6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,12 @@ public enum DimensionType {
2323
*/
2424
NUMERIC,
2525

26+
/**
27+
* Represents an unsigned long dimension type.
28+
* This is used for dimensions that contain numerical values of type unsigned long.
29+
*/
30+
UNSIGNED_LONG,
31+
2632
/**
2733
* Represents a date dimension type.
2834
* This is used for dimensions that contain date or timestamp values.

server/src/main/java/org/opensearch/index/compositeindex/datacube/ReadDimension.java

+15
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,24 @@ public class ReadDimension implements Dimension {
2626
public static final String READ = "read";
2727
private final String field;
2828
private final DocValuesType docValuesType;
29+
private final DimensionDataType dimensionDataType;
2930

3031
public ReadDimension(String field) {
3132
this.field = field;
3233
this.docValuesType = DocValuesType.SORTED_NUMERIC;
34+
this.dimensionDataType = DimensionDataType.LONG;
3335
}
3436

3537
public ReadDimension(String field, DocValuesType docValuesType) {
3638
this.field = field;
3739
this.docValuesType = docValuesType;
40+
this.dimensionDataType = DimensionDataType.LONG;
41+
}
42+
43+
public ReadDimension(String field, DocValuesType docValuesType, DimensionDataType dimensionDataType) {
44+
this.field = field;
45+
this.docValuesType = docValuesType;
46+
this.dimensionDataType = dimensionDataType;
3847
}
3948

4049
public String getField() {
@@ -82,4 +91,10 @@ public boolean equals(Object o) {
8291
public int hashCode() {
8392
return Objects.hash(field);
8493
}
94+
95+
@Override
96+
public DimensionDataType getDimensionDataType() {
97+
return dimensionDataType;
98+
}
99+
85100
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.compositeindex.datacube;
10+
11+
import org.opensearch.core.xcontent.XContentBuilder;
12+
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
13+
14+
import java.io.IOException;
15+
16+
/**
17+
* Unsigned Long dimension class
18+
*
19+
* @opensearch.experimental
20+
*/
21+
public class UnsignedLongDimension extends NumericDimension {
22+
23+
public static final String UNSIGNED_LONG = "unsigned_long";
24+
25+
public UnsignedLongDimension(String field) {
26+
super(field);
27+
}
28+
29+
@Override
30+
public DimensionDataType getDimensionDataType() {
31+
return DimensionDataType.UNSIGNED_LONG;
32+
}
33+
34+
@Override
35+
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
36+
builder.startObject();
37+
builder.field(CompositeDataCubeFieldType.NAME, getField());
38+
builder.field(CompositeDataCubeFieldType.TYPE, UNSIGNED_LONG);
39+
builder.endObject();
40+
return builder;
41+
}
42+
43+
}

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java

+6
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import java.util.ArrayList;
5656
import java.util.Arrays;
5757
import java.util.Collections;
58+
import java.util.Comparator;
5859
import java.util.HashMap;
5960
import java.util.HashSet;
6061
import java.util.Iterator;
@@ -112,6 +113,8 @@ public abstract class BaseStarTreeBuilder implements StarTreeBuilder {
112113
// This should be true for merge flows
113114
protected boolean isMerge = false;
114115

116+
protected final List<Comparator<Long>> dimensionComparators = new ArrayList<>();
117+
115118
/**
116119
* Reads all the configuration related to dimensions and metrics, builds a star-tree based on the different construction parameters.
117120
*
@@ -136,6 +139,9 @@ protected BaseStarTreeBuilder(
136139
int numDims = 0;
137140
for (Dimension dim : starTreeField.getDimensionsOrder()) {
138141
numDims += dim.getNumSubDimensions();
142+
for (int i = 0; i < dim.getNumSubDimensions(); i++) {
143+
dimensionComparators.add(dim.comparator());
144+
}
139145
dimensionsSplitOrder.add(dim);
140146
}
141147
this.numDimensions = numDims;

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OffHeapStarTreeBuilder.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ private Iterator<StarTreeDocument> sortAndReduceDocuments(int[] sortedDocIds, in
235235
} catch (IOException e) {
236236
throw new UncheckedIOException(e);
237237
}
238-
});
238+
}, dimensionComparators);
239239
} catch (UncheckedIOException ex) {
240240
// Unwrap UncheckedIOException and throw as IOException
241241
if (ex.getCause() != null) {
@@ -308,6 +308,7 @@ public List<StarTreeDocument> getStarTreeDocuments() throws IOException {
308308
@Override
309309
public Long getDimensionValue(int docId, int dimensionId) throws IOException {
310310
return starTreeDocumentFileManager.getDimensionValue(docId, dimensionId);
311+
311312
}
312313

313314
/**
@@ -334,7 +335,8 @@ public Iterator<StarTreeDocument> generateStarTreeDocumentsForStarNode(int start
334335
} catch (IOException e) {
335336
throw new RuntimeException(e);
336337
}
337-
});
338+
}, dimensionComparators);
339+
338340
// Create an iterator for aggregated documents
339341
return new Iterator<StarTreeDocument>() {
340342
boolean hasNext = true;

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/OnHeapStarTreeBuilder.java

+10-20
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ public class OnHeapStarTreeBuilder extends BaseStarTreeBuilder {
4242
/**
4343
* Constructor for OnHeapStarTreeBuilder
4444
*
45-
* @param metaOut an index output to write star-tree metadata
46-
* @param dataOut an index output to write star-tree data
45+
* @param metaOut an index output to write star-tree metadata
46+
* @param dataOut an index output to write star-tree data
4747
* @param starTreeField star-tree field
4848
* @param segmentWriteState segment write state
4949
* @param mapperService helps with the numeric type of field
@@ -82,9 +82,8 @@ public Long getDimensionValue(int docId, int dimensionId) {
8282
* Sorts and aggregates all the documents of the segment based on dimension and metrics configuration
8383
*
8484
* @param dimensionReaders List of docValues readers to read dimensions from the segment
85-
* @param metricReaders List of docValues readers to read metrics from the segment
85+
* @param metricReaders List of docValues readers to read metrics from the segment
8686
* @return Iterator of star-tree documents
87-
*
8887
*/
8988
@Override
9089
public Iterator<StarTreeDocument> sortAndAggregateSegmentDocuments(
@@ -161,7 +160,7 @@ StarTreeDocument[] getSegmentsStarTreeDocuments(List<StarTreeValues> starTreeVal
161160
Iterator<StarTreeDocument> sortAndAggregateStarTreeDocuments(StarTreeDocument[] starTreeDocuments, boolean isMerge) {
162161

163162
// sort all the documents
164-
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, 0);
163+
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, -1);
165164

166165
// merge the documents
167166
return mergeStarTreeDocuments(starTreeDocuments, isMerge);
@@ -222,7 +221,7 @@ public Iterator<StarTreeDocument> generateStarTreeDocumentsForStarNode(int start
222221
}
223222

224223
// sort star tree documents from given dimension id (as previous dimension ids have already been processed)
225-
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, dimensionId + 1);
224+
sortStarTreeDocumentsFromDimensionId(starTreeDocuments, dimensionId);
226225

227226
return new Iterator<StarTreeDocument>() {
228227
boolean hasNext = true;
@@ -267,22 +266,13 @@ public StarTreeDocument next() {
267266
* Sorts the star-tree documents from the given dimension id
268267
*
269268
* @param starTreeDocuments star-tree documents
270-
* @param dimensionId id of the dimension
269+
* @param dimensionId id of the dimension
271270
*/
272271
private void sortStarTreeDocumentsFromDimensionId(StarTreeDocument[] starTreeDocuments, int dimensionId) {
273-
Arrays.sort(starTreeDocuments, (o1, o2) -> {
274-
for (int i = dimensionId; i < numDimensions; i++) {
275-
if (!Objects.equals(o1.dimensions[i], o2.dimensions[i])) {
276-
if (o1.dimensions[i] == null && o2.dimensions[i] == null) {
277-
return 0;
278-
}
279-
if (o1.dimensions[i] == null) {
280-
return 1;
281-
}
282-
if (o2.dimensions[i] == null) {
283-
return -1;
284-
}
285-
return Long.compare(o1.dimensions[i], o2.dimensions[i]);
272+
Arrays.sort(starTreeDocuments, (doc1, doc2) -> {
273+
for (int i = dimensionId + 1; i < numDimensions; i++) {
274+
if (!Objects.equals(doc1.dimensions[i], doc2.dimensions[i])) {
275+
return dimensionComparators.get(i).compare(doc1.dimensions[i], doc2.dimensions[i]);
286276
}
287277
}
288278
return 0;

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/fileformats/StarTreeWriter.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,11 @@ public class StarTreeWriter {
2727
/** Initial version for the star tree writer */
2828
public static final int VERSION_START = 0;
2929

30+
/** Version for the star tree writer with updated metadata which handles unsigned long */
31+
public static final int VERSION_DIMENSION_DATA_TYPE = 1;
32+
3033
/** Current version for the star tree writer */
31-
public static final int VERSION_CURRENT = VERSION_START;
34+
public static final int VERSION_CURRENT = VERSION_DIMENSION_DATA_TYPE;
3235

3336
public StarTreeWriter() {}
3437

0 commit comments

Comments
 (0)