Skip to content

Commit ad7f9e7

Browse files
Handle delete cases for star tree (#16380)
Signed-off-by: Bharathwaj G <bharath78910@gmail.com>
1 parent c4a9cc1 commit ad7f9e7

File tree

5 files changed

+158
-23
lines changed

5 files changed

+158
-23
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.apache.lucene.index;
10+
11+
import org.apache.lucene.codecs.DocValuesProducer;
12+
13+
import java.util.Collections;
14+
import java.util.Set;
15+
16+
/**
17+
* Utility class for DocValuesProducers
18+
* @opensearch.internal
19+
*/
20+
public class DocValuesProducerUtil {
21+
/**
22+
* Returns the segment doc values producers for the given doc values producer.
23+
* If the given doc values producer is not a segment doc values producer, an empty set is returned.
24+
* @param docValuesProducer the doc values producer
25+
* @return the segment doc values producers
26+
*/
27+
public static Set<DocValuesProducer> getSegmentDocValuesProducers(DocValuesProducer docValuesProducer) {
28+
if (docValuesProducer instanceof SegmentDocValuesProducer) {
29+
return (((SegmentDocValuesProducer) docValuesProducer).dvProducers);
30+
}
31+
return Collections.emptySet();
32+
}
33+
}

server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesReader.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,13 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
185185
// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
186186
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
187187
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
188-
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);
188+
this.readState = new SegmentReadState(
189+
readState.directory,
190+
readState.segmentInfo,
191+
fieldInfos,
192+
readState.context,
193+
readState.segmentSuffix
194+
);
189195

190196
// initialize star-tree doc values producer
191197

server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java

+18-21
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.apache.lucene.codecs.DocValuesConsumer;
1313
import org.apache.lucene.codecs.DocValuesProducer;
1414
import org.apache.lucene.index.DocValues;
15+
import org.apache.lucene.index.DocValuesProducerUtil;
1516
import org.apache.lucene.index.DocValuesType;
1617
import org.apache.lucene.index.EmptyDocValuesProducer;
1718
import org.apache.lucene.index.FieldInfo;
@@ -35,7 +36,6 @@
3536
import org.opensearch.index.mapper.CompositeMappedFieldType;
3637
import org.opensearch.index.mapper.DocCountFieldMapper;
3738
import org.opensearch.index.mapper.MapperService;
38-
import org.opensearch.index.mapper.StarTreeMapper;
3939

4040
import java.io.IOException;
4141
import java.util.ArrayList;
@@ -221,12 +221,8 @@ private void createCompositeIndicesIfPossible(DocValuesProducer valuesProducer,
221221
}
222222
// we have all the required fields to build composite fields
223223
if (compositeFieldSet.isEmpty()) {
224-
for (CompositeMappedFieldType mappedType : compositeMappedFieldTypes) {
225-
if (mappedType instanceof StarTreeMapper.StarTreeFieldType) {
226-
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
227-
starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer);
228-
}
229-
}
224+
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
225+
starTreesBuilder.build(metaOut, dataOut, fieldProducerMap, compositeDocValuesConsumer);
230226
}
231227
}
232228
}
@@ -285,27 +281,27 @@ private void mergeStarTreeFields(MergeState mergeState) throws IOException {
285281
if (mergeState.docValuesProducers[i] instanceof CompositeIndexReader) {
286282
reader = (CompositeIndexReader) mergeState.docValuesProducers[i];
287283
} else {
288-
continue;
284+
Set<DocValuesProducer> docValuesProducers = DocValuesProducerUtil.getSegmentDocValuesProducers(
285+
mergeState.docValuesProducers[i]
286+
);
287+
for (DocValuesProducer docValuesProducer : docValuesProducers) {
288+
if (docValuesProducer instanceof CompositeIndexReader) {
289+
reader = (CompositeIndexReader) docValuesProducer;
290+
List<CompositeIndexFieldInfo> compositeFieldInfo = reader.getCompositeIndexFields();
291+
if (compositeFieldInfo.isEmpty() == false) {
292+
break;
293+
}
294+
}
295+
}
289296
}
290-
297+
if (reader == null) continue;
291298
List<CompositeIndexFieldInfo> compositeFieldInfo = reader.getCompositeIndexFields();
292299
for (CompositeIndexFieldInfo fieldInfo : compositeFieldInfo) {
293300
if (fieldInfo.getType().equals(CompositeMappedFieldType.CompositeFieldType.STAR_TREE)) {
294301
CompositeIndexValues compositeIndexValues = reader.getCompositeIndexValues(fieldInfo);
295302
if (compositeIndexValues instanceof StarTreeValues) {
296303
StarTreeValues starTreeValues = (StarTreeValues) compositeIndexValues;
297304
List<StarTreeValues> fieldsList = starTreeSubsPerField.getOrDefault(fieldInfo.getField(), new ArrayList<>());
298-
if (starTreeField == null) {
299-
starTreeField = starTreeValues.getStarTreeField();
300-
}
301-
// assert star tree configuration is same across segments
302-
else {
303-
if (starTreeField.equals(starTreeValues.getStarTreeField()) == false) {
304-
throw new IllegalArgumentException(
305-
"star tree field configuration must match the configuration of the field being merged"
306-
);
307-
}
308-
}
309305
fieldsList.add(starTreeValues);
310306
starTreeSubsPerField.put(fieldInfo.getField(), fieldsList);
311307
}
@@ -340,7 +336,8 @@ private static SegmentWriteState getSegmentWriteState(SegmentWriteState segmentW
340336
segmentInfo,
341337
segmentWriteState.fieldInfos,
342338
segmentWriteState.segUpdates,
343-
segmentWriteState.context
339+
segmentWriteState.context,
340+
segmentWriteState.segmentSuffix
344341
);
345342
}
346343

server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/builder/BaseStarTreeBuilder.java

-1
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,6 @@ private SequentialDocValuesIterator getIteratorForNumericField(
735735
* @throws IOException throws an exception if we are unable to add the doc
736736
*/
737737
private void appendToStarTree(StarTreeDocument starTreeDocument) throws IOException {
738-
739738
appendStarTreeDocument(starTreeDocument);
740739
numStarTreeDocs++;
741740
}

server/src/test/java/org/opensearch/index/codec/composite912/datacube/startree/StarTreeDocValuesFormatTests.java

+100
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515
import org.apache.lucene.codecs.Codec;
1616
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
1717
import org.apache.lucene.document.Document;
18+
import org.apache.lucene.document.Field;
1819
import org.apache.lucene.document.SortedNumericDocValuesField;
20+
import org.apache.lucene.document.StringField;
1921
import org.apache.lucene.index.DirectoryReader;
2022
import org.apache.lucene.index.IndexWriterConfig;
2123
import org.apache.lucene.index.LeafReaderContext;
2224
import org.apache.lucene.index.SegmentReader;
25+
import org.apache.lucene.index.Term;
2326
import org.apache.lucene.store.Directory;
2427
import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase;
2528
import org.apache.lucene.tests.index.RandomIndexWriter;
@@ -58,9 +61,12 @@
5861
import java.util.ArrayList;
5962
import java.util.Collection;
6063
import java.util.Collections;
64+
import java.util.HashMap;
6165
import java.util.List;
66+
import java.util.Map;
6267

6368
import static org.opensearch.common.util.FeatureFlags.STAR_TREE_INDEX;
69+
import static org.opensearch.index.compositeindex.CompositeIndexConstants.STAR_TREE_DOCS_COUNT;
6470
import static org.opensearch.index.compositeindex.datacube.startree.StarTreeTestUtils.assertStarTreeDocuments;
6571

6672
/**
@@ -207,6 +213,100 @@ public void testStarTreeDocValues() throws IOException {
207213
directory.close();
208214
}
209215

216+
public void testStarTreeDocValuesWithDeletions() throws IOException {
217+
Directory directory = newDirectory();
218+
IndexWriterConfig conf = newIndexWriterConfig(null);
219+
conf.setMergePolicy(newLogMergePolicy());
220+
RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf);
221+
222+
int iterations = 3;
223+
Map<String, Integer> map = new HashMap<>();
224+
List<String> allIds = new ArrayList<>();
225+
for (int iter = 0; iter < iterations; iter++) {
226+
// Add 10 documents
227+
for (int i = 0; i < 10; i++) {
228+
String id = String.valueOf(random().nextInt() + i);
229+
allIds.add(id);
230+
Document doc = new Document();
231+
doc.add(new StringField("_id", id, Field.Store.YES));
232+
int fieldValue = random().nextInt(5) + 1;
233+
doc.add(new SortedNumericDocValuesField("field", fieldValue));
234+
235+
int sndvValue = random().nextInt(3);
236+
237+
doc.add(new SortedNumericDocValuesField("sndv", sndvValue));
238+
int dvValue = random().nextInt(3);
239+
240+
doc.add(new SortedNumericDocValuesField("dv", dvValue));
241+
map.put(sndvValue + "-" + dvValue, fieldValue + map.getOrDefault(sndvValue + "-" + dvValue, 0));
242+
iw.addDocument(doc);
243+
}
244+
iw.flush();
245+
}
246+
iw.commit();
247+
// Delete random number of documents
248+
int docsToDelete = random().nextInt(9); // Delete up to 9 documents
249+
for (int i = 0; i < docsToDelete; i++) {
250+
if (!allIds.isEmpty()) {
251+
String idToDelete = allIds.remove(random().nextInt(allIds.size() - 1));
252+
iw.deleteDocuments(new Term("_id", idToDelete));
253+
allIds.remove(idToDelete);
254+
}
255+
}
256+
iw.flush();
257+
iw.commit();
258+
iw.forceMerge(1);
259+
iw.close();
260+
261+
DirectoryReader ir = DirectoryReader.open(directory);
262+
TestUtil.checkReader(ir);
263+
assertEquals(1, ir.leaves().size());
264+
265+
// Assert star tree documents
266+
for (LeafReaderContext context : ir.leaves()) {
267+
SegmentReader reader = Lucene.segmentReader(context.reader());
268+
CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader();
269+
List<CompositeIndexFieldInfo> compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields();
270+
271+
for (CompositeIndexFieldInfo compositeIndexFieldInfo : compositeIndexFields) {
272+
StarTreeValues starTreeValues = (StarTreeValues) starTreeDocValuesReader.getCompositeIndexValues(compositeIndexFieldInfo);
273+
StarTreeDocument[] actualStarTreeDocuments = StarTreeTestUtils.getSegmentsStarTreeDocuments(
274+
List.of(starTreeValues),
275+
List.of(
276+
NumberFieldMapper.NumberType.DOUBLE,
277+
NumberFieldMapper.NumberType.LONG,
278+
NumberFieldMapper.NumberType.DOUBLE,
279+
NumberFieldMapper.NumberType.DOUBLE,
280+
NumberFieldMapper.NumberType.DOUBLE,
281+
NumberFieldMapper.NumberType.LONG,
282+
NumberFieldMapper.NumberType.DOUBLE,
283+
NumberFieldMapper.NumberType.DOUBLE,
284+
NumberFieldMapper.NumberType.LONG
285+
),
286+
Integer.parseInt(starTreeValues.getAttributes().get(STAR_TREE_DOCS_COUNT))
287+
);
288+
for (StarTreeDocument starDoc : actualStarTreeDocuments) {
289+
Long sndvVal = null;
290+
if (starDoc.dimensions[0] != null) {
291+
sndvVal = starDoc.dimensions[0];
292+
}
293+
Long dvVal = null;
294+
if (starDoc.dimensions[1] != null) {
295+
dvVal = starDoc.dimensions[1];
296+
}
297+
if (starDoc.metrics[0] != null) {
298+
double metric = (double) starDoc.metrics[0];
299+
if (map.containsKey(sndvVal + "-" + dvVal)) {
300+
assertEquals((long) map.get(sndvVal + "-" + dvVal), (long) metric);
301+
}
302+
}
303+
}
304+
}
305+
}
306+
ir.close();
307+
directory.close();
308+
}
309+
210310
private XContentBuilder getExpandedMapping() throws IOException {
211311
return topMapping(b -> {
212312
b.startObject("composite");

0 commit comments

Comments
 (0)