Skip to content

Commit 19c9470

Browse files
Added the fix for NPE happening during merges when all vector fields docs are deleted in the segments getting merged (opensearch-project#2365) (opensearch-project#2371)
Signed-off-by: Navneet Verma <navneev@amazon.com> (cherry picked from commit 9fb7a5a) Co-authored-by: Navneet Verma <navneev@amazon.com>
1 parent 12398c4 commit 19c9470

File tree

8 files changed

+78
-48
lines changed

8 files changed

+78
-48
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2525
- Add check to directly use ANN Search when filters match all docs. (#2320)[https://github.com/opensearch-project/k-NN/pull/2320]
2626
### Bug Fixes
2727
* Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282]
28+
* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365]
2829
* Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315]
2930
* Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346]
3031
* Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352]

src/main/java/org/opensearch/knn/common/KNNVectorUtil.java

-15
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77

88
import lombok.AccessLevel;
99
import lombok.NoArgsConstructor;
10-
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
1110

12-
import java.io.IOException;
1311
import java.util.List;
1412
import java.util.Objects;
1513

@@ -62,17 +60,4 @@ public static int[] intListToArray(final List<Integer> integerList) {
6260
}
6361
return intArray;
6462
}
65-
66-
/**
67-
* Iterates vector values once if it is not at start of the location,
68-
* Intended to be done to make sure dimension and bytesPerVector are available
69-
* @param vectorValues
70-
* @throws IOException
71-
*/
72-
public static void iterateVectorValuesOnce(final KNNVectorValues<?> vectorValues) throws IOException {
73-
if (vectorValues.docId() == -1) {
74-
vectorValues.nextDoc();
75-
vectorValues.getVector();
76-
}
77-
}
7863
}

src/main/java/org/opensearch/knn/index/codec/nativeindex/DefaultIndexBuildStrategy.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2424
import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
2525
import static org.opensearch.knn.common.KNNVectorUtil.intListToArray;
26-
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
26+
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
2727
import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer;
2828

2929
/**
@@ -52,7 +52,7 @@ public static DefaultIndexBuildStrategy getInstance() {
5252
public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException {
5353
final KNNVectorValues<?> knnVectorValues = indexInfo.getVectorValues();
5454
// Needed to make sure we don't get 0 dimensions while initializing index
55-
iterateVectorValuesOnce(knnVectorValues);
55+
initializeVectorValues(knnVectorValues);
5656
IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo);
5757

5858
try (

src/main/java/org/opensearch/knn/index/codec/nativeindex/MemOptimizedNativeIndexBuildStrategy.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
2424
import static org.opensearch.knn.common.KNNVectorUtil.intListToArray;
25-
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
25+
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
2626
import static org.opensearch.knn.index.codec.transfer.OffHeapVectorTransferFactory.getVectorTransfer;
2727

2828
/**
@@ -53,7 +53,7 @@ public static MemOptimizedNativeIndexBuildStrategy getInstance() {
5353
public void buildAndWriteIndex(final BuildIndexParams indexInfo) throws IOException {
5454
final KNNVectorValues<?> knnVectorValues = indexInfo.getVectorValues();
5555
// Needed to make sure we don't get 0 dimensions while initializing index
56-
iterateVectorValuesOnce(knnVectorValues);
56+
initializeVectorValues(knnVectorValues);
5757
KNNEngine engine = indexInfo.getKnnEngine();
5858
Map<String, Object> indexParameters = indexInfo.getParameters();
5959
IndexBuildSetup indexBuildSetup = QuantizationIndexUtils.prepareIndexBuild(knnVectorValues, indexInfo);

src/main/java/org/opensearch/knn/index/codec/nativeindex/NativeIndexWriter.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
import static org.opensearch.knn.common.FieldInfoExtractor.extractVectorDataType;
4444
import static org.opensearch.knn.common.KNNConstants.MODEL_ID;
4545
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
46-
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
46+
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
4747
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.buildEngineFileName;
4848
import static org.opensearch.knn.index.engine.faiss.Faiss.FAISS_BINARY_INDEX_DESCRIPTION_PREFIX;
4949

@@ -100,7 +100,7 @@ public static NativeIndexWriter getWriter(
100100
* @throws IOException
101101
*/
102102
public void flushIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDocs) throws IOException {
103-
iterateVectorValuesOnce(knnVectorValues);
103+
initializeVectorValues(knnVectorValues);
104104
buildAndWriteIndex(knnVectorValues, totalLiveDocs);
105105
recordRefreshStats();
106106
}
@@ -111,7 +111,7 @@ public void flushIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDo
111111
* @throws IOException
112112
*/
113113
public void mergeIndex(final KNNVectorValues<?> knnVectorValues, int totalLiveDocs) throws IOException {
114-
iterateVectorValuesOnce(knnVectorValues);
114+
initializeVectorValues(knnVectorValues);
115115
if (knnVectorValues.docId() == NO_MORE_DOCS) {
116116
// This is in place so we do not add metrics
117117
log.debug("Skipping mergeIndex, vector values are already iterated for {}", fieldInfo.name);

src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java

+28
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
import org.apache.lucene.index.BinaryDocValues;
1010
import org.apache.lucene.index.FieldInfo;
1111
import org.apache.lucene.index.SegmentInfo;
12+
import org.apache.lucene.search.DocIdSetIterator;
1213
import org.opensearch.knn.common.FieldInfoExtractor;
1314
import org.opensearch.knn.common.KNNConstants;
1415
import org.opensearch.knn.index.VectorDataType;
1516
import org.opensearch.knn.index.codec.KNN80Codec.KNN80BinaryDocValues;
1617
import org.opensearch.knn.index.engine.KNNEngine;
18+
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
1719

20+
import java.io.IOException;
1821
import java.util.Comparator;
1922
import java.util.List;
2023
import java.util.stream.Collectors;
@@ -116,6 +119,31 @@ public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentIn
116119
}
117120
}
118121

122+
/**
123+
* Positions the vectorValuesIterator to the first vector document ID if not already positioned there.
124+
* This initialization is crucial for setting up vector dimensions and other properties in VectorValues.
125+
* <p>
126+
* If the VectorValues contains no vector documents, the iterator will be positioned at
127+
* {@link DocIdSetIterator#NO_MORE_DOCS}
128+
*
129+
* @param vectorValues {@link KNNVectorValues}
130+
* @throws IOException if there is an error while accessing the vector values
131+
*/
132+
public static void initializeVectorValues(final KNNVectorValues<?> vectorValues) throws IOException {
133+
// The docId will be set to -1 if next doc has never been called yet. If it has already been called,
134+
// no need to advance the vector values
135+
if (vectorValues.docId() != -1) {
136+
return;
137+
}
138+
// Ensure that we are not getting the next vector if there are no more docs
139+
vectorValues.nextDoc();
140+
if (vectorValues.docId() == DocIdSetIterator.NO_MORE_DOCS) {
141+
// Ensure that we are not getting the vector if there are no more docs
142+
return;
143+
}
144+
vectorValues.getVector();
145+
}
146+
119147
/**
120148
* Get KNNEngine From FieldInfo
121149
*

src/test/java/org/opensearch/knn/common/KNNVectorUtilTests.java

-26
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,10 @@
1111

1212
package org.opensearch.knn.common;
1313

14-
import lombok.SneakyThrows;
1514
import org.opensearch.knn.KNNTestCase;
16-
import org.opensearch.knn.index.VectorDataType;
17-
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
18-
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
19-
import org.opensearch.knn.index.vectorvalues.TestVectorValues;
2015

2116
import java.util.List;
2217

23-
import static org.opensearch.knn.common.KNNVectorUtil.iterateVectorValuesOnce;
24-
2518
public class KNNVectorUtilTests extends KNNTestCase {
2619
public void testByteZeroVector() {
2720
assertTrue(KNNVectorUtil.isZeroVector(new byte[] { 0, 0, 0 }));
@@ -38,23 +31,4 @@ public void testIntListToArray() {
3831
assertNull(KNNVectorUtil.intListToArray(List.of()));
3932
assertNull(KNNVectorUtil.intListToArray(null));
4033
}
41-
42-
@SneakyThrows
43-
public void testInit() {
44-
// Give
45-
final List<float[]> floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 });
46-
final int dimension = floatArray.get(0).length;
47-
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
48-
floatArray
49-
);
50-
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);
51-
52-
// When
53-
iterateVectorValuesOnce(knnVectorValues);
54-
55-
// Then
56-
assertNotEquals(-1, knnVectorValues.docId());
57-
assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f);
58-
assertEquals(dimension, knnVectorValues.dimension());
59-
}
6034
}

src/test/java/org/opensearch/knn/index/codec/util/KNNCodecUtilTests.java

+42
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,24 @@
66
package org.opensearch.knn.index.codec.util;
77

88
import junit.framework.TestCase;
9+
import lombok.SneakyThrows;
910
import org.apache.lucene.index.SegmentInfo;
11+
import org.apache.lucene.search.DocIdSetIterator;
12+
import org.junit.Assert;
1013
import org.opensearch.knn.index.VectorDataType;
1114
import org.opensearch.knn.index.engine.KNNEngine;
15+
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
16+
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
17+
import org.opensearch.knn.index.vectorvalues.TestVectorValues;
1218

19+
import java.util.Collections;
1320
import java.util.List;
1421
import java.util.Set;
1522

1623
import static org.mockito.Mockito.mock;
1724
import static org.mockito.Mockito.when;
1825
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.calculateArraySize;
26+
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
1927

2028
public class KNNCodecUtilTests extends TestCase {
2129

@@ -46,4 +54,38 @@ public void testGetKNNEngines() {
4654
assertEquals(engineFiles.size(), 2);
4755
assertTrue(engineFiles.get(0).equals("_0_2011_target_field.faissc"));
4856
}
57+
58+
@SneakyThrows
59+
public void testInitializeVectorValues_whenValidVectorValues_thenSuccess() {
60+
// Give
61+
final List<float[]> floatArray = List.of(new float[] { 1, 2 }, new float[] { 2, 3 });
62+
final int dimension = floatArray.get(0).length;
63+
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
64+
floatArray
65+
);
66+
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);
67+
68+
// When
69+
initializeVectorValues(knnVectorValues);
70+
71+
// Then
72+
Assert.assertNotEquals(-1, knnVectorValues.docId());
73+
Assert.assertArrayEquals(floatArray.get(0), knnVectorValues.getVector(), 0.001f);
74+
assertEquals(dimension, knnVectorValues.dimension());
75+
}
76+
77+
@SneakyThrows
78+
public void testInitializeVectorValues_whenNoDocs_thenSuccess() {
79+
// Give
80+
final List<float[]> floatArray = Collections.emptyList();
81+
final TestVectorValues.PreDefinedFloatVectorValues randomVectorValues = new TestVectorValues.PreDefinedFloatVectorValues(
82+
floatArray
83+
);
84+
final KNNVectorValues<float[]> knnVectorValues = KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, randomVectorValues);
85+
86+
// When
87+
initializeVectorValues(knnVectorValues);
88+
// Then
89+
Assert.assertEquals(DocIdSetIterator.NO_MORE_DOCS, knnVectorValues.docId());
90+
}
4991
}

0 commit comments

Comments
 (0)