Skip to content

Commit 9baf3e4

Browse files
authored
Backport to main (opensearch-project#2520)
* Remove skip building graph check for quantization use case (opensearch-project#2430) For quantization indices, we don't have to apply building graph check since it is already faster, this is now only applied for fp32/16 indices and where threshold is configured. Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com> * Update default to 0 to always build graph as default behavior (opensearch-project#2452) Signed-off-by: Balasubramanian <balasvij@amazon.com> * Update changelog Signed-off-by: Balasubramanian <balasvij@amazon.com> --------- Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com> Signed-off-by: Balasubramanian <balasvij@amazon.com>
1 parent 359a37b commit 9baf3e4

File tree

4 files changed

+10
-15
lines changed

4 files changed

+10
-15
lines changed

CHANGELOG.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5757
* Added Lucene BWC tests (#2313)[https://github.com/opensearch-project/k-NN/pull/2313]
5858
* Upgrade jsonpath from 2.8.0 to 2.9.0[2325](https://github.com/opensearch-project/k-NN/pull/2325)
5959
* Bump Faiss commit from 1f42e81 to 0cbc2a8 to accelerate hamming distance calculation using _mm512_popcnt_epi64 intrinsic and also add avx512-fp16 instructions to boost performance [#2381](https://github.com/opensearch-project/k-NN/pull/2381)
60-
* Enabled indices.breaker.total.use_real_memory setting via build.gradle for integTest Cluster to catch heap CB in local ITs and github CI actions [#2395](https://github.com/opensearch-project/k-NN/pull/2395/)
60+
* Enabled indices.breaker.total.use_real_memory setting via build.gradle for integTest Cluster to catch heap CB in local ITs and github CI actions [#2395](https://github.com/opensearch-project/k-NN/pull/2395/)
6161
* Enabled idempotency of local builds when using `./gradlew clean` and nest `jni/release` directory under `jni/build` for easier cleanup [#2516](https://github.com/opensearch-project/k-NN/pull/2516)
6262
### Refactoring

src/main/java/org/opensearch/knn/index/KNNSettings.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public class KNNSettings {
103103
public static final boolean KNN_DEFAULT_FAISS_AVX512_DISABLED_VALUE = false;
104104
public static final boolean KNN_DEFAULT_FAISS_AVX512_SPR_DISABLED_VALUE = false;
105105
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2";
106-
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 15_000;
106+
public static final Integer INDEX_KNN_ADVANCED_APPROXIMATE_THRESHOLD_DEFAULT_VALUE = 0;
107107
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MIN = -1;
108108
public static final Integer INDEX_KNN_BUILD_VECTOR_DATA_STRUCTURE_THRESHOLD_MAX = Integer.MAX_VALUE - 2;
109109
public static final String INDEX_KNN_DEFAULT_SPACE_TYPE_FOR_BINARY = "hamming";

src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java

+4-6
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,8 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
104104
field.getVectors()
105105
);
106106
final QuantizationState quantizationState = train(field.getFieldInfo(), knnVectorValuesSupplier, totalLiveDocs);
107-
// Check only after quantization state writer finish writing its state, since it is required
108-
// even if there are no graph files in segment, which will be later used by exact search
109-
if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) {
107+
// should skip graph building only for non quantization use case and if threshold is met
108+
if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) {
110109
log.info(
111110
"Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during flush",
112111
fieldInfo.name,
@@ -144,9 +143,8 @@ public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState
144143
}
145144

146145
final QuantizationState quantizationState = train(fieldInfo, knnVectorValuesSupplier, totalLiveDocs);
147-
// Check only after quantization state writer finish writing its state, since it is required
148-
// even if there are no graph files in segment, which will be later used by exact search
149-
if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) {
146+
// should skip graph building only for non quantization use case and if threshold is met
147+
if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) {
150148
log.info(
151149
"Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during merge",
152150
fieldInfo.name,

src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java

+4-7
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,7 @@ public void testFlush_whenThresholdIsEqualToFixedValue_thenRelevantNativeIndexWr
630630
}
631631
}
632632

633-
public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenSkipBuildingGraph()
634-
throws IOException {
633+
public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenStillBuildGraph() throws IOException {
635634
// Given
636635
List<KNNVectorValues<float[]>> expectedVectorValues = new ArrayList<>();
637636
final Map<Integer, Integer> sizeMap = new HashMap<>();
@@ -714,7 +713,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres
714713
} else {
715714
assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size());
716715
}
717-
verifyNoInteractions(nativeIndexWriter);
718716
IntStream.range(0, vectorsPerField.size()).forEach(i -> {
719717
try {
720718
if (vectorsPerField.get(i).isEmpty()) {
@@ -729,12 +727,12 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres
729727
final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count();
730728
knnVectorValuesFactoryMockedStatic.verify(
731729
() -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()),
732-
times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled))
730+
times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2)
733731
);
734732
}
735733
}
736734

737-
public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenSkipBuildingGraph()
735+
public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenStillBuildGraph()
738736
throws IOException {
739737
// Given
740738
List<KNNVectorValues<float[]>> expectedVectorValues = new ArrayList<>();
@@ -817,7 +815,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres
817815
} else {
818816
assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size());
819817
}
820-
verifyNoInteractions(nativeIndexWriter);
821818
IntStream.range(0, vectorsPerField.size()).forEach(i -> {
822819
try {
823820
if (vectorsPerField.get(i).isEmpty()) {
@@ -832,7 +829,7 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres
832829
final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count();
833830
knnVectorValuesFactoryMockedStatic.verify(
834831
() -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()),
835-
times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled))
832+
times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2)
836833
);
837834
}
838835
}

0 commit comments

Comments
 (0)