Skip to content

Commit 7e93b7f

Browse files
jed326Jay Deng
authored and
Jay Deng
committed
Introduce RemoteIndexBuildStrategy, refactor NativeIndexBuildStrategy to accept vector value supplier
Signed-off-by: Jay Deng <jayd0104@gmail.com>
1 parent 45ecb5b commit 7e93b7f

21 files changed

+541
-228
lines changed

src/main/java/org/opensearch/knn/common/featureflags/KNNFeatureFlags.java

+16-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class KNNFeatureFlags {
2626

2727
// Feature flags
2828
private static final String KNN_FORCE_EVICT_CACHE_ENABLED = "knn.feature.cache.force_evict.enabled";
29+
private static final String KNN_REMOTE_VECTOR_BUILD = "knn.feature.remote_index_build.enabled";
2930

3031
@VisibleForTesting
3132
public static final Setting<Boolean> KNN_FORCE_EVICT_CACHE_ENABLED_SETTING = Setting.boolSetting(
@@ -35,8 +36,15 @@ public class KNNFeatureFlags {
3536
Dynamic
3637
);
3738

39+
public static final Setting<Boolean> KNN_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
40+
KNN_REMOTE_VECTOR_BUILD,
41+
false,
42+
NodeScope,
43+
Dynamic
44+
);
45+
3846
public static List<Setting<?>> getFeatureFlags() {
39-
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING);
47+
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING, KNN_REMOTE_VECTOR_BUILD_SETTING);
4048
}
4149

4250
/**
@@ -46,4 +54,11 @@ public static List<Setting<?>> getFeatureFlags() {
4654
public static boolean isForceEvictCacheEnabled() {
4755
return Booleans.parseBoolean(KNNSettings.state().getSettingValue(KNN_FORCE_EVICT_CACHE_ENABLED).toString(), false);
4856
}
57+
58+
/**
59+
* @return true if remote vector index build feature flag is enabled
60+
*/
61+
public static boolean isKNNRemoteVectorBuildEnabled() {
62+
return Booleans.parseBooleanStrict(KNNSettings.state().getSettingValue(KNN_REMOTE_VECTOR_BUILD).toString(), false);
63+
}
4964
}

src/main/java/org/opensearch/knn/index/KNNSettings.java

+23-2
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141

4242
import static java.util.stream.Collectors.toUnmodifiableMap;
4343
import static org.opensearch.common.settings.Setting.Property.Dynamic;
44+
import static org.opensearch.common.settings.Setting.Property.Final;
4445
import static org.opensearch.common.settings.Setting.Property.IndexScope;
4546
import static org.opensearch.common.settings.Setting.Property.NodeScope;
46-
import static org.opensearch.common.settings.Setting.Property.Final;
4747
import static org.opensearch.common.settings.Setting.Property.UnmodifiableOnRestore;
4848
import static org.opensearch.common.unit.MemorySizeValue.parseBytesSizeValueOrHeapRatio;
4949
import static org.opensearch.core.common.unit.ByteSizeValue.parseBytesSizeValue;
@@ -94,6 +94,8 @@ public class KNNSettings {
9494
public static final String KNN_FAISS_AVX512_SPR_DISABLED = "knn.faiss.avx512_spr.disabled";
9595
public static final String KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED = "index.knn.disk.vector.shard_level_rescoring_disabled";
9696
public static final String KNN_DERIVED_SOURCE_ENABLED = "index.knn.derived_source.enabled";
97+
public static final String KNN_INDEX_REMOTE_VECTOR_BUILD = "index.knn.remote_index_build.enabled";
98+
public static final String KNN_REMOTE_VECTOR_REPO = "knn.remote_index_build.vector_repo";
9799

98100
/**
99101
* Default setting values
@@ -371,6 +373,15 @@ public class KNNSettings {
371373
NodeScope
372374
);
373375

376+
public static final Setting<Boolean> KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
377+
KNN_INDEX_REMOTE_VECTOR_BUILD,
378+
false,
379+
Dynamic,
380+
IndexScope
381+
);
382+
383+
public static final Setting<String> KNN_REMOTE_VECTOR_REPO_SETTING = Setting.simpleString(KNN_REMOTE_VECTOR_REPO, Dynamic, NodeScope);
384+
374385
/**
375386
* Dynamic settings
376387
*/
@@ -525,6 +536,14 @@ private Setting<?> getSetting(String key) {
525536
return KNN_DERIVED_SOURCE_ENABLED_SETTING;
526537
}
527538

539+
if (KNN_INDEX_REMOTE_VECTOR_BUILD.equals(key)) {
540+
return KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
541+
}
542+
543+
if (KNN_REMOTE_VECTOR_REPO.equals(key)) {
544+
return KNN_REMOTE_VECTOR_REPO_SETTING;
545+
}
546+
528547
throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
529548
}
530549

@@ -550,7 +569,9 @@ public List<Setting<?>> getSettings() {
550569
QUANTIZATION_STATE_CACHE_SIZE_LIMIT_SETTING,
551570
QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING,
552571
KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING,
553-
KNN_DERIVED_SOURCE_ENABLED_SETTING
572+
KNN_DERIVED_SOURCE_ENABLED_SETTING,
573+
KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING,
574+
KNN_REMOTE_VECTOR_REPO_SETTING
554575
);
555576
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
556577
.collect(Collectors.toList());

src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java

+25-6
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import org.opensearch.knn.index.engine.KNNMethodContext;
2222
import org.opensearch.knn.index.mapper.KNNMappingConfig;
2323
import org.opensearch.knn.index.mapper.KNNVectorFieldType;
24+
import org.opensearch.repositories.RepositoriesService;
2425

2526
import java.util.Map;
2627
import java.util.Optional;
@@ -44,6 +45,7 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor
4445
private final Supplier<KnnVectorsFormat> defaultFormatSupplier;
4546
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
4647
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier;
48+
private final Supplier<RepositoriesService> repositoriesServiceSupplier;
4749
private static final String MAX_CONNECTIONS = "max_connections";
4850
private static final String BEAM_WIDTH = "beam_width";
4951

@@ -54,11 +56,26 @@ public BasePerFieldKnnVectorsFormat(
5456
Supplier<KnnVectorsFormat> defaultFormatSupplier,
5557
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
5658
) {
57-
this.mapperService = mapperService;
58-
this.defaultMaxConnections = defaultMaxConnections;
59-
this.defaultBeamWidth = defaultBeamWidth;
60-
this.defaultFormatSupplier = defaultFormatSupplier;
61-
this.vectorsFormatSupplier = vectorsFormatSupplier;
59+
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null, null);
60+
}
61+
62+
public BasePerFieldKnnVectorsFormat(
63+
Optional<MapperService> mapperService,
64+
int defaultMaxConnections,
65+
int defaultBeamWidth,
66+
Supplier<KnnVectorsFormat> defaultFormatSupplier,
67+
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier,
68+
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier
69+
) {
70+
this(
71+
mapperService,
72+
defaultMaxConnections,
73+
defaultBeamWidth,
74+
defaultFormatSupplier,
75+
vectorsFormatSupplier,
76+
scalarQuantizedVectorsFormatSupplier,
77+
null
78+
);
6279
}
6380

6481
@Override
@@ -141,7 +158,9 @@ private NativeEngines990KnnVectorsFormat nativeEngineVectorsFormat() {
141158
int approximateThreshold = getApproximateThresholdValue();
142159
return new NativeEngines990KnnVectorsFormat(
143160
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()),
144-
approximateThreshold
161+
approximateThreshold,
162+
repositoriesServiceSupplier,
163+
mapperService.get().getIndexSettings()
145164
);
146165
}
147166

src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer,
7272
// For BDV it is fine to use knnVectorValues.totalLiveDocs() as we already run the full loop to calculate total
7373
// live docs
7474
if (isMerge) {
75-
NativeIndexWriter.getWriter(field, state).mergeIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
75+
NativeIndexWriter.getWriter(field, state).mergeIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
7676
} else {
77-
NativeIndexWriter.getWriter(field, state).flushIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
77+
NativeIndexWriter.getWriter(field, state).flushIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
7878
}
7979
}
8080

src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
import org.opensearch.knn.index.SpaceType;
1414
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
1515
import org.opensearch.knn.index.engine.KNNEngine;
16+
import org.opensearch.repositories.RepositoriesService;
1617

1718
import java.util.Optional;
1819
import java.util.concurrent.ExecutorService;
1920
import java.util.concurrent.Executors;
21+
import java.util.function.Supplier;
2022

2123
/**
2224
* Class provides per field format implementation for Lucene Knn vector type
@@ -25,6 +27,13 @@ public class KNN9120PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsForma
2527
private static final Tuple<Integer, ExecutorService> DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE = Tuple.tuple(1, null);
2628

2729
public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
30+
this(mapperService, null);
31+
}
32+
33+
public KNN9120PerFieldKnnVectorsFormat(
34+
final Optional<MapperService> mapperService,
35+
Supplier<RepositoriesService> repositoriesServiceSupplier
36+
) {
2837
super(
2938
mapperService,
3039
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
@@ -67,7 +76,8 @@ public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServi
6776
// Executor service
6877
mergeThreadCountAndExecutorService.v2()
6978
);
70-
}
79+
},
80+
repositoriesServiceSupplier
7181
);
7282
}
7383

src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java

+23-1
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@
1919
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
2020
import org.apache.lucene.index.SegmentReadState;
2121
import org.apache.lucene.index.SegmentWriteState;
22+
import org.opensearch.index.IndexSettings;
2223
import org.opensearch.knn.index.KNNSettings;
2324
import org.opensearch.knn.index.engine.KNNEngine;
25+
import org.opensearch.repositories.RepositoriesService;
2426

2527
import java.io.IOException;
28+
import java.util.function.Supplier;
2629

2730
/**
2831
* This is a Vector format that will be used for Native engines like Faiss and Nmslib for reading and writing vector
@@ -33,6 +36,8 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat {
3336
private static FlatVectorsFormat flatVectorsFormat;
3437
private static final String FORMAT_NAME = "NativeEngines990KnnVectorsFormat";
3538
private static int approximateThreshold;
39+
private final Supplier<RepositoriesService> repositoriesServiceSupplier;
40+
private final IndexSettings indexSettings;
3641

3742
public NativeEngines990KnnVectorsFormat() {
3843
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
@@ -47,9 +52,20 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
4752
}
4853

4954
public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat, int approximateThreshold) {
55+
this(flatVectorsFormat, approximateThreshold, null, null);
56+
}
57+
58+
public NativeEngines990KnnVectorsFormat(
59+
final FlatVectorsFormat flatVectorsFormat,
60+
int approximateThreshold,
61+
Supplier<RepositoriesService> repositoriesServiceSupplier,
62+
IndexSettings indexSettings
63+
) {
5064
super(FORMAT_NAME);
5165
NativeEngines990KnnVectorsFormat.flatVectorsFormat = flatVectorsFormat;
5266
NativeEngines990KnnVectorsFormat.approximateThreshold = approximateThreshold;
67+
this.repositoriesServiceSupplier = repositoriesServiceSupplier;
68+
this.indexSettings = indexSettings;
5369
}
5470

5571
/**
@@ -59,7 +75,13 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
5975
*/
6076
@Override
6177
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException {
62-
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), approximateThreshold);
78+
return new NativeEngines990KnnVectorsWriter(
79+
state,
80+
flatVectorsFormat.fieldsWriter(state),
81+
approximateThreshold,
82+
repositoriesServiceSupplier,
83+
indexSettings
84+
);
6385
}
6486

6587
/**

0 commit comments

Comments
 (0)