Skip to content

Commit f9d8b53

Browse files
authored
Introduce RemoteIndexBuildStrategy, refactor NativeIndexBuildStrategy to accept vector value supplier (#2525)
Signed-off-by: Jay Deng <jayd0104@gmail.com>
1 parent 6203ba1 commit f9d8b53

30 files changed

+898
-330
lines changed

qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/AbstractRestartUpgradeTestCase.java

+5-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
import java.util.Locale;
1414
import java.util.Optional;
1515

16-
import static org.opensearch.knn.TestUtils.*;
16+
import static org.opensearch.knn.TestUtils.BWC_VERSION;
17+
import static org.opensearch.knn.TestUtils.CLIENT_TIMEOUT_VALUE;
18+
import static org.opensearch.knn.TestUtils.KNN_BWC_PREFIX;
19+
import static org.opensearch.knn.TestUtils.RESTART_UPGRADE_OLD_CLUSTER;
1720

1821
public abstract class AbstractRestartUpgradeTestCase extends KNNRestTestCase {
1922
protected String testIndex;
@@ -58,6 +61,7 @@ protected static final boolean isRunningAgainstOldCluster() {
5861
return Boolean.parseBoolean(System.getProperty(RESTART_UPGRADE_OLD_CLUSTER));
5962
}
6063

64+
@Override
6165
protected final Optional<String> getBWCVersion() {
6266
return Optional.ofNullable(System.getProperty(BWC_VERSION, null));
6367
}

qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/AbstractRollingUpgradeTestCase.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,21 @@
66
package org.opensearch.knn.bwc;
77

88
import org.junit.Before;
9-
import org.opensearch.knn.KNNRestTestCase;
109
import org.opensearch.common.settings.Settings;
10+
import org.opensearch.knn.KNNRestTestCase;
1111
import org.opensearch.test.rest.OpenSearchRestTestCase;
1212

1313
import java.util.Locale;
1414
import java.util.Optional;
1515

16-
import static org.opensearch.knn.TestUtils.*;
16+
import static org.opensearch.knn.TestUtils.BWCSUITE_CLUSTER;
17+
import static org.opensearch.knn.TestUtils.BWC_VERSION;
18+
import static org.opensearch.knn.TestUtils.CLIENT_TIMEOUT_VALUE;
19+
import static org.opensearch.knn.TestUtils.KNN_BWC_PREFIX;
20+
import static org.opensearch.knn.TestUtils.MIXED_CLUSTER;
21+
import static org.opensearch.knn.TestUtils.OLD_CLUSTER;
22+
import static org.opensearch.knn.TestUtils.ROLLING_UPGRADE_FIRST_ROUND;
23+
import static org.opensearch.knn.TestUtils.UPGRADED_CLUSTER;
1724

1825
public abstract class AbstractRollingUpgradeTestCase extends KNNRestTestCase {
1926
protected String testIndex;
@@ -81,6 +88,7 @@ protected final boolean isFirstMixedRound() {
8188
return Boolean.parseBoolean(System.getProperty(ROLLING_UPGRADE_FIRST_ROUND, "false"));
8289
}
8390

91+
@Override
8492
protected final Optional<String> getBWCVersion() {
8593
return Optional.ofNullable(System.getProperty(BWC_VERSION, null));
8694
}

src/main/java/org/opensearch/knn/common/featureflags/KNNFeatureFlags.java

+19-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ public class KNNFeatureFlags {
2626

2727
// Feature flags
2828
private static final String KNN_FORCE_EVICT_CACHE_ENABLED = "knn.feature.cache.force_evict.enabled";
29+
private static final String KNN_REMOTE_VECTOR_BUILD = "knn.feature.remote_index_build.enabled";
2930

3031
@VisibleForTesting
3132
public static final Setting<Boolean> KNN_FORCE_EVICT_CACHE_ENABLED_SETTING = Setting.boolSetting(
@@ -35,8 +36,18 @@ public class KNNFeatureFlags {
3536
Dynamic
3637
);
3738

39+
/**
40+
* Feature flag to control remote index build at the cluster level
41+
*/
42+
public static final Setting<Boolean> KNN_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
43+
KNN_REMOTE_VECTOR_BUILD,
44+
false,
45+
NodeScope,
46+
Dynamic
47+
);
48+
3849
public static List<Setting<?>> getFeatureFlags() {
39-
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING);
50+
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING, KNN_REMOTE_VECTOR_BUILD_SETTING);
4051
}
4152

4253
/**
@@ -46,4 +57,11 @@ public static List<Setting<?>> getFeatureFlags() {
4657
public static boolean isForceEvictCacheEnabled() {
4758
return Booleans.parseBoolean(KNNSettings.state().getSettingValue(KNN_FORCE_EVICT_CACHE_ENABLED).toString(), false);
4859
}
60+
61+
/**
62+
* @return true if remote vector index build feature flag is enabled
63+
*/
64+
public static boolean isKNNRemoteVectorBuildEnabled() {
65+
return Booleans.parseBooleanStrict(KNNSettings.state().getSettingValue(KNN_REMOTE_VECTOR_BUILD).toString(), false);
66+
}
4967
}

src/main/java/org/opensearch/knn/index/KNNSettings.java

+29-2
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141

4242
import static java.util.stream.Collectors.toUnmodifiableMap;
4343
import static org.opensearch.common.settings.Setting.Property.Dynamic;
44+
import static org.opensearch.common.settings.Setting.Property.Final;
4445
import static org.opensearch.common.settings.Setting.Property.IndexScope;
4546
import static org.opensearch.common.settings.Setting.Property.NodeScope;
46-
import static org.opensearch.common.settings.Setting.Property.Final;
4747
import static org.opensearch.common.settings.Setting.Property.UnmodifiableOnRestore;
4848
import static org.opensearch.common.unit.MemorySizeValue.parseBytesSizeValueOrHeapRatio;
4949
import static org.opensearch.core.common.unit.ByteSizeValue.parseBytesSizeValue;
@@ -94,6 +94,8 @@ public class KNNSettings {
9494
public static final String KNN_FAISS_AVX512_SPR_DISABLED = "knn.faiss.avx512_spr.disabled";
9595
public static final String KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED = "index.knn.disk.vector.shard_level_rescoring_disabled";
9696
public static final String KNN_DERIVED_SOURCE_ENABLED = "index.knn.derived_source.enabled";
97+
public static final String KNN_INDEX_REMOTE_VECTOR_BUILD = "index.knn.remote_index_build.enabled";
98+
public static final String KNN_REMOTE_VECTOR_REPO = "knn.remote_index_build.vector_repo";
9799

98100
/**
99101
* Default setting values
@@ -371,6 +373,21 @@ public class KNNSettings {
371373
NodeScope
372374
);
373375

376+
/**
377+
* Index level setting to control whether remote index build is enabled or not.
378+
*/
379+
public static final Setting<Boolean> KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
380+
KNN_INDEX_REMOTE_VECTOR_BUILD,
381+
false,
382+
Dynamic,
383+
IndexScope
384+
);
385+
386+
/**
387+
* Cluster level setting which indicates the repository that the remote index build should write to.
388+
*/
389+
public static final Setting<String> KNN_REMOTE_VECTOR_REPO_SETTING = Setting.simpleString(KNN_REMOTE_VECTOR_REPO, Dynamic, NodeScope);
390+
374391
/**
375392
* Dynamic settings
376393
*/
@@ -525,6 +542,14 @@ private Setting<?> getSetting(String key) {
525542
return KNN_DERIVED_SOURCE_ENABLED_SETTING;
526543
}
527544

545+
if (KNN_INDEX_REMOTE_VECTOR_BUILD.equals(key)) {
546+
return KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
547+
}
548+
549+
if (KNN_REMOTE_VECTOR_REPO.equals(key)) {
550+
return KNN_REMOTE_VECTOR_REPO_SETTING;
551+
}
552+
528553
throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
529554
}
530555

@@ -550,7 +575,9 @@ public List<Setting<?>> getSettings() {
550575
QUANTIZATION_STATE_CACHE_SIZE_LIMIT_SETTING,
551576
QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING,
552577
KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING,
553-
KNN_DERIVED_SOURCE_ENABLED_SETTING
578+
KNN_DERIVED_SOURCE_ENABLED_SETTING,
579+
KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING,
580+
KNN_REMOTE_VECTOR_REPO_SETTING
554581
);
555582
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
556583
.collect(Collectors.toList());

src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java

+24-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.opensearch.index.mapper.MapperService;
1616
import org.opensearch.knn.index.KNNSettings;
1717
import org.opensearch.knn.index.codec.KNN990Codec.NativeEngines990KnnVectorsFormat;
18+
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory;
1819
import org.opensearch.knn.index.codec.params.KNNScalarQuantizedVectorsFormatParams;
1920
import org.opensearch.knn.index.codec.params.KNNVectorsFormatParams;
2021
import org.opensearch.knn.index.engine.KNNEngine;
@@ -44,6 +45,7 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor
4445
private final Supplier<KnnVectorsFormat> defaultFormatSupplier;
4546
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
4647
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier;
48+
private final NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory;
4749
private static final String MAX_CONNECTIONS = "max_connections";
4850
private static final String BEAM_WIDTH = "beam_width";
4951

@@ -54,11 +56,26 @@ public BasePerFieldKnnVectorsFormat(
5456
Supplier<KnnVectorsFormat> defaultFormatSupplier,
5557
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
5658
) {
57-
this.mapperService = mapperService;
58-
this.defaultMaxConnections = defaultMaxConnections;
59-
this.defaultBeamWidth = defaultBeamWidth;
60-
this.defaultFormatSupplier = defaultFormatSupplier;
61-
this.vectorsFormatSupplier = vectorsFormatSupplier;
59+
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null);
60+
}
61+
62+
public BasePerFieldKnnVectorsFormat(
63+
Optional<MapperService> mapperService,
64+
int defaultMaxConnections,
65+
int defaultBeamWidth,
66+
Supplier<KnnVectorsFormat> defaultFormatSupplier,
67+
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier,
68+
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier
69+
) {
70+
this(
71+
mapperService,
72+
defaultMaxConnections,
73+
defaultBeamWidth,
74+
defaultFormatSupplier,
75+
vectorsFormatSupplier,
76+
scalarQuantizedVectorsFormatSupplier,
77+
new NativeIndexBuildStrategyFactory()
78+
);
6279
}
6380

6481
@Override
@@ -141,7 +158,8 @@ private NativeEngines990KnnVectorsFormat nativeEngineVectorsFormat() {
141158
int approximateThreshold = getApproximateThresholdValue();
142159
return new NativeEngines990KnnVectorsFormat(
143160
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()),
144-
approximateThreshold
161+
approximateThreshold,
162+
nativeIndexBuildStrategyFactory
145163
);
146164
}
147165

src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumer.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@
66
package org.opensearch.knn.index.codec.KNN80Codec;
77

88
import lombok.extern.log4j.Log4j2;
9-
import org.opensearch.common.StopWatch;
10-
import org.opensearch.knn.index.VectorDataType;
11-
import org.opensearch.knn.index.engine.KNNEngine;
12-
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
13-
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
149
import org.apache.logging.log4j.LogManager;
1510
import org.apache.logging.log4j.Logger;
1611
import org.apache.lucene.codecs.DocValuesConsumer;
@@ -19,8 +14,13 @@
1914
import org.apache.lucene.index.FieldInfo;
2015
import org.apache.lucene.index.MergeState;
2116
import org.apache.lucene.index.SegmentWriteState;
17+
import org.opensearch.common.StopWatch;
18+
import org.opensearch.knn.index.VectorDataType;
2219
import org.opensearch.knn.index.codec.nativeindex.NativeIndexWriter;
20+
import org.opensearch.knn.index.engine.KNNEngine;
2321
import org.opensearch.knn.index.mapper.KNNVectorFieldMapper;
22+
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
23+
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
2424
import org.opensearch.knn.plugin.stats.KNNGraphValue;
2525

2626
import java.io.IOException;
@@ -72,9 +72,9 @@ public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer,
7272
// For BDV it is fine to use knnVectorValues.totalLiveDocs() as we already run the full loop to calculate total
7373
// live docs
7474
if (isMerge) {
75-
NativeIndexWriter.getWriter(field, state).mergeIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
75+
NativeIndexWriter.getWriter(field, state).mergeIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
7676
} else {
77-
NativeIndexWriter.getWriter(field, state).flushIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
77+
NativeIndexWriter.getWriter(field, state).flushIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
7878
}
7979
}
8080

src/main/java/org/opensearch/knn/index/codec/KNN9120Codec/KNN9120PerFieldKnnVectorsFormat.java

+10-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import org.opensearch.knn.index.KNNSettings;
1313
import org.opensearch.knn.index.SpaceType;
1414
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
15+
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory;
1516
import org.opensearch.knn.index.engine.KNNEngine;
1617

1718
import java.util.Optional;
@@ -25,6 +26,13 @@ public class KNN9120PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsForma
2526
private static final Tuple<Integer, ExecutorService> DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE = Tuple.tuple(1, null);
2627

2728
public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
29+
this(mapperService, new NativeIndexBuildStrategyFactory());
30+
}
31+
32+
public KNN9120PerFieldKnnVectorsFormat(
33+
final Optional<MapperService> mapperService,
34+
NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory
35+
) {
2836
super(
2937
mapperService,
3038
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
@@ -67,7 +75,8 @@ public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServi
6775
// Executor service
6876
mergeThreadCountAndExecutorService.v2()
6977
);
70-
}
78+
},
79+
nativeIndexBuildStrategyFactory
7180
);
7281
}
7382

src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsFormat.java

+17-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.apache.lucene.index.SegmentReadState;
2121
import org.apache.lucene.index.SegmentWriteState;
2222
import org.opensearch.knn.index.KNNSettings;
23+
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory;
2324
import org.opensearch.knn.index.engine.KNNEngine;
2425

2526
import java.io.IOException;
@@ -33,6 +34,7 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat {
3334
private static FlatVectorsFormat flatVectorsFormat;
3435
private static final String FORMAT_NAME = "NativeEngines990KnnVectorsFormat";
3536
private static int approximateThreshold;
37+
private final NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory;
3638

3739
public NativeEngines990KnnVectorsFormat() {
3840
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
@@ -47,9 +49,18 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
4749
}
4850

4951
public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat, int approximateThreshold) {
52+
this(flatVectorsFormat, approximateThreshold, new NativeIndexBuildStrategyFactory());
53+
}
54+
55+
public NativeEngines990KnnVectorsFormat(
56+
final FlatVectorsFormat flatVectorsFormat,
57+
int approximateThreshold,
58+
final NativeIndexBuildStrategyFactory nativeIndexBuildStrategyFactory
59+
) {
5060
super(FORMAT_NAME);
5161
NativeEngines990KnnVectorsFormat.flatVectorsFormat = flatVectorsFormat;
5262
NativeEngines990KnnVectorsFormat.approximateThreshold = approximateThreshold;
63+
this.nativeIndexBuildStrategyFactory = nativeIndexBuildStrategyFactory;
5364
}
5465

5566
/**
@@ -59,7 +70,12 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
5970
*/
6071
@Override
6172
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException {
62-
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), approximateThreshold);
73+
return new NativeEngines990KnnVectorsWriter(
74+
state,
75+
flatVectorsFormat.fieldsWriter(state),
76+
approximateThreshold,
77+
nativeIndexBuildStrategyFactory
78+
);
6379
}
6480

6581
/**

0 commit comments

Comments
 (0)