Skip to content

Commit b77b6b6

Browse files
authored
[Remote Vector Index Build] Introduce RemoteIndexClient skeleton and Build Request construction (#2560)
Add RemoteIndexClient initial implementation, its accompanying dependencies, and Build Request, Retry Strategy, and test files Signed-off-by: owenhalpert <ohalpert@gmail.com>
1 parent 0618f03 commit b77b6b6

26 files changed

+1312
-34
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
99
* [Remote Vector Index Build] Introduce Remote Native Index Build feature flag, settings, and initial skeleton [#2525](https://github.com/opensearch-project/k-NN/pull/2525)
1010
* [Remote Vector Index Build] Implement vector data upload and vector data size threshold setting [#2550](https://github.com/opensearch-project/k-NN/pull/2550)
1111
* [Remote Vector Index Build] Implement data download and IndexOutput write functionality [#2554](https://github.com/opensearch-project/k-NN/pull/2554)
12+
* [Remote Vector Index Build] Introduce Client Skeleton + basic Build Request implementation [#2560](https://github.com/opensearch-project/k-NN/pull/2560)
1213
* Add concurrency optimizations with native memory graph loading and force eviction (#2265) [https://github.com/opensearch-project/k-NN/pull/2345]
1314
### Enhancements
1415
* Introduce node level circuit breakers for k-NN [#2509](https://github.com/opensearch-project/k-NN/pull/2509)

build.gradle

+3-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,9 @@ dependencies {
321321
api "net.java.dev.jna:jna-platform:5.13.0"
322322
// OpenSearch core is using slf4j 1.7.36. Therefore, we cannot change the version here.
323323
implementation 'org.slf4j:slf4j-api:1.7.36'
324-
324+
api "org.apache.httpcomponents.client5:httpclient5:${versions.httpclient5}"
325+
api "org.apache.httpcomponents.core5:httpcore5:${versions.httpcore5}"
326+
api "org.apache.httpcomponents.core5:httpcore5-h2:${versions.httpcore5}"
325327
zipArchive group: 'org.opensearch.plugin', name:'opensearch-security', version: "${opensearch_build}"
326328
}
327329

src/main/java/org/opensearch/knn/index/KNNSettings.java

+86-1
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
import org.opensearch.cluster.metadata.IndexMetadata;
1616
import org.opensearch.cluster.service.ClusterService;
1717
import org.opensearch.common.Booleans;
18+
import org.opensearch.common.settings.SecureSetting;
1819
import org.opensearch.common.settings.Setting;
1920
import org.opensearch.common.settings.Settings;
2021
import org.opensearch.common.unit.TimeValue;
2122
import org.opensearch.core.action.ActionListener;
23+
import org.opensearch.core.common.settings.SecureString;
2224
import org.opensearch.core.common.unit.ByteSizeUnit;
2325
import org.opensearch.core.common.unit.ByteSizeValue;
2426
import org.opensearch.index.IndexModule;
@@ -100,6 +102,11 @@ public class KNNSettings {
100102
public static final String KNN_INDEX_REMOTE_VECTOR_BUILD = "index.knn.remote_index_build.enabled";
101103
public static final String KNN_REMOTE_VECTOR_REPO = "knn.remote_index_build.vector_repo";
102104
public static final String KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD = "index.knn.remote_index_build.size_threshold";
105+
public static final String KNN_REMOTE_BUILD_SERVICE_ENDPOINT = "knn.remote_index_build.client.endpoint";
106+
public static final String KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL = "knn.remote_index_build.client.poll_interval";
107+
public static final String KNN_REMOTE_BUILD_CLIENT_TIMEOUT = "knn.remote_index_build.client.timeout";
108+
public static final String KNN_REMOTE_BUILD_CLIENT_USERNAME = "knn.remote_index_build.client.username";
109+
public static final String KNN_REMOTE_BUILD_CLIENT_PASSWORD = "knn.remote_index_build.client.password";
103110

104111
/**
105112
* Default setting values
@@ -133,6 +140,10 @@ public class KNNSettings {
133140
// TODO: Tune this default value based on benchmarking
134141
public static final ByteSizeValue KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_DEFAULT_VALUE = new ByteSizeValue(50, ByteSizeUnit.MB);
135142

143+
// TODO: Tune these default values based on benchmarking
144+
public static final Integer KNN_DEFAULT_REMOTE_BUILD_CLIENT_TIMEOUT_MINUTES = 60;
145+
public static final Integer KNN_DEFAULT_REMOTE_BUILD_CLIENT_POLL_INTERVAL_SECONDS = 30;
146+
136147
/**
137148
* Settings Definition
138149
*/
@@ -409,6 +420,47 @@ public class KNNSettings {
409420
Dynamic,
410421
IndexScope
411422
);
423+
/**
424+
* Remote build service endpoint to be used for remote index build.
425+
*/
426+
public static final Setting<String> KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING = Setting.simpleString(
427+
KNN_REMOTE_BUILD_SERVICE_ENDPOINT,
428+
NodeScope,
429+
Dynamic
430+
);
431+
432+
/**
433+
* Time the remote build service client will wait before falling back to CPU index build.
434+
*/
435+
public static final Setting<TimeValue> KNN_REMOTE_BUILD_CLIENT_TIMEOUT_SETTING = Setting.timeSetting(
436+
KNN_REMOTE_BUILD_CLIENT_TIMEOUT,
437+
TimeValue.timeValueMinutes(KNN_DEFAULT_REMOTE_BUILD_CLIENT_TIMEOUT_MINUTES),
438+
NodeScope,
439+
Dynamic
440+
);
441+
442+
/**
443+
* Setting to control how often the remote build service client polls the build service for the status of the job.
444+
*/
445+
public static final Setting<TimeValue> KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL_SETTING = Setting.timeSetting(
446+
KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL,
447+
TimeValue.timeValueSeconds(KNN_DEFAULT_REMOTE_BUILD_CLIENT_POLL_INTERVAL_SECONDS),
448+
NodeScope,
449+
Dynamic
450+
);
451+
452+
/**
453+
* Keystore settings for build service HTTP authorization
454+
*/
455+
public static final Setting<SecureString> KNN_REMOTE_BUILD_CLIENT_USERNAME_SETTING = SecureSetting.secureString(
456+
KNN_REMOTE_BUILD_CLIENT_USERNAME,
457+
null
458+
);
459+
public static final Setting<SecureString> KNN_REMOTE_BUILD_CLIENT_PASSWORD_SETTING = SecureSetting.secureString(
460+
KNN_REMOTE_BUILD_CLIENT_PASSWORD,
461+
null
462+
);
463+
412464
/**
413465
* Dynamic settings
414466
*/
@@ -600,6 +652,26 @@ private Setting<?> getSetting(String key) {
600652
return KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_SETTING;
601653
}
602654

655+
if (KNN_REMOTE_BUILD_SERVICE_ENDPOINT.equals(key)) {
656+
return KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING;
657+
}
658+
659+
if (KNN_REMOTE_BUILD_CLIENT_TIMEOUT.equals(key)) {
660+
return KNN_REMOTE_BUILD_CLIENT_TIMEOUT_SETTING;
661+
}
662+
663+
if (KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL.equals(key)) {
664+
return KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL_SETTING;
665+
}
666+
667+
if (KNN_REMOTE_BUILD_CLIENT_USERNAME.equals(key)) {
668+
return KNN_REMOTE_BUILD_CLIENT_USERNAME_SETTING;
669+
}
670+
671+
if (KNN_REMOTE_BUILD_CLIENT_PASSWORD.equals(key)) {
672+
return KNN_REMOTE_BUILD_CLIENT_PASSWORD_SETTING;
673+
}
674+
603675
throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
604676
}
605677

@@ -628,7 +700,12 @@ public List<Setting<?>> getSettings() {
628700
KNN_DERIVED_SOURCE_ENABLED_SETTING,
629701
KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING,
630702
KNN_REMOTE_VECTOR_REPO_SETTING,
631-
KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_SETTING
703+
KNN_INDEX_REMOTE_VECTOR_BUILD_THRESHOLD_SETTING,
704+
KNN_REMOTE_BUILD_SERVICE_ENDPOINT_SETTING,
705+
KNN_REMOTE_BUILD_CLIENT_TIMEOUT_SETTING,
706+
KNN_REMOTE_BUILD_CLIENT_POLL_INTERVAL_SETTING,
707+
KNN_REMOTE_BUILD_CLIENT_USERNAME_SETTING,
708+
KNN_REMOTE_BUILD_CLIENT_PASSWORD_SETTING
632709
);
633710
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
634711
.collect(Collectors.toList());
@@ -748,6 +825,14 @@ public static double getCircuitBreakerUnsetPercentage() {
748825
return KNNSettings.state().getSettingValue(KNNSettings.KNN_CIRCUIT_BREAKER_UNSET_PERCENTAGE);
749826
}
750827

828+
/**
829+
* Gets the remote build service endpoint.
830+
* @return String representation of the remote build service endpoint URL
831+
*/
832+
public static String getRemoteBuildServiceEndpoint() {
833+
return KNNSettings.state().getSettingValue(KNNSettings.KNN_REMOTE_BUILD_SERVICE_ENDPOINT);
834+
}
835+
751836
public static boolean isFaissAVX2Disabled() {
752837
try {
753838
return KNNSettings.state().getSettingValue(KNNSettings.KNN_FAISS_AVX2_DISABLED);

src/main/java/org/opensearch/knn/index/codec/nativeindex/remote/DefaultVectorRepositoryAccessor.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
import java.util.concurrent.atomic.AtomicReference;
3434
import java.util.function.Supplier;
3535

36-
import static org.opensearch.knn.index.codec.nativeindex.remote.RemoteIndexBuildStrategy.DOC_ID_FILE_EXTENSION;
37-
import static org.opensearch.knn.index.codec.nativeindex.remote.RemoteIndexBuildStrategy.VECTORS_PATH;
38-
import static org.opensearch.knn.index.codec.nativeindex.remote.RemoteIndexBuildStrategy.VECTOR_BLOB_FILE_EXTENSION;
3936
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues;
37+
import static org.opensearch.knn.index.remote.KNNRemoteConstants.DOC_ID_FILE_EXTENSION;
38+
import static org.opensearch.knn.index.remote.KNNRemoteConstants.VECTORS_PATH;
39+
import static org.opensearch.knn.index.remote.KNNRemoteConstants.VECTOR_BLOB_FILE_EXTENSION;
4040

4141
@Log4j2
4242
@AllArgsConstructor

src/main/java/org/opensearch/knn/index/codec/nativeindex/remote/RemoteIndexBuildStrategy.java

+10-24
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,18 @@
66
package org.opensearch.knn.index.codec.nativeindex.remote;
77

88
import lombok.extern.log4j.Log4j2;
9-
import org.apache.commons.lang.NotImplementedException;
109
import org.opensearch.common.StopWatch;
1110
import org.opensearch.common.UUIDs;
1211
import org.opensearch.common.annotation.ExperimentalApi;
1312
import org.opensearch.index.IndexSettings;
1413
import org.opensearch.knn.index.KNNSettings;
1514
import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategy;
1615
import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
16+
import org.opensearch.knn.index.remote.RemoteBuildRequest;
17+
import org.opensearch.knn.index.remote.RemoteBuildResponse;
18+
import org.opensearch.knn.index.remote.RemoteIndexClient;
19+
import org.opensearch.knn.index.remote.RemoteIndexClientFactory;
20+
import org.opensearch.knn.index.remote.RemoteStatusResponse;
1721
import org.opensearch.repositories.RepositoriesService;
1822
import org.opensearch.repositories.Repository;
1923
import org.opensearch.repositories.RepositoryMissingException;
@@ -38,10 +42,6 @@ public class RemoteIndexBuildStrategy implements NativeIndexBuildStrategy {
3842
private final NativeIndexBuildStrategy fallbackStrategy;
3943
private final IndexSettings indexSettings;
4044

41-
static final String VECTOR_BLOB_FILE_EXTENSION = ".knnvec";
42-
static final String DOC_ID_FILE_EXTENSION = ".knndid";
43-
static final String VECTORS_PATH = "_vectors";
44-
4545
/**
4646
* Public constructor, intended to be called by {@link org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategyFactory} based in
4747
* part on the return value from {@link RemoteIndexBuildStrategy#shouldBuildIndexRemotely}
@@ -125,18 +125,20 @@ public void buildAndWriteIndex(BuildIndexParams indexInfo) throws IOException {
125125
time_in_millis = stopWatch.stop().totalTime().millis();
126126
log.debug("Repository write took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
127127

128+
RemoteIndexClient client = RemoteIndexClientFactory.getRemoteIndexClient();
129+
RemoteBuildRequest request = new RemoteBuildRequest(indexSettings, indexInfo, getRepository().getMetadata(), blobName);
128130
stopWatch = new StopWatch().start();
129-
submitVectorBuild();
131+
RemoteBuildResponse remoteBuildResponse = client.submitVectorBuild(request);
130132
time_in_millis = stopWatch.stop().totalTime().millis();
131133
log.debug("Submit vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
132134

133135
stopWatch = new StopWatch().start();
134-
String downloadPath = awaitVectorBuild();
136+
RemoteStatusResponse remoteStatusResponse = client.awaitVectorBuild(remoteBuildResponse);
135137
time_in_millis = stopWatch.stop().totalTime().millis();
136138
log.debug("Await vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
137139

138140
stopWatch = new StopWatch().start();
139-
vectorRepositoryAccessor.readFromRepository(downloadPath, indexInfo.getIndexOutputWithBuffer());
141+
vectorRepositoryAccessor.readFromRepository(remoteStatusResponse.getIndexPath(), indexInfo.getIndexOutputWithBuffer());
140142
time_in_millis = stopWatch.stop().totalTime().millis();
141143
log.debug("Repository read took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
142144
} catch (Exception e) {
@@ -163,20 +165,4 @@ private BlobStoreRepository getRepository() throws RepositoryMissingException {
163165
assert repository instanceof BlobStoreRepository : "Repository should be instance of BlobStoreRepository";
164166
return (BlobStoreRepository) repository;
165167
}
166-
167-
/**
168-
* Submit vector build request to remote vector build service
169-
*
170-
*/
171-
private void submitVectorBuild() {
172-
throw new NotImplementedException();
173-
}
174-
175-
/**
176-
* Wait on remote vector build to complete
177-
* @return String The path from which we should perform download, delimited by "/"
178-
*/
179-
private String awaitVectorBuild() throws NotImplementedException {
180-
throw new NotImplementedException();
181-
}
182168
}

src/main/java/org/opensearch/knn/index/engine/KNNEngine.java

+6
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.knn.index.engine.faiss.Faiss;
1212
import org.opensearch.knn.index.engine.lucene.Lucene;
1313
import org.opensearch.knn.index.engine.nmslib.Nmslib;
14+
import org.opensearch.knn.index.remote.RemoteIndexParameters;
1415

1516
import java.util.List;
1617
import java.util.Map;
@@ -216,4 +217,9 @@ public ResolvedMethodContext resolveMethod(
216217
public boolean supportsRemoteIndexBuild() {
217218
return knnLibrary.supportsRemoteIndexBuild();
218219
}
220+
221+
@Override
222+
public RemoteIndexParameters createRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
223+
return knnLibrary.createRemoteIndexingParameters(indexInfoParameters);
224+
}
219225
}

src/main/java/org/opensearch/knn/index/engine/KNNLibrary.java

+6
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77

88
import org.opensearch.common.ValidationException;
99
import org.opensearch.knn.index.SpaceType;
10+
import org.opensearch.knn.index.remote.RemoteIndexParameters;
1011

1112
import java.util.Collections;
1213
import java.util.List;
14+
import java.util.Map;
1315

1416
/**
1517
* KNNLibrary is an interface that helps the plugin communicate with k-NN libraries
@@ -147,4 +149,8 @@ default List<String> mmapFileExtensions() {
147149
default boolean supportsRemoteIndexBuild() {
148150
return false;
149151
}
152+
153+
default RemoteIndexParameters createRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
154+
throw new UnsupportedOperationException("Remote build service does not support this engine");
155+
}
150156
}

src/main/java/org/opensearch/knn/index/engine/faiss/Faiss.java

+10
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
import org.opensearch.knn.index.engine.MethodResolver;
1515
import org.opensearch.knn.index.engine.NativeLibrary;
1616
import org.opensearch.knn.index.engine.ResolvedMethodContext;
17+
import org.opensearch.knn.index.remote.RemoteIndexParameters;
1718

1819
import java.util.Map;
1920
import java.util.function.Function;
2021

2122
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
2223
import static org.opensearch.knn.common.KNNConstants.METHOD_IVF;
24+
import static org.opensearch.knn.common.KNNConstants.NAME;
2325

2426
/**
2527
* Implements NativeLibrary for the faiss native library
@@ -123,4 +125,12 @@ public ResolvedMethodContext resolveMethod(
123125
public boolean supportsRemoteIndexBuild() {
124126
return true;
125127
}
128+
129+
@Override
130+
public RemoteIndexParameters createRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
131+
if (METHOD_HNSW.equals(indexInfoParameters.get(NAME))) {
132+
return FaissHNSWMethod.createRemoteIndexingParameters(indexInfoParameters);
133+
}
134+
throw new IllegalArgumentException("Unsupported method for remote indexing");
135+
}
126136
}

src/main/java/org/opensearch/knn/index/engine/faiss/FaissHNSWMethod.java

+41-1
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
import org.opensearch.knn.index.engine.AbstractKNNMethod;
1414
import org.opensearch.knn.index.engine.DefaultHnswSearchContext;
1515
import org.opensearch.knn.index.engine.Encoder;
16+
import org.opensearch.knn.index.engine.KNNMethodContext;
1617
import org.opensearch.knn.index.engine.MethodComponent;
1718
import org.opensearch.knn.index.engine.MethodComponentContext;
1819
import org.opensearch.knn.index.engine.Parameter;
19-
import org.opensearch.knn.index.engine.KNNMethodContext;
2020
import org.opensearch.knn.index.engine.TrainingConfigValidationInput;
2121
import org.opensearch.knn.index.engine.TrainingConfigValidationOutput;
22+
import org.opensearch.knn.index.remote.RemoteFaissHNSWIndexParameters;
23+
import org.opensearch.knn.index.remote.RemoteIndexParameters;
2224

2325
import java.util.Arrays;
2426
import java.util.Collections;
@@ -29,11 +31,14 @@
2931
import java.util.stream.Collectors;
3032

3133
import static org.opensearch.knn.common.KNNConstants.FAISS_HNSW_DESCRIPTION;
34+
import static org.opensearch.knn.common.KNNConstants.INDEX_DESCRIPTION_PARAMETER;
3235
import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER;
3336
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;
3437
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION;
3538
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH;
3639
import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M;
40+
import static org.opensearch.knn.common.KNNConstants.PARAMETERS;
41+
import static org.opensearch.knn.common.KNNConstants.SPACE_TYPE;
3742

3843
/**
3944
* Faiss HNSW method implementation
@@ -147,4 +152,39 @@ protected Function<TrainingConfigValidationInput, TrainingConfigValidationOutput
147152
return encoder.validateEncoderConfig(trainingConfigValidationInput);
148153
};
149154
}
155+
156+
/**
157+
* Get the parameters that need to be passed to the remote build service for training
158+
*
159+
* @param indexInfoParameters result of indexInfo.getParameters() to parse
160+
* @return Map of parameters to be used as "index_parameters"
161+
*/
162+
public static RemoteIndexParameters createRemoteIndexingParameters(Map<String, Object> indexInfoParameters) {
163+
RemoteFaissHNSWIndexParameters.RemoteFaissHNSWIndexParametersBuilder<?, ?> builder = RemoteFaissHNSWIndexParameters.builder();
164+
assert (indexInfoParameters.get(SPACE_TYPE) instanceof String);
165+
String spaceType = (String) indexInfoParameters.get(SPACE_TYPE);
166+
builder.algorithm(METHOD_HNSW).spaceType(spaceType);
167+
168+
Object innerParams = indexInfoParameters.get(PARAMETERS);
169+
assert (innerParams instanceof Map);
170+
Map<String, Object> innerMap = (Map<String, Object>) innerParams;
171+
assert (innerMap.get(METHOD_PARAMETER_EF_CONSTRUCTION) instanceof Integer);
172+
builder.efConstruction((Integer) innerMap.get(METHOD_PARAMETER_EF_CONSTRUCTION));
173+
assert (innerMap.get(METHOD_PARAMETER_EF_SEARCH) instanceof Integer);
174+
builder.efSearch((Integer) innerMap.get(METHOD_PARAMETER_EF_SEARCH));
175+
Object indexDescription = indexInfoParameters.get(INDEX_DESCRIPTION_PARAMETER);
176+
assert indexDescription instanceof String;
177+
builder.m(getMFromIndexDescription((String) indexDescription));
178+
179+
return builder.build();
180+
}
181+
182+
private static int getMFromIndexDescription(String indexDescription) {
183+
int commaIndex = indexDescription.indexOf(",");
184+
if (commaIndex == -1) {
185+
throw new IllegalArgumentException("Invalid index description: " + indexDescription);
186+
}
187+
String hnswPart = indexDescription.substring(0, commaIndex);
188+
return Integer.parseInt(hnswPart.substring(4));
189+
}
150190
}

0 commit comments

Comments
 (0)