|
11 | 11 | import org.opensearch.common.StopWatch;
|
12 | 12 | import org.opensearch.common.annotation.ExperimentalApi;
|
13 | 13 | import org.opensearch.index.IndexSettings;
|
| 14 | +import org.opensearch.knn.common.KNNConstants; |
14 | 15 | import org.opensearch.knn.common.featureflags.KNNFeatureFlags;
|
15 | 16 | import org.opensearch.knn.index.KNNSettings;
|
16 | 17 | import org.opensearch.knn.index.codec.nativeindex.NativeIndexBuildStrategy;
|
17 | 18 | import org.opensearch.knn.index.codec.nativeindex.model.BuildIndexParams;
|
| 19 | +import org.opensearch.knn.index.remote.RemoteBuildRequest; |
| 20 | +import org.opensearch.knn.index.remote.RemoteIndexClient; |
18 | 21 | import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
|
19 | 22 | import org.opensearch.repositories.RepositoriesService;
|
20 | 23 | import org.opensearch.repositories.Repository;
|
21 | 24 | import org.opensearch.repositories.RepositoryMissingException;
|
22 | 25 | import org.opensearch.repositories.blobstore.BlobStoreRepository;
|
23 | 26 |
|
24 | 27 | import java.io.IOException;
|
| 28 | +import java.util.HashMap; |
| 29 | +import java.util.Map; |
25 | 30 | import java.util.function.Supplier;
|
26 | 31 |
|
27 | 32 | import static org.opensearch.knn.index.KNNSettings.KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
|
28 | 33 | import static org.opensearch.knn.index.KNNSettings.KNN_REMOTE_VECTOR_REPO_SETTING;
|
| 34 | +import static org.opensearch.knn.index.KNNSettings.KNN_SPACE_TYPE; |
| 35 | +import static org.opensearch.knn.index.KNNSettings.state; |
29 | 36 |
|
30 | 37 | /**
|
31 | 38 | * This class orchestrates building vector indices. It handles uploading data to a repository, submitting a remote
|
@@ -54,7 +61,7 @@ public RemoteIndexBuildStrategy(Supplier<RepositoriesService> repositoriesServic
|
54 | 61 | * @return whether to use the remote build feature
|
55 | 62 | */
|
56 | 63 | public static boolean shouldBuildIndexRemotely(IndexSettings indexSettings) {
|
57 |
| - String vectorRepo = KNNSettings.state().getSettingValue(KNN_REMOTE_VECTOR_REPO_SETTING.getKey()); |
| 64 | + String vectorRepo = state().getSettingValue(KNN_REMOTE_VECTOR_REPO_SETTING.getKey()); |
58 | 65 | return KNNFeatureFlags.isKNNRemoteVectorBuildEnabled()
|
59 | 66 | && indexSettings.getValue(KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING)
|
60 | 67 | && vectorRepo != null
|
@@ -88,17 +95,18 @@ public void buildAndWriteIndex(BuildIndexParams indexInfo) throws IOException {
|
88 | 95 | log.debug("Repository write took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
|
89 | 96 |
|
90 | 97 | stopWatch = new StopWatch().start();
|
91 |
| - submitVectorBuild(); |
| 98 | + RemoteBuildRequest buildRequest = constructBuildRequest(indexInfo); |
| 99 | + String jobId = RemoteIndexClient.getInstance().submitVectorBuild(buildRequest); |
92 | 100 | time_in_millis = stopWatch.stop().totalTime().millis();
|
93 | 101 | log.debug("Submit vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
|
94 | 102 |
|
95 | 103 | stopWatch = new StopWatch().start();
|
96 |
| - awaitVectorBuild(); |
| 104 | + String indexPath = awaitVectorBuild(jobId); |
97 | 105 | time_in_millis = stopWatch.stop().totalTime().millis();
|
98 | 106 | log.debug("Await vector build took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
|
99 | 107 |
|
100 | 108 | stopWatch = new StopWatch().start();
|
101 |
| - readFromRepository(); |
| 109 | + readFromRepository(indexPath); |
102 | 110 | time_in_millis = stopWatch.stop().totalTime().millis();
|
103 | 111 | log.debug("Repository read took {} ms for vector field [{}]", time_in_millis, indexInfo.getFieldName());
|
104 | 112 | } catch (Exception e) {
|
@@ -156,14 +164,58 @@ private void submitVectorBuild() {
|
156 | 164 | /**
|
157 | 165 | * Wait on remote vector build to complete
|
158 | 166 | */
|
159 |
| - private void awaitVectorBuild() { |
| 167 | + private String awaitVectorBuild(String jobId) { |
160 | 168 | throw new NotImplementedException();
|
161 | 169 | }
|
162 | 170 |
|
163 | 171 | /**
|
164 | 172 | * Read constructed vector file from remote repository and write to IndexOutput
|
165 | 173 | */
|
166 |
| - private void readFromRepository() { |
| 174 | + private void readFromRepository(String indexPath) { |
167 | 175 | throw new NotImplementedException();
|
168 | 176 | }
|
| 177 | + |
| 178 | + /** |
| 179 | + * Construct the JSON request body and HTTP request for the index build request |
| 180 | + * @return HttpExecuteRequest for the index build request with parameters set |
| 181 | + */ |
| 182 | + public RemoteBuildRequest constructBuildRequest(BuildIndexParams indexInfo) throws IOException { |
| 183 | + String repositoryType = getRepository().getMetadata().type(); |
| 184 | + String containerName = switch (repositoryType) { |
| 185 | + case "s3" -> getRepository().getMetadata().settings().get("bucket"); |
| 186 | + case "fs" -> getRepository().getMetadata().settings().get("location"); |
| 187 | + default -> throw new IllegalStateException("Unexpected value: " + repositoryType); |
| 188 | + }; |
| 189 | + String vectorPath = null; // blobName + VECTOR_BLOB_FILE_EXTENSION |
| 190 | + String docIdPath = null; // blobName + DOC_ID_FILE_EXTENSION |
| 191 | + String tenantId = null; // indexSettings.getSettings().get(ClusterName.CLUSTER_NAME_SETTING.getKey()); |
| 192 | + int dimension = 0; // TODO |
| 193 | + int docCount = indexInfo.getTotalLiveDocs(); |
| 194 | + String dataType = indexInfo.getVectorDataType().getValue(); // TODO need to fetch encoder param to get fp16 vs fp32 |
| 195 | + String engine = indexInfo.getKnnEngine().getName(); |
| 196 | + |
| 197 | + String spaceType = indexInfo.getParameters().get(KNNConstants.SPACE_TYPE).toString(); // OR |
| 198 | + String spaceType2 = KNNSettings.state().getSettingValue(KNN_SPACE_TYPE); |
| 199 | + |
| 200 | + Map<String, Object> algorithmParams = new HashMap<>(); |
| 201 | + algorithmParams.put("ef_construction", 100); |
| 202 | + algorithmParams.put("m", 16); |
| 203 | + |
| 204 | + Map<String, Object> indexParameters = new HashMap<>(); |
| 205 | + indexParameters.put("algorithm", "hnsw"); |
| 206 | + indexParameters.put("algorithm_parameters", algorithmParams); |
| 207 | + |
| 208 | + return RemoteBuildRequest.builder() |
| 209 | + .repositoryType(repositoryType) |
| 210 | + .containerName(containerName) |
| 211 | + .vectorPath(vectorPath) |
| 212 | + .docIdPath(docIdPath) |
| 213 | + .tenantId(tenantId) |
| 214 | + .dimension(dimension) |
| 215 | + .docCount(docCount) |
| 216 | + .dataType(dataType) |
| 217 | + .engine(engine) |
| 218 | + .indexParameters(indexParameters) |
| 219 | + .build(); |
| 220 | + } |
169 | 221 | }
|
0 commit comments