5
5
6
6
package org .opensearch .knn .index .codec .nativeindex .remote ;
7
7
8
+ import com .google .common .annotations .VisibleForTesting ;
8
9
import lombok .extern .log4j .Log4j2 ;
9
10
import org .apache .commons .lang .NotImplementedException ;
11
+ import org .apache .lucene .store .IndexOutput ;
10
12
import org .opensearch .action .LatchedActionListener ;
11
13
import org .opensearch .common .CheckedTriFunction ;
12
14
import org .opensearch .common .StopWatch ;
25
27
import org .opensearch .knn .index .VectorDataType ;
26
28
import org .opensearch .knn .index .codec .nativeindex .NativeIndexBuildStrategy ;
27
29
import org .opensearch .knn .index .codec .nativeindex .model .BuildIndexParams ;
30
+ import org .opensearch .knn .index .engine .KNNEngine ;
28
31
import org .opensearch .knn .index .vectorvalues .KNNVectorValues ;
29
32
import org .opensearch .repositories .RepositoriesService ;
30
33
import org .opensearch .repositories .Repository ;
@@ -143,7 +146,8 @@ public void buildAndWriteIndex(BuildIndexParams indexInfo) throws IOException {
143
146
log .debug ("Await vector build took {} ms for vector field [{}]" , time_in_millis , indexInfo .getFieldName ());
144
147
145
148
stopWatch = new StopWatch ().start ();
146
- readFromRepository ();
149
+ // TODO: This blob will be retrieved from the remote vector build service status response
150
+ readFromRepository (blobName + KNNEngine .FAISS .getExtension (), indexInfo .getIndexOutputWithBuffer ().getIndexOutput ());
147
151
time_in_millis = stopWatch .stop ().totalTime ().millis ();
148
152
log .debug ("Repository read took {} ms for vector field [{}]" , time_in_millis , indexInfo .getFieldName ());
149
153
} catch (Exception e ) {
@@ -171,6 +175,14 @@ private BlobStoreRepository getRepository() throws RepositoryMissingException {
171
175
return (BlobStoreRepository ) repository ;
172
176
}
173
177
178
+ /**
179
+ * @return The blob container to read/write from, determined from the repository base path and index settings. This container is where all blobs will be written to.
180
+ */
181
+ private BlobContainer getBlobContainer () {
182
+ BlobPath path = getRepository ().basePath ().add (indexSettings .getUUID () + VECTORS_PATH );
183
+ return getRepository ().blobStore ().blobContainer (path );
184
+ }
185
+
174
186
/**
175
187
* This method is responsible for writing both the vector blobs and doc ids provided by {@param knnVectorValuesSupplier} to the vector repository configured by {@link KNN_REMOTE_VECTOR_REPO_SETTING}.
176
188
* If the repository implements {@link AsyncMultiStreamBlobContainer}, then parallel uploads will be used. Parallel uploads are backed by a {@link WriteContext}, for which we have a custom
@@ -192,9 +204,7 @@ private void writeToRepository(
192
204
VectorDataType vectorDataType ,
193
205
Supplier <KNNVectorValues <?>> knnVectorValuesSupplier
194
206
) throws IOException , InterruptedException {
195
- // Get the blob container based on blobName and the repo base path. This is where the blobs will be written to.
196
- BlobPath path = getRepository ().basePath ().add (indexSettings .getUUID () + VECTORS_PATH );
197
- BlobContainer blobContainer = getRepository ().blobStore ().blobContainer (path );
207
+ BlobContainer blobContainer = getBlobContainer ();
198
208
199
209
KNNVectorValues <?> knnVectorValues = knnVectorValuesSupplier .get ();
200
210
initializeVectorValues (knnVectorValues );
@@ -343,7 +353,27 @@ private void awaitVectorBuild() {
343
353
/**
344
354
* Read constructed vector file from remote repository and write to IndexOutput
345
355
*/
346
- private void readFromRepository () {
347
- throw new NotImplementedException ();
356
+ @ VisibleForTesting
357
+ void readFromRepository (String blobName , IndexOutput indexOutput ) throws IOException {
358
+ BlobContainer blobContainer = getBlobContainer ();
359
+ // TODO: We are using the sequential download API as multi-part parallel download is difficult for us to implement today and
360
+ // requires some changes in core. For more details, see: https://github.com/opensearch-project/k-NN/issues/2464
361
+ InputStream graphStream = blobContainer .readBlob (blobName );
362
+
363
+ // Allocate buffer of 64KB, same as used for CPU builds, see: IndexOutputWithBuffer
364
+ int CHUNK_SIZE = 64 * 1024 ;
365
+ byte [] buffer = new byte [CHUNK_SIZE ];
366
+
367
+ int bytesRead = 0 ;
368
+ // InputStream uses -1 indicates there are no more bytes to be read
369
+ while (bytesRead != -1 ) {
370
+ // Try to read CHUNK_SIZE into the buffer. The actual amount read may be less.
371
+ bytesRead = graphStream .read (buffer , 0 , CHUNK_SIZE );
372
+ assert bytesRead <= CHUNK_SIZE ;
373
+ // However many bytes we read, write it to the IndexOutput if != -1
374
+ if (bytesRead != -1 ) {
375
+ indexOutput .writeBytes (buffer , 0 , bytesRead );
376
+ }
377
+ }
348
378
}
349
379
}
0 commit comments