@@ -74,50 +74,36 @@ public void writeToRepository(
74
74
initializeVectorValues (knnVectorValues );
75
75
long vectorBlobLength = (long ) knnVectorValues .bytesPerVector () * totalLiveDocs ;
76
76
77
- if (blobContainer instanceof AsyncMultiStreamBlobContainer ) {
77
+ if (blobContainer instanceof AsyncMultiStreamBlobContainer asyncBlobContainer ) {
78
78
// First initiate vectors upload
79
79
log .debug ("Repository {} Supports Parallel Blob Upload" , repository );
80
80
// WriteContext is the main entry point into asyncBlobUpload. It stores all of our upload configurations, analogous to
81
81
// BuildIndexParams
82
- WriteContext writeContext = new WriteContext .Builder ().fileName (blobName + VECTOR_BLOB_FILE_EXTENSION )
83
- .streamContextSupplier ((partSize ) -> getStreamContext (partSize , vectorBlobLength , knnVectorValuesSupplier , vectorDataType ))
84
- .fileSize (vectorBlobLength )
85
- .failIfAlreadyExists (true )
86
- .writePriority (WritePriority .NORMAL )
87
- // TODO: Checksum implementations -- It is difficult to calculate a checksum on the knnVectorValues as
88
- // there is no underlying file upon which we can create the checksum. We should be able to create a
89
- // checksum still by iterating through once, however this will be an expensive operation.
90
- .uploadFinalizer ((bool ) -> {})
91
- .doRemoteDataIntegrityCheck (false )
92
- .expectedChecksum (null )
93
- .build ();
82
+ WriteContext writeContext = createWriteContext (blobName , vectorBlobLength , knnVectorValuesSupplier , vectorDataType );
94
83
95
84
AtomicReference <Exception > exception = new AtomicReference <>();
96
85
final CountDownLatch latch = new CountDownLatch (1 );
97
- ((AsyncMultiStreamBlobContainer ) blobContainer ).asyncBlobUpload (
98
- writeContext ,
99
- new LatchedActionListener <>(new ActionListener <>() {
100
- @ Override
101
- public void onResponse (Void unused ) {
102
- log .debug (
103
- "Parallel vector upload succeeded for blob {} with size {}" ,
104
- blobName + VECTOR_BLOB_FILE_EXTENSION ,
105
- vectorBlobLength
106
- );
107
- }
108
-
109
- @ Override
110
- public void onFailure (Exception e ) {
111
- log .error (
112
- "Parallel vector upload failed for blob {} with size {}" ,
113
- blobName + VECTOR_BLOB_FILE_EXTENSION ,
114
- vectorBlobLength ,
115
- e
116
- );
117
- exception .set (e );
118
- }
119
- }, latch )
120
- );
86
+ asyncBlobContainer .asyncBlobUpload (writeContext , new LatchedActionListener <>(new ActionListener <>() {
87
+ @ Override
88
+ public void onResponse (Void unused ) {
89
+ log .debug (
90
+ "Parallel vector upload succeeded for blob {} with size {}" ,
91
+ blobName + VECTOR_BLOB_FILE_EXTENSION ,
92
+ vectorBlobLength
93
+ );
94
+ }
95
+
96
+ @ Override
97
+ public void onFailure (Exception e ) {
98
+ log .error (
99
+ "Parallel vector upload failed for blob {} with size {}" ,
100
+ blobName + VECTOR_BLOB_FILE_EXTENSION ,
101
+ vectorBlobLength ,
102
+ e
103
+ );
104
+ exception .set (e );
105
+ }
106
+ }, latch ));
121
107
122
108
// Then upload doc id blob before waiting on vector uploads
123
109
// TODO: We wrap with a BufferedInputStream to support retries. We can tune this buffer size to optimize performance.
@@ -130,9 +116,14 @@ public void onFailure(Exception e) {
130
116
} else {
131
117
log .debug ("Repository {} Does Not Support Parallel Blob Upload" , repository );
132
118
// Write Vectors
133
- InputStream vectorStream = new BufferedInputStream (new VectorValuesInputStream (knnVectorValuesSupplier .get (), vectorDataType ));
134
- log .debug ("Writing {} bytes for {} docs to {}" , vectorBlobLength , totalLiveDocs , blobName + VECTOR_BLOB_FILE_EXTENSION );
135
- blobContainer .writeBlob (blobName + VECTOR_BLOB_FILE_EXTENSION , vectorStream , vectorBlobLength , true );
119
+ try (
120
+ InputStream vectorStream = new BufferedInputStream (
121
+ new VectorValuesInputStream (knnVectorValuesSupplier .get (), vectorDataType )
122
+ )
123
+ ) {
124
+ log .debug ("Writing {} bytes for {} docs to {}" , vectorBlobLength , totalLiveDocs , blobName + VECTOR_BLOB_FILE_EXTENSION );
125
+ blobContainer .writeBlob (blobName + VECTOR_BLOB_FILE_EXTENSION , vectorStream , vectorBlobLength , true );
126
+ }
136
127
// Then write doc ids
137
128
writeDocIds (knnVectorValuesSupplier .get (), vectorBlobLength , totalLiveDocs , blobName , blobContainer );
138
129
}
@@ -154,14 +145,15 @@ private void writeDocIds(
154
145
String blobName ,
155
146
BlobContainer blobContainer
156
147
) throws IOException {
157
- InputStream docStream = new BufferedInputStream (new DocIdInputStream (knnVectorValues ));
158
- log .debug (
159
- "Writing {} bytes for {} docs ids to {}" ,
160
- vectorBlobLength ,
161
- totalLiveDocs * Integer .BYTES ,
162
- blobName + DOC_ID_FILE_EXTENSION
163
- );
164
- blobContainer .writeBlob (blobName + DOC_ID_FILE_EXTENSION , docStream , totalLiveDocs * Integer .BYTES , true );
148
+ try (InputStream docStream = new BufferedInputStream (new DocIdInputStream (knnVectorValues ))) {
149
+ log .debug (
150
+ "Writing {} bytes for {} docs ids to {}" ,
151
+ vectorBlobLength ,
152
+ totalLiveDocs * Integer .BYTES ,
153
+ blobName + DOC_ID_FILE_EXTENSION
154
+ );
155
+ blobContainer .writeBlob (blobName + DOC_ID_FILE_EXTENSION , docStream , totalLiveDocs * Integer .BYTES , true );
156
+ }
165
157
}
166
158
167
159
/**
@@ -215,6 +207,30 @@ private CheckedTriFunction<Integer, Long, Long, InputStreamContainer, IOExceptio
215
207
});
216
208
}
217
209
210
+ /**
211
+ * Creates a {@link WriteContext} meant to be used by {@link AsyncMultiStreamBlobContainer#asyncBlobUpload}.
212
+ * Note: Integrity checking is left up to the vendor repository and SDK implementations.
213
+ * @param blobName
214
+ * @param vectorBlobLength
215
+ * @param knnVectorValuesSupplier
216
+ * @param vectorDataType
217
+ * @return
218
+ */
219
+ private WriteContext createWriteContext (
220
+ String blobName ,
221
+ long vectorBlobLength ,
222
+ Supplier <KNNVectorValues <?>> knnVectorValuesSupplier ,
223
+ VectorDataType vectorDataType
224
+ ) {
225
+ return new WriteContext .Builder ().fileName (blobName + VECTOR_BLOB_FILE_EXTENSION )
226
+ .streamContextSupplier ((partSize ) -> getStreamContext (partSize , vectorBlobLength , knnVectorValuesSupplier , vectorDataType ))
227
+ .fileSize (vectorBlobLength )
228
+ .failIfAlreadyExists (true )
229
+ .writePriority (WritePriority .NORMAL )
230
+ .uploadFinalizer ((bool ) -> {})
231
+ .build ();
232
+ }
233
+
218
234
@ Override
219
235
public void readFromRepository (String path , IndexOutputWithBuffer indexOutputWithBuffer ) throws IOException {
220
236
if (path == null || path .isEmpty ()) {
0 commit comments