|
| 1 | +/* |
| 2 | + * Copyright OpenSearch Contributors |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | + |
| 6 | +package org.opensearch.knn.index.codec.nativeindex.remote; |
| 7 | + |
| 8 | +import lombok.extern.log4j.Log4j2; |
| 9 | +import org.apache.lucene.search.DocIdSetIterator; |
| 10 | +import org.opensearch.knn.index.vectorvalues.KNNVectorValues; |
| 11 | + |
| 12 | +import java.io.IOException; |
| 13 | +import java.io.InputStream; |
| 14 | +import java.nio.ByteBuffer; |
| 15 | +import java.nio.ByteOrder; |
| 16 | + |
| 17 | +import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; |
| 18 | + |
| 19 | +/** |
| 20 | + * {@link InputStream} implementation of doc ids backed by {@link KNNVectorValues} rather than any file. Intended for use by {@link RemoteIndexBuildStrategy} |
| 21 | + */ |
| 22 | +@Log4j2 |
| 23 | +class DocIdInputStream extends InputStream { |
| 24 | + private final KNNVectorValues<?> knnVectorValues; |
| 25 | + // Doc ids are 4 byte integers, byte read() only returns a single byte, so we will need to track the byte position within a doc id. |
| 26 | + // For simplicity, and to maintain the byte ordering, we use a buffer with size of 1 int. |
| 27 | + private ByteBuffer currentBuffer; |
| 28 | + |
| 29 | + /** |
| 30 | + * Use to represent the doc ids of a {@link KNNVectorValues} as an {@link InputStream}. Expected to be used only with {@link org.opensearch.common.blobstore.BlobContainer#writeBlob}. |
| 31 | + * @param knnVectorValues |
| 32 | + * @throws IOException |
| 33 | + * @see VectorValuesInputStream |
| 34 | + */ |
| 35 | + public DocIdInputStream(KNNVectorValues<?> knnVectorValues) throws IOException { |
| 36 | + this.currentBuffer = ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN); |
| 37 | + this.knnVectorValues = knnVectorValues; |
| 38 | + initializeVectorValues(this.knnVectorValues); |
| 39 | + reloadBuffer(); |
| 40 | + } |
| 41 | + |
| 42 | + @Override |
| 43 | + public int read() throws IOException { |
| 44 | + if (currentBuffer == null) { |
| 45 | + return -1; |
| 46 | + } |
| 47 | + |
| 48 | + if (!currentBuffer.hasRemaining()) { |
| 49 | + advanceAndReloadBuffer(); |
| 50 | + if (currentBuffer == null) { |
| 51 | + return -1; |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + // Unsigned byte conversion is not technically needed as we are using a ByteBuffer, however we perform this operation still just in |
| 56 | + // case. |
| 57 | + return currentBuffer.get() & 0xFF; |
| 58 | + } |
| 59 | + |
| 60 | + @Override |
| 61 | + public int read(byte[] b, int off, int len) throws IOException { |
| 62 | + if (currentBuffer == null) { |
| 63 | + return -1; |
| 64 | + } |
| 65 | + |
| 66 | + int available = currentBuffer.remaining(); |
| 67 | + if (available <= 0) { |
| 68 | + advanceAndReloadBuffer(); |
| 69 | + if (currentBuffer == null) { |
| 70 | + return -1; |
| 71 | + } |
| 72 | + available = currentBuffer.remaining(); |
| 73 | + } |
| 74 | + |
| 75 | + int bytesToRead = Math.min(available, len); |
| 76 | + currentBuffer.get(b, off, bytesToRead); |
| 77 | + return bytesToRead; |
| 78 | + } |
| 79 | + |
| 80 | + /** |
| 81 | + * Advances to the next doc, and then refills the buffer with the new doc. |
| 82 | + * @throws IOException |
| 83 | + */ |
| 84 | + private void advanceAndReloadBuffer() throws IOException { |
| 85 | + int docId = knnVectorValues.nextDoc(); |
| 86 | + if (docId != -1 && docId != DocIdSetIterator.NO_MORE_DOCS) { |
| 87 | + reloadBuffer(); |
| 88 | + } else { |
| 89 | + // Reset buffer to null to indicate that there are no more docs to be read |
| 90 | + currentBuffer = null; |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + /** |
| 95 | + * Reload {@link currentBuffer} with the current doc id that {@link knnVectorValues} is pointing to |
| 96 | + * @throws IOException |
| 97 | + */ |
| 98 | + private void reloadBuffer() throws IOException { |
| 99 | + currentBuffer.clear(); |
| 100 | + currentBuffer.putInt(knnVectorValues.docId()); |
| 101 | + currentBuffer.position(0); |
| 102 | + } |
| 103 | +} |
0 commit comments