|
| 1 | +/* |
| 2 | + * Copyright OpenSearch Contributors |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | + |
| 6 | +package org.opensearch.knn.index.codec.KNN10010Codec; |
| 7 | + |
| 8 | +import org.apache.lucene.codecs.KnnVectorsFormat; |
| 9 | +import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; |
| 10 | +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; |
| 11 | +import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat; |
| 12 | +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; |
| 13 | +import org.opensearch.common.Nullable; |
| 14 | +import org.opensearch.common.collect.Tuple; |
| 15 | +import org.opensearch.index.mapper.MapperService; |
| 16 | +import org.opensearch.knn.index.KNNSettings; |
| 17 | +import org.opensearch.knn.index.SpaceType; |
| 18 | +import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat; |
| 19 | +import org.opensearch.knn.index.codec.KNN9120Codec.KNN9120HnswBinaryVectorsFormat; |
| 20 | +import org.opensearch.knn.index.engine.KNNEngine; |
| 21 | +import org.opensearch.knn.index.remote.RemoteIndexBuilder; |
| 22 | + |
| 23 | +import java.util.Optional; |
| 24 | +import java.util.concurrent.ExecutorService; |
| 25 | +import java.util.concurrent.Executors; |
| 26 | + |
| 27 | +/** |
| 28 | + * Class provides per field format implementation for Lucene Knn vector type |
| 29 | + */ |
| 30 | +public class KNN10010PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat { |
| 31 | + private static final Tuple<Integer, ExecutorService> DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE = Tuple.tuple(1, null); |
| 32 | + @Nullable |
| 33 | + private RemoteIndexBuilder remoteIndexBuilder; |
| 34 | + |
| 35 | + public KNN10010PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService, final RemoteIndexBuilder remoteIndexBuilder) { |
| 36 | + super( |
| 37 | + mapperService, |
| 38 | + Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN, |
| 39 | + Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH, |
| 40 | + Lucene99HnswVectorsFormat::new, |
| 41 | + knnVectorsFormatParams -> { |
| 42 | + final Tuple<Integer, ExecutorService> mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService(); |
| 43 | + // There is an assumption here that hamming space will only be used for binary vectors. This will need to be fixed if that |
| 44 | + // changes in the future. |
| 45 | + if (knnVectorsFormatParams.getSpaceType() == SpaceType.HAMMING) { |
| 46 | + return new KNN9120HnswBinaryVectorsFormat( |
| 47 | + knnVectorsFormatParams.getMaxConnections(), |
| 48 | + knnVectorsFormatParams.getBeamWidth(), |
| 49 | + // number of merge threads |
| 50 | + mergeThreadCountAndExecutorService.v1(), |
| 51 | + // executor service |
| 52 | + mergeThreadCountAndExecutorService.v2() |
| 53 | + ); |
| 54 | + } else { |
| 55 | + return new Lucene99HnswVectorsFormat( |
| 56 | + knnVectorsFormatParams.getMaxConnections(), |
| 57 | + knnVectorsFormatParams.getBeamWidth(), |
| 58 | + // number of merge threads |
| 59 | + mergeThreadCountAndExecutorService.v1(), |
| 60 | + // executor service |
| 61 | + mergeThreadCountAndExecutorService.v2() |
| 62 | + ); |
| 63 | + } |
| 64 | + }, |
| 65 | + knnScalarQuantizedVectorsFormatParams -> { |
| 66 | + final Tuple<Integer, ExecutorService> mergeThreadCountAndExecutorService = getMergeThreadCountAndExecutorService(); |
| 67 | + return new Lucene99HnswScalarQuantizedVectorsFormat( |
| 68 | + knnScalarQuantizedVectorsFormatParams.getMaxConnections(), |
| 69 | + knnScalarQuantizedVectorsFormatParams.getBeamWidth(), |
| 70 | + // Number of merge threads |
| 71 | + mergeThreadCountAndExecutorService.v1(), |
| 72 | + knnScalarQuantizedVectorsFormatParams.getBits(), |
| 73 | + knnScalarQuantizedVectorsFormatParams.isCompressFlag(), |
| 74 | + knnScalarQuantizedVectorsFormatParams.getConfidenceInterval(), |
| 75 | + // Executor service |
| 76 | + mergeThreadCountAndExecutorService.v2() |
| 77 | + ); |
| 78 | + } |
| 79 | + ); |
| 80 | + this.remoteIndexBuilder = remoteIndexBuilder; |
| 81 | + } |
| 82 | + |
| 83 | + public KNN10010PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) { |
| 84 | + this(mapperService, null); |
| 85 | + } |
| 86 | + |
| 87 | + /** |
| 88 | + * This method returns the maximum dimension allowed from KNNEngine for Lucene codec |
| 89 | + * |
| 90 | + * @param fieldName Name of the field, ignored |
| 91 | + * @return Maximum constant dimension set by KNNEngine |
| 92 | + */ |
| 93 | + @Override |
| 94 | + public int getMaxDimensions(String fieldName) { |
| 95 | + return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE); |
| 96 | + } |
| 97 | + |
| 98 | + private static Tuple<Integer, ExecutorService> getMergeThreadCountAndExecutorService() { |
| 99 | + // To ensure that only once we are fetching the settings per segment, we are fetching the num threads once while |
| 100 | + // creating the executors |
| 101 | + int mergeThreadCount = KNNSettings.getIndexThreadQty(); |
| 102 | + // We need to return null whenever the merge threads are <=1, as lucene assumes that if number of threads are 1 |
| 103 | + // then we should be giving a null value of the executor |
| 104 | + if (mergeThreadCount <= 1) { |
| 105 | + return DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE; |
| 106 | + } else { |
| 107 | + return Tuple.tuple(mergeThreadCount, Executors.newFixedThreadPool(mergeThreadCount)); |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + @Override |
| 112 | + protected KnnVectorsFormat nativeEngineVectorsFormat() { |
| 113 | + int approximateThreshold = getApproximateThresholdValue(); |
| 114 | + return new NativeEngines10010KnnVectorsFormat( |
| 115 | + new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()), |
| 116 | + approximateThreshold, |
| 117 | + remoteIndexBuilder |
| 118 | + ); |
| 119 | + } |
| 120 | +} |
0 commit comments