Skip to content

Commit 7900dbb

Browse files
Bump lucene codec to 99 (#1383) (#1386)
* Add Lucene Codec 9.9 Signed-off-by: Naveen Tatikonda <navtat@amazon.com> * Fix import statements for Lucene95 Codec Signed-off-by: Naveen Tatikonda <navtat@amazon.com> * Fix SegmentInfo Constructor in Test Signed-off-by: Naveen Tatikonda <navtat@amazon.com> * Temporarily Ignore Old Codec Tests Signed-off-by: Naveen Tatikonda <navtat@amazon.com> * Add CHANGELOG Signed-off-by: Naveen Tatikonda <navtat@amazon.com> * Delete Old Codec Tests Signed-off-by: Naveen Tatikonda <navtat@amazon.com> --------- Signed-off-by: Naveen Tatikonda <navtat@amazon.com> (cherry picked from commit 45e9e54)
1 parent 7c65643 commit 7900dbb

File tree

13 files changed

+142
-90
lines changed

13 files changed

+142
-90
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
3434
* Upgrade urllib to 1.26.17 [#1278](https://github.com/opensearch-project/k-NN/pull/1278)
3535
* Upgrade urllib to 1.26.18 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
3636
* Upgrade guava to 32.1.3 [#1319](https://github.com/opensearch-project/k-NN/pull/1319)
37+
* Bump lucene codec to 99 [#1383](https://github.com/opensearch-project/k-NN/pull/1383)
3738
### Refactoring

src/main/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950PerFieldKnnVectorsFormat.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
package org.opensearch.knn.index.codec.KNN950Codec;
77

8-
import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat;
8+
import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsFormat;
99
import org.opensearch.index.mapper.MapperService;
1010
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
1111
import org.opensearch.knn.index.util.KNNEngine;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.knn.index.codec.KNN990Codec;
7+
8+
import lombok.Builder;
9+
import org.apache.lucene.codecs.Codec;
10+
import org.apache.lucene.codecs.CompoundFormat;
11+
import org.apache.lucene.codecs.DocValuesFormat;
12+
import org.apache.lucene.codecs.FilterCodec;
13+
import org.apache.lucene.codecs.KnnVectorsFormat;
14+
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
15+
import org.opensearch.knn.index.codec.KNNCodecVersion;
16+
import org.opensearch.knn.index.codec.KNNFormatFacade;
17+
18+
/**
19+
* KNN Codec that wraps the Lucene Codec which is part of Lucene 9.9
20+
*/
21+
public class KNN990Codec extends FilterCodec {
22+
private static final KNNCodecVersion VERSION = KNNCodecVersion.V_9_9_0;
23+
private final KNNFormatFacade knnFormatFacade;
24+
private final PerFieldKnnVectorsFormat perFieldKnnVectorsFormat;
25+
26+
/**
27+
* No arg constructor that uses Lucene99 as the delegate
28+
*/
29+
public KNN990Codec() {
30+
this(VERSION.getDefaultCodecDelegate(), VERSION.getPerFieldKnnVectorsFormat());
31+
}
32+
33+
/**
34+
* Sole constructor. When subclassing this codec, create a no-arg ctor and pass the delegate codec
35+
* and a unique name to this ctor.
36+
*
37+
* @param delegate codec that will perform all operations this codec does not override
38+
* @param knnVectorsFormat per field format for KnnVector
39+
*/
40+
@Builder
41+
protected KNN990Codec(Codec delegate, PerFieldKnnVectorsFormat knnVectorsFormat) {
42+
super(VERSION.getCodecName(), delegate);
43+
knnFormatFacade = VERSION.getKnnFormatFacadeSupplier().apply(delegate);
44+
perFieldKnnVectorsFormat = knnVectorsFormat;
45+
}
46+
47+
@Override
48+
public DocValuesFormat docValuesFormat() {
49+
return knnFormatFacade.docValuesFormat();
50+
}
51+
52+
@Override
53+
public CompoundFormat compoundFormat() {
54+
return knnFormatFacade.compoundFormat();
55+
}
56+
57+
@Override
58+
public KnnVectorsFormat knnVectorsFormat() {
59+
return perFieldKnnVectorsFormat;
60+
}
61+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.knn.index.codec.KNN990Codec;
7+
8+
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
9+
import org.opensearch.index.mapper.MapperService;
10+
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
11+
import org.opensearch.knn.index.util.KNNEngine;
12+
13+
import java.util.Optional;
14+
15+
/**
16+
* Class provides per field format implementation for Lucene Knn vector type
17+
*/
18+
public class KNN990PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsFormat {
19+
20+
public KNN990PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
21+
super(
22+
mapperService,
23+
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
24+
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH,
25+
() -> new Lucene99HnswVectorsFormat(),
26+
(maxConnm, beamWidth) -> new Lucene99HnswVectorsFormat(maxConnm, beamWidth)
27+
);
28+
}
29+
30+
@Override
31+
/**
32+
* This method returns the maximum dimension allowed from KNNEngine for Lucene codec
33+
*
34+
* @param fieldName Name of the field, ignored
35+
* @return Maximum constant dimension set by KNNEngine
36+
*/
37+
public int getMaxDimensions(String fieldName) {
38+
return KNNEngine.getMaxDimensionByEngine(KNNEngine.LUCENE);
39+
}
40+
}

src/main/java/org/opensearch/knn/index/codec/KNNCodecVersion.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
import org.apache.lucene.backward_codecs.lucene92.Lucene92Codec;
1212
import org.apache.lucene.codecs.Codec;
1313
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
14-
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
14+
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
15+
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
1516
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
1617
import org.opensearch.index.mapper.MapperService;
1718
import org.opensearch.knn.index.codec.KNN80Codec.KNN80CompoundFormat;
@@ -23,6 +24,8 @@
2324
import org.opensearch.knn.index.codec.KNN940Codec.KNN940PerFieldKnnVectorsFormat;
2425
import org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec;
2526
import org.opensearch.knn.index.codec.KNN950Codec.KNN950PerFieldKnnVectorsFormat;
27+
import org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec;
28+
import org.opensearch.knn.index.codec.KNN990Codec.KNN990PerFieldKnnVectorsFormat;
2629

2730
import java.util.Optional;
2831
import java.util.function.BiFunction;
@@ -92,9 +95,24 @@ public enum KNNCodecVersion {
9295
.knnVectorsFormat(new KNN950PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
9396
.build(),
9497
KNN950Codec::new
98+
),
99+
100+
V_9_9_0(
101+
"KNN990Codec",
102+
new Lucene99Codec(),
103+
new KNN990PerFieldKnnVectorsFormat(Optional.empty()),
104+
(delegate) -> new KNNFormatFacade(
105+
new KNN80DocValuesFormat(delegate.docValuesFormat()),
106+
new KNN80CompoundFormat(delegate.compoundFormat())
107+
),
108+
(userCodec, mapperService) -> KNN990Codec.builder()
109+
.delegate(userCodec)
110+
.knnVectorsFormat(new KNN990PerFieldKnnVectorsFormat(Optional.ofNullable(mapperService)))
111+
.build(),
112+
KNN990Codec::new
95113
);
96114

97-
private static final KNNCodecVersion CURRENT = V_9_5_0;
115+
private static final KNNCodecVersion CURRENT = V_9_9_0;
98116

99117
private final String codecName;
100118
private final Codec defaultCodecDelegate;

src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ org.opensearch.knn.index.codec.KNN87Codec.KNN87Codec
55
org.opensearch.knn.index.codec.KNN910Codec.KNN910Codec
66
org.opensearch.knn.index.codec.KNN920Codec.KNN920Codec
77
org.opensearch.knn.index.codec.KNN940Codec.KNN940Codec
8-
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
8+
org.opensearch.knn.index.codec.KNN950Codec.KNN950Codec
9+
org.opensearch.knn.index.codec.KNN990Codec.KNN990Codec

src/test/java/org/opensearch/knn/index/codec/KNN910Codec/KNN910CodecTests.java

-22
This file was deleted.

src/test/java/org/opensearch/knn/index/codec/KNN920Codec/KNN920CodecTests.java

-23
This file was deleted.

src/test/java/org/opensearch/knn/index/codec/KNN940Codec/KNN940CodecTests.java

-30
This file was deleted.

src/test/java/org/opensearch/knn/index/codec/KNN950Codec/KNN950CodecTests.java src/test/java/org/opensearch/knn/index/codec/KNN990Codec/KNN990CodecTests.java

+10-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

6-
package org.opensearch.knn.index.codec.KNN950Codec;
6+
package org.opensearch.knn.index.codec.KNN990Codec;
77

88
import lombok.SneakyThrows;
99
import org.apache.lucene.codecs.Codec;
@@ -14,35 +14,35 @@
1414
import java.util.Optional;
1515
import java.util.function.Function;
1616

17-
import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_5_0;
17+
import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_9_0;
1818

19-
public class KNN950CodecTests extends KNNCodecTestCase {
19+
public class KNN990CodecTests extends KNNCodecTestCase {
2020

2121
@SneakyThrows
2222
public void testMultiFieldsKnnIndex() {
23-
testMultiFieldsKnnIndex(KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build());
23+
testMultiFieldsKnnIndex(KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build());
2424
}
2525

2626
@SneakyThrows
2727
public void testBuildFromModelTemplate() {
28-
testBuildFromModelTemplate((KNN950Codec.builder().delegate(V_9_5_0.getDefaultCodecDelegate()).build()));
28+
testBuildFromModelTemplate((KNN990Codec.builder().delegate(V_9_9_0.getDefaultCodecDelegate()).build()));
2929
}
3030

3131
// Ensure that the codec is able to return the correct per field knn vectors format for codec
3232
public void testCodecSetsCustomPerFieldKnnVectorsFormat() {
33-
final Codec codec = new KNN950Codec();
34-
assertTrue(codec.knnVectorsFormat() instanceof KNN950PerFieldKnnVectorsFormat);
33+
final Codec codec = new KNN990Codec();
34+
assertTrue(codec.knnVectorsFormat() instanceof KNN990PerFieldKnnVectorsFormat);
3535
}
3636

3737
// IMPORTANT: When this Codec is moved to a backwards Codec, this test needs to be removed, because it attempts to
3838
// write with a read only codec, which will fail
3939
@SneakyThrows
4040
public void testKnnVectorIndex() {
4141
Function<MapperService, PerFieldKnnVectorsFormat> perFieldKnnVectorsFormatProvider = (
42-
mapperService) -> new KNN950PerFieldKnnVectorsFormat(Optional.of(mapperService));
42+
mapperService) -> new KNN990PerFieldKnnVectorsFormat(Optional.of(mapperService));
4343

44-
Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN950Codec.builder()
45-
.delegate(V_9_5_0.getDefaultCodecDelegate())
44+
Function<PerFieldKnnVectorsFormat, Codec> knnCodecProvider = (knnVectorFormat) -> KNN990Codec.builder()
45+
.delegate(V_9_9_0.getDefaultCodecDelegate())
4646
.knnVectorsFormat(knnVectorFormat)
4747
.build();
4848

src/test/java/org/opensearch/knn/index/codec/KNNCodecFactoryTests.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import org.apache.lucene.codecs.Codec;
1010
import org.apache.lucene.backward_codecs.lucene91.Lucene91Codec;
1111
import org.apache.lucene.backward_codecs.lucene94.Lucene94Codec;
12-
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
12+
import org.apache.lucene.backward_codecs.lucene95.Lucene95Codec;
1313
import org.opensearch.knn.KNNTestCase;
1414

1515
import static org.opensearch.knn.index.codec.KNNCodecVersion.V_9_1_0;

src/test/java/org/opensearch/knn/index/codec/KNNCodecTestUtil.java

+1
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ public static SegmentInfo newSegmentInfo(final Directory directory, final String
363363
segmentName,
364364
docsInSegment,
365365
false,
366+
false,
366367
codec,
367368
Collections.emptyMap(),
368369
randomByteArrayOfLength(StringHelper.ID_LENGTH),

src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java

+5
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ public void testQueryScoreForFaissWithModel() throws IOException {
181181
SEGMENT_NAME,
182182
100,
183183
true,
184+
false,
184185
KNNCodecVersion.current().getDefaultCodecDelegate(),
185186
Map.of(),
186187
new byte[StringHelper.ID_LENGTH],
@@ -270,6 +271,7 @@ public void testShardWithoutFiles() {
270271
SEGMENT_NAME,
271272
100,
272273
false,
274+
false,
273275
KNNCodecVersion.current().getDefaultCodecDelegate(),
274276
Map.of(),
275277
new byte[StringHelper.ID_LENGTH],
@@ -313,6 +315,7 @@ public void testEmptyQueryResults() {
313315
SEGMENT_NAME,
314316
100,
315317
true,
318+
false,
316319
KNNCodecVersion.current().getDefaultCodecDelegate(),
317320
Map.of(),
318321
new byte[StringHelper.ID_LENGTH],
@@ -369,6 +372,7 @@ public void testANNWithFilterQuery_whenDoingANN_thenSuccess() {
369372
SEGMENT_NAME,
370373
100,
371374
true,
375+
false,
372376
KNNCodecVersion.current().getDefaultCodecDelegate(),
373377
Map.of(),
374378
new byte[StringHelper.ID_LENGTH],
@@ -617,6 +621,7 @@ private void testQueryScore(
617621
SEGMENT_NAME,
618622
100,
619623
true,
624+
false,
620625
KNNCodecVersion.current().getDefaultCodecDelegate(),
621626
Map.of(),
622627
new byte[StringHelper.ID_LENGTH],

0 commit comments

Comments
 (0)