Skip to content

Commit 8e867a9

Browse files
committed
Update codecs to Apache Lucene 9.12.0
Signed-off-by: Andriy Redko <andriy.redko@aiven.io>
1 parent 5b5d693 commit 8e867a9

32 files changed

+1299
-295
lines changed

src/integrationTest/java/org/opensearch/index/codec/rest/CreateIndexWithCodecIT.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import org.opensearch.cluster.metadata.IndexMetadata;
2727
import org.opensearch.common.settings.Settings;
2828
import org.opensearch.core.common.Strings;
29-
import org.opensearch.index.codec.customcodecs.Lucene99QatCodec;
29+
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
3030
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
3131
import org.opensearch.test.rest.OpenSearchRestTestCase;
3232

@@ -103,7 +103,10 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
103103
Settings.builder()
104104
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
105105
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
106-
.put("index.codec", randomFrom(Lucene99QatCodec.Mode.QAT_LZ4.getCodec(), Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))
106+
.put(
107+
"index.codec",
108+
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
109+
)
107110
.put("index.codec.compression_level", randomIntBetween(1, 6))
108111
.build()
109112
)

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java

+12-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.common.settings.Setting;
1212
import org.opensearch.index.IndexSettings;
1313
import org.opensearch.index.codec.CodecServiceFactory;
14+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
1415
import org.opensearch.index.engine.EngineConfig;
1516
import org.opensearch.plugins.EnginePlugin;
1617
import org.opensearch.plugins.Plugin;
@@ -49,12 +50,19 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
4950
|| codecName.equals(CustomCodecService.QAT_DEFLATE_CODEC)) {
5051
return Optional.of(new CustomCodecServiceFactory());
5152
} else {
52-
if (!QatZipperFactory.isQatAvailable()
53-
&& (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
54-
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))) {
55-
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
53+
if (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
54+
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())) {
55+
if (!QatZipperFactory.isQatAvailable()) {
56+
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
57+
}
5658
}
5759

60+
if (codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
61+
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())) {
62+
if (!QatZipperFactory.isQatAvailable()) {
63+
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
64+
}
65+
}
5866
}
5967
return Optional.empty();
6068
}

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java

+15-13
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.Map;
2020
import java.util.stream.Stream;
2121

22+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING;
2223
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
2324

2425
/** CustomCodecService provides ZSTD, ZSTD_NO_DICT, QAT_LZ4, and QAT_DEFLATE compression codecs. */
@@ -49,25 +50,26 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
4950
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
5051
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
5152
if (mapperService == null) {
52-
codecs.put(ZSTD_CODEC, new Zstd99Codec(compressionLevel));
53-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(compressionLevel));
53+
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
54+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
5455
if (QatZipperFactory.isQatAvailable()) {
55-
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(compressionLevel, () -> {
56-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
57-
}));
58-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(compressionLevel, () -> {
59-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
56+
codecs.put(
57+
QAT_LZ4_CODEC,
58+
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
59+
);
60+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
61+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
6062
}));
6163
}
6264
} else {
63-
codecs.put(ZSTD_CODEC, new Zstd99Codec(mapperService, logger, compressionLevel));
64-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(mapperService, logger, compressionLevel));
65+
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
66+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
6567
if (QatZipperFactory.isQatAvailable()) {
66-
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(mapperService, logger, compressionLevel, () -> {
67-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
68+
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
69+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
6870
}));
69-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(mapperService, logger, compressionLevel, () -> {
70-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
71+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
72+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
7173
}));
7274
}
7375
}

src/main/java/org/opensearch/index/codec/customcodecs/Lucene99CustomCodec.java src/main/java/org/opensearch/index/codec/customcodecs/Lucene912CustomCodec.java

+12-16
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@
1111
import org.apache.logging.log4j.Logger;
1212
import org.apache.lucene.codecs.FilterCodec;
1313
import org.apache.lucene.codecs.StoredFieldsFormat;
14-
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
15-
import org.opensearch.common.settings.Settings;
14+
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
1615
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
1716
import org.opensearch.index.mapper.MapperService;
1817

1918
import java.util.Set;
2019

21-
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
20+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
2221

2322
/**
2423
*
@@ -28,21 +27,18 @@
2827
*
2928
* @opensearch.internal
3029
*/
31-
public abstract class Lucene99CustomCodec extends FilterCodec {
32-
33-
/** Default compression level used for compression */
34-
public static final int DEFAULT_COMPRESSION_LEVEL = INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getDefault(Settings.EMPTY);
30+
public abstract class Lucene912CustomCodec extends FilterCodec {
3531

3632
/** Each mode represents a compression algorithm. */
3733
public enum Mode {
3834
/**
3935
* ZStandard mode with dictionary
4036
*/
41-
ZSTD("ZSTD99", Set.of("zstd")),
37+
ZSTD("ZSTD912", Set.of("zstd")),
4238
/**
4339
* ZStandard mode without dictionary
4440
*/
45-
ZSTD_NO_DICT("ZSTDNODICT99", Set.of("zstd_no_dict"));
41+
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));
4642

4743
private final String codec;
4844
private final Set<String> aliases;
@@ -74,7 +70,7 @@ public Set<String> getAliases() {
7470
*
7571
* @param mode The compression codec (ZSTD or ZSTDNODICT).
7672
*/
77-
public Lucene99CustomCodec(Mode mode) {
73+
public Lucene912CustomCodec(Mode mode) {
7874
this(mode, DEFAULT_COMPRESSION_LEVEL);
7975
}
8076

@@ -86,9 +82,9 @@ public Lucene99CustomCodec(Mode mode) {
8682
* @param mode The compression codec (ZSTD or ZSTDNODICT).
8783
* @param compressionLevel The compression level.
8884
*/
89-
public Lucene99CustomCodec(Mode mode, int compressionLevel) {
90-
super(mode.getCodec(), new Lucene99Codec());
91-
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
85+
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
86+
super(mode.getCodec(), new Lucene912Codec());
87+
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
9288
}
9389

9490
/**
@@ -101,9 +97,9 @@ public Lucene99CustomCodec(Mode mode, int compressionLevel) {
10197
* @param mapperService The mapper service.
10298
* @param logger The logger.
10399
*/
104-
public Lucene99CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
105-
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger));
106-
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
100+
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
101+
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
102+
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
107103
}
108104

109105
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.lucene.codecs.StoredFieldsFormat;
12+
import org.apache.lucene.codecs.StoredFieldsReader;
13+
import org.apache.lucene.codecs.StoredFieldsWriter;
14+
import org.apache.lucene.codecs.compressing.CompressionMode;
15+
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
16+
import org.apache.lucene.index.FieldInfos;
17+
import org.apache.lucene.index.SegmentInfo;
18+
import org.apache.lucene.store.Directory;
19+
import org.apache.lucene.store.IOContext;
20+
21+
import java.io.IOException;
22+
import java.util.Objects;
23+
24+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
25+
26+
/** Stored field format used by pluggable codec */
27+
public class Lucene912CustomStoredFieldsFormat extends StoredFieldsFormat {
28+
29+
/** A key that we use to map to a mode */
30+
public static final String MODE_KEY = Lucene912CustomStoredFieldsFormat.class.getSimpleName() + ".mode";
31+
32+
protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
33+
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
34+
protected static final int ZSTD_BLOCK_SHIFT = 10;
35+
36+
private final CompressionMode zstdCompressionMode;
37+
private final CompressionMode zstdNoDictCompressionMode;
38+
39+
private final Lucene912CustomCodec.Mode mode;
40+
private final int compressionLevel;
41+
42+
/** default constructor */
43+
public Lucene912CustomStoredFieldsFormat() {
44+
this(Lucene912CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
45+
}
46+
47+
/**
48+
* Creates a new instance.
49+
*
50+
* @param mode The mode represents ZSTD or ZSTDNODICT
51+
*/
52+
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode) {
53+
this(mode, DEFAULT_COMPRESSION_LEVEL);
54+
}
55+
56+
/**
57+
* Creates a new instance with the specified mode and compression level.
58+
*
59+
* @param mode The mode represents ZSTD or ZSTDNODICT
60+
* @param compressionLevel The compression level for the mode.
61+
*/
62+
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode, int compressionLevel) {
63+
this.mode = Objects.requireNonNull(mode);
64+
this.compressionLevel = compressionLevel;
65+
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
66+
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
67+
}
68+
69+
/**
70+
* Returns a {@link StoredFieldsReader} to load stored fields.
71+
* @param directory The index directory.
72+
* @param si The SegmentInfo that stores segment information.
73+
* @param fn The fieldInfos.
74+
* @param context The IOContext that holds additional details on the merge/search context.
75+
*/
76+
@Override
77+
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
78+
if (si.getAttribute(MODE_KEY) != null) {
79+
String value = si.getAttribute(MODE_KEY);
80+
Lucene912CustomCodec.Mode mode = Lucene912CustomCodec.Mode.valueOf(value);
81+
return impl(mode).fieldsReader(directory, si, fn, context);
82+
} else {
83+
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
84+
}
85+
}
86+
87+
/**
88+
* Returns a {@link StoredFieldsReader} to write stored fields.
89+
* @param directory The index directory.
90+
* @param si The SegmentInfo that stores segment information.
91+
* @param context The IOContext that holds additional details on the merge/search context.
92+
*/
93+
@Override
94+
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
95+
String previous = si.putAttribute(MODE_KEY, mode.name());
96+
if (previous != null && previous.equals(mode.name()) == false) {
97+
throw new IllegalStateException(
98+
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
99+
);
100+
}
101+
return impl(mode).fieldsWriter(directory, si, context);
102+
}
103+
104+
StoredFieldsFormat impl(Lucene912CustomCodec.Mode mode) {
105+
switch (mode) {
106+
case ZSTD:
107+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
108+
case ZSTD_NO_DICT:
109+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
110+
default:
111+
throw new IllegalStateException("Unsupported compression mode: " + mode);
112+
}
113+
}
114+
115+
private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
116+
return new Lucene90CompressingStoredFieldsFormat(
117+
formatName,
118+
compressionMode,
119+
ZSTD_BLOCK_LENGTH,
120+
ZSTD_MAX_DOCS_PER_BLOCK,
121+
ZSTD_BLOCK_SHIFT
122+
);
123+
}
124+
125+
public Lucene912CustomCodec.Mode getMode() {
126+
return mode;
127+
}
128+
129+
/**
130+
* Returns the compression level.
131+
*/
132+
public int getCompressionLevel() {
133+
return compressionLevel;
134+
}
135+
136+
public CompressionMode getCompressionMode() {
137+
return mode == Lucene912CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
138+
}
139+
140+
}

0 commit comments

Comments
 (0)