Skip to content

Commit 2afbadd

Browse files
authoredOct 20, 2024··
[Backport] [2.x] Update codecs to Apache Lucene 9.12.0 (#199)
* Update codecs to Apache Lucene 9.12.0 Signed-off-by: Andriy Redko <andriy.redko@aiven.io> * Address code review comments Signed-off-by: Andriy Redko <andriy.redko@aiven.io> * Address code review comments Signed-off-by: Andriy Redko <andriy.redko@aiven.io> * Fix javadoc comments Signed-off-by: Andriy Redko <andriy.redko@aiven.io> --------- Signed-off-by: Andriy Redko <andriy.redko@aiven.io>

33 files changed

+1150
-202
lines changed
 

‎.github/workflows/check.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929
run: |
3030
# https://github.com/opensearch-project/opensearch-build/issues/4191
3131
chown -R ci-runner:ci-runner `pwd`
32-
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
32+
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew test check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
3333
- name: Run Gradle (assemble)
3434
run: |
3535
# https://github.com/opensearch-project/opensearch-build/issues/4191
@@ -53,7 +53,7 @@ jobs:
5353
cache: gradle
5454
- name: Run Gradle (check)
5555
run: |
56-
./gradlew check
56+
./gradlew test check
5757
- name: Run Gradle (assemble)
5858
run: |
5959
./gradlew assemble

‎src/integrationTest/java/org/opensearch/index/codec/rest/CreateIndexWithCodecIT.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import org.opensearch.cluster.metadata.IndexMetadata;
2424
import org.opensearch.common.settings.Settings;
2525
import org.opensearch.core.common.Strings;
26-
import org.opensearch.index.codec.customcodecs.Lucene99QatCodec;
26+
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
2727
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
2828
import org.opensearch.test.rest.OpenSearchRestTestCase;
2929

@@ -100,7 +100,10 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
100100
Settings.builder()
101101
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
102102
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
103-
.put("index.codec", randomFrom(Lucene99QatCodec.Mode.QAT_LZ4.getCodec(), Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))
103+
.put(
104+
"index.codec",
105+
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
106+
)
104107
.put("index.codec.compression_level", randomIntBetween(1, 6))
105108
.build()
106109
)

‎src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java

+12-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.common.settings.Setting;
1212
import org.opensearch.index.IndexSettings;
1313
import org.opensearch.index.codec.CodecServiceFactory;
14+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
1415
import org.opensearch.index.engine.EngineConfig;
1516
import org.opensearch.plugins.EnginePlugin;
1617
import org.opensearch.plugins.Plugin;
@@ -49,12 +50,11 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
4950
|| codecName.equals(CustomCodecService.QAT_DEFLATE_CODEC)) {
5051
return Optional.of(new CustomCodecServiceFactory());
5152
} else {
52-
if (!QatZipperFactory.isQatAvailable()
53-
&& (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
54-
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))) {
55-
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
53+
if (!QatZipperFactory.isQatAvailable() && isQatCodec(codecName)) {
54+
throw new IllegalArgumentException(
55+
"QAT codecs are not supported (QAT is not available). Please create indices with a different codec."
56+
);
5657
}
57-
5858
}
5959
return Optional.empty();
6060
}
@@ -63,4 +63,11 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
6363
public List<Setting<?>> getSettings() {
6464
return Arrays.asList(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
6565
}
66+
67+
private static boolean isQatCodec(String codecName) {
68+
return codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
69+
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())
70+
|| codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
71+
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec());
72+
}
6673
}

‎src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java

+15-13
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.util.Map;
2020
import java.util.stream.Stream;
2121

22+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING;
2223
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
2324

2425
/** CustomCodecService provides ZSTD, ZSTD_NO_DICT, QAT_LZ4, and QAT_DEFLATE compression codecs. */
@@ -49,25 +50,26 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
4950
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
5051
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
5152
if (mapperService == null) {
52-
codecs.put(ZSTD_CODEC, new Zstd99Codec(compressionLevel));
53-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(compressionLevel));
53+
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
54+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
5455
if (QatZipperFactory.isQatAvailable()) {
55-
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(compressionLevel, () -> {
56-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
57-
}));
58-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(compressionLevel, () -> {
59-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
56+
codecs.put(
57+
QAT_LZ4_CODEC,
58+
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
59+
);
60+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
61+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
6062
}));
6163
}
6264
} else {
63-
codecs.put(ZSTD_CODEC, new Zstd99Codec(mapperService, logger, compressionLevel));
64-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(mapperService, logger, compressionLevel));
65+
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
66+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
6567
if (QatZipperFactory.isQatAvailable()) {
66-
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(mapperService, logger, compressionLevel, () -> {
67-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
68+
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
69+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
6870
}));
69-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(mapperService, logger, compressionLevel, () -> {
70-
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
71+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
72+
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
7173
}));
7274
}
7375
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.logging.log4j.Logger;
12+
import org.apache.lucene.codecs.FilterCodec;
13+
import org.apache.lucene.codecs.StoredFieldsFormat;
14+
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
15+
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
16+
import org.opensearch.index.mapper.MapperService;
17+
18+
import java.util.Set;
19+
20+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
21+
22+
/**
23+
*
24+
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
25+
* Supports two modes zstd and zstd_no_dict.
26+
* Uses Lucene912 as the delegate codec
27+
*
28+
* @opensearch.internal
29+
*/
30+
public abstract class Lucene912CustomCodec extends FilterCodec {
31+
32+
/** Each mode represents a compression algorithm. */
33+
public enum Mode {
34+
/**
35+
* ZStandard mode with dictionary
36+
*/
37+
ZSTD("ZSTD912", Set.of("zstd")),
38+
/**
39+
* ZStandard mode without dictionary
40+
*/
41+
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));
42+
43+
private final String codec;
44+
private final Set<String> aliases;
45+
46+
Mode(String codec, Set<String> aliases) {
47+
this.codec = codec;
48+
this.aliases = aliases;
49+
}
50+
51+
/**
52+
* Returns the Codec that is registered with Lucene
53+
*/
54+
public String getCodec() {
55+
return codec;
56+
}
57+
58+
/**
59+
* Returns the aliases of the Codec
60+
*/
61+
public Set<String> getAliases() {
62+
return aliases;
63+
}
64+
}
65+
66+
private final StoredFieldsFormat storedFieldsFormat;
67+
68+
/**
69+
* Creates a new compression codec with the default compression level.
70+
*
71+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
72+
*/
73+
public Lucene912CustomCodec(Mode mode) {
74+
this(mode, DEFAULT_COMPRESSION_LEVEL);
75+
}
76+
77+
/**
78+
* Creates a new compression codec with the given compression level. We use
79+
* lowercase letters when registering the codec so that we remain consistent with
80+
* the other compression codecs: default, lucene_default, and best_compression.
81+
*
82+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
83+
* @param compressionLevel The compression level.
84+
*/
85+
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
86+
super(mode.getCodec(), new Lucene912Codec());
87+
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
88+
}
89+
90+
/**
91+
* Creates a new compression codec with the given compression level. We use
92+
* lowercase letters when registering the codec so that we remain consistent with
93+
* the other compression codecs: default, lucene_default, and best_compression.
94+
*
95+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
96+
* @param compressionLevel The compression level.
97+
* @param mapperService The mapper service.
98+
* @param logger The logger.
99+
*/
100+
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
101+
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
102+
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
103+
}
104+
105+
@Override
106+
public StoredFieldsFormat storedFieldsFormat() {
107+
return storedFieldsFormat;
108+
}
109+
110+
@Override
111+
public String toString() {
112+
return getClass().getSimpleName();
113+
}
114+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.lucene.codecs.StoredFieldsFormat;
12+
import org.apache.lucene.codecs.StoredFieldsReader;
13+
import org.apache.lucene.codecs.StoredFieldsWriter;
14+
import org.apache.lucene.codecs.compressing.CompressionMode;
15+
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
16+
import org.apache.lucene.index.FieldInfos;
17+
import org.apache.lucene.index.SegmentInfo;
18+
import org.apache.lucene.store.Directory;
19+
import org.apache.lucene.store.IOContext;
20+
21+
import java.io.IOException;
22+
import java.util.Objects;
23+
24+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
25+
26+
/** Stored field format used by pluggable codec */
27+
public class Lucene912CustomStoredFieldsFormat extends StoredFieldsFormat {
28+
29+
/** A key that we use to map to a mode */
30+
public static final String MODE_KEY = Lucene912CustomStoredFieldsFormat.class.getSimpleName() + ".mode";
31+
32+
protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
33+
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
34+
protected static final int ZSTD_BLOCK_SHIFT = 10;
35+
36+
private final CompressionMode zstdCompressionMode;
37+
private final CompressionMode zstdNoDictCompressionMode;
38+
39+
private final Lucene912CustomCodec.Mode mode;
40+
private final int compressionLevel;
41+
42+
/** default constructor */
43+
public Lucene912CustomStoredFieldsFormat() {
44+
this(Lucene912CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
45+
}
46+
47+
/**
48+
* Creates a new instance.
49+
*
50+
* @param mode The mode represents ZSTD or ZSTDNODICT
51+
*/
52+
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode) {
53+
this(mode, DEFAULT_COMPRESSION_LEVEL);
54+
}
55+
56+
/**
57+
* Creates a new instance with the specified mode and compression level.
58+
*
59+
* @param mode The mode represents ZSTD or ZSTDNODICT
60+
* @param compressionLevel The compression level for the mode.
61+
*/
62+
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode, int compressionLevel) {
63+
this.mode = Objects.requireNonNull(mode);
64+
this.compressionLevel = compressionLevel;
65+
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
66+
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
67+
}
68+
69+
/**
70+
* Returns a {@link StoredFieldsReader} to load stored fields.
71+
* @param directory The index directory.
72+
* @param si The SegmentInfo that stores segment information.
73+
* @param fn The fieldInfos.
74+
* @param context The IOContext that holds additional details on the merge/search context.
75+
*/
76+
@Override
77+
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
78+
if (si.getAttribute(MODE_KEY) != null) {
79+
String value = si.getAttribute(MODE_KEY);
80+
Lucene912CustomCodec.Mode mode = Lucene912CustomCodec.Mode.valueOf(value);
81+
return impl(mode).fieldsReader(directory, si, fn, context);
82+
} else {
83+
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
84+
}
85+
}
86+
87+
/**
88+
* Returns a {@link StoredFieldsReader} to write stored fields.
89+
* @param directory The index directory.
90+
* @param si The SegmentInfo that stores segment information.
91+
* @param context The IOContext that holds additional details on the merge/search context.
92+
*/
93+
@Override
94+
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
95+
String previous = si.putAttribute(MODE_KEY, mode.name());
96+
if (previous != null && previous.equals(mode.name()) == false) {
97+
throw new IllegalStateException(
98+
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
99+
);
100+
}
101+
return impl(mode).fieldsWriter(directory, si, context);
102+
}
103+
104+
StoredFieldsFormat impl(Lucene912CustomCodec.Mode mode) {
105+
switch (mode) {
106+
case ZSTD:
107+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
108+
case ZSTD_NO_DICT:
109+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
110+
default:
111+
throw new IllegalStateException("Unsupported compression mode: " + mode);
112+
}
113+
}
114+
115+
private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
116+
return new Lucene90CompressingStoredFieldsFormat(
117+
formatName,
118+
compressionMode,
119+
ZSTD_BLOCK_LENGTH,
120+
ZSTD_MAX_DOCS_PER_BLOCK,
121+
ZSTD_BLOCK_SHIFT
122+
);
123+
}
124+
125+
public Lucene912CustomCodec.Mode getMode() {
126+
return mode;
127+
}
128+
129+
/**
130+
* Returns the compression level.
131+
*/
132+
public int getCompressionLevel() {
133+
return compressionLevel;
134+
}
135+
136+
public CompressionMode getCompressionMode() {
137+
return mode == Lucene912CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
138+
}
139+
140+
}

0 commit comments

Comments
 (0)
Please sign in to comment.