Skip to content

Commit e8c68b3

Browse files
mohit10011999mohitamgpeterzhuamazon
authored
Bumping custom-codecs plugin version to 3.0.0-alpha1 and lucene upgrade (#228)
* Bumping custom-codecs plugin version to 3.0.0.0-alpha1 Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Adding v4 actions as v2 and v3 are deprecated Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Adding lucene 101 classes and test cases Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Adding specific ubuntu platforms Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * BWC test command change Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * fix github actions Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Fixing syntax failures Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Update spotless Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Update version extraction Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Update results to include qualifier as well Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Add def previousToken Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Update new regex Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> * Addressing comments and resolving conflicts Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Addressing comments and resolving conflicts Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Addressing comments and resolving conflicts Signed-off-by: Mohit Kumar <mohitamg@amazon.com> * Addressed comments and resolving conflicts Signed-off-by: Mohit Kumar <mohitamg@amazon.com> --------- Signed-off-by: Mohit Kumar <mohitamg@amazon.com> Signed-off-by: Peter Zhu <zhujiaxi@amazon.com> Co-authored-by: Mohit Kumar <mohitamg@amazon.com> Co-authored-by: Peter Zhu <zhujiaxi@amazon.com>
1 parent e7d1d9b commit e8c68b3

36 files changed

+1076
-151
lines changed

.github/actions/create-bwc-build/action.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ runs:
2525
with:
2626
path: ${{ inputs.plugin-branch }}
2727

28-
- uses: actions/checkout@v3
28+
- uses: actions/checkout@v4
2929
if: ${{ inputs.plugin-branch != 'current_branch' }}
3030
with:
3131
repository: opensearch-project/custom-codecs

.github/workflows/check.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
steps:
4545
- uses: actions/checkout@v4
4646
- name: Set up JDK ${{ matrix.java }}
47-
uses: actions/setup-java@v3
47+
uses: actions/setup-java@v4
4848
with:
4949
java-version: ${{ matrix.java }}
5050
distribution: temurin

.github/workflows/ci.yml

+1-5
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,7 @@ jobs:
2525
uses: actions/checkout@v4
2626

2727
- name: Build BWC tests
28-
uses: gradle/gradle-build-action@v3
29-
with:
30-
cache-disabled: true
31-
arguments: |
32-
-p bwc-test build -x test -x integTest
28+
run: ./gradlew -p bwc-test build -x test -x integTest
3329

3430
backward-compatibility:
3531
strategy:

.github/workflows/publish-maven-snapshots.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ jobs:
1717
contents: write
1818

1919
steps:
20-
- uses: actions/setup-java@v3
20+
- uses: actions/setup-java@v4
2121
with:
2222
distribution: temurin
2323
java-version: 21
24-
- uses: actions/checkout@v3
24+
- uses: actions/checkout@v4
2525
- uses: aws-actions/configure-aws-credentials@v1
2626
with:
2727
role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}

build.gradle

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131
buildscript {
3232
ext {
3333
opensearch_group = "org.opensearch"
34-
opensearch_version = System.getProperty("opensearch.version", "3.0.0-SNAPSHOT")
34+
opensearch_version = System.getProperty("opensearch.version", "3.0.0-alpha1-SNAPSHOT")
3535
isSnapshot = "true" == System.getProperty("build.snapshot", "true")
36-
buildVersionQualifier = System.getProperty("build.version_qualifier", "")
36+
buildVersionQualifier = System.getProperty("build.version_qualifier", "alpha1")
3737
}
3838

3939
repositories {

bwc-test/build.gradle

+7-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ ext {
4444

4545
buildscript {
4646
ext {
47-
opensearch_version = System.getProperty("opensearch.version", "3.0.0-SNAPSHOT")
47+
opensearch_version = System.getProperty("opensearch.version", "3.0.0-alpha1-SNAPSHOT")
4848
opensearch_group = "org.opensearch"
4949
}
5050
repositories {
@@ -89,9 +89,13 @@ String nextOpenSearch = extractVersion(nextVersion) + "-SNAPSHOT";
8989

9090
// Extracts the OpenSearch version from a plugin version string, 2.11.0.0 -> 2.11.0.
9191
def String extractVersion(versionStr) {
92-
def versionMatcher = versionStr =~ /(.+?)(\.\d+)$/
92+
def versionMatcher = versionStr =~ /(.+?)(\.\d+)(-(alpha|beta|rc)\d+)?$/
9393
versionMatcher.find()
94-
return versionMatcher.group(1)
94+
if (versionMatcher.group(3) != null) { /* there is a qualifier */
95+
return versionMatcher.group(1) + versionMatcher.group(3)
96+
} else {
97+
return versionMatcher.group(1)
98+
}
9599
}
96100

97101
2.times {i ->

gradle/wrapper/gradle-wrapper.properties

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
distributionBase=GRADLE_USER_HOME
22
distributionPath=wrapper/dists
3-
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
3+
distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
4+
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
5+
networkTimeout=10000
6+
validateDistributionUrl=true
47
zipStoreBase=GRADLE_USER_HOME
58
zipStorePath=wrapper/dists
6-
distributionSha256Sum=5b9c5eb3f9fc2c94abaea57d90bd78747ca117ddbbf96c859d3741181a12bf2a

src/integrationTest/java/org/opensearch/index/codec/rest/CreateIndexWithCodecIT.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import org.opensearch.cluster.metadata.IndexMetadata;
2727
import org.opensearch.common.settings.Settings;
2828
import org.opensearch.core.common.Strings;
29-
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
29+
import org.opensearch.index.codec.customcodecs.Lucene101QatCodec;
3030
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
3131
import org.opensearch.test.rest.OpenSearchRestTestCase;
3232

@@ -105,7 +105,7 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
105105
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
106106
.put(
107107
"index.codec",
108-
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
108+
randomFrom(Lucene101QatCodec.Mode.QAT_LZ4.getCodec(), Lucene101QatCodec.Mode.QAT_DEFLATE.getCodec())
109109
)
110110
.put("index.codec.compression_level", randomIntBetween(1, 6))
111111
.build()

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.common.settings.Setting;
1212
import org.opensearch.index.IndexSettings;
1313
import org.opensearch.index.codec.CodecServiceFactory;
14+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.Lucene912QatCodec;
1415
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
1516
import org.opensearch.index.engine.EngineConfig;
1617
import org.opensearch.plugins.EnginePlugin;
@@ -68,6 +69,8 @@ private static boolean isQatCodec(String codecName) {
6869
return codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
6970
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())
7071
|| codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
71-
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec());
72+
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
73+
|| codecName.equals(Lucene101QatCodec.Mode.QAT_LZ4.getCodec())
74+
|| codecName.equals(Lucene101QatCodec.Mode.QAT_DEFLATE.getCodec());
7275
}
7376
}

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java

+8-8
Original file line numberDiff line numberDiff line change
@@ -50,25 +50,25 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
5050
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
5151
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
5252
if (mapperService == null) {
53-
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
54-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
53+
codecs.put(ZSTD_CODEC, new Zstd101Codec(compressionLevel));
54+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict101Codec(compressionLevel));
5555
if (QatZipperFactory.isQatAvailable()) {
5656
codecs.put(
5757
QAT_LZ4_CODEC,
58-
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
58+
new QatLz4101Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
5959
);
60-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
60+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate101Codec(compressionLevel, () -> {
6161
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
6262
}));
6363
}
6464
} else {
65-
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
66-
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
65+
codecs.put(ZSTD_CODEC, new Zstd101Codec(mapperService, logger, compressionLevel));
66+
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict101Codec(mapperService, logger, compressionLevel));
6767
if (QatZipperFactory.isQatAvailable()) {
68-
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
68+
codecs.put(QAT_LZ4_CODEC, new QatLz4101Codec(mapperService, logger, compressionLevel, () -> {
6969
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
7070
}));
71-
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
71+
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate101Codec(mapperService, logger, compressionLevel, () -> {
7272
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
7373
}));
7474
}

src/main/java/org/opensearch/index/codec/customcodecs/Lucene912CustomCodec.java src/main/java/org/opensearch/index/codec/customcodecs/Lucene101CustomCodec.java

+12-12
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import org.apache.logging.log4j.Logger;
1212
import org.apache.lucene.codecs.FilterCodec;
1313
import org.apache.lucene.codecs.StoredFieldsFormat;
14-
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
14+
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
1515
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
1616
import org.opensearch.index.mapper.MapperService;
1717

@@ -23,22 +23,22 @@
2323
*
2424
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
2525
* Supports two modes zstd and zstd_no_dict.
26-
* Uses Lucene912 as the delegate codec
26+
* Uses Lucene101 as the delegate codec
2727
*
2828
* @opensearch.internal
2929
*/
30-
public abstract class Lucene912CustomCodec extends FilterCodec {
30+
public abstract class Lucene101CustomCodec extends FilterCodec {
3131

3232
/** Each mode represents a compression algorithm. */
3333
public enum Mode {
3434
/**
3535
* ZStandard mode with dictionary
3636
*/
37-
ZSTD("ZSTD912", Set.of("zstd")),
37+
ZSTD("ZSTD101", Set.of("zstd")),
3838
/**
3939
* ZStandard mode without dictionary
4040
*/
41-
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));
41+
ZSTD_NO_DICT("ZSTDNODICT101", Set.of("zstd_no_dict"));
4242

4343
private final String codec;
4444
private final Set<String> aliases;
@@ -70,7 +70,7 @@ public Set<String> getAliases() {
7070
*
7171
* @param mode The compression codec (ZSTD or ZSTDNODICT).
7272
*/
73-
public Lucene912CustomCodec(Mode mode) {
73+
public Lucene101CustomCodec(Mode mode) {
7474
this(mode, DEFAULT_COMPRESSION_LEVEL);
7575
}
7676

@@ -82,9 +82,9 @@ public Lucene912CustomCodec(Mode mode) {
8282
* @param mode The compression codec (ZSTD or ZSTDNODICT).
8383
* @param compressionLevel The compression level.
8484
*/
85-
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
86-
super(mode.getCodec(), new Lucene912Codec());
87-
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
85+
public Lucene101CustomCodec(Mode mode, int compressionLevel) {
86+
super(mode.getCodec(), new Lucene101Codec());
87+
this.storedFieldsFormat = new Lucene101CustomStoredFieldsFormat(mode, compressionLevel);
8888
}
8989

9090
/**
@@ -97,9 +97,9 @@ public Lucene912CustomCodec(Mode mode, int compressionLevel) {
9797
* @param mapperService The mapper service.
9898
* @param logger The logger.
9999
*/
100-
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
101-
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
102-
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
100+
public Lucene101CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
101+
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, logger));
102+
this.storedFieldsFormat = new Lucene101CustomStoredFieldsFormat(mode, compressionLevel);
103103
}
104104

105105
@Override
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.lucene.codecs.StoredFieldsFormat;
12+
import org.apache.lucene.codecs.StoredFieldsReader;
13+
import org.apache.lucene.codecs.StoredFieldsWriter;
14+
import org.apache.lucene.codecs.compressing.CompressionMode;
15+
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
16+
import org.apache.lucene.index.FieldInfos;
17+
import org.apache.lucene.index.SegmentInfo;
18+
import org.apache.lucene.store.Directory;
19+
import org.apache.lucene.store.IOContext;
20+
21+
import java.io.IOException;
22+
import java.util.Objects;
23+
24+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
25+
26+
/** Stored field format used by pluggable codec */
27+
public class Lucene101CustomStoredFieldsFormat extends StoredFieldsFormat {
28+
29+
/** A key that we use to map to a mode */
30+
public static final String MODE_KEY = Lucene101CustomStoredFieldsFormat.class.getSimpleName() + ".mode";
31+
32+
protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
33+
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
34+
protected static final int ZSTD_BLOCK_SHIFT = 10;
35+
36+
private final CompressionMode zstdCompressionMode;
37+
private final CompressionMode zstdNoDictCompressionMode;
38+
39+
private final Lucene101CustomCodec.Mode mode;
40+
private final int compressionLevel;
41+
42+
/** default constructor */
43+
public Lucene101CustomStoredFieldsFormat() {
44+
this(Lucene101CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
45+
}
46+
47+
/**
48+
* Creates a new instance.
49+
*
50+
* @param mode The mode represents ZSTD or ZSTDNODICT
51+
*/
52+
public Lucene101CustomStoredFieldsFormat(Lucene101CustomCodec.Mode mode) {
53+
this(mode, DEFAULT_COMPRESSION_LEVEL);
54+
}
55+
56+
/**
57+
* Creates a new instance with the specified mode and compression level.
58+
*
59+
* @param mode The mode represents ZSTD or ZSTDNODICT
60+
* @param compressionLevel The compression level for the mode.
61+
*/
62+
public Lucene101CustomStoredFieldsFormat(Lucene101CustomCodec.Mode mode, int compressionLevel) {
63+
this.mode = Objects.requireNonNull(mode);
64+
this.compressionLevel = compressionLevel;
65+
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
66+
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
67+
}
68+
69+
/**
70+
* Returns a {@link StoredFieldsReader} to load stored fields.
71+
* @param directory The index directory.
72+
* @param si The SegmentInfo that stores segment information.
73+
* @param fn The fieldInfos.
74+
* @param context The IOContext that holds additional details on the merge/search context.
75+
*/
76+
@Override
77+
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
78+
if (si.getAttribute(MODE_KEY) != null) {
79+
String value = si.getAttribute(MODE_KEY);
80+
Lucene101CustomCodec.Mode mode = Lucene101CustomCodec.Mode.valueOf(value);
81+
return impl(mode).fieldsReader(directory, si, fn, context);
82+
} else {
83+
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
84+
}
85+
}
86+
87+
/**
88+
* Returns a {@link StoredFieldsReader} to write stored fields.
89+
* @param directory The index directory.
90+
* @param si The SegmentInfo that stores segment information.
91+
* @param context The IOContext that holds additional details on the merge/search context.
92+
*/
93+
@Override
94+
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
95+
String previous = si.putAttribute(MODE_KEY, mode.name());
96+
if (previous != null && previous.equals(mode.name()) == false) {
97+
throw new IllegalStateException(
98+
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
99+
);
100+
}
101+
return impl(mode).fieldsWriter(directory, si, context);
102+
}
103+
104+
StoredFieldsFormat impl(Lucene101CustomCodec.Mode mode) {
105+
switch (mode) {
106+
case ZSTD:
107+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
108+
case ZSTD_NO_DICT:
109+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
110+
default:
111+
throw new IllegalStateException("Unsupported compression mode: " + mode);
112+
}
113+
}
114+
115+
private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
116+
return new Lucene90CompressingStoredFieldsFormat(
117+
formatName,
118+
compressionMode,
119+
ZSTD_BLOCK_LENGTH,
120+
ZSTD_MAX_DOCS_PER_BLOCK,
121+
ZSTD_BLOCK_SHIFT
122+
);
123+
}
124+
125+
public Lucene101CustomCodec.Mode getMode() {
126+
return mode;
127+
}
128+
129+
/**
130+
* Returns the compression level.
131+
*/
132+
public int getCompressionLevel() {
133+
return compressionLevel;
134+
}
135+
136+
public CompressionMode getCompressionMode() {
137+
return mode == Lucene101CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
138+
}
139+
140+
}

0 commit comments

Comments
 (0)