Skip to content

Commit bd9622f

Browse files
committed
Adding lucene 101 classes and test cases
Signed-off-by: Mohit Kumar <mohitamg@amazon.com>
1 parent eb09e5d commit bd9622f

32 files changed

+1100
-24
lines changed

.github/actions/create-bwc-build/action.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ runs:
3333
path: ${{ inputs.plugin-branch }}
3434

3535
- name: Build
36-
uses: gradle/gradle-build-action@v4
36+
uses: gradle/gradle-build-action@v3
3737
with:
3838
cache-disabled: true
3939
arguments: assemble

.github/actions/run-bwc-suite/action.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ runs:
3737
plugin-branch: ${{ inputs.plugin-next-branch }}
3838

3939
- name: Run BWC tests
40-
uses: gradle/gradle-build-action@v4
40+
uses: gradle/gradle-build-action@v3
4141
with:
4242
cache-disabled: true
4343
arguments: |

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
uses: actions/checkout@v4
2626

2727
- name: Build BWC tests
28-
uses: gradle/gradle-build-action@v4
28+
uses: gradle/gradle-build-action@v3
2929
with:
3030
cache-disabled: true
3131
arguments: |

src/integrationTest/java/org/opensearch/index/codec/rest/CreateIndexWithCodecIT.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import org.opensearch.cluster.metadata.IndexMetadata;
2727
import org.opensearch.common.settings.Settings;
2828
import org.opensearch.core.common.Strings;
29-
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
29+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.Lucene912QatCodec;
3030
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
3131
import org.opensearch.test.rest.OpenSearchRestTestCase;
3232

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecPlugin.java

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.opensearch.common.settings.Setting;
1212
import org.opensearch.index.IndexSettings;
1313
import org.opensearch.index.codec.CodecServiceFactory;
14+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.Lucene912QatCodec;
1415
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
1516
import org.opensearch.index.engine.EngineConfig;
1617
import org.opensearch.plugins.EnginePlugin;

src/main/java/org/opensearch/index/codec/customcodecs/CustomCodecService.java

+4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
import org.opensearch.common.collect.MapBuilder;
1414
import org.opensearch.index.IndexSettings;
1515
import org.opensearch.index.codec.CodecService;
16+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.QatDeflate912Codec;
17+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.QatLz4912Codec;
18+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.Zstd912Codec;
19+
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene912.ZstdNoDict912Codec;
1620
import org.opensearch.index.mapper.MapperService;
1721

1822
import java.util.Arrays;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.logging.log4j.Logger;
12+
import org.apache.lucene.codecs.FilterCodec;
13+
import org.apache.lucene.codecs.StoredFieldsFormat;
14+
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
15+
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
16+
import org.opensearch.index.mapper.MapperService;
17+
18+
import java.util.Set;
19+
20+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
21+
22+
/**
23+
*
24+
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
25+
* Supports two modes zstd and zstd_no_dict.
26+
* Uses Lucene101 as the delegate codec
27+
*
28+
* @opensearch.internal
29+
*/
30+
public abstract class Lucene101CustomCodec extends FilterCodec {
31+
32+
/** Each mode represents a compression algorithm. */
33+
public enum Mode {
34+
/**
35+
* ZStandard mode with dictionary
36+
*/
37+
ZSTD("ZSTD101", Set.of("zstd")),
38+
/**
39+
* ZStandard mode without dictionary
40+
*/
41+
ZSTD_NO_DICT("ZSTDNODICT101", Set.of("zstd_no_dict"));
42+
43+
private final String codec;
44+
private final Set<String> aliases;
45+
46+
Mode(String codec, Set<String> aliases) {
47+
this.codec = codec;
48+
this.aliases = aliases;
49+
}
50+
51+
/**
52+
* Returns the Codec that is registered with Lucene
53+
*/
54+
public String getCodec() {
55+
return codec;
56+
}
57+
58+
/**
59+
* Returns the aliases of the Codec
60+
*/
61+
public Set<String> getAliases() {
62+
return aliases;
63+
}
64+
}
65+
66+
private final StoredFieldsFormat storedFieldsFormat;
67+
68+
/**
69+
* Creates a new compression codec with the default compression level.
70+
*
71+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
72+
*/
73+
public Lucene101CustomCodec(Mode mode) {
74+
this(mode, DEFAULT_COMPRESSION_LEVEL);
75+
}
76+
77+
/**
78+
* Creates a new compression codec with the given compression level. We use
79+
* lowercase letters when registering the codec so that we remain consistent with
80+
* the other compression codecs: default, lucene_default, and best_compression.
81+
*
82+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
83+
* @param compressionLevel The compression level.
84+
*/
85+
public Lucene101CustomCodec(Mode mode, int compressionLevel) {
86+
super(mode.getCodec(), new Lucene101Codec());
87+
this.storedFieldsFormat = new Lucene101CustomStoredFieldsFormat(mode, compressionLevel);
88+
}
89+
90+
/**
91+
* Creates a new compression codec with the given compression level. We use
92+
* lowercase letters when registering the codec so that we remain consistent with
93+
* the other compression codecs: default, lucene_default, and best_compression.
94+
*
95+
* @param mode The compression codec (ZSTD or ZSTDNODICT).
96+
* @param compressionLevel The compression level.
97+
* @param mapperService The mapper service.
98+
* @param logger The logger.
99+
*/
100+
public Lucene101CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
101+
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, logger));
102+
this.storedFieldsFormat = new Lucene101CustomStoredFieldsFormat(mode, compressionLevel);
103+
}
104+
105+
@Override
106+
public StoredFieldsFormat storedFieldsFormat() {
107+
return storedFieldsFormat;
108+
}
109+
110+
@Override
111+
public String toString() {
112+
return getClass().getSimpleName();
113+
}
114+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec.customcodecs;
10+
11+
import org.apache.lucene.codecs.StoredFieldsFormat;
12+
import org.apache.lucene.codecs.StoredFieldsReader;
13+
import org.apache.lucene.codecs.StoredFieldsWriter;
14+
import org.apache.lucene.codecs.compressing.CompressionMode;
15+
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
16+
import org.apache.lucene.index.FieldInfos;
17+
import org.apache.lucene.index.SegmentInfo;
18+
import org.apache.lucene.store.Directory;
19+
import org.apache.lucene.store.IOContext;
20+
import org.opensearch.index.codec.customcodecs.ZstdCompressionMode;
21+
import org.opensearch.index.codec.customcodecs.ZstdNoDictCompressionMode;
22+
23+
import java.io.IOException;
24+
import java.util.Objects;
25+
26+
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;
27+
28+
/** Stored field format used by pluggable codec */
29+
public class Lucene101CustomStoredFieldsFormat extends StoredFieldsFormat {
30+
31+
/** A key that we use to map to a mode */
32+
public static final String MODE_KEY = Lucene101CustomStoredFieldsFormat.class.getSimpleName() + ".mode";
33+
34+
protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
35+
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
36+
protected static final int ZSTD_BLOCK_SHIFT = 10;
37+
38+
private final CompressionMode zstdCompressionMode;
39+
private final CompressionMode zstdNoDictCompressionMode;
40+
41+
private final Lucene101CustomCodec.Mode mode;
42+
private final int compressionLevel;
43+
44+
/** default constructor */
45+
public Lucene101CustomStoredFieldsFormat() {
46+
this(Lucene101CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
47+
}
48+
49+
/**
50+
* Creates a new instance.
51+
*
52+
* @param mode The mode represents ZSTD or ZSTDNODICT
53+
*/
54+
public Lucene101CustomStoredFieldsFormat(Lucene101CustomCodec.Mode mode) {
55+
this(mode, DEFAULT_COMPRESSION_LEVEL);
56+
}
57+
58+
/**
59+
* Creates a new instance with the specified mode and compression level.
60+
*
61+
* @param mode The mode represents ZSTD or ZSTDNODICT
62+
* @param compressionLevel The compression level for the mode.
63+
*/
64+
public Lucene101CustomStoredFieldsFormat(Lucene101CustomCodec.Mode mode, int compressionLevel) {
65+
this.mode = Objects.requireNonNull(mode);
66+
this.compressionLevel = compressionLevel;
67+
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
68+
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
69+
}
70+
71+
/**
72+
* Returns a {@link StoredFieldsReader} to load stored fields.
73+
* @param directory The index directory.
74+
* @param si The SegmentInfo that stores segment information.
75+
* @param fn The fieldInfos.
76+
* @param context The IOContext that holds additional details on the merge/search context.
77+
*/
78+
@Override
79+
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
80+
if (si.getAttribute(MODE_KEY) != null) {
81+
String value = si.getAttribute(MODE_KEY);
82+
Lucene101CustomCodec.Mode mode = Lucene101CustomCodec.Mode.valueOf(value);
83+
return impl(mode).fieldsReader(directory, si, fn, context);
84+
} else {
85+
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
86+
}
87+
}
88+
89+
/**
90+
* Returns a {@link StoredFieldsReader} to write stored fields.
91+
* @param directory The index directory.
92+
* @param si The SegmentInfo that stores segment information.
93+
* @param context The IOContext that holds additional details on the merge/search context.
94+
*/
95+
@Override
96+
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
97+
String previous = si.putAttribute(MODE_KEY, mode.name());
98+
if (previous != null && previous.equals(mode.name()) == false) {
99+
throw new IllegalStateException(
100+
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
101+
);
102+
}
103+
return impl(mode).fieldsWriter(directory, si, context);
104+
}
105+
106+
StoredFieldsFormat impl(Lucene101CustomCodec.Mode mode) {
107+
switch (mode) {
108+
case ZSTD:
109+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
110+
case ZSTD_NO_DICT:
111+
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
112+
default:
113+
throw new IllegalStateException("Unsupported compression mode: " + mode);
114+
}
115+
}
116+
117+
private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
118+
return new Lucene90CompressingStoredFieldsFormat(
119+
formatName,
120+
compressionMode,
121+
ZSTD_BLOCK_LENGTH,
122+
ZSTD_MAX_DOCS_PER_BLOCK,
123+
ZSTD_BLOCK_SHIFT
124+
);
125+
}
126+
127+
public Lucene101CustomCodec.Mode getMode() {
128+
return mode;
129+
}
130+
131+
/**
132+
* Returns the compression level.
133+
*/
134+
public int getCompressionLevel() {
135+
return compressionLevel;
136+
}
137+
138+
public CompressionMode getCompressionMode() {
139+
return mode == Lucene101CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
140+
}
141+
142+
}

0 commit comments

Comments
 (0)