Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6d9b93a

Browse files
committedMar 15, 2024·
noop-compression-size
Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
1 parent 6b1986e commit 6d9b93a

File tree

7 files changed

+304
-27
lines changed

7 files changed

+304
-27
lines changed
 

‎.idea/runConfigurations/Debug_OpenSearch.xml

+5-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java

+3
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,9 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
115115
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_INFO_SETTING,
116116
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_TRACE_SETTING,
117117
SearchSlowLog.INDEX_SEARCH_SLOWLOG_LEVEL,
118+
EngineConfig.INDEX_CODEC_LZ4_BLOCK_SIZE_SETTING,
119+
EngineConfig.INDEX_CODEC_ZLIB_BLOCK_SIZE_SETTING,
120+
EngineConfig.INDEX_CODEC_NO_OP_COMPRESSION_SIZE,
118121
IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_WARN_SETTING,
119122
IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_DEBUG_SETTING,
120123
IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_INFO_SETTING,

‎server/src/main/java/org/opensearch/index/codec/CodecService.java

+13-26
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@
3434

3535
import org.apache.logging.log4j.Logger;
3636
import org.apache.lucene.codecs.Codec;
37-
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
3837
import org.apache.lucene.codecs.lucene95.Lucene95Codec.Mode;
3938
import org.opensearch.common.Nullable;
4039
import org.opensearch.common.collect.MapBuilder;
4140
import org.opensearch.index.IndexSettings;
41+
import org.opensearch.index.engine.EngineConfig;
4242
import org.opensearch.index.mapper.MapperService;
4343

4444
import java.util.Map;
@@ -67,34 +67,21 @@ public class CodecService {
6767
public CodecService(@Nullable MapperService mapperService, IndexSettings indexSettings, Logger logger) {
6868
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
6969
assert null != indexSettings;
70-
if (mapperService == null) {
71-
codecs.put(DEFAULT_CODEC, new Lucene95Codec());
72-
codecs.put(LZ4, new Lucene95Codec());
73-
codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION));
74-
codecs.put(ZLIB, new Lucene95Codec(Mode.BEST_COMPRESSION));
75-
} else {
76-
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
77-
codecs.put(LZ4, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
78-
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
79-
codecs.put(ZLIB, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
80-
}
81-
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
82-
for (String codec : Codec.availableCodecs()) {
83-
codecs.put(codec, Codec.forName(codec));
84-
}
85-
this.codecs = codecs.immutableMap();
86-
}
8770

88-
@Deprecated(since = "2.9.0", forRemoval = true)
89-
public CodecService(@Nullable MapperService mapperService, Logger logger) {
90-
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
71+
int lz4BlockSize = indexSettings.getValue(EngineConfig.INDEX_CODEC_LZ4_BLOCK_SIZE_SETTING);
72+
int zlibBlockSize = indexSettings.getValue(EngineConfig.INDEX_CODEC_ZLIB_BLOCK_SIZE_SETTING);
73+
int noopCompressionSize = indexSettings.getValue(EngineConfig.INDEX_CODEC_NO_OP_COMPRESSION_SIZE);
74+
9175
if (mapperService == null) {
92-
codecs.put(DEFAULT_CODEC, new Lucene95Codec());
93-
codecs.put(BEST_COMPRESSION_CODEC, new Lucene95Codec(Mode.BEST_COMPRESSION));
76+
codecs.put(DEFAULT_CODEC, new Lucene99CoreCodec(Mode.BEST_SPEED, lz4BlockSize, zlibBlockSize, noopCompressionSize));
77+
codecs.put(LZ4, new Lucene99CoreCodec(Mode.BEST_SPEED, lz4BlockSize, zlibBlockSize, noopCompressionSize));
78+
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, lz4BlockSize, zlibBlockSize, noopCompressionSize));
79+
codecs.put(ZLIB, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, lz4BlockSize, zlibBlockSize, noopCompressionSize));
9480
} else {
95-
IndexSettings indexSettings = mapperService.getIndexSettings();
96-
codecs.put(DEFAULT_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_SPEED, mapperService, logger));
97-
codecs.put(BEST_COMPRESSION_CODEC, new PerFieldMappingPostingFormatCodec(Mode.BEST_COMPRESSION, mapperService, logger));
81+
codecs.put(DEFAULT_CODEC, new Lucene99CoreCodec(Mode.BEST_SPEED, lz4BlockSize, zlibBlockSize, noopCompressionSize, mapperService, logger));
82+
codecs.put(LZ4, new Lucene99CoreCodec(Mode.BEST_SPEED, lz4BlockSize, zlibBlockSize, noopCompressionSize, mapperService, logger));
83+
codecs.put(BEST_COMPRESSION_CODEC, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, lz4BlockSize, zlibBlockSize, noopCompressionSize, mapperService, logger));
84+
codecs.put(ZLIB, new Lucene99CoreCodec(Mode.BEST_COMPRESSION, lz4BlockSize, zlibBlockSize, noopCompressionSize, mapperService, logger));
9885
}
9986
codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
10087
for (String codec : Codec.availableCodecs()) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec;
10+
11+
import org.apache.logging.log4j.Logger;
12+
import org.apache.lucene.codecs.FilterCodec;
13+
import org.apache.lucene.codecs.StoredFieldsFormat;
14+
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
15+
import org.opensearch.index.mapper.MapperService;
16+
17+
/**
18+
*
19+
* Extends {@link FilterCodec} to reuse the functionality of Lucene Codec.
20+
* Supports two lucene modes BEST_SPEED and BEST_COMPRESSION.
21+
* Uses Lucene99 as the delegate codec
22+
*
23+
* @opensearch.internal
24+
*/
25+
public class Lucene99CoreCodec extends FilterCodec {
26+
27+
private final StoredFieldsFormat storedFieldsFormat;
28+
29+
public Lucene99CoreCodec() {
30+
super("Lucene99Core", new Lucene95Codec());
31+
storedFieldsFormat = new Lucene99CoreStoredFieldsFormat();
32+
}
33+
34+
public Lucene99CoreCodec(Lucene95Codec.Mode mode, Integer lz4BlockSize, Integer zlibBlockSize, Integer noopCompressionSize) {
35+
super("Lucene99Core", new Lucene95Codec(mode));
36+
storedFieldsFormat = new Lucene99CoreStoredFieldsFormat(mode, lz4BlockSize, zlibBlockSize, noopCompressionSize);
37+
}
38+
39+
@Override
40+
public StoredFieldsFormat storedFieldsFormat() {
41+
return storedFieldsFormat;
42+
}
43+
44+
public Lucene99CoreCodec(Lucene95Codec.Mode mode, Integer lz4BlockSize, Integer zlibBlockSize, Integer noopCompressionSize, MapperService mapperService, Logger logger) {
45+
super("Lucene99Core", new PerFieldMappingPostingFormatCodec(mode, mapperService, logger));
46+
this.storedFieldsFormat = new Lucene99CoreStoredFieldsFormat(mode, lz4BlockSize, zlibBlockSize, noopCompressionSize);
47+
}
48+
49+
@Override
50+
public String toString() {
51+
return getClass().getSimpleName();
52+
}
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.index.codec;
10+
11+
import org.apache.lucene.codecs.StoredFieldsFormat;
12+
import org.apache.lucene.codecs.StoredFieldsReader;
13+
import org.apache.lucene.codecs.StoredFieldsWriter;
14+
import org.apache.lucene.codecs.compressing.CompressionMode;
15+
import org.apache.lucene.codecs.compressing.Compressor;
16+
import org.apache.lucene.codecs.compressing.Decompressor;
17+
import org.apache.lucene.codecs.lucene90.DeflateWithPresetDictCompressionMode;
18+
import org.apache.lucene.codecs.lucene90.LZ4WithPresetDictCompressionMode;
19+
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
20+
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
21+
import org.apache.lucene.codecs.lucene95.Lucene95Codec;
22+
import org.apache.lucene.index.FieldInfos;
23+
import org.apache.lucene.index.SegmentInfo;
24+
import org.apache.lucene.store.ByteBuffersDataInput;
25+
import org.apache.lucene.store.DataInput;
26+
import org.apache.lucene.store.DataOutput;
27+
import org.apache.lucene.store.Directory;
28+
import org.apache.lucene.store.IOContext;
29+
import org.apache.lucene.util.ArrayUtil;
30+
import org.apache.lucene.util.BytesRef;
31+
32+
import java.io.IOException;
33+
import java.util.Objects;
34+
35+
/**
36+
* Stored field format used by pluggable codec
37+
*/
38+
public class Lucene99CoreStoredFieldsFormat extends StoredFieldsFormat {
39+
40+
/**
41+
* A key that we use to map to a mode
42+
*/
43+
public static final String MODE_KEY = Lucene99CoreStoredFieldsFormat.class.getSimpleName() + ".mode";
44+
45+
private final Lucene95Codec.Mode mode;
46+
private final Integer lz4BlockSize;
47+
private final Integer zlibBlockSize;
48+
private final Integer noopCompressionSize;
49+
50+
/**
51+
* default constructor
52+
*/
53+
public Lucene99CoreStoredFieldsFormat() {
54+
this(Lucene95Codec.Mode.BEST_SPEED, 16, 60, 100);
55+
}
56+
57+
/**
58+
* Creates a new instance.
59+
*
60+
* @param mode The mode represents ZSTD or ZSTDNODICT
61+
*/
62+
63+
public Lucene99CoreStoredFieldsFormat(Lucene95Codec.Mode mode, Integer lz4Block, Integer zlibBlockSize, Integer noopCompressionSize) {
64+
this.mode = Objects.requireNonNull(mode);
65+
this.lz4BlockSize = lz4Block;
66+
this.zlibBlockSize = zlibBlockSize;
67+
this.noopCompressionSize = noopCompressionSize;
68+
}
69+
70+
/**
71+
* Returns a {@link StoredFieldsReader} to load stored fields.
72+
*
73+
* @param directory The index directory.
74+
* @param si The SegmentInfo that stores segment information.
75+
* @param fn The fieldInfos.
76+
* @param context The IOContext that holds additional details on the merge/search context.
77+
*/
78+
@Override
79+
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
80+
if (si.getAttribute(MODE_KEY) != null) {
81+
String value = si.getAttribute(MODE_KEY);
82+
Lucene95Codec.Mode mode = Lucene95Codec.Mode.valueOf(value);
83+
return impl(mode, si).fieldsReader(directory, si, fn, context);
84+
} else {
85+
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
86+
}
87+
88+
}
89+
90+
/**
91+
* Returns a {@link StoredFieldsReader} to write stored fields.
92+
*
93+
* @param directory The index directory.
94+
* @param si The SegmentInfo that stores segment information.
95+
* @param context The IOContext that holds additional details on the merge/search context.
96+
*/
97+
98+
@Override
99+
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
100+
String previous = si.putAttribute(MODE_KEY, mode.name());
101+
if (previous != null && previous.equals(mode.name()) == false) {
102+
throw new IllegalStateException(
103+
"found existing value for " + MODE_KEY + " for segment: " + si.name + "old=" + previous + ", new=" + mode.name()
104+
);
105+
}
106+
return impl(mode, si).fieldsWriter(directory, si, context);
107+
}
108+
109+
StoredFieldsFormat impl(Lucene95Codec.Mode mode, SegmentInfo si) {
110+
switch (mode) {
111+
case BEST_SPEED:
112+
return getLZ4CompressingStoredFieldsFormat(si);
113+
case BEST_COMPRESSION:
114+
return getZlibCompressingStoredFieldsFormat(si);
115+
default:
116+
throw new AssertionError();
117+
}
118+
}
119+
120+
public Lucene95Codec.Mode getMode() {
121+
return mode;
122+
}
123+
124+
// Shoot for 10 sub blocks of 48kB each.
125+
private static final int BEST_COMPRESSION_BLOCK_LENGTH = 10 * 1024;
126+
127+
/**
128+
* Compression mode for {@link Lucene90StoredFieldsFormat.Mode#BEST_COMPRESSION}
129+
*/
130+
public static final CompressionMode BEST_COMPRESSION_MODE = new DeflateWithPresetDictCompressionMode();
131+
132+
// Shoot for 10 sub blocks of 8kB each.
133+
private static final int BEST_SPEED_BLOCK_LENGTH = 10 * 1024;
134+
135+
/**
136+
* Compression mode for {@link Lucene90StoredFieldsFormat.Mode#BEST_SPEED}
137+
*/
138+
public static final CompressionMode BEST_SPEED_MODE = new LZ4WithPresetDictCompressionMode();
139+
140+
private StoredFieldsFormat getLZ4CompressingStoredFieldsFormat(SegmentInfo si) {
141+
142+
if (si.getAttribute("estimatedMergeSize") != null) {
143+
long estimatedMergeSize = Long.parseLong(si.getAttribute("estimatedMergeSize"));
144+
145+
if (estimatedMergeSize < this.noopCompressionSize * 1024 * 1024) {
146+
return new Lucene90CompressingStoredFieldsFormat(
147+
"Lucene90StoredFieldsFastData",
148+
NO_COMPRESSION_MODE,
149+
BEST_SPEED_BLOCK_LENGTH * this.lz4BlockSize,
150+
1024,
151+
10
152+
);
153+
}
154+
}
155+
156+
return new Lucene90CompressingStoredFieldsFormat(
157+
"Lucene90StoredFieldsFastData",
158+
BEST_SPEED_MODE,
159+
BEST_SPEED_BLOCK_LENGTH * this.lz4BlockSize,
160+
1024,
161+
10
162+
);
163+
}
164+
165+
private StoredFieldsFormat getZlibCompressingStoredFieldsFormat(SegmentInfo si) {
166+
167+
168+
if (si.getAttribute("estimatedMergeSize") != null) {
169+
long estimatedMergeSize = Long.parseLong(si.getAttribute("estimatedMergeSize"));
170+
if (estimatedMergeSize < 100 * 1024) {
171+
return new Lucene90CompressingStoredFieldsFormat(
172+
"Lucene90StoredFieldsHighData",
173+
NO_COMPRESSION_MODE,
174+
BEST_COMPRESSION_BLOCK_LENGTH * this.zlibBlockSize,
175+
4096,
176+
10
177+
);
178+
}
179+
}
180+
return new Lucene90CompressingStoredFieldsFormat(
181+
"Lucene90StoredFieldsHighData",
182+
BEST_COMPRESSION_MODE,
183+
BEST_COMPRESSION_BLOCK_LENGTH * this.zlibBlockSize,
184+
4096,
185+
10
186+
);
187+
}
188+
189+
static final CompressionMode NO_COMPRESSION_MODE = new CompressionMode() {
190+
public Compressor newCompressor() {
191+
return new Compressor() {
192+
public void close() throws IOException {
193+
}
194+
195+
public void compress(ByteBuffersDataInput buffersInput, DataOutput out) throws IOException {
196+
out.copyBytes(buffersInput, buffersInput.size());
197+
}
198+
};
199+
}
200+
201+
public Decompressor newDecompressor() {
202+
return new Decompressor() {
203+
public void decompress(DataInput in, int originalLength, int offset, int length, BytesRef bytes) throws IOException {
204+
bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, length);
205+
in.skipBytes(offset);
206+
in.readBytes(bytes.bytes, 0, length);
207+
bytes.offset = 0;
208+
bytes.length = length;
209+
}
210+
211+
public Decompressor clone() {
212+
return this;
213+
}
214+
};
215+
}
216+
};
217+
218+
}

‎server/src/main/java/org/opensearch/index/engine/EngineConfig.java

+11
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,17 @@ public Supplier<RetentionLeases> retentionLeasesSupplier() {
157157
}
158158
}, Property.IndexScope, Property.NodeScope);
159159

160+
public static final Setting<Integer> INDEX_CODEC_LZ4_BLOCK_SIZE_SETTING =
161+
Setting.intSetting("index.codec.lz4.block_size", 16, Property.IndexScope, Property.Dynamic);
162+
163+
public static final Setting<Integer> INDEX_CODEC_ZLIB_BLOCK_SIZE_SETTING =
164+
Setting.intSetting("index.codec.zlib.block_size", 60, Property.IndexScope, Property.Dynamic);
165+
166+
167+
public static final Setting<Integer> INDEX_CODEC_NO_OP_COMPRESSION_SIZE =
168+
Setting.intSetting("index.codec.no_compression_size", 100, Property.IndexScope, Property.Dynamic);
169+
170+
160171
/**
161172
* Index setting to change the compression level of zstd and zstd_no_dict lucene codecs.
162173
* Compression Level gives a trade-off between compression ratio and speed. The higher compression level results in higher compression ratio but slower compression and decompression speeds.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
org.opensearch.index.codec.Lucene99CoreCodec

0 commit comments

Comments
 (0)
Please sign in to comment.