Skip to content

Commit 53d41d3

Browse files
authored
feat: add vertical scaling and SoftReference for snapshot repository data cache (opensearch-project#16489)
- Applies `SoftReference` to cached repository data for efficient memory management under heap pressure. - Enables cache size configuration in `opensearch.yml`, adjustable within a range of 500KB to 1% of heap memory. - Sets the default cache size to `Math.max(ByteSizeUnit.KB.toBytes(500), CACHE_MAX_THRESHOLD / 2)` so it’s generally proportional to heap size. In cases where 1% of the heap is less than 1000KB, indicating a low-memory environment, the default reverts to 500KB as before. - Since `BytesReference` internally uses `byte[]`, the compressed array size is capped at `Integer.MAX_VALUE - 8` to ensure compatibility with JDK limitations on array sizes. Therefore, the maximum cache size cannot exceed this limit. Signed-off-by: inpink <inpink@kakao.com>
1 parent e9f77e3 commit 53d41d3

File tree

5 files changed

+209
-17
lines changed

5 files changed

+209
-17
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1616
- Make IndexStoreListener a pluggable interface ([#16583](https://github.com/opensearch-project/OpenSearch/pull/16583))
1717
- Support for keyword fields in star-tree index ([#16233](https://github.com/opensearch-project/OpenSearch/pull/16233))
1818
- Add a flag in QueryShardContext to differentiate inner hit query ([#16600](https://github.com/opensearch-project/OpenSearch/pull/16600))
19+
- Add vertical scaling and SoftReference for snapshot repository data cache ([#16489](https://github.com/opensearch-project/OpenSearch/pull/16489))
1920

2021
### Dependencies
2122
- Bump `com.azure:azure-storage-common` from 12.25.1 to 12.27.1 ([#16521](https://github.com/opensearch-project/OpenSearch/pull/16521))

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

+1
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,7 @@ public void apply(Settings value, Settings current, Settings previous) {
786786
// Snapshot related Settings
787787
BlobStoreRepository.SNAPSHOT_SHARD_PATH_PREFIX_SETTING,
788788
BlobStoreRepository.SNAPSHOT_ASYNC_DELETION_ENABLE_SETTING,
789+
BlobStoreRepository.SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD,
789790

790791
SearchService.CLUSTER_ALLOW_DERIVED_FIELD_SETTING,
791792

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

+98-13
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142
import org.opensearch.indices.RemoteStoreSettings;
143143
import org.opensearch.indices.recovery.RecoverySettings;
144144
import org.opensearch.indices.recovery.RecoveryState;
145+
import org.opensearch.monitor.jvm.JvmInfo;
145146
import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService;
146147
import org.opensearch.repositories.IndexId;
147148
import org.opensearch.repositories.IndexMetaDataGenerations;
@@ -167,6 +168,7 @@
167168
import java.io.FilterInputStream;
168169
import java.io.IOException;
169170
import java.io.InputStream;
171+
import java.lang.ref.SoftReference;
170172
import java.nio.file.NoSuchFileException;
171173
import java.util.ArrayList;
172174
import java.util.Arrays;
@@ -196,6 +198,7 @@
196198
import java.util.stream.LongStream;
197199
import java.util.stream.Stream;
198200

201+
import static org.opensearch.common.unit.MemorySizeValue.parseBytesSizeValueOrHeapRatio;
199202
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1;
200203
import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
201204
import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS;
@@ -253,6 +256,23 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
253256
*/
254257
public static final String VIRTUAL_DATA_BLOB_PREFIX = "v__";
255258

259+
public static final String SNAPSHOT_REPOSITORY_DATA_CACHET_THRESHOLD_SETTING_NAME = "snapshot.repository_data.cache.threshold";
260+
261+
public static final double SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD_DEFAULT_PERCENTAGE = 0.01;
262+
263+
public static final long CACHE_MIN_THRESHOLD = ByteSizeUnit.KB.toBytes(500);
264+
265+
public static final long CACHE_MAX_THRESHOLD = calculateMaxSnapshotRepositoryDataCacheThreshold();
266+
267+
public static final long CACHE_DEFAULT_THRESHOLD = calculateDefaultSnapshotRepositoryDataCacheThreshold();
268+
269+
/**
270+
* Set to Integer.MAX_VALUE - 8 to prevent OutOfMemoryError due to array header requirements, following the limit used in certain JDK versions.
271+
* This ensures compatibility across various JDK versions. For a practical usage example,
272+
* see this link: https://github.com/openjdk/jdk11u/blob/cee8535a9d3de8558b4b5028d68e397e508bef71/src/jdk.zipfs/share/classes/jdk/nio/zipfs/ByteArrayChannel.java#L226
273+
*/
274+
private static final int MAX_SAFE_ARRAY_SIZE = Integer.MAX_VALUE - 8;
275+
256276
/**
257277
* When set to {@code true}, {@link #bestEffortConsistency} will be set to {@code true} and concurrent modifications of the repository
258278
* contents will not result in the repository being marked as corrupted.
@@ -275,6 +295,58 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
275295
Setting.Property.Deprecated
276296
);
277297

298+
/**
299+
* Sets the cache size for snapshot repository data: the valid range is within 500Kb ... 1% of the node heap memory.
300+
*/
301+
public static final Setting<ByteSizeValue> SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD = new Setting<>(
302+
SNAPSHOT_REPOSITORY_DATA_CACHET_THRESHOLD_SETTING_NAME,
303+
CACHE_DEFAULT_THRESHOLD + "b",
304+
(s) -> {
305+
ByteSizeValue userDefinedLimit = parseBytesSizeValueOrHeapRatio(s, SNAPSHOT_REPOSITORY_DATA_CACHET_THRESHOLD_SETTING_NAME);
306+
long userDefinedLimitBytes = userDefinedLimit.getBytes();
307+
308+
if (userDefinedLimitBytes > CACHE_MAX_THRESHOLD) {
309+
throw new IllegalArgumentException(
310+
"["
311+
+ SNAPSHOT_REPOSITORY_DATA_CACHET_THRESHOLD_SETTING_NAME
312+
+ "] cannot be larger than ["
313+
+ CACHE_MAX_THRESHOLD
314+
+ "] bytes."
315+
);
316+
}
317+
318+
if (userDefinedLimitBytes < CACHE_MIN_THRESHOLD) {
319+
throw new IllegalArgumentException(
320+
"["
321+
+ SNAPSHOT_REPOSITORY_DATA_CACHET_THRESHOLD_SETTING_NAME
322+
+ "] cannot be smaller than ["
323+
+ CACHE_MIN_THRESHOLD
324+
+ "] bytes."
325+
);
326+
}
327+
328+
return userDefinedLimit;
329+
},
330+
Setting.Property.NodeScope
331+
);
332+
333+
public static long calculateDefaultSnapshotRepositoryDataCacheThreshold() {
334+
return Math.max(ByteSizeUnit.KB.toBytes(500), CACHE_MAX_THRESHOLD / 2);
335+
}
336+
337+
public static long calculateMaxSnapshotRepositoryDataCacheThreshold() {
338+
long jvmHeapSize = JvmInfo.jvmInfo().getMem().getHeapMax().getBytes();
339+
long defaultThresholdOfHeap = (long) (jvmHeapSize * SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD_DEFAULT_PERCENTAGE);
340+
long defaultAbsoluteThreshold = ByteSizeUnit.KB.toBytes(500);
341+
long maxThreshold = calculateMaxWithinIntLimit(defaultThresholdOfHeap, defaultAbsoluteThreshold);
342+
343+
return maxThreshold;
344+
}
345+
346+
protected static long calculateMaxWithinIntLimit(long defaultThresholdOfHeap, long defaultAbsoluteThreshold) {
347+
return Math.min(Math.max(defaultThresholdOfHeap, defaultAbsoluteThreshold), MAX_SAFE_ARRAY_SIZE);
348+
}
349+
278350
/**
279351
* Size hint for the IO buffer size to use when reading from and writing to the repository.
280352
*/
@@ -461,6 +533,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
461533

462534
private volatile boolean enableAsyncDeletion;
463535

536+
protected final long repositoryDataCacheThreshold;
537+
464538
/**
465539
* Flag that is set to {@code true} if this instance is started with {@link #metadata} that has a higher value for
466540
* {@link RepositoryMetadata#pendingGeneration()} than for {@link RepositoryMetadata#generation()} indicating a full cluster restart
@@ -515,6 +589,7 @@ protected BlobStoreRepository(
515589
this.snapshotShardPathPrefix = SNAPSHOT_SHARD_PATH_PREFIX_SETTING.get(clusterService.getSettings());
516590
this.enableAsyncDeletion = SNAPSHOT_ASYNC_DELETION_ENABLE_SETTING.get(clusterService.getSettings());
517591
clusterService.getClusterSettings().addSettingsUpdateConsumer(SNAPSHOT_ASYNC_DELETION_ENABLE_SETTING, this::setEnableAsyncDeletion);
592+
this.repositoryDataCacheThreshold = SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD.get(clusterService.getSettings()).getBytes();
518593
}
519594

520595
@Override
@@ -1132,7 +1207,8 @@ private RepositoryData safeRepositoryData(long repositoryStateId, Map<String, Bl
11321207
cached = null;
11331208
} else {
11341209
genToLoad = latestKnownRepoGen.get();
1135-
cached = latestKnownRepositoryData.get();
1210+
SoftReference<Tuple<Long, BytesReference>> softRef = latestKnownRepositoryData.get();
1211+
cached = (softRef != null) ? softRef.get() : null;
11361212
}
11371213
if (genToLoad > generation) {
11381214
// It's always a possibility to not see the latest index-N in the listing here on an eventually consistent blob store, just
@@ -2926,15 +3002,19 @@ public void endVerification(String seed) {
29263002
private final AtomicLong latestKnownRepoGen = new AtomicLong(RepositoryData.UNKNOWN_REPO_GEN);
29273003

29283004
// Best effort cache of the latest known repository data and its generation, cached serialized as compressed json
2929-
private final AtomicReference<Tuple<Long, BytesReference>> latestKnownRepositoryData = new AtomicReference<>();
3005+
private final AtomicReference<SoftReference<Tuple<Long, BytesReference>>> latestKnownRepositoryData = new AtomicReference<>(
3006+
new SoftReference<>(null)
3007+
);
29303008

29313009
@Override
29323010
public void getRepositoryData(ActionListener<RepositoryData> listener) {
29333011
if (latestKnownRepoGen.get() == RepositoryData.CORRUPTED_REPO_GEN) {
29343012
listener.onFailure(corruptedStateException(null));
29353013
return;
29363014
}
2937-
final Tuple<Long, BytesReference> cached = latestKnownRepositoryData.get();
3015+
final SoftReference<Tuple<Long, BytesReference>> softRef = latestKnownRepositoryData.get();
3016+
final Tuple<Long, BytesReference> cached = (softRef != null) ? softRef.get() : null;
3017+
29383018
// Fast path loading repository data directly from cache if we're in fully consistent mode and the cache matches up with
29393019
// the latest known repository generation
29403020
if (bestEffortConsistency == false && cached != null && cached.v1() == latestKnownRepoGen.get()) {
@@ -2983,7 +3063,8 @@ private void doGetRepositoryData(ActionListener<RepositoryData> listener) {
29833063
genToLoad = latestKnownRepoGen.get();
29843064
}
29853065
try {
2986-
final Tuple<Long, BytesReference> cached = latestKnownRepositoryData.get();
3066+
final SoftReference<Tuple<Long, BytesReference>> softRef = latestKnownRepositoryData.get();
3067+
final Tuple<Long, BytesReference> cached = (softRef != null) ? softRef.get() : null;
29873068
final RepositoryData loaded;
29883069
// Caching is not used with #bestEffortConsistency see docs on #cacheRepositoryData for details
29893070
if (bestEffortConsistency == false && cached != null && cached.v1() == genToLoad) {
@@ -3050,19 +3131,22 @@ private void cacheRepositoryData(BytesReference updated, long generation) {
30503131
try {
30513132
serialized = CompressorRegistry.defaultCompressor().compress(updated);
30523133
final int len = serialized.length();
3053-
if (len > ByteSizeUnit.KB.toBytes(500)) {
3134+
long cacheWarningThreshold = Math.min(repositoryDataCacheThreshold * 10, MAX_SAFE_ARRAY_SIZE);
3135+
if (len > repositoryDataCacheThreshold) {
30543136
logger.debug(
3055-
"Not caching repository data of size [{}] for repository [{}] because it is larger than 500KB in"
3137+
"Not caching repository data of size [{}] for repository [{}] because it is larger than [{}] bytes in"
30563138
+ " serialized size",
30573139
len,
3058-
metadata.name()
3140+
metadata.name(),
3141+
repositoryDataCacheThreshold
30593142
);
3060-
if (len > ByteSizeUnit.MB.toBytes(5)) {
3143+
if (len > cacheWarningThreshold) {
30613144
logger.warn(
3062-
"Your repository metadata blob for repository [{}] is larger than 5MB. Consider moving to a fresh"
3145+
"Your repository metadata blob for repository [{}] is larger than [{}] bytes. Consider moving to a fresh"
30633146
+ " repository for new snapshots or deleting unneeded snapshots from your repository to ensure stable"
30643147
+ " repository behavior going forward.",
3065-
metadata.name()
3148+
metadata.name(),
3149+
cacheWarningThreshold
30663150
);
30673151
}
30683152
// Set empty repository data to not waste heap for an outdated cached value
@@ -3074,11 +3158,12 @@ private void cacheRepositoryData(BytesReference updated, long generation) {
30743158
logger.warn("Failed to serialize repository data", e);
30753159
return;
30763160
}
3077-
latestKnownRepositoryData.updateAndGet(known -> {
3161+
latestKnownRepositoryData.updateAndGet(knownRef -> {
3162+
Tuple<Long, BytesReference> known = (knownRef != null) ? knownRef.get() : null;
30783163
if (known != null && known.v1() > generation) {
3079-
return known;
3164+
return knownRef;
30803165
}
3081-
return new Tuple<>(generation, serialized);
3166+
return new SoftReference<>(new Tuple<>(generation, serialized));
30823167
});
30833168
}
30843169
}

server/src/test/java/org/opensearch/common/settings/MemorySizeSettingsTests.java

+59-4
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,15 @@
3434

3535
import org.opensearch.common.settings.Setting.Property;
3636
import org.opensearch.common.util.PageCacheRecycler;
37+
import org.opensearch.core.common.unit.ByteSizeUnit;
3738
import org.opensearch.core.common.unit.ByteSizeValue;
3839
import org.opensearch.indices.IndexingMemoryController;
3940
import org.opensearch.indices.IndicesQueryCache;
4041
import org.opensearch.indices.IndicesRequestCache;
4142
import org.opensearch.indices.breaker.HierarchyCircuitBreakerService;
4243
import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache;
4344
import org.opensearch.monitor.jvm.JvmInfo;
45+
import org.opensearch.repositories.blobstore.BlobStoreRepository;
4446
import org.opensearch.test.OpenSearchTestCase;
4547

4648
import static org.hamcrest.Matchers.equalTo;
@@ -127,22 +129,75 @@ public void testIndicesFieldDataCacheSetting() {
127129
);
128130
}
129131

132+
public void testSnapshotRepositoryDataCacheSizeSetting() {
133+
assertMemorySizeSettingInRange(
134+
BlobStoreRepository.SNAPSHOT_REPOSITORY_DATA_CACHE_THRESHOLD,
135+
"snapshot.repository_data.cache.threshold",
136+
new ByteSizeValue(BlobStoreRepository.calculateDefaultSnapshotRepositoryDataCacheThreshold()),
137+
ByteSizeUnit.KB.toBytes(500),
138+
1.0
139+
);
140+
}
141+
130142
private void assertMemorySizeSetting(Setting<ByteSizeValue> setting, String settingKey, ByteSizeValue defaultValue) {
131143
assertMemorySizeSetting(setting, settingKey, defaultValue, Settings.EMPTY);
132144
}
133145

134146
private void assertMemorySizeSetting(Setting<ByteSizeValue> setting, String settingKey, ByteSizeValue defaultValue, Settings settings) {
147+
assertMemorySizeSetting(setting, settingKey, defaultValue, 25.0, 1024, settings);
148+
}
149+
150+
private void assertMemorySizeSetting(
151+
Setting<ByteSizeValue> setting,
152+
String settingKey,
153+
ByteSizeValue defaultValue,
154+
double availablePercentage,
155+
long availableBytes,
156+
Settings settings
157+
) {
135158
assertThat(setting, notNullValue());
136159
assertThat(setting.getKey(), equalTo(settingKey));
137160
assertThat(setting.getProperties(), hasItem(Property.NodeScope));
138161
assertThat(setting.getDefault(settings), equalTo(defaultValue));
139-
Settings settingWithPercentage = Settings.builder().put(settingKey, "25%").build();
162+
Settings settingWithPercentage = Settings.builder().put(settingKey, percentageAsString(availablePercentage)).build();
140163
assertThat(
141164
setting.get(settingWithPercentage),
142-
equalTo(new ByteSizeValue((long) (JvmInfo.jvmInfo().getMem().getHeapMax().getBytes() * 0.25)))
165+
equalTo(
166+
new ByteSizeValue((long) (JvmInfo.jvmInfo().getMem().getHeapMax().getBytes() * percentageAsFraction(availablePercentage)))
167+
)
143168
);
144-
Settings settingWithBytesValue = Settings.builder().put(settingKey, "1024b").build();
145-
assertThat(setting.get(settingWithBytesValue), equalTo(new ByteSizeValue(1024)));
169+
Settings settingWithBytesValue = Settings.builder().put(settingKey, availableBytes + "b").build();
170+
assertThat(setting.get(settingWithBytesValue), equalTo(new ByteSizeValue(availableBytes)));
146171
}
147172

173+
private void assertMemorySizeSettingInRange(
174+
Setting<ByteSizeValue> setting,
175+
String settingKey,
176+
ByteSizeValue defaultValue,
177+
long minBytes,
178+
double maxPercentage
179+
) {
180+
assertMemorySizeSetting(setting, settingKey, defaultValue, maxPercentage, minBytes, Settings.EMPTY);
181+
182+
assertThrows(IllegalArgumentException.class, () -> {
183+
Settings settingWithTooSmallValue = Settings.builder().put(settingKey, minBytes - 1).build();
184+
setting.get(settingWithTooSmallValue);
185+
});
186+
187+
assertThrows(IllegalArgumentException.class, () -> {
188+
double unavailablePercentage = maxPercentage + 0.1;
189+
Settings settingWithPercentageExceedingLimit = Settings.builder()
190+
.put(settingKey, percentageAsString(unavailablePercentage))
191+
.build();
192+
setting.get(settingWithPercentageExceedingLimit);
193+
});
194+
}
195+
196+
private double percentageAsFraction(double availablePercentage) {
197+
return availablePercentage / 100.0;
198+
}
199+
200+
private String percentageAsString(double availablePercentage) {
201+
return availablePercentage + "%";
202+
}
148203
}

server/src/test/java/org/opensearch/repositories/blobstore/BlobStoreRepositoryTests.java

+50
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
import java.util.stream.Collectors;
9393

9494
import static org.opensearch.repositories.RepositoryDataTests.generateRandomRepoData;
95+
import static org.opensearch.repositories.blobstore.BlobStoreRepository.calculateMaxWithinIntLimit;
9596
import static org.hamcrest.Matchers.equalTo;
9697
import static org.hamcrest.Matchers.nullValue;
9798
import static org.mockito.ArgumentMatchers.any;
@@ -653,4 +654,53 @@ public void testGetRestrictedSystemRepositorySettings() {
653654
assertTrue(settings.contains(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY));
654655
repository.close();
655656
}
657+
658+
public void testSnapshotRepositoryDataCacheDefaultSetting() {
659+
// given
660+
BlobStoreRepository repository = setupRepo();
661+
long maxThreshold = BlobStoreRepository.calculateMaxSnapshotRepositoryDataCacheThreshold();
662+
663+
// when
664+
long expectedThreshold = Math.max(ByteSizeUnit.KB.toBytes(500), maxThreshold / 2);
665+
666+
// then
667+
assertEquals(repository.repositoryDataCacheThreshold, expectedThreshold);
668+
}
669+
670+
public void testHeapThresholdUsed() {
671+
// given
672+
long defaultThresholdOfHeap = ByteSizeUnit.GB.toBytes(1);
673+
long defaultAbsoluteThreshold = ByteSizeUnit.KB.toBytes(500);
674+
675+
// when
676+
long expectedThreshold = calculateMaxWithinIntLimit(defaultThresholdOfHeap, defaultAbsoluteThreshold);
677+
678+
// then
679+
assertEquals(defaultThresholdOfHeap, expectedThreshold);
680+
}
681+
682+
public void testAbsoluteThresholdUsed() {
683+
// given
684+
long defaultThresholdOfHeap = ByteSizeUnit.KB.toBytes(499);
685+
long defaultAbsoluteThreshold = ByteSizeUnit.KB.toBytes(500);
686+
687+
// when
688+
long result = calculateMaxWithinIntLimit(defaultThresholdOfHeap, defaultAbsoluteThreshold);
689+
690+
// then
691+
assertEquals(defaultAbsoluteThreshold, result);
692+
}
693+
694+
public void testThresholdCappedAtIntMax() {
695+
// given
696+
int maxSafeArraySize = Integer.MAX_VALUE - 8;
697+
long defaultThresholdOfHeap = (long) maxSafeArraySize + 1;
698+
long defaultAbsoluteThreshold = ByteSizeUnit.KB.toBytes(500);
699+
700+
// when
701+
long expectedThreshold = calculateMaxWithinIntLimit(defaultThresholdOfHeap, defaultAbsoluteThreshold);
702+
703+
// then
704+
assertEquals(maxSafeArraySize, expectedThreshold);
705+
}
656706
}

0 commit comments

Comments
 (0)