Skip to content

Commit eee491f

Browse files
peteralfonsiPeter Alfonsijainankitk
authored
[Backport 2.x] [Bugfix] Fix TieredSpilloverCache stats not adding correctly when sha… (#16790)
* [Bugfix] Fix TieredSpilloverCache stats not adding correctly when shards are closed (#16560) * added draft tests for tsc stats holder Signed-off-by: Peter Alfonsi <petealft@amazon.com> * first draft tsc stats bugfix Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Complete tests Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Cleanup Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Integrate fix with TSC Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Add IT Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Refactor cache package names in TSC module to match with server Signed-off-by: Peter Alfonsi <petealft@amazon.com> * changelog Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Revert "Refactor cache package names in TSC module to match with server" This reverts commit 3b15a7a. Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Addressed Sagar's comments Signed-off-by: Peter Alfonsi <petealft@amazon.com> * More package fixes Signed-off-by: Peter Alfonsi <petealft@amazon.com> * Addressed andross's comments Signed-off-by: Peter Alfonsi <petealft@amazon.com> --------- Signed-off-by: Peter Alfonsi <petealft@amazon.com> Signed-off-by: Peter Alfonsi <peter.alfonsi@gmail.com> Co-authored-by: Peter Alfonsi <petealft@amazon.com> (cherry picked from commit c82cd2e) * Addressed Ankit's comment Signed-off-by: Peter Alfonsi <petealft@amazon.com> --------- Signed-off-by: Peter Alfonsi <petealft@amazon.com> Signed-off-by: Ankit Jain <akjain@amazon.com> Co-authored-by: Peter Alfonsi <petealft@amazon.com> Co-authored-by: Ankit Jain <akjain@amazon.com>
1 parent d632268 commit eee491f

File tree

7 files changed

+501
-12
lines changed

7 files changed

+501
-12
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
8585
- Fix `doc_values` only (`index:false`) IP field searching for masks ([#16628](https://github.com/opensearch-project/OpenSearch/pull/16628))
8686
- Fix stale cluster state custom file deletion ([#16670](https://github.com/opensearch-project/OpenSearch/pull/16670))
8787
- Bound the size of cache in deprecation logger ([16702](https://github.com/opensearch-project/OpenSearch/issues/16702))
88+
- [Tiered Caching] Fix bug in cache stats API ([#16560](https://github.com/opensearch-project/OpenSearch/pull/16560))
8889
- Ensure consistency of system flag on IndexMetadata after diff is applied ([#16644](https://github.com/opensearch-project/OpenSearch/pull/16644))
8990
- Skip remote-repositories validations for node-joins when RepositoriesService is not in sync with cluster-state ([#16763](https://github.com/opensearch-project/OpenSearch/pull/16763))
9091
- Fix _list/shards API failing when closed indices are present ([#16606](https://github.com/opensearch-project/OpenSearch/pull/16606))

modules/cache-common/src/internalClusterTest/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsIT.java

+51
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest;
1212
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse;
13+
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
1314
import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse;
1415
import org.opensearch.action.admin.indices.stats.CommonStatsFlags;
1516
import org.opensearch.action.search.SearchResponse;
@@ -40,6 +41,7 @@
4041
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_NAME;
4142
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_DISK;
4243
import static org.opensearch.cache.common.tier.TieredSpilloverCacheStatsHolder.TIER_DIMENSION_VALUE_ON_HEAP;
44+
import static org.opensearch.indices.IndicesService.INDICES_CACHE_CLEAN_INTERVAL_SETTING;
4345
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
4446
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse;
4547

@@ -417,6 +419,55 @@ public void testStatsWithMultipleSegments() throws Exception {
417419
assertTrue(diskCacheStat.getEvictions() == 0);
418420
}
419421

422+
public void testClosingShard() throws Exception {
423+
// Closing the shard should totally remove the stats associated with that shard.
424+
internalCluster().startNodes(
425+
1,
426+
Settings.builder()
427+
.put(defaultSettings(HEAP_CACHE_SIZE_STRING, getNumberOfSegments()))
428+
.put(
429+
TieredSpilloverCacheSettings.TOOK_TIME_POLICY_CONCRETE_SETTINGS_MAP.get(CacheType.INDICES_REQUEST_CACHE).getKey(),
430+
new TimeValue(0, TimeUnit.SECONDS)
431+
)
432+
.put(INDICES_CACHE_CLEAN_INTERVAL_SETTING.getKey(), new TimeValue(1))
433+
.build()
434+
);
435+
String index = "index";
436+
Client client = client();
437+
startIndex(client, index);
438+
439+
// First search one time to see how big a single value will be
440+
searchIndex(client, index, 0);
441+
// get total stats
442+
long singleSearchSize = getTotalStats(client).getSizeInBytes();
443+
// Select numbers so we get some values on both heap and disk
444+
int itemsOnHeap = HEAP_CACHE_SIZE / (int) singleSearchSize;
445+
int itemsOnDisk = 1 + randomInt(30); // The first one we search (to get the size) always goes to disk
446+
int expectedEntries = itemsOnHeap + itemsOnDisk;
447+
448+
for (int i = 1; i < expectedEntries; i++) {
449+
// Cause misses
450+
searchIndex(client, index, i);
451+
}
452+
int expectedMisses = itemsOnHeap + itemsOnDisk;
453+
454+
// Cause some hits
455+
int expectedHits = randomIntBetween(itemsOnHeap, expectedEntries); // Select it so some hits come from both tiers
456+
for (int i = 0; i < expectedHits; i++) {
457+
searchIndex(client, index, i);
458+
}
459+
460+
// Check the new stats API values are as expected
461+
assertEquals(
462+
new ImmutableCacheStats(expectedHits, expectedMisses, 0, expectedEntries * singleSearchSize, expectedEntries),
463+
getTotalStats(client)
464+
);
465+
466+
// Closing the index should close the shard
467+
assertAcked(client().admin().indices().delete(new DeleteIndexRequest("index")).get());
468+
assertEquals(new ImmutableCacheStats(0, 0, 0, 0, 0), getTotalStats(client));
469+
}
470+
420471
private void startIndex(Client client, String indexName) throws InterruptedException {
421472
assertAcked(
422473
client.admin()

modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCache.java

+4-6
Original file line numberDiff line numberDiff line change
@@ -373,12 +373,10 @@ private V compute(ICacheKey<K> key, LoadAwareCacheLoader<ICacheKey<K>, V> loader
373373

374374
@Override
375375
public void invalidate(ICacheKey<K> key) {
376-
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
377-
if (key.getDropStatsForDimensions()) {
378-
List<String> dimensionValues = statsHolder.getDimensionsWithTierValue(key.dimensions, cacheEntry.getValue().tierName);
379-
statsHolder.removeDimensions(dimensionValues);
380-
}
381-
if (key.key != null) {
376+
if (key.getDropStatsForDimensions()) {
377+
statsHolder.removeDimensions(key.dimensions);
378+
} else if (key.key != null) {
379+
for (Map.Entry<ICache<K, V>, TierInfo> cacheEntry : caches.entrySet()) {
382380
try (ReleasableLock ignore = writeLock.acquire()) {
383381
cacheEntry.getKey().invalidate(key);
384382
}

modules/cache-common/src/main/java/org/opensearch/cache/common/tier/TieredSpilloverCacheStatsHolder.java

+15
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ public class TieredSpilloverCacheStatsHolder extends DefaultCacheStatsHolder {
4343
/** Dimension value for on-disk cache, like EhcacheDiskCache. */
4444
public static final String TIER_DIMENSION_VALUE_DISK = "disk";
4545

46+
static final List<String> TIER_VALUES = List.of(TIER_DIMENSION_VALUE_ON_HEAP, TIER_DIMENSION_VALUE_DISK);
47+
4648
/**
4749
* Constructor for the stats holder.
4850
* @param originalDimensionNames the original dimension names, not including TIER_DIMENSION_NAME
@@ -167,4 +169,17 @@ public void decrementItems(List<String> dimensionValues) {
167169
void setDiskCacheEnabled(boolean diskCacheEnabled) {
168170
this.diskCacheEnabled = diskCacheEnabled;
169171
}
172+
173+
@Override
174+
public void removeDimensions(List<String> dimensionValues) {
175+
assert dimensionValues.size() == dimensionNames.size() - 1
176+
: "Must specify a value for every dimension except tier when removing from StatsHolder";
177+
// As we are removing nodes from the tree, obtain the lock
178+
lock.lock();
179+
try {
180+
removeDimensionsHelper(dimensionValues, statsRoot, 0);
181+
} finally {
182+
lock.unlock();
183+
}
184+
}
170185
}

0 commit comments

Comments
 (0)