Skip to content

Commit a0757d9

Browse files
github-actions[bot]andrross
authored andcommitted
Mitigation for remote snapshot filecache overflow (#15077)
TransferManager fails BlobFetchRequest on full cache Signed-off-by: Finn Carroll <carrofin@amazon.com> (cherry picked from commit 8f34ce5) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Signed-off-by: Andrew Ross <andrross@amazon.com>
1 parent 93b0755 commit a0757d9

File tree

3 files changed

+30
-23
lines changed

3 files changed

+30
-23
lines changed

release-notes/opensearch.release-notes-2.17.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,4 @@
103103
- Fix unchecked cast in dynamic action map getter ([#15394](https://github.com/opensearch-project/OpenSearch/pull/15394))
104104
- Fix null values indexed as "null" strings in flat_object field ([#14069](https://github.com/opensearch-project/OpenSearch/pull/14069))
105105
- Fix terms query on wildcard field returns nothing ([#15607](https://github.com/opensearch-project/OpenSearch/pull/15607))
106+
- Fix remote snapshot file_cache exceeding capacity ([#15077](https://github.com/opensearch-project/OpenSearch/pull/15077))

server/src/main/java/org/opensearch/index/store/remote/utils/TransferManager.java

+13
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,19 @@ public IndexInput fetchBlob(BlobFetchRequest blobFetchRequest) throws IOExceptio
9595
@SuppressWarnings("removal")
9696
private static FileCachedIndexInput createIndexInput(FileCache fileCache, StreamReader streamReader, BlobFetchRequest request) {
9797
try {
98+
// This local file cache is ref counted and may not strictly enforce configured capacity.
99+
// If we find available capacity is exceeded, deny further BlobFetchRequests.
100+
if (fileCache.capacity() < fileCache.usage().usage()) {
101+
fileCache.prune();
102+
throw new IOException(
103+
"Local file cache capacity ("
104+
+ fileCache.capacity()
105+
+ ") exceeded ("
106+
+ fileCache.usage().usage()
107+
+ ") - BlobFetchRequest failed: "
108+
+ request.getFilePath()
109+
);
110+
}
98111
if (Files.exists(request.getFilePath()) == false) {
99112
logger.trace("Fetching from Remote in createIndexInput of Transfer Manager");
100113
try (

server/src/test/java/org/opensearch/index/store/remote/utils/TransferManagerTestCase.java

+16-23
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ public void testConcurrentAccess() throws Exception {
9999
}
100100
}
101101

102-
public void testFetchBlobWithConcurrentCacheEvictions() throws Exception {
102+
public void testFetchBlobWithConcurrentCacheEvictions() {
103103
// Submit 256 tasks to an executor with 16 threads that will each randomly
104104
// request one of eight blobs. Given that the cache can only hold two
105105
// blobs this will lead to a huge amount of contention and thrashing.
@@ -114,41 +114,34 @@ public void testFetchBlobWithConcurrentCacheEvictions() throws Exception {
114114
try (IndexInput indexInput = fetchBlobWithName(blobname)) {
115115
assertIndexInputIsFunctional(indexInput);
116116
}
117+
} catch (IOException ignored) { // fetchBlobWithName may fail due to fixed capacity
117118
} catch (Exception e) {
118119
throw new AssertionError(e);
119120
}
120121
}));
121122
}
122123
// Wait for all threads to complete
123-
for (Future<?> future : futures) {
124-
future.get(10, TimeUnit.SECONDS);
124+
try {
125+
for (Future<?> future : futures) {
126+
future.get(10, TimeUnit.SECONDS);
127+
}
128+
} catch (java.util.concurrent.ExecutionException ignored) { // Index input may be null
129+
} catch (Exception e) {
130+
throw new AssertionError(e);
125131
}
132+
126133
} finally {
127134
assertTrue(terminate(testRunner));
128135
}
129136
MatcherAssert.assertThat("Expected many evictions to happen", fileCache.stats().evictionCount(), greaterThan(0L));
130137
}
131138

132-
public void testUsageExceedsCapacity() throws Exception {
133-
// Fetch resources that exceed the configured capacity of the cache and assert that the
134-
// returned IndexInputs are still functional.
135-
try (IndexInput i1 = fetchBlobWithName("1"); IndexInput i2 = fetchBlobWithName("2"); IndexInput i3 = fetchBlobWithName("3")) {
136-
assertIndexInputIsFunctional(i1);
137-
assertIndexInputIsFunctional(i2);
138-
assertIndexInputIsFunctional(i3);
139-
MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB * 3));
140-
MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3));
141-
}
142-
MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L));
143-
MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB * 3));
144-
// Fetch another resource which will trigger an eviction
145-
try (IndexInput i1 = fetchBlobWithName("1")) {
146-
assertIndexInputIsFunctional(i1);
147-
MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo((long) EIGHT_MB));
148-
MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB));
149-
}
150-
MatcherAssert.assertThat(fileCache.usage().activeUsage(), equalTo(0L));
151-
MatcherAssert.assertThat(fileCache.usage().usage(), equalTo((long) EIGHT_MB));
139+
public void testOverflowDisabled() throws Exception {
140+
initializeTransferManager();
141+
IndexInput i1 = fetchBlobWithName("1");
142+
IndexInput i2 = fetchBlobWithName("2");
143+
144+
assertThrows(IOException.class, () -> { IndexInput i3 = fetchBlobWithName("3"); });
152145
}
153146

154147
public void testDownloadFails() throws Exception {

0 commit comments

Comments
 (0)