Skip to content

Commit 047fbca

Browse files
committed
Fix flaky RemoteIndexRecoveryIT testRerouteRecovery test opensearch-project#9580
Signed-off-by: Ashish Singh <ssashish@amazon.com>
1 parent c132db9 commit 047fbca

File tree

3 files changed

+44
-7
lines changed

3 files changed

+44
-7
lines changed

server/src/internalClusterTest/java/org/opensearch/indices/recovery/IndexRecoveryIT.java

+7-3
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ private void assertOnGoingRecoveryState(
253253
}
254254

255255
private void slowDownRecovery(ByteSizeValue shardSize) {
256-
long chunkSize = Math.max(1, shardSize.getBytes() / 10);
256+
long chunkSize = getChunkSize(shardSize);
257257
assertTrue(
258258
client().admin()
259259
.cluster()
@@ -270,6 +270,10 @@ private void slowDownRecovery(ByteSizeValue shardSize) {
270270
);
271271
}
272272

273+
protected long getChunkSize(ByteSizeValue shardSize) {
274+
return Math.max(1, shardSize.getBytes() / 10);
275+
}
276+
273277
private void restoreRecoverySpeed() {
274278
assertTrue(
275279
client().admin()
@@ -523,12 +527,12 @@ public void testRerouteRecovery() throws Exception {
523527

524528
logger.info("--> waiting for recovery to start both on source and target");
525529
final Index index = resolveIndex(INDEX_NAME);
526-
assertBusy(() -> {
530+
assertBusyWithFixedSleepTime(() -> {
527531
IndicesService indicesService = internalCluster().getInstance(IndicesService.class, nodeA);
528532
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsSource(), equalTo(1));
529533
indicesService = internalCluster().getInstance(IndicesService.class, nodeB);
530534
assertThat(indicesService.indexServiceSafe(index).getShard(0).recoveryStats().currentAsTarget(), equalTo(1));
531-
});
535+
}, TimeValue.timeValueSeconds(10), TimeValue.timeValueMillis(500));
532536

533537
logger.info("--> request recoveries");
534538
RecoveryResponse response = client().admin().indices().prepareRecoveries(INDEX_NAME).execute().actionGet();

server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexRecoveryIT.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import org.opensearch.cluster.metadata.IndexMetadata;
1212
import org.opensearch.common.settings.Settings;
13+
import org.opensearch.core.common.unit.ByteSizeValue;
1314
import org.opensearch.index.IndexModule;
1415
import org.opensearch.index.IndexSettings;
1516
import org.opensearch.indices.recovery.IndexRecoveryIT;
@@ -158,9 +159,8 @@ public void testReplicaRecovery() {
158159

159160
}
160161

161-
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9580")
162-
public void testRerouteRecovery() {
163-
162+
@Override
163+
protected long getChunkSize(ByteSizeValue shardSize) {
164+
return Math.max(1, shardSize.getBytes() / 8);
164165
}
165-
166166
}

test/framework/src/main/java/org/opensearch/test/OpenSearchTestCase.java

+33
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import org.opensearch.common.settings.Settings;
8484
import org.opensearch.common.time.DateUtils;
8585
import org.opensearch.common.time.FormatNames;
86+
import org.opensearch.common.unit.TimeValue;
8687
import org.opensearch.common.util.MockBigArrays;
8788
import org.opensearch.common.util.MockPageCacheRecycler;
8889
import org.opensearch.common.util.concurrent.ThreadContext;
@@ -1095,6 +1096,38 @@ public static void assertBusy(CheckedRunnable<Exception> codeBlock, long maxWait
10951096
}
10961097
}
10971098

1099+
/**
1100+
* Runs the code block for the provided max wait time and sleeping for fixed sleep time, waiting for no assertions to trip.
1101+
*/
1102+
public static void assertBusyWithFixedSleepTime(CheckedRunnable<Exception> codeBlock, TimeValue maxWaitTime, TimeValue sleepTime)
1103+
throws Exception {
1104+
long maxTimeInMillis = maxWaitTime.millis();
1105+
long sleepTimeInMillis = sleepTime.millis();
1106+
if (sleepTimeInMillis > maxTimeInMillis) {
1107+
throw new IllegalArgumentException("sleepTime is more than the maxWaitTime");
1108+
}
1109+
long sum = 0;
1110+
List<AssertionError> failures = new ArrayList<>();
1111+
while (sum <= maxTimeInMillis) {
1112+
try {
1113+
codeBlock.run();
1114+
return;
1115+
} catch (AssertionError e) {
1116+
failures.add(e);
1117+
}
1118+
sum += sleepTimeInMillis;
1119+
Thread.sleep(sleepTimeInMillis);
1120+
}
1121+
try {
1122+
codeBlock.run();
1123+
} catch (AssertionError e) {
1124+
for (AssertionError failure : failures) {
1125+
e.addSuppressed(failure);
1126+
}
1127+
throw e;
1128+
}
1129+
}
1130+
10981131
/**
10991132
* Periodically execute the supplied function until it returns true, or a timeout
11001133
* is reached. This version uses a timeout of 10 seconds. If at all possible,

0 commit comments

Comments
 (0)