Skip to content

Commit d99e0e0

Browse files
imRishNwangdongyu.danny
authored and
wangdongyu.danny
committed
Add lower limit for primary and replica batch allocators timeout (opensearch-project#14979)
* Add lower limit for primary and replica batch allocators Signed-off-by: Rishab Nahata <rnnahata@amazon.com>
1 parent a37ace4 commit d99e0e0

File tree

4 files changed

+96
-5
lines changed

4 files changed

+96
-5
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1515
- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997))
1616

1717
### Changed
18+
- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979))
1819

1920
### Deprecated
2021

server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ public void testBatchModeEnabledWithSufficientTimeoutAndClusterGreen() throws Ex
886886
assertEquals(0, gatewayAllocator.getNumberOfInFlightFetches());
887887
}
888888

889-
public void testBatchModeEnabledWithInSufficientTimeoutButClusterGreen() throws Exception {
889+
public void testBatchModeEnabledWithDisabledTimeoutAndClusterGreen() throws Exception {
890890

891891
internalCluster().startClusterManagerOnlyNodes(
892892
1,
@@ -920,8 +920,8 @@ public void testBatchModeEnabledWithInSufficientTimeoutButClusterGreen() throws
920920
.put("node.name", clusterManagerName)
921921
.put(clusterManagerDataPathSettings)
922922
.put(ShardsBatchGatewayAllocator.GATEWAY_ALLOCATOR_BATCH_SIZE.getKey(), 5)
923-
.put(ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey(), "10ms")
924-
.put(ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey(), "10ms")
923+
.put(ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey(), "-1")
924+
.put(ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey(), "-1")
925925
.put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), true)
926926
.build()
927927
);

server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java

+37-2
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,14 @@ public class ShardsBatchGatewayAllocator implements ExistingShardsAllocator {
7373
private final long maxBatchSize;
7474
private static final short DEFAULT_SHARD_BATCH_SIZE = 2000;
7575

76-
private static final String PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY =
76+
public static final String PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY =
7777
"cluster.routing.allocation.shards_batch_gateway_allocator.primary_allocator_timeout";
78-
private static final String REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY =
78+
public static final String REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY =
7979
"cluster.routing.allocation.shards_batch_gateway_allocator.replica_allocator_timeout";
8080

8181
private TimeValue primaryShardsBatchGatewayAllocatorTimeout;
8282
private TimeValue replicaShardsBatchGatewayAllocatorTimeout;
83+
public static final TimeValue MIN_ALLOCATOR_TIMEOUT = TimeValue.timeValueSeconds(20);
8384

8485
/**
8586
* Number of shards we send in one batch to data nodes for fetching metadata
@@ -92,16 +93,50 @@ public class ShardsBatchGatewayAllocator implements ExistingShardsAllocator {
9293
Setting.Property.NodeScope
9394
);
9495

96+
/**
97+
* Timeout for existing primary shards batch allocator.
98+
* Timeout value must be greater than or equal to 20s or -1ms to effectively disable timeout
99+
*/
95100
public static final Setting<TimeValue> PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING = Setting.timeSetting(
96101
PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY,
97102
TimeValue.MINUS_ONE,
103+
TimeValue.MINUS_ONE,
104+
new Setting.Validator<>() {
105+
@Override
106+
public void validate(TimeValue timeValue) {
107+
if (timeValue.compareTo(MIN_ALLOCATOR_TIMEOUT) < 0 && timeValue.compareTo(TimeValue.MINUS_ONE) != 0) {
108+
throw new IllegalArgumentException(
109+
"Setting ["
110+
+ PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey()
111+
+ "] should be more than 20s or -1ms to disable timeout"
112+
);
113+
}
114+
}
115+
},
98116
Setting.Property.NodeScope,
99117
Setting.Property.Dynamic
100118
);
101119

120+
/**
121+
* Timeout for existing replica shards batch allocator.
122+
* Timeout value must be greater than or equal to 20s or -1ms to effectively disable timeout
123+
*/
102124
public static final Setting<TimeValue> REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING = Setting.timeSetting(
103125
REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY,
104126
TimeValue.MINUS_ONE,
127+
TimeValue.MINUS_ONE,
128+
new Setting.Validator<>() {
129+
@Override
130+
public void validate(TimeValue timeValue) {
131+
if (timeValue.compareTo(MIN_ALLOCATOR_TIMEOUT) < 0 && timeValue.compareTo(TimeValue.MINUS_ONE) != 0) {
132+
throw new IllegalArgumentException(
133+
"Setting ["
134+
+ REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey()
135+
+ "] should be more than 20s or -1ms to disable timeout"
136+
);
137+
}
138+
}
139+
},
105140
Setting.Property.NodeScope,
106141
Setting.Property.Dynamic
107142
);

server/src/test/java/org/opensearch/gateway/GatewayAllocatorTests.java

+55
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747
import java.util.Set;
4848
import java.util.stream.Collectors;
4949

50+
import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING;
51+
import static org.opensearch.gateway.ShardsBatchGatewayAllocator.PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY;
52+
import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING;
53+
import static org.opensearch.gateway.ShardsBatchGatewayAllocator.REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY;
54+
5055
public class GatewayAllocatorTests extends OpenSearchAllocationTestCase {
5156

5257
private final Logger logger = LogManager.getLogger(GatewayAllocatorTests.class);
@@ -368,6 +373,56 @@ public void testCreatePrimaryAndReplicaExecutorOfSizeTwo() {
368373
assertEquals(executor.getTimeoutAwareRunnables().size(), 2);
369374
}
370375

376+
public void testPrimaryAllocatorTimeout() {
377+
// Valid setting with timeout = 20s
378+
Settings build = Settings.builder().put(PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "20s").build();
379+
assertEquals(20, PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getSeconds());
380+
381+
// Valid setting with timeout > 20s
382+
build = Settings.builder().put(PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "30000ms").build();
383+
assertEquals(30, PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getSeconds());
384+
385+
// Invalid setting with timeout < 20s
386+
Settings lessThan20sSetting = Settings.builder().put(PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "10s").build();
387+
IllegalArgumentException iae = expectThrows(
388+
IllegalArgumentException.class,
389+
() -> PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(lessThan20sSetting)
390+
);
391+
assertEquals(
392+
"Setting [" + PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey() + "] should be more than 20s or -1ms to disable timeout",
393+
iae.getMessage()
394+
);
395+
396+
// Valid setting with timeout = -1
397+
build = Settings.builder().put(PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "-1").build();
398+
assertEquals(-1, PRIMARY_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getMillis());
399+
}
400+
401+
public void testReplicaAllocatorTimeout() {
402+
// Valid setting with timeout = 20s
403+
Settings build = Settings.builder().put(REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "20s").build();
404+
assertEquals(20, REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getSeconds());
405+
406+
// Valid setting with timeout > 20s
407+
build = Settings.builder().put(REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "30000ms").build();
408+
assertEquals(30, REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getSeconds());
409+
410+
// Invalid setting with timeout < 20s
411+
Settings lessThan20sSetting = Settings.builder().put(REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "10s").build();
412+
IllegalArgumentException iae = expectThrows(
413+
IllegalArgumentException.class,
414+
() -> REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(lessThan20sSetting)
415+
);
416+
assertEquals(
417+
"Setting [" + REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.getKey() + "] should be more than 20s or -1ms to disable timeout",
418+
iae.getMessage()
419+
);
420+
421+
// Valid setting with timeout = -1
422+
build = Settings.builder().put(REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING_KEY, "-1").build();
423+
assertEquals(-1, REPLICA_BATCH_ALLOCATOR_TIMEOUT_SETTING.get(build).getMillis());
424+
}
425+
371426
private void createIndexAndUpdateClusterState(int count, int numberOfShards, int numberOfReplicas) {
372427
if (count == 0) return;
373428
Metadata.Builder metadata = Metadata.builder();

0 commit comments

Comments
 (0)