Skip to content

Commit ea1cc9d

Browse files
authored
Add version check during task submission for bwc for static threshold setting (#5633)
* Add version check during task submission for bwc for static threshold setting Signed-off-by: Dhwanil Patel <dhwanip@amazon.com>
1 parent 98ca4a7 commit ea1cc9d

File tree

4 files changed

+73
-2
lines changed

4 files changed

+73
-2
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
112112
- Fix case sensitivity for wildcard queries ([#5462](https://github.com/opensearch-project/OpenSearch/pull/5462))
113113
- Apply cluster manager throttling settings during bootstrap ([#5524](https://github.com/opensearch-project/OpenSearch/pull/5524))
114114
- Update thresholds map when cluster manager throttling setting is removed ([#5524](https://github.com/opensearch-project/OpenSearch/pull/5524))
115+
- Fix backward compatibility for static cluster manager throttling threshold setting ([#5633](https://github.com/opensearch-project/OpenSearch/pull/5633))
115116
### Security
116117

117118
[Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.4...HEAD

server/src/main/java/org/opensearch/OpenSearchException.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1613,7 +1613,7 @@ private enum OpenSearchExceptionHandle {
16131613
ClusterManagerThrottlingException.class,
16141614
ClusterManagerThrottlingException::new,
16151615
165,
1616-
Version.V_2_4_0
1616+
Version.V_2_5_0
16171617
),
16181618
SNAPSHOT_IN_USE_DELETION_EXCEPTION(
16191619
SnapshotInUseDeletionException.class,

server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java

+29-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.Set;
2323
import java.util.concurrent.ConcurrentHashMap;
2424
import java.util.concurrent.ConcurrentMap;
25+
import java.util.concurrent.atomic.AtomicBoolean;
2526
import java.util.function.Supplier;
2627

2728
/**
@@ -51,6 +52,11 @@ public class ClusterManagerTaskThrottler implements TaskBatcherListener {
5152
private final ConcurrentMap<String, Long> tasksThreshold;
5253
private final Supplier<Version> minNodeVersionSupplier;
5354

55+
// Once all nodes are greater than or equal 2.5.0 version, then only it will start throttling.
56+
// During upgrade as well, it will wait for all older version nodes to leave the cluster before starting throttling.
57+
// This is needed specifically for static setting to enable throttling.
58+
private AtomicBoolean startThrottling = new AtomicBoolean();
59+
5460
public ClusterManagerTaskThrottler(
5561
final Settings settings,
5662
final ClusterSettings clusterSettings,
@@ -168,7 +174,7 @@ public void onBeginSubmit(List<? extends TaskBatcher.BatchedTask> tasks) {
168174
int size = tasks.size();
169175
if (clusterManagerThrottlingKey.isThrottlingEnabled()) {
170176
Long threshold = tasksThreshold.get(clusterManagerThrottlingKey.getTaskThrottlingKey());
171-
if (threshold != null && (count + size > threshold)) {
177+
if (threshold != null && shouldThrottle(threshold, count, size)) {
172178
clusterManagerTaskThrottlerListener.onThrottle(clusterManagerThrottlingKey.getTaskThrottlingKey(), size);
173179
logger.warn(
174180
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
@@ -185,6 +191,28 @@ public void onBeginSubmit(List<? extends TaskBatcher.BatchedTask> tasks) {
185191
});
186192
}
187193

194+
/**
195+
* If throttling thresholds are set via static setting, it will update the threshold map.
196+
* It may start throwing throttling exception to older nodes in cluster.
197+
* Older version nodes will not be equipped to handle the throttling exception and
198+
* this may result in unexpected behavior where internal tasks would start failing without any retries.
199+
*
200+
* For every task submission request, it will validate if nodes version is greater or equal to 2.5.0 and set the startThrottling flag.
201+
* Once the startThrottling flag is set, it will not perform check for next set of tasks.
202+
*/
203+
private boolean shouldThrottle(Long threshold, Long count, int size) {
204+
if (!startThrottling.get()) {
205+
if (minNodeVersionSupplier.get().compareTo(Version.V_2_5_0) >= 0) {
206+
startThrottling.compareAndSet(false, true);
207+
logger.info("Starting cluster manager throttling as all nodes are higher than or equal to 2.5.0");
208+
} else {
209+
logger.info("Skipping cluster manager throttling as at least one node < 2.5.0 is present in cluster");
210+
return false;
211+
}
212+
}
213+
return count + size > threshold;
214+
}
215+
188216
@Override
189217
public void onSubmitFailure(List<? extends TaskBatcher.BatchedTask> tasks) {
190218
reduceTaskCount(tasks);

server/src/test/java/org/opensearch/cluster/service/ClusterManagerTaskThrottlerTests.java

+42
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,48 @@ public void testThrottlingForDisabledThrottlingTask() {
324324
assertEquals(0L, throttlingStats.getThrottlingCount(taskKey));
325325
}
326326

327+
public void testThrottlingForInitialStaticSettingAndVersionCheck() {
328+
ClusterManagerThrottlingStats throttlingStats = new ClusterManagerThrottlingStats();
329+
DiscoveryNode clusterManagerNode = getClusterManagerNode(Version.V_2_5_0);
330+
DiscoveryNode dataNode = getDataNode(Version.V_2_4_0);
331+
setState(
332+
clusterService,
333+
ClusterStateCreationUtils.state(clusterManagerNode, clusterManagerNode, new DiscoveryNode[] { clusterManagerNode, dataNode })
334+
);
335+
336+
// setting threshold in initial settings
337+
ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
338+
int put_mapping_threshold_value = randomIntBetween(1, 10);
339+
Settings initialSettings = Settings.builder()
340+
.put("cluster_manager.throttling.thresholds.put-mapping.value", put_mapping_threshold_value)
341+
.build();
342+
ClusterManagerTaskThrottler throttler = new ClusterManagerTaskThrottler(
343+
initialSettings,
344+
clusterSettings,
345+
() -> { return clusterService.getMasterService().getMinNodeVersion(); },
346+
throttlingStats
347+
);
348+
ClusterManagerTaskThrottler.ThrottlingKey throttlingKey = throttler.registerClusterManagerTask("put-mapping", true);
349+
350+
// verifying adding more tasks then threshold passes
351+
throttler.onBeginSubmit(getMockUpdateTaskList("put-mapping", throttlingKey, put_mapping_threshold_value + 5));
352+
assertEquals(0L, throttlingStats.getThrottlingCount("put-mapping"));
353+
354+
// Removing older version node from cluster
355+
setState(
356+
clusterService,
357+
ClusterStateCreationUtils.state(clusterManagerNode, clusterManagerNode, new DiscoveryNode[] { clusterManagerNode })
358+
);
359+
360+
// adding more tasks, these tasks should be throttled
361+
// As queue already have more tasks than threshold from previous call.
362+
assertThrows(
363+
ClusterManagerThrottlingException.class,
364+
() -> throttler.onBeginSubmit(getMockUpdateTaskList("put-mapping", throttlingKey, 3))
365+
);
366+
assertEquals(3L, throttlingStats.getThrottlingCount("put-mapping"));
367+
}
368+
327369
public void testThrottling() {
328370
ClusterManagerThrottlingStats throttlingStats = new ClusterManagerThrottlingStats();
329371
String taskKey = "test";

0 commit comments

Comments
 (0)