Skip to content

Commit d3f5f66

Browse files
sumitasrdk2k
authored andcommitted
ClusterManagerTaskThrottler Improvements (opensearch-project#15508)
* [ClusterManagerTaskThrottler Improvements] : Add shallow check in ClusterManagerTaskThrottler to fail fast before computeIfPresent to avoid lock when queue is full Signed-off-by: Sumit Bansal <sumitsb@amazon.com>
1 parent d7f8cd4 commit d3f5f66

File tree

4 files changed

+234
-35
lines changed

4 files changed

+234
-35
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5050
- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568))
5151
- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218))
5252
- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010))
53+
- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508))
5354

5455
### Dependencies
5556
- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081))

server/src/main/java/org/opensearch/cluster/service/ClusterManagerTaskThrottler.java

+52-23
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
* <p>
3434
* Set specific setting to for setting the threshold of throttling of particular task type.
3535
* e.g : Set "cluster_manager.throttling.thresholds.put_mapping" to set throttling limit of "put mapping" tasks,
36-
* Set it to default value(-1) to disable the throttling for this task type.
36+
* Set it to default value(-1) to disable the throttling for this task type.
3737
*/
3838
public class ClusterManagerTaskThrottler implements TaskBatcherListener {
3939
private static final Logger logger = LogManager.getLogger(ClusterManagerTaskThrottler.class);
@@ -69,7 +69,7 @@ public class ClusterManagerTaskThrottler implements TaskBatcherListener {
6969
private final int MIN_THRESHOLD_VALUE = -1; // Disabled throttling
7070
private final ClusterManagerTaskThrottlerListener clusterManagerTaskThrottlerListener;
7171

72-
private final ConcurrentMap<String, Long> tasksCount;
72+
final ConcurrentMap<String, Long> tasksCount;
7373
private final ConcurrentMap<String, Long> tasksThreshold;
7474
private final Supplier<Version> minNodeVersionSupplier;
7575

@@ -209,30 +209,59 @@ Long getThrottlingLimit(final String taskKey) {
209209
return tasksThreshold.get(taskKey);
210210
}
211211

212+
private void failFastWhenThrottlingThresholdsAreAlreadyBreached(
213+
final boolean throttlingEnabledWithThreshold,
214+
final Long threshold,
215+
final long existingTaskCount,
216+
final int incomingTaskCount,
217+
final String taskThrottlingKey
218+
) {
219+
if (throttlingEnabledWithThreshold && shouldThrottle(threshold, existingTaskCount, incomingTaskCount)) {
220+
throw new ClusterManagerThrottlingException("Throttling Exception : Limit exceeded for " + taskThrottlingKey);
221+
}
222+
}
223+
212224
@Override
213225
public void onBeginSubmit(List<? extends TaskBatcher.BatchedTask> tasks) {
214-
ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
226+
final ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
215227
.getClusterManagerThrottlingKey();
216-
tasksCount.putIfAbsent(clusterManagerThrottlingKey.getTaskThrottlingKey(), 0L);
217-
tasksCount.computeIfPresent(clusterManagerThrottlingKey.getTaskThrottlingKey(), (key, count) -> {
218-
int size = tasks.size();
219-
if (clusterManagerThrottlingKey.isThrottlingEnabled()) {
220-
Long threshold = tasksThreshold.get(clusterManagerThrottlingKey.getTaskThrottlingKey());
221-
if (threshold != null && shouldThrottle(threshold, count, size)) {
222-
clusterManagerTaskThrottlerListener.onThrottle(clusterManagerThrottlingKey.getTaskThrottlingKey(), size);
223-
logger.warn(
224-
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
225-
clusterManagerThrottlingKey.getTaskThrottlingKey(),
226-
tasks.size(),
227-
threshold
228-
);
229-
throw new ClusterManagerThrottlingException(
230-
"Throttling Exception : Limit exceeded for " + clusterManagerThrottlingKey.getTaskThrottlingKey()
231-
);
232-
}
233-
}
234-
return count + size;
235-
});
228+
final String taskThrottlingKey = clusterManagerThrottlingKey.getTaskThrottlingKey();
229+
final Long threshold = getThrottlingLimit(taskThrottlingKey);
230+
final boolean isThrottlingEnabledWithThreshold = clusterManagerThrottlingKey.isThrottlingEnabled() && threshold != null;
231+
int incomingTaskCount = tasks.size();
232+
233+
try {
234+
tasksCount.putIfAbsent(taskThrottlingKey, 0L);
235+
// Perform shallow check before acquiring lock to avoid blocking of network threads
236+
// if throttling is ongoing for a specific task
237+
failFastWhenThrottlingThresholdsAreAlreadyBreached(
238+
isThrottlingEnabledWithThreshold,
239+
threshold,
240+
tasksCount.get(taskThrottlingKey),
241+
incomingTaskCount,
242+
taskThrottlingKey
243+
);
244+
245+
tasksCount.computeIfPresent(taskThrottlingKey, (key, existingTaskCount) -> {
246+
failFastWhenThrottlingThresholdsAreAlreadyBreached(
247+
isThrottlingEnabledWithThreshold,
248+
threshold,
249+
existingTaskCount,
250+
incomingTaskCount,
251+
taskThrottlingKey
252+
);
253+
return existingTaskCount + incomingTaskCount;
254+
});
255+
} catch (final ClusterManagerThrottlingException e) {
256+
clusterManagerTaskThrottlerListener.onThrottle(taskThrottlingKey, incomingTaskCount);
257+
logger.trace(
258+
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
259+
taskThrottlingKey,
260+
incomingTaskCount,
261+
threshold
262+
);
263+
throw e;
264+
}
236265
}
237266

238267
/**

server/src/main/java/org/opensearch/cluster/service/ClusterManagerThrottlingException.java

+6
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,10 @@ public ClusterManagerThrottlingException(String msg, Object... args) {
2525
public ClusterManagerThrottlingException(StreamInput in) throws IOException {
2626
super(in);
2727
}
28+
29+
@Override
30+
public Throwable fillInStackTrace() {
31+
// This is on the hot path; stack traces are expensive to compute and not very useful for this exception, so don't fill it.
32+
return this;
33+
}
2834
}

0 commit comments

Comments
 (0)