Skip to content

Commit 3491bcb

Browse files
authored
Add cluster primary balance contraint for rebalancing with buffer (#12656)
Signed-off-by: Arpit-Bandejiya <abandeji@amazon.com>
1 parent 2dc071f commit 3491bcb

File tree

10 files changed

+370
-36
lines changed

10 files changed

+370
-36
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
110110
- [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697))
111111
- [Concurrent Segment Search] Disable concurrent segment search for system indices and throttled requests ([#12954](https://github.com/opensearch-project/OpenSearch/pull/12954))
112112
- Detect breaking changes on pull requests ([#9044](https://github.com/opensearch-project/OpenSearch/pull/9044))
113+
- Add cluster primary balance contraint for rebalancing with buffer ([#12656](https://github.com/opensearch-project/OpenSearch/pull/12656))
113114

114115
### Dependencies
115116
- Bump `org.apache.commons:commons-configuration2` from 2.10.0 to 2.10.1 ([#12896](https://github.com/opensearch-project/OpenSearch/pull/12896))

server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationAllocationIT.java

+84-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
import java.util.stream.Collectors;
3232

3333
import static org.opensearch.cluster.routing.ShardRoutingState.STARTED;
34+
import static org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.PREFER_PRIMARY_SHARD_BALANCE;
35+
import static org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.PREFER_PRIMARY_SHARD_REBALANCE;
36+
import static org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.PRIMARY_SHARD_REBALANCE_BUFFER;
3437
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
3538

3639
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0)
@@ -58,6 +61,20 @@ public void enablePreferPrimaryBalance() {
5861
);
5962
}
6063

64+
public void setAllocationRelocationStrategy(boolean preferPrimaryBalance, boolean preferPrimaryRebalance, float buffer) {
65+
assertAcked(
66+
client().admin()
67+
.cluster()
68+
.prepareUpdateSettings()
69+
.setPersistentSettings(
70+
Settings.builder()
71+
.put(PREFER_PRIMARY_SHARD_BALANCE.getKey(), preferPrimaryBalance)
72+
.put(PREFER_PRIMARY_SHARD_REBALANCE.getKey(), preferPrimaryRebalance)
73+
.put(PRIMARY_SHARD_REBALANCE_BUFFER.getKey(), buffer)
74+
)
75+
);
76+
}
77+
6178
/**
6279
* This test verifies that the overall primary balance is attained during allocation. This test verifies primary
6380
* balance per index and across all indices is maintained.
@@ -87,7 +104,7 @@ public void testGlobalPrimaryAllocation() throws Exception {
87104
state = client().admin().cluster().prepareState().execute().actionGet().getState();
88105
logger.info(ShardAllocations.printShardDistribution(state));
89106
verifyPerIndexPrimaryBalance();
90-
verifyPrimaryBalance();
107+
verifyPrimaryBalance(0.0f);
91108
}
92109

93110
/**
@@ -224,6 +241,70 @@ public void testAllocationWithDisruption() throws Exception {
224241
verifyPerIndexPrimaryBalance();
225242
}
226243

244+
/**
245+
* Similar to testSingleIndexShardAllocation test but creates multiple indices, multiple nodes adding in and getting
246+
* removed. The test asserts post each such event that primary shard distribution is balanced for each index as well as across the nodes
247+
* when the PREFER_PRIMARY_SHARD_REBALANCE is set to true
248+
*/
249+
public void testAllocationAndRebalanceWithDisruption() throws Exception {
250+
internalCluster().startClusterManagerOnlyNode();
251+
final int maxReplicaCount = 2;
252+
final int maxShardCount = 2;
253+
// Create higher number of nodes than number of shards to reduce chances of SameShardAllocationDecider kicking-in
254+
// and preventing primary relocations
255+
final int nodeCount = randomIntBetween(5, 10);
256+
final int numberOfIndices = randomIntBetween(1, 10);
257+
final float buffer = randomIntBetween(1, 4) * 0.10f;
258+
259+
logger.info("--> Creating {} nodes", nodeCount);
260+
final List<String> nodeNames = new ArrayList<>();
261+
for (int i = 0; i < nodeCount; i++) {
262+
nodeNames.add(internalCluster().startNode());
263+
}
264+
setAllocationRelocationStrategy(true, true, buffer);
265+
266+
int shardCount, replicaCount;
267+
ClusterState state;
268+
for (int i = 0; i < numberOfIndices; i++) {
269+
shardCount = randomIntBetween(1, maxShardCount);
270+
replicaCount = randomIntBetween(1, maxReplicaCount);
271+
logger.info("--> Creating index test{} with primary {} and replica {}", i, shardCount, replicaCount);
272+
createIndex("test" + i, shardCount, replicaCount, i % 2 == 0);
273+
ensureGreen(TimeValue.timeValueSeconds(60));
274+
if (logger.isTraceEnabled()) {
275+
state = client().admin().cluster().prepareState().execute().actionGet().getState();
276+
logger.info(ShardAllocations.printShardDistribution(state));
277+
}
278+
}
279+
state = client().admin().cluster().prepareState().execute().actionGet().getState();
280+
logger.info(ShardAllocations.printShardDistribution(state));
281+
verifyPerIndexPrimaryBalance();
282+
verifyPrimaryBalance(buffer);
283+
284+
final int additionalNodeCount = randomIntBetween(1, 5);
285+
logger.info("--> Adding {} nodes", additionalNodeCount);
286+
287+
internalCluster().startNodes(additionalNodeCount);
288+
ensureGreen(TimeValue.timeValueSeconds(60));
289+
state = client().admin().cluster().prepareState().execute().actionGet().getState();
290+
logger.info(ShardAllocations.printShardDistribution(state));
291+
verifyPerIndexPrimaryBalance();
292+
verifyPrimaryBalance(buffer);
293+
294+
int nodeCountToStop = additionalNodeCount;
295+
while (nodeCountToStop > 0) {
296+
internalCluster().stopRandomDataNode();
297+
// give replica a chance to promote as primary before terminating node containing the replica
298+
ensureGreen(TimeValue.timeValueSeconds(60));
299+
nodeCountToStop--;
300+
}
301+
state = client().admin().cluster().prepareState().execute().actionGet().getState();
302+
logger.info("--> Cluster state post nodes stop {}", state);
303+
logger.info(ShardAllocations.printShardDistribution(state));
304+
verifyPerIndexPrimaryBalance();
305+
verifyPrimaryBalance(buffer);
306+
}
307+
227308
/**
228309
* Utility method which ensures cluster has balanced primary shard distribution across a single index.
229310
* @throws Exception exception
@@ -263,7 +344,7 @@ private void verifyPerIndexPrimaryBalance() throws Exception {
263344
}, 60, TimeUnit.SECONDS);
264345
}
265346

266-
private void verifyPrimaryBalance() throws Exception {
347+
private void verifyPrimaryBalance(float buffer) throws Exception {
267348
assertBusy(() -> {
268349
final ClusterState currentState = client().admin().cluster().prepareState().execute().actionGet().getState();
269350
RoutingNodes nodes = currentState.getRoutingNodes();
@@ -278,7 +359,7 @@ private void verifyPrimaryBalance() throws Exception {
278359
.filter(ShardRouting::primary)
279360
.collect(Collectors.toList())
280361
.size();
281-
assertTrue(primaryCount <= avgPrimaryShardsPerNode);
362+
assertTrue(primaryCount <= (avgPrimaryShardsPerNode * (1 + buffer)));
282363
}
283364
}, 60, TimeUnit.SECONDS);
284365
}

server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationConstraints.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@ public class AllocationConstraints {
3030

3131
public AllocationConstraints() {
3232
this.constraints = new HashMap<>();
33-
this.constraints.putIfAbsent(INDEX_SHARD_PER_NODE_BREACH_CONSTRAINT_ID, new Constraint(isIndexShardsPerNodeBreached()));
34-
this.constraints.putIfAbsent(INDEX_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPerIndexPrimaryShardsPerNodeBreached()));
35-
this.constraints.putIfAbsent(CLUSTER_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPrimaryShardsPerNodeBreached()));
33+
this.constraints.put(INDEX_SHARD_PER_NODE_BREACH_CONSTRAINT_ID, new Constraint(isIndexShardsPerNodeBreached()));
34+
this.constraints.put(INDEX_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPerIndexPrimaryShardsPerNodeBreached()));
35+
this.constraints.put(CLUSTER_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPrimaryShardsPerNodeBreached(0.0f)));
3636
}
3737

3838
public void updateAllocationConstraint(String constraint, boolean enable) {

server/src/main/java/org/opensearch/cluster/routing/allocation/ConstraintTypes.java

+10-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@ public class ConstraintTypes {
2828
*/
2929
public final static String CLUSTER_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID = "cluster.primary.shard.balance.constraint";
3030

31+
/**
32+
* Defines a cluster constraint which is breached when a node contains more than avg primary shards across all indices
33+
*/
34+
public final static String CLUSTER_PRIMARY_SHARD_REBALANCE_CONSTRAINT_ID = "cluster.primary.shard.rebalance.constraint";
35+
3136
/**
3237
* Defines an index constraint which is breached when a node contains more than avg number of shards for an index
3338
*/
@@ -70,14 +75,14 @@ public static Predicate<Constraint.ConstraintParams> isPerIndexPrimaryShardsPerN
7075
}
7176

7277
/**
73-
* Defines a predicate which returns true when a node contains more than average number of primary shards. This
74-
* constraint is used in weight calculation during allocation only. When breached a high weight {@link ConstraintTypes#CONSTRAINT_WEIGHT}
75-
* is assigned to node resulting in lesser chances of node being selected as allocation target
78+
* Defines a predicate which returns true when a node contains more than average number of primary shards with added buffer. This
79+
* constraint is used in weight calculation during allocation/rebalance both. When breached a high weight {@link ConstraintTypes#CONSTRAINT_WEIGHT}
80+
* is assigned to node resulting in lesser chances of node being selected as allocation/rebalance target
7681
*/
77-
public static Predicate<Constraint.ConstraintParams> isPrimaryShardsPerNodeBreached() {
82+
public static Predicate<Constraint.ConstraintParams> isPrimaryShardsPerNodeBreached(float buffer) {
7883
return (params) -> {
7984
int primaryShardCount = params.getNode().numPrimaryShards();
80-
int allowedPrimaryShardCount = (int) Math.ceil(params.getBalancer().avgPrimaryShardsPerNode());
85+
int allowedPrimaryShardCount = (int) Math.ceil(params.getBalancer().avgPrimaryShardsPerNode() * (1 + buffer));
8186
return primaryShardCount >= allowedPrimaryShardCount;
8287
};
8388
}

server/src/main/java/org/opensearch/cluster/routing/allocation/RebalanceConstraints.java

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
import java.util.HashMap;
1515
import java.util.Map;
1616

17+
import static org.opensearch.cluster.routing.allocation.ConstraintTypes.CLUSTER_PRIMARY_SHARD_REBALANCE_CONSTRAINT_ID;
1718
import static org.opensearch.cluster.routing.allocation.ConstraintTypes.INDEX_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID;
1819
import static org.opensearch.cluster.routing.allocation.ConstraintTypes.isPerIndexPrimaryShardsPerNodeBreached;
20+
import static org.opensearch.cluster.routing.allocation.ConstraintTypes.isPrimaryShardsPerNodeBreached;
1921

2022
/**
2123
* Constraints applied during rebalancing round; specify conditions which, if breached, reduce the
@@ -27,9 +29,13 @@ public class RebalanceConstraints {
2729

2830
private Map<String, Constraint> constraints;
2931

30-
public RebalanceConstraints() {
32+
public RebalanceConstraints(RebalanceParameter rebalanceParameter) {
3133
this.constraints = new HashMap<>();
32-
this.constraints.putIfAbsent(INDEX_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPerIndexPrimaryShardsPerNodeBreached()));
34+
this.constraints.put(INDEX_PRIMARY_SHARD_BALANCE_CONSTRAINT_ID, new Constraint(isPerIndexPrimaryShardsPerNodeBreached()));
35+
this.constraints.put(
36+
CLUSTER_PRIMARY_SHARD_REBALANCE_CONSTRAINT_ID,
37+
new Constraint(isPrimaryShardsPerNodeBreached(rebalanceParameter.getPreferPrimaryBalanceBuffer()))
38+
);
3339
}
3440

3541
public void updateRebalanceConstraint(String constraint, boolean enable) {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.cluster.routing.allocation;
10+
11+
/**
12+
* RebalanceConstraint Params
13+
*/
14+
public class RebalanceParameter {
15+
private float preferPrimaryBalanceBuffer;
16+
17+
public RebalanceParameter(float preferPrimaryBalanceBuffer) {
18+
this.preferPrimaryBalanceBuffer = preferPrimaryBalanceBuffer;
19+
}
20+
21+
public float getPreferPrimaryBalanceBuffer() {
22+
return preferPrimaryBalanceBuffer;
23+
}
24+
}

0 commit comments

Comments
 (0)