Skip to content

Commit 18e5816

Browse files
authored
Add allocation decider for mixed cluster during remote store migration (#12505)
Signed-off-by: Lakshya Taragi <lakshya.taragi@gmail.com>
1 parent ef50fb4 commit 18e5816

File tree

4 files changed

+865
-0
lines changed

4 files changed

+865
-0
lines changed

server/src/main/java/org/opensearch/cluster/ClusterModule.java

+5
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
import org.opensearch.cluster.routing.allocation.decider.NodeLoadAwareAllocationDecider;
7070
import org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider;
7171
import org.opensearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider;
72+
import org.opensearch.cluster.routing.allocation.decider.RemoteStoreMigrationAllocationDecider;
7273
import org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider;
7374
import org.opensearch.cluster.routing.allocation.decider.ResizeAllocationDecider;
7475
import org.opensearch.cluster.routing.allocation.decider.RestoreInProgressAllocationDecider;
@@ -83,6 +84,7 @@
8384
import org.opensearch.common.settings.Setting;
8485
import org.opensearch.common.settings.Setting.Property;
8586
import org.opensearch.common.settings.Settings;
87+
import org.opensearch.common.util.FeatureFlags;
8688
import org.opensearch.common.util.concurrent.ThreadContext;
8789
import org.opensearch.common.util.set.Sets;
8890
import org.opensearch.core.ParseField;
@@ -373,6 +375,9 @@ public static Collection<AllocationDecider> createAllocationDeciders(
373375
addAllocationDecider(deciders, new AwarenessAllocationDecider(settings, clusterSettings));
374376
addAllocationDecider(deciders, new NodeLoadAwareAllocationDecider(settings, clusterSettings));
375377
addAllocationDecider(deciders, new TargetPoolAllocationDecider());
378+
if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING)) {
379+
addAllocationDecider(deciders, new RemoteStoreMigrationAllocationDecider(settings, clusterSettings));
380+
}
376381

377382
clusterPlugins.stream()
378383
.flatMap(p -> p.createAllocationDeciders(settings, clusterSettings).stream())
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
/*
10+
* Licensed to Elasticsearch under one or more contributor
11+
* license agreements. See the NOTICE file distributed with
12+
* this work for additional information regarding copyright
13+
* ownership. Elasticsearch licenses this file to you under
14+
* the Apache License, Version 2.0 (the "License"); you may
15+
* not use this file except in compliance with the License.
16+
* You may obtain a copy of the License at
17+
*
18+
* http://www.apache.org/licenses/LICENSE-2.0
19+
*
20+
* Unless required by applicable law or agreed to in writing,
21+
* software distributed under the License is distributed on an
22+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
23+
* KIND, either express or implied. See the License for the
24+
* specific language governing permissions and limitations
25+
* under the License.
26+
*/
27+
28+
/*
29+
* Modifications Copyright OpenSearch Contributors. See
30+
* GitHub history for details.
31+
*/
32+
33+
package org.opensearch.cluster.routing.allocation.decider;
34+
35+
import org.opensearch.cluster.metadata.IndexMetadata;
36+
import org.opensearch.cluster.node.DiscoveryNode;
37+
import org.opensearch.cluster.routing.RoutingNode;
38+
import org.opensearch.cluster.routing.ShardRouting;
39+
import org.opensearch.cluster.routing.allocation.RoutingAllocation;
40+
import org.opensearch.common.settings.ClusterSettings;
41+
import org.opensearch.common.settings.Settings;
42+
import org.opensearch.node.remotestore.RemoteStoreNodeService;
43+
import org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode;
44+
import org.opensearch.node.remotestore.RemoteStoreNodeService.Direction;
45+
46+
import java.util.Locale;
47+
48+
/**
49+
* A new allocation decider for migration of document replication clusters to remote store backed clusters:
50+
* - For STRICT compatibility mode, the decision is always YES
51+
* - For remote store backed indices, relocation or allocation/relocation can only be towards a remote node
52+
* - For "REMOTE_STORE" migration direction:
53+
* - New primary shards can only be allocated to a remote node
54+
* - New replica shards can be allocated to a remote node iff the primary has been migrated/allocated to a remote node
55+
* - For other directions ("DOCREP", "NONE"), the decision is always YES
56+
*
57+
* @opensearch.internal
58+
*/
59+
public class RemoteStoreMigrationAllocationDecider extends AllocationDecider {
60+
61+
public static final String NAME = "remote_store_migration";
62+
63+
private Direction migrationDirection;
64+
private CompatibilityMode compatibilityMode;
65+
private boolean remoteStoreBackedIndex;
66+
67+
public RemoteStoreMigrationAllocationDecider(Settings settings, ClusterSettings clusterSettings) {
68+
this.migrationDirection = RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING.get(settings);
69+
this.compatibilityMode = RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(settings);
70+
clusterSettings.addSettingsUpdateConsumer(RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING, this::setMigrationDirection);
71+
clusterSettings.addSettingsUpdateConsumer(
72+
RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING,
73+
this::setCompatibilityMode
74+
);
75+
}
76+
77+
private void setMigrationDirection(Direction migrationDirection) {
78+
this.migrationDirection = migrationDirection;
79+
}
80+
81+
private void setCompatibilityMode(CompatibilityMode compatibilityMode) {
82+
this.compatibilityMode = compatibilityMode;
83+
}
84+
85+
@Override
86+
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
87+
DiscoveryNode targetNode = node.node();
88+
89+
if (compatibilityMode.equals(CompatibilityMode.STRICT)) {
90+
// assuming all nodes are of the same type (all remote or all non-remote)
91+
return allocation.decision(
92+
Decision.YES,
93+
NAME,
94+
getDecisionDetails(true, shardRouting, targetNode, " for strict compatibility mode")
95+
);
96+
}
97+
98+
if (migrationDirection.equals(Direction.REMOTE_STORE) == false) {
99+
// docrep migration direction is currently not supported
100+
return allocation.decision(
101+
Decision.YES,
102+
NAME,
103+
getDecisionDetails(true, shardRouting, targetNode, " for non remote_store direction")
104+
);
105+
}
106+
107+
// check for remote store backed indices
108+
IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
109+
if (IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.exists(indexMetadata.getSettings())) {
110+
remoteStoreBackedIndex = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings());
111+
}
112+
if (remoteStoreBackedIndex && targetNode.isRemoteStoreNode() == false) {
113+
// allocations and relocations must be to a remote node
114+
String reason = String.format(
115+
Locale.ROOT,
116+
" because a remote store backed index's shard copy can only be %s to a remote node",
117+
((shardRouting.assignedToNode() == false) ? "allocated" : "relocated")
118+
);
119+
return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, shardRouting, targetNode, reason));
120+
}
121+
122+
if (shardRouting.primary()) {
123+
return primaryShardDecision(shardRouting, targetNode, allocation);
124+
}
125+
return replicaShardDecision(shardRouting, targetNode, allocation);
126+
}
127+
128+
// handle scenarios for allocation of a new shard's primary copy
129+
private Decision primaryShardDecision(ShardRouting primaryShardRouting, DiscoveryNode targetNode, RoutingAllocation allocation) {
130+
if (targetNode.isRemoteStoreNode() == false) {
131+
return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, primaryShardRouting, targetNode, ""));
132+
}
133+
return allocation.decision(Decision.YES, NAME, getDecisionDetails(true, primaryShardRouting, targetNode, ""));
134+
}
135+
136+
private Decision replicaShardDecision(ShardRouting replicaShardRouting, DiscoveryNode targetNode, RoutingAllocation allocation) {
137+
if (targetNode.isRemoteStoreNode()) {
138+
ShardRouting primaryShardRouting = allocation.routingNodes().activePrimary(replicaShardRouting.shardId());
139+
boolean primaryHasMigratedToRemote = false;
140+
if (primaryShardRouting != null) {
141+
DiscoveryNode primaryShardNode = allocation.nodes().getNodes().get(primaryShardRouting.currentNodeId());
142+
primaryHasMigratedToRemote = primaryShardNode.isRemoteStoreNode();
143+
}
144+
if (primaryHasMigratedToRemote == false) {
145+
return allocation.decision(
146+
Decision.NO,
147+
NAME,
148+
getDecisionDetails(false, replicaShardRouting, targetNode, " since primary shard copy is not yet migrated to remote")
149+
);
150+
}
151+
return allocation.decision(
152+
Decision.YES,
153+
NAME,
154+
getDecisionDetails(true, replicaShardRouting, targetNode, " since primary shard copy has been migrated to remote")
155+
);
156+
}
157+
return allocation.decision(Decision.YES, NAME, getDecisionDetails(true, replicaShardRouting, targetNode, ""));
158+
}
159+
160+
// get detailed reason for the decision
161+
private String getDecisionDetails(boolean isYes, ShardRouting shardRouting, DiscoveryNode targetNode, String reason) {
162+
return String.format(
163+
Locale.ROOT,
164+
"[%s migration_direction]: %s shard copy %s be %s to a %s node%s",
165+
migrationDirection.direction,
166+
(shardRouting.primary() ? "primary" : "replica"),
167+
(isYes ? "can" : "can not"),
168+
((shardRouting.assignedToNode() == false) ? "allocated" : "relocated"),
169+
(targetNode.isRemoteStoreNode() ? "remote" : "non-remote"),
170+
reason
171+
);
172+
}
173+
174+
}

server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java

+5
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.opensearch.cluster.routing.allocation.decider.NodeLoadAwareAllocationDecider;
5252
import org.opensearch.cluster.routing.allocation.decider.NodeVersionAllocationDecider;
5353
import org.opensearch.cluster.routing.allocation.decider.RebalanceOnlyWhenActiveAllocationDecider;
54+
import org.opensearch.cluster.routing.allocation.decider.RemoteStoreMigrationAllocationDecider;
5455
import org.opensearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider;
5556
import org.opensearch.cluster.routing.allocation.decider.ResizeAllocationDecider;
5657
import org.opensearch.cluster.routing.allocation.decider.RestoreInProgressAllocationDecider;
@@ -67,6 +68,7 @@
6768
import org.opensearch.common.settings.Setting.Property;
6869
import org.opensearch.common.settings.Settings;
6970
import org.opensearch.common.settings.SettingsModule;
71+
import org.opensearch.common.util.FeatureFlags;
7072
import org.opensearch.common.util.concurrent.ThreadContext;
7173
import org.opensearch.gateway.GatewayAllocator;
7274
import org.opensearch.plugins.ClusterPlugin;
@@ -242,6 +244,9 @@ public void testAllocationDeciderOrder() {
242244
NodeLoadAwareAllocationDecider.class,
243245
TargetPoolAllocationDecider.class
244246
);
247+
if (FeatureFlags.isEnabled(FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL_SETTING)) {
248+
expectedDeciders.add(RemoteStoreMigrationAllocationDecider.class);
249+
}
245250
Collection<AllocationDecider> deciders = ClusterModule.createAllocationDeciders(
246251
Settings.EMPTY,
247252
new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),

0 commit comments

Comments
 (0)