Skip to content

Commit 1f406db

Browse files
authored
[Remote Store] Update index settings on shard movement during remote store migration (opensearch-project#13316)
Signed-off-by: Shourya Dutta Biswas <114977491+shourya035@users.noreply.github.com>
1 parent f1228e9 commit 1f406db

20 files changed

+1480
-44
lines changed

server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java

+22
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import java.util.concurrent.atomic.AtomicBoolean;
3535
import java.util.concurrent.atomic.AtomicLong;
3636

37+
import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING;
3738
import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING;
3839
import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING;
3940
import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING;
@@ -199,4 +200,25 @@ public void setRefreshFrequency(int refreshFrequency) {
199200
this.refreshFrequency = refreshFrequency;
200201
}
201202
}
203+
204+
public void excludeNodeSet(String attr, String value) {
205+
assertAcked(
206+
internalCluster().client()
207+
.admin()
208+
.cluster()
209+
.prepareUpdateSettings()
210+
.setTransientSettings(Settings.builder().put("cluster.routing.allocation.exclude._" + attr, value))
211+
.get()
212+
);
213+
}
214+
215+
public void stopShardRebalancing() {
216+
assertAcked(
217+
client().admin()
218+
.cluster()
219+
.prepareUpdateSettings()
220+
.setPersistentSettings(Settings.builder().put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build())
221+
.get()
222+
);
223+
}
202224
}

server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java

+52-32
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.opensearch.test.transport.MockTransportService;
3131

3232
import java.util.Collection;
33+
import java.util.List;
3334
import java.util.Map;
3435
import java.util.stream.Collectors;
3536
import java.util.stream.Stream;
@@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
132133

133134
/*
134135
Scenario:
135-
- Starts 1 docrep backed data node
136-
- Creates an index with 0 replica
136+
- Starts 2 docrep backed data node
137+
- Creates an index with 1 replica
137138
- Starts 1 remote backed data node
138139
- Index some docs
139140
- Move primary copy from docrep to remote through _cluster/reroute
@@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
145146
public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception {
146147
internalCluster().startClusterManagerOnlyNode();
147148

148-
logger.info("---> Starting 1 docrep data nodes");
149-
String docrepNodeName = internalCluster().startDataOnlyNode();
149+
logger.info("---> Starting 2 docrep data nodes");
150+
internalCluster().startDataOnlyNodes(2);
150151
internalCluster().validateClusterFormed();
151152
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);
152153

153-
logger.info("---> Creating index with 0 replica");
154+
logger.info("---> Creating index with 1 replica");
154155
Settings zeroReplicas = Settings.builder()
155-
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
156+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
156157
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "1s")
157158
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "1s")
158159
.build();
@@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ
245246
pollAndCheckRetentionLeases(REMOTE_PRI_DOCREP_REMOTE_REP);
246247
}
247248

249+
/*
250+
Scenario:
251+
- Starts 2 docrep backed data node
252+
- Creates an index with 1 replica
253+
- Starts 1 remote backed data node
254+
- Index some docs
255+
- Move primary copy from docrep to remote through _cluster/reroute
256+
- Starts another remote backed data node
257+
- Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node
258+
- Index some more docs
259+
- Assert retention lease consistency
260+
*/
248261
public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica() throws Exception {
249262
internalCluster().startClusterManagerOnlyNode();
250263

251-
logger.info("---> Starting docrep data node");
252-
internalCluster().startDataOnlyNode();
264+
logger.info("---> Starting 2 docrep data nodes");
265+
internalCluster().startDataOnlyNodes(2);
253266

254267
Settings zeroReplicasAndOverridenSyncIntervals = Settings.builder()
255-
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
268+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
256269
.put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms")
257270
.put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms")
258271
.build();
@@ -323,25 +336,24 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception {
323336

324337
/*
325338
Scenario:
326-
- Starts 1 docrep backed data node
327-
- Creates an index with 0 replica
339+
- Starts 2 docrep backed data node
340+
- Creates an index with 1 replica
328341
- Starts 1 remote backed data node
329342
- Move primary copy from docrep to remote through _cluster/reroute
330-
- Expands index to 1 replica
331343
- Stops remote enabled node
332344
- Ensure doc count is same after failover
333345
- Index some more docs to ensure working of failed-over primary
334346
*/
335347
public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
336348
internalCluster().startClusterManagerOnlyNode();
337349

338-
logger.info("---> Starting 1 docrep data nodes");
339-
String docrepNodeName = internalCluster().startDataOnlyNode();
350+
logger.info("---> Starting 2 docrep data nodes");
351+
internalCluster().startDataOnlyNodes(2);
340352
internalCluster().validateClusterFormed();
341353
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);
342354

343355
logger.info("---> Creating index with 0 replica");
344-
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build();
356+
Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
345357
createIndex(FAILOVER_REMOTE_TO_DOCREP, excludeRemoteNode);
346358
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);
347359
initDocRepToRemoteMigration();
@@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
376388
);
377389
ensureGreen(FAILOVER_REMOTE_TO_DOCREP);
378390

379-
logger.info("---> Expanding index to 1 replica copy");
380-
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build();
391+
logger.info("---> Expanding index to 2 replica copies");
392+
Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build();
381393
assertAcked(
382394
internalCluster().client()
383395
.admin()
@@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
412424

413425
logger.info("---> Stop remote store enabled node");
414426
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName));
415-
ensureStableCluster(2);
427+
ensureStableCluster(3);
416428
ensureYellow(FAILOVER_REMOTE_TO_DOCREP);
417429

418430
shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
@@ -433,16 +445,16 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
433445
refreshAndWaitForReplication(FAILOVER_REMOTE_TO_DOCREP);
434446

435447
shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap();
436-
assertEquals(1, shardStatsMap.size());
448+
assertEquals(2, shardStatsMap.size());
437449
shardStatsMap.forEach(
438450
(shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); }
439451
);
440452
}
441453

442454
/*
443455
Scenario:
444-
- Starts 1 docrep backed data node
445-
- Creates an index with 0 replica
456+
- Starts 2 docrep backed data nodes
457+
- Creates an index with 1 replica
446458
- Starts 1 remote backed data node
447459
- Moves primary copy from docrep to remote through _cluster/reroute
448460
- Starts 1 more remote backed data node
@@ -455,13 +467,13 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
455467
public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
456468
internalCluster().startClusterManagerOnlyNode();
457469

458-
logger.info("---> Starting 1 docrep data node");
459-
String docrepNodeName = internalCluster().startDataOnlyNode();
470+
logger.info("---> Starting 2 docrep data nodes");
471+
List<String> docrepNodeNames = internalCluster().startDataOnlyNodes(2);
460472
internalCluster().validateClusterFormed();
461473
assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0);
462474

463-
logger.info("---> Creating index with 0 replica");
464-
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build());
475+
logger.info("---> Creating index with 1 replica");
476+
createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build());
465477
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
466478
initDocRepToRemoteMigration();
467479

@@ -484,15 +496,17 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
484496
AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE);
485497
asyncIndexingService.startIndexing();
486498

487-
logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", docrepNodeName, remoteNodeName1);
499+
String primaryNodeName = primaryNodeName(FAILOVER_REMOTE_TO_REMOTE);
500+
logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", primaryNodeName, remoteNodeName1);
488501
assertAcked(
489502
internalCluster().client()
490503
.admin()
491504
.cluster()
492505
.prepareReroute()
493-
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, docrepNodeName, remoteNodeName1))
506+
.add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, primaryNodeName, remoteNodeName1))
494507
.get()
495508
);
509+
waitForRelocation();
496510
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
497511
assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1);
498512

@@ -507,7 +521,13 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
507521
.indices()
508522
.prepareUpdateSettings()
509523
.setIndices(FAILOVER_REMOTE_TO_REMOTE)
510-
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build())
524+
.setSettings(
525+
Settings.builder()
526+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2)
527+
// prevent replica copy from being allocated to the extra docrep node
528+
.put("index.routing.allocation.exclude._name", primaryNodeName)
529+
.build()
530+
)
511531
.get()
512532
);
513533
ensureGreen(FAILOVER_REMOTE_TO_REMOTE);
@@ -536,8 +556,8 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
536556

537557
logger.info("---> Stop remote store enabled node hosting the primary");
538558
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1));
539-
ensureStableCluster(3);
540-
ensureYellow(FAILOVER_REMOTE_TO_REMOTE);
559+
ensureStableCluster(4);
560+
ensureYellowAndNoInitializingShards(FAILOVER_REMOTE_TO_REMOTE);
541561
DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes();
542562

543563
waitUntil(() -> {
@@ -580,7 +600,6 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
580600
- Creates an index with 0 replica
581601
- Starts 1 remote backed data node
582602
- Move primary copy from docrep to remote through _cluster/reroute
583-
- Expands index to 1 replica
584603
- Stops remote enabled node
585604
- Ensure doc count is same after failover
586605
- Index some more docs to ensure working of failed-over primary
@@ -664,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch
664683
RemoteSegmentStats remoteSegmentStats = shardStats.getSegments().getRemoteSegmentStats();
665684
assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0);
666685
assertTrue(remoteSegmentStats.getTotalUploadTime() > 0);
667-
} else {
686+
}
687+
if (shardRouting.unassigned() == false && shardRouting.primary() == false) {
668688
boolean remoteNode = nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode();
669689
assertEquals(
670690
"Mismatched doc count. Is this on remote node ? " + remoteNode,

0 commit comments

Comments
 (0)