30
30
import org .opensearch .test .transport .MockTransportService ;
31
31
32
32
import java .util .Collection ;
33
+ import java .util .List ;
33
34
import java .util .Map ;
34
35
import java .util .stream .Collectors ;
35
36
import java .util .stream .Stream ;
@@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
132
133
133
134
/*
134
135
Scenario:
135
- - Starts 1 docrep backed data node
136
- - Creates an index with 0 replica
136
+ - Starts 2 docrep backed data node
137
+ - Creates an index with 1 replica
137
138
- Starts 1 remote backed data node
138
139
- Index some docs
139
140
- Move primary copy from docrep to remote through _cluster/reroute
@@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception {
145
146
public void testRemotePrimaryDocRepAndRemoteReplica () throws Exception {
146
147
internalCluster ().startClusterManagerOnlyNode ();
147
148
148
- logger .info ("---> Starting 1 docrep data nodes" );
149
- String docrepNodeName = internalCluster ().startDataOnlyNode ( );
149
+ logger .info ("---> Starting 2 docrep data nodes" );
150
+ internalCluster ().startDataOnlyNodes ( 2 );
150
151
internalCluster ().validateClusterFormed ();
151
152
assertEquals (internalCluster ().client ().admin ().cluster ().prepareGetRepositories ().get ().repositories ().size (), 0 );
152
153
153
- logger .info ("---> Creating index with 0 replica" );
154
+ logger .info ("---> Creating index with 1 replica" );
154
155
Settings zeroReplicas = Settings .builder ()
155
- .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 0 )
156
+ .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 )
156
157
.put (IndexService .RETENTION_LEASE_SYNC_INTERVAL_SETTING .getKey (), "1s" )
157
158
.put (IndexService .GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING .getKey (), "1s" )
158
159
.build ();
@@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ
245
246
pollAndCheckRetentionLeases (REMOTE_PRI_DOCREP_REMOTE_REP );
246
247
}
247
248
249
+ /*
250
+ Scenario:
251
+ - Starts 2 docrep backed data node
252
+ - Creates an index with 1 replica
253
+ - Starts 1 remote backed data node
254
+ - Index some docs
255
+ - Move primary copy from docrep to remote through _cluster/reroute
256
+ - Starts another remote backed data node
257
+ - Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node
258
+ - Index some more docs
259
+ - Assert retention lease consistency
260
+ */
248
261
public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica () throws Exception {
249
262
internalCluster ().startClusterManagerOnlyNode ();
250
263
251
- logger .info ("---> Starting docrep data node " );
252
- internalCluster ().startDataOnlyNode ( );
264
+ logger .info ("---> Starting 2 docrep data nodes " );
265
+ internalCluster ().startDataOnlyNodes ( 2 );
253
266
254
267
Settings zeroReplicasAndOverridenSyncIntervals = Settings .builder ()
255
- .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 0 )
268
+ .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 )
256
269
.put (IndexService .GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING .getKey (), "100ms" )
257
270
.put (IndexService .RETENTION_LEASE_SYNC_INTERVAL_SETTING .getKey (), "100ms" )
258
271
.build ();
@@ -323,25 +336,24 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception {
323
336
324
337
/*
325
338
Scenario:
326
- - Starts 1 docrep backed data node
327
- - Creates an index with 0 replica
339
+ - Starts 2 docrep backed data node
340
+ - Creates an index with 1 replica
328
341
- Starts 1 remote backed data node
329
342
- Move primary copy from docrep to remote through _cluster/reroute
330
- - Expands index to 1 replica
331
343
- Stops remote enabled node
332
344
- Ensure doc count is same after failover
333
345
- Index some more docs to ensure working of failed-over primary
334
346
*/
335
347
public void testFailoverRemotePrimaryToDocrepReplica () throws Exception {
336
348
internalCluster ().startClusterManagerOnlyNode ();
337
349
338
- logger .info ("---> Starting 1 docrep data nodes" );
339
- String docrepNodeName = internalCluster ().startDataOnlyNode ( );
350
+ logger .info ("---> Starting 2 docrep data nodes" );
351
+ internalCluster ().startDataOnlyNodes ( 2 );
340
352
internalCluster ().validateClusterFormed ();
341
353
assertEquals (internalCluster ().client ().admin ().cluster ().prepareGetRepositories ().get ().repositories ().size (), 0 );
342
354
343
355
logger .info ("---> Creating index with 0 replica" );
344
- Settings excludeRemoteNode = Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 0 ).build ();
356
+ Settings excludeRemoteNode = Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 ).build ();
345
357
createIndex (FAILOVER_REMOTE_TO_DOCREP , excludeRemoteNode );
346
358
ensureGreen (FAILOVER_REMOTE_TO_DOCREP );
347
359
initDocRepToRemoteMigration ();
@@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
376
388
);
377
389
ensureGreen (FAILOVER_REMOTE_TO_DOCREP );
378
390
379
- logger .info ("---> Expanding index to 1 replica copy " );
380
- Settings twoReplicas = Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 ).build ();
391
+ logger .info ("---> Expanding index to 2 replica copies " );
392
+ Settings twoReplicas = Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 2 ).build ();
381
393
assertAcked (
382
394
internalCluster ().client ()
383
395
.admin ()
@@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
412
424
413
425
logger .info ("---> Stop remote store enabled node" );
414
426
internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (remoteNodeName ));
415
- ensureStableCluster (2 );
427
+ ensureStableCluster (3 );
416
428
ensureYellow (FAILOVER_REMOTE_TO_DOCREP );
417
429
418
430
shardStatsMap = internalCluster ().client ().admin ().indices ().prepareStats (FAILOVER_REMOTE_TO_DOCREP ).setDocs (true ).get ().asMap ();
@@ -433,16 +445,16 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
433
445
refreshAndWaitForReplication (FAILOVER_REMOTE_TO_DOCREP );
434
446
435
447
shardStatsMap = internalCluster ().client ().admin ().indices ().prepareStats (FAILOVER_REMOTE_TO_DOCREP ).setDocs (true ).get ().asMap ();
436
- assertEquals (1 , shardStatsMap .size ());
448
+ assertEquals (2 , shardStatsMap .size ());
437
449
shardStatsMap .forEach (
438
450
(shardRouting , shardStats ) -> { assertEquals (firstBatch + secondBatch , shardStats .getStats ().getDocs ().getCount ()); }
439
451
);
440
452
}
441
453
442
454
/*
443
455
Scenario:
444
- - Starts 1 docrep backed data node
445
- - Creates an index with 0 replica
456
+ - Starts 2 docrep backed data nodes
457
+ - Creates an index with 1 replica
446
458
- Starts 1 remote backed data node
447
459
- Moves primary copy from docrep to remote through _cluster/reroute
448
460
- Starts 1 more remote backed data node
@@ -455,13 +467,13 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception {
455
467
public void testFailoverRemotePrimaryToRemoteReplica () throws Exception {
456
468
internalCluster ().startClusterManagerOnlyNode ();
457
469
458
- logger .info ("---> Starting 1 docrep data node " );
459
- String docrepNodeName = internalCluster ().startDataOnlyNode ( );
470
+ logger .info ("---> Starting 2 docrep data nodes " );
471
+ List < String > docrepNodeNames = internalCluster ().startDataOnlyNodes ( 2 );
460
472
internalCluster ().validateClusterFormed ();
461
473
assertEquals (internalCluster ().client ().admin ().cluster ().prepareGetRepositories ().get ().repositories ().size (), 0 );
462
474
463
- logger .info ("---> Creating index with 0 replica" );
464
- createIndex (FAILOVER_REMOTE_TO_REMOTE , Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 0 ).build ());
475
+ logger .info ("---> Creating index with 1 replica" );
476
+ createIndex (FAILOVER_REMOTE_TO_REMOTE , Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 1 ).build ());
465
477
ensureGreen (FAILOVER_REMOTE_TO_REMOTE );
466
478
initDocRepToRemoteMigration ();
467
479
@@ -484,15 +496,17 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
484
496
AsyncIndexingService asyncIndexingService = new AsyncIndexingService (FAILOVER_REMOTE_TO_REMOTE );
485
497
asyncIndexingService .startIndexing ();
486
498
487
- logger .info ("---> Moving primary copy from docrep node {} to remote enabled node {}" , docrepNodeName , remoteNodeName1 );
499
+ String primaryNodeName = primaryNodeName (FAILOVER_REMOTE_TO_REMOTE );
500
+ logger .info ("---> Moving primary copy from docrep node {} to remote enabled node {}" , primaryNodeName , remoteNodeName1 );
488
501
assertAcked (
489
502
internalCluster ().client ()
490
503
.admin ()
491
504
.cluster ()
492
505
.prepareReroute ()
493
- .add (new MoveAllocationCommand (FAILOVER_REMOTE_TO_REMOTE , 0 , docrepNodeName , remoteNodeName1 ))
506
+ .add (new MoveAllocationCommand (FAILOVER_REMOTE_TO_REMOTE , 0 , primaryNodeName , remoteNodeName1 ))
494
507
.get ()
495
508
);
509
+ waitForRelocation ();
496
510
ensureGreen (FAILOVER_REMOTE_TO_REMOTE );
497
511
assertEquals (primaryNodeName (FAILOVER_REMOTE_TO_REMOTE ), remoteNodeName1 );
498
512
@@ -507,7 +521,13 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
507
521
.indices ()
508
522
.prepareUpdateSettings ()
509
523
.setIndices (FAILOVER_REMOTE_TO_REMOTE )
510
- .setSettings (Settings .builder ().put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 2 ).build ())
524
+ .setSettings (
525
+ Settings .builder ()
526
+ .put (IndexMetadata .SETTING_NUMBER_OF_REPLICAS , 2 )
527
+ // prevent replica copy from being allocated to the extra docrep node
528
+ .put ("index.routing.allocation.exclude._name" , primaryNodeName )
529
+ .build ()
530
+ )
511
531
.get ()
512
532
);
513
533
ensureGreen (FAILOVER_REMOTE_TO_REMOTE );
@@ -536,8 +556,8 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
536
556
537
557
logger .info ("---> Stop remote store enabled node hosting the primary" );
538
558
internalCluster ().stopRandomNode (InternalTestCluster .nameFilter (remoteNodeName1 ));
539
- ensureStableCluster (3 );
540
- ensureYellow (FAILOVER_REMOTE_TO_REMOTE );
559
+ ensureStableCluster (4 );
560
+ ensureYellowAndNoInitializingShards (FAILOVER_REMOTE_TO_REMOTE );
541
561
DiscoveryNodes finalNodes = internalCluster ().client ().admin ().cluster ().prepareState ().get ().getState ().getNodes ();
542
562
543
563
waitUntil (() -> {
@@ -580,7 +600,6 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception {
580
600
- Creates an index with 0 replica
581
601
- Starts 1 remote backed data node
582
602
- Move primary copy from docrep to remote through _cluster/reroute
583
- - Expands index to 1 replica
584
603
- Stops remote enabled node
585
604
- Ensure doc count is same after failover
586
605
- Index some more docs to ensure working of failed-over primary
@@ -664,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch
664
683
RemoteSegmentStats remoteSegmentStats = shardStats .getSegments ().getRemoteSegmentStats ();
665
684
assertTrue (remoteSegmentStats .getUploadBytesSucceeded () > 0 );
666
685
assertTrue (remoteSegmentStats .getTotalUploadTime () > 0 );
667
- } else {
686
+ }
687
+ if (shardRouting .unassigned () == false && shardRouting .primary () == false ) {
668
688
boolean remoteNode = nodes .get (shardRouting .currentNodeId ()).isRemoteStoreNode ();
669
689
assertEquals (
670
690
"Mismatched doc count. Is this on remote node ? " + remoteNode ,
0 commit comments