Skip to content

Commit cf0d6cc

Browse files
authored
[Bugfix] Fix NPE in ReplicaShardAllocator (opensearch-project#13993) (opensearch-project#14385)
* [Bugfix] Fix NPE in ReplicaShardAllocator (opensearch-project#13993) Signed-off-by: Daniil Roman <daniilroman.cv@gmail.com> * Add fix info to CHANGELOG.md Signed-off-by: Daniil Roman <danroman17397@gmail.com> --------- Signed-off-by: Daniil Roman <daniilroman.cv@gmail.com> Signed-off-by: Daniil Roman <danroman17397@gmail.com>
1 parent 1299919 commit cf0d6cc

File tree

3 files changed

+59
-1
lines changed

3 files changed

+59
-1
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7575
- Refactoring Grok.validatePatternBank by using an iterative approach ([#14206](https://github.com/opensearch-project/OpenSearch/pull/14206))
7676
- Update help output for _cat ([#14722](https://github.com/opensearch-project/OpenSearch/pull/14722))
7777
- Fix bulk upsert ignores the default_pipeline and final_pipeline when auto-created index matches the index template ([#12891](https://github.com/opensearch-project/OpenSearch/pull/12891))
78+
- Fix NPE in ReplicaShardAllocator ([#14385](https://github.com/opensearch-project/OpenSearch/pull/14385))
7879

7980
### Security
8081

server/src/main/java/org/opensearch/gateway/ReplicaShardAllocator.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ protected Runnable cancelExistingRecoveryForBetterMatch(
100100
Metadata metadata = allocation.metadata();
101101
RoutingNodes routingNodes = allocation.routingNodes();
102102
ShardRouting primaryShard = allocation.routingNodes().activePrimary(shard.shardId());
103-
assert primaryShard != null : "the replica shard can be allocated on at least one node, so there must be an active primary";
103+
if (primaryShard == null) {
104+
logger.trace("{}: no active primary shard found or allocated, letting actual allocation figure it out", shard);
105+
return null;
106+
}
104107
assert primaryShard.currentNodeId() != null;
105108
final DiscoveryNode primaryNode = allocation.nodes().get(primaryShard.currentNodeId());
106109

server/src/test/java/org/opensearch/gateway/ReplicaShardBatchAllocatorTests.java

+54
Original file line numberDiff line numberDiff line change
@@ -644,6 +644,25 @@ public void testDoNotCancelForBrokenNode() {
644644
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED), empty());
645645
}
646646

647+
public void testDoNotCancelForInactivePrimaryNode() {
648+
RoutingAllocation allocation = oneInactivePrimaryOnNode1And1ReplicaRecovering(yesAllocationDeciders(), null);
649+
testBatchAllocator.addData(
650+
node1,
651+
null,
652+
"MATCH",
653+
null,
654+
new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION)
655+
).addData(node2, randomSyncId(), null, new StoreFileMetadata("file1", 10, "MATCH_CHECKSUM", MIN_SUPPORTED_LUCENE_VERSION));
656+
657+
testBatchAllocator.processExistingRecoveries(
658+
allocation,
659+
Collections.singletonList(new ArrayList<>(allocation.routingNodes().shardsWithState(ShardRoutingState.INITIALIZING)))
660+
);
661+
662+
assertThat(allocation.routingNodesChanged(), equalTo(false));
663+
assertThat(allocation.routingNodes().shardsWithState(ShardRoutingState.UNASSIGNED), empty());
664+
}
665+
647666
public void testAllocateUnassignedBatchThrottlingAllocationDeciderIsHonoured() throws InterruptedException {
648667
ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
649668
AllocationDeciders allocationDeciders = randomAllocationDeciders(
@@ -872,6 +891,41 @@ private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDecid
872891
);
873892
}
874893

894+
private RoutingAllocation oneInactivePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders, UnassignedInfo unassignedInfo) {
895+
ShardRouting primaryShard = TestShardRouting.newShardRouting(shardId, node1.getId(), true, ShardRoutingState.INITIALIZING);
896+
RoutingTable routingTable = RoutingTable.builder()
897+
.add(
898+
IndexRoutingTable.builder(shardId.getIndex())
899+
.addIndexShard(
900+
new IndexShardRoutingTable.Builder(shardId).addShard(primaryShard)
901+
.addShard(
902+
TestShardRouting.newShardRouting(
903+
shardId,
904+
node2.getId(),
905+
null,
906+
false,
907+
ShardRoutingState.INITIALIZING,
908+
unassignedInfo
909+
)
910+
)
911+
.build()
912+
)
913+
)
914+
.build();
915+
ClusterState state = ClusterState.builder(org.opensearch.cluster.ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY))
916+
.routingTable(routingTable)
917+
.nodes(DiscoveryNodes.builder().add(node1).add(node2))
918+
.build();
919+
return new RoutingAllocation(
920+
deciders,
921+
new RoutingNodes(state, false),
922+
state,
923+
ClusterInfo.EMPTY,
924+
SnapshotShardSizeInfo.EMPTY,
925+
System.nanoTime()
926+
);
927+
}
928+
875929
private RoutingAllocation onePrimaryOnNode1And1ReplicaRecovering(AllocationDeciders deciders) {
876930
return onePrimaryOnNode1And1ReplicaRecovering(deciders, new UnassignedInfo(UnassignedInfo.Reason.CLUSTER_RECOVERED, null));
877931
}

0 commit comments

Comments
 (0)