Skip to content

Commit e828c18

Browse files
authored
Fix flakiness with SegmentReplicationSuiteIT (opensearch-project#11977)
* Fix SegmentReplicationSuiteIT This test fails because of a race during shard/node shutdown with node-node replication. Fixed by properly synchronizing creation of new replication events with cancellation and cancelling after shards are closed. Signed-off-by: Marc Handalian <marc.handalian@gmail.com> * Remove CopyState caching from OngoingSegmentReplications. This change removes the responsibility of caching CopyState inside of OngoingSegmentReplications. 1. CopyState was originally cached to prevent frequent disk reads while building segment metadata. This is now cached lower down in IndexShard and is not required here. 2. Change prepareForReplication method to return SegmentReplicationSourceHandler directly 3. Move responsibility of creating and clearing CopyState to the handler. Signed-off-by: Marc Handalian <marc.handalian@gmail.com> * Fix comment for afterIndexShardClosed method. Signed-off-by: Marc Handalian <marc.handalian@gmail.com> * Fix comment on beforeIndexShardClosed Signed-off-by: Marc Handalian <marc.handalian@gmail.com> * Remove unnecessary method from OngoingSegmentReplications Signed-off-by: Marc Handalian <marc.handalian@gmail.com> --------- Signed-off-by: Marc Handalian <marc.handalian@gmail.com>
1 parent cc22310 commit e828c18

File tree

12 files changed

+125
-295
lines changed

12 files changed

+125
-295
lines changed

server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationSuiteIT.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@
88

99
package org.opensearch.indices.replication;
1010

11-
import org.apache.lucene.tests.util.LuceneTestCase;
1211
import org.opensearch.action.admin.indices.delete.DeleteIndexRequest;
1312
import org.opensearch.cluster.metadata.IndexMetadata;
1413
import org.opensearch.common.settings.Settings;
1514
import org.opensearch.indices.replication.common.ReplicationType;
1615
import org.opensearch.test.OpenSearchIntegTestCase;
1716
import org.junit.Before;
1817

19-
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9499")
2018
@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 2)
2119
public class SegmentReplicationSuiteIT extends SegmentReplicationBaseIT {
2220

@@ -64,6 +62,7 @@ public void testDropRandomNodeDuringReplication() throws Exception {
6462
ensureYellow(INDEX_NAME);
6563
client().prepareIndex(INDEX_NAME).setId(Integer.toString(docCount)).setSource("field", "value" + docCount).execute().get();
6664
internalCluster().startDataOnlyNode();
65+
ensureGreen(INDEX_NAME);
6766
client().admin().indices().delete(new DeleteIndexRequest(INDEX_NAME)).actionGet();
6867
}
6968

server/src/main/java/org/opensearch/indices/replication/CheckpointInfoResponse.java

+6
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ public CheckpointInfoResponse(
4040
this.infosBytes = infosBytes;
4141
}
4242

43+
public CheckpointInfoResponse(final ReplicationCheckpoint checkpoint, final byte[] infosBytes) {
44+
this.checkpoint = checkpoint;
45+
this.infosBytes = infosBytes;
46+
this.metadataMap = checkpoint.getMetadataMap();
47+
}
48+
4349
public CheckpointInfoResponse(StreamInput in) throws IOException {
4450
this.checkpoint = new ReplicationCheckpoint(in);
4551
this.metadataMap = in.readMap(StreamInput::readString, StoreFileMetadata::new);

server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java

+38-147
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,10 @@
2121
import org.opensearch.indices.IndicesService;
2222
import org.opensearch.indices.recovery.FileChunkWriter;
2323
import org.opensearch.indices.recovery.RecoverySettings;
24-
import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint;
25-
import org.opensearch.indices.replication.common.CopyState;
2624

2725
import java.io.IOException;
26+
import java.io.UncheckedIOException;
2827
import java.util.Collections;
29-
import java.util.HashMap;
3028
import java.util.List;
3129
import java.util.Map;
3230
import java.util.Set;
@@ -36,7 +34,6 @@
3634
/**
3735
* Manages references to ongoing segrep events on a node.
3836
* Each replica will have a new {@link SegmentReplicationSourceHandler} created when starting replication.
39-
* CopyStates will be cached for reuse between replicas and only released when all replicas have finished copying segments.
4037
*
4138
* @opensearch.internal
4239
*/
@@ -45,7 +42,6 @@ class OngoingSegmentReplications {
4542
private static final Logger logger = LogManager.getLogger(OngoingSegmentReplications.class);
4643
private final RecoverySettings recoverySettings;
4744
private final IndicesService indicesService;
48-
private final Map<ReplicationCheckpoint, CopyState> copyStateMap;
4945
private final Map<String, SegmentReplicationSourceHandler> allocationIdToHandlers;
5046

5147
/**
@@ -57,46 +53,9 @@ class OngoingSegmentReplications {
5753
OngoingSegmentReplications(IndicesService indicesService, RecoverySettings recoverySettings) {
5854
this.indicesService = indicesService;
5955
this.recoverySettings = recoverySettings;
60-
this.copyStateMap = Collections.synchronizedMap(new HashMap<>());
6156
this.allocationIdToHandlers = ConcurrentCollections.newConcurrentMap();
6257
}
6358

64-
/*
65-
Operations on the {@link #copyStateMap} member.
66-
*/
67-
68-
/**
69-
* A synchronized method that checks {@link #copyStateMap} for the given {@link ReplicationCheckpoint} key
70-
* and returns the cached value if one is present. If the key is not present, a {@link CopyState}
71-
* object is constructed and stored in the map before being returned.
72-
*/
73-
synchronized CopyState getCachedCopyState(ReplicationCheckpoint checkpoint) throws IOException {
74-
if (isInCopyStateMap(checkpoint)) {
75-
final CopyState copyState = fetchFromCopyStateMap(checkpoint);
76-
// we incref the copyState for every replica that is using this checkpoint.
77-
// decref will happen when copy completes.
78-
copyState.incRef();
79-
return copyState;
80-
} else {
81-
// From the checkpoint's shard ID, fetch the IndexShard
82-
ShardId shardId = checkpoint.getShardId();
83-
final IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex());
84-
final IndexShard indexShard = indexService.getShard(shardId.id());
85-
// build the CopyState object and cache it before returning
86-
final CopyState copyState = new CopyState(checkpoint, indexShard);
87-
88-
/*
89-
Use the checkpoint from the request as the key in the map, rather than
90-
the checkpoint from the created CopyState. This maximizes cache hits
91-
if replication targets make a request with an older checkpoint.
92-
Replication targets are expected to fetch the checkpoint in the response
93-
CopyState to bring themselves up to date.
94-
*/
95-
addToCopyStateMap(checkpoint, copyState);
96-
return copyState;
97-
}
98-
}
99-
10059
/**
10160
* Start sending files to the replica.
10261
*
@@ -114,51 +73,43 @@ void startSegmentCopy(GetSegmentFilesRequest request, ActionListener<GetSegmentF
11473
);
11574
}
11675
// update the given listener to release the CopyState before it resolves.
117-
final ActionListener<GetSegmentFilesResponse> wrappedListener = ActionListener.runBefore(listener, () -> {
118-
final SegmentReplicationSourceHandler sourceHandler = allocationIdToHandlers.remove(request.getTargetAllocationId());
119-
if (sourceHandler != null) {
120-
removeCopyState(sourceHandler.getCopyState());
121-
}
122-
});
76+
final ActionListener<GetSegmentFilesResponse> wrappedListener = ActionListener.runBefore(
77+
listener,
78+
() -> allocationIdToHandlers.remove(request.getTargetAllocationId())
79+
);
12380
handler.sendFiles(request, wrappedListener);
12481
} else {
12582
listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList()));
12683
}
12784
}
12885

12986
/**
130-
* Prepare for a Replication event. This method constructs a {@link CopyState} holding files to be sent off of the current
131-
* node's store. This state is intended to be sent back to Replicas before copy is initiated so the replica can perform a diff against its
132-
* local store. It will then build a handler to orchestrate the segment copy that will be stored locally and started on a subsequent request from replicas
133-
* with the list of required files.
87+
* Prepare for a Replication event. This method constructs a {@link SegmentReplicationSourceHandler} that orchestrates segment copy and
88+
* will internally incref files for copy.
13489
*
13590
* @param request {@link CheckpointInfoRequest}
13691
* @param fileChunkWriter {@link FileChunkWriter} writer to handle sending files over the transport layer.
137-
* @return {@link CopyState} the built CopyState for this replication event.
138-
* @throws IOException - When there is an IO error building CopyState.
92+
* @return {@link SegmentReplicationSourceHandler} the built CopyState for this replication event.
13993
*/
140-
CopyState prepareForReplication(CheckpointInfoRequest request, FileChunkWriter fileChunkWriter) throws IOException {
141-
final CopyState copyState = getCachedCopyState(request.getCheckpoint());
142-
final SegmentReplicationSourceHandler newHandler = createTargetHandler(
143-
request.getTargetNode(),
144-
copyState,
145-
request.getTargetAllocationId(),
146-
fileChunkWriter
147-
);
148-
final SegmentReplicationSourceHandler existingHandler = allocationIdToHandlers.putIfAbsent(
149-
request.getTargetAllocationId(),
150-
newHandler
151-
);
152-
// If we are already replicating to this allocation Id, cancel the old and replace with a new execution.
153-
// This will clear the old handler & referenced copy state holding an incref'd indexCommit.
154-
if (existingHandler != null) {
155-
logger.warn("Override handler for allocation id {}", request.getTargetAllocationId());
156-
cancelHandlers(handler -> handler.getAllocationId().equals(request.getTargetAllocationId()), "cancel due to retry");
157-
assert allocationIdToHandlers.containsKey(request.getTargetAllocationId()) == false;
158-
allocationIdToHandlers.put(request.getTargetAllocationId(), newHandler);
159-
}
160-
assert allocationIdToHandlers.containsKey(request.getTargetAllocationId());
161-
return copyState;
94+
SegmentReplicationSourceHandler prepareForReplication(CheckpointInfoRequest request, FileChunkWriter fileChunkWriter) {
95+
return allocationIdToHandlers.computeIfAbsent(request.getTargetAllocationId(), aId -> {
96+
try {
97+
// From the checkpoint's shard ID, fetch the IndexShard
98+
final ShardId shardId = request.getCheckpoint().getShardId();
99+
final IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex());
100+
final IndexShard indexShard = indexService.getShard(shardId.id());
101+
return new SegmentReplicationSourceHandler(
102+
request.getTargetNode(),
103+
fileChunkWriter,
104+
indexShard,
105+
request.getTargetAllocationId(),
106+
Math.toIntExact(recoverySettings.getChunkSize().getBytes()),
107+
recoverySettings.getMaxConcurrentFileChunks()
108+
);
109+
} catch (IOException e) {
110+
throw new UncheckedIOException("Error creating replication handler", e);
111+
}
112+
});
162113
}
163114

164115
/**
@@ -167,8 +118,8 @@ CopyState prepareForReplication(CheckpointInfoRequest request, FileChunkWriter f
167118
* @param shard {@link IndexShard}
168119
* @param reason {@link String} - Reason for the cancel
169120
*/
170-
synchronized void cancel(IndexShard shard, String reason) {
171-
cancelHandlers(handler -> handler.getCopyState().getShard().shardId().equals(shard.shardId()), reason);
121+
void cancel(IndexShard shard, String reason) {
122+
cancelHandlers(handler -> handler.shardId().equals(shard.shardId()), reason);
172123
}
173124

174125
/**
@@ -177,11 +128,10 @@ synchronized void cancel(IndexShard shard, String reason) {
177128
* @param allocationId {@link String} - Allocation ID.
178129
* @param reason {@link String} - Reason for the cancel
179130
*/
180-
synchronized void cancel(String allocationId, String reason) {
131+
void cancel(String allocationId, String reason) {
181132
final SegmentReplicationSourceHandler handler = allocationIdToHandlers.remove(allocationId);
182133
if (handler != null) {
183134
handler.cancel(reason);
184-
removeCopyState(handler.getCopyState());
185135
}
186136
}
187137

@@ -194,14 +144,6 @@ void cancelReplication(DiscoveryNode node) {
194144
cancelHandlers(handler -> handler.getTargetNode().equals(node), "Node left");
195145
}
196146

197-
/**
198-
* Checks if the {@link #copyStateMap} has the input {@link ReplicationCheckpoint}
199-
* as a key by invoking {@link Map#containsKey(Object)}.
200-
*/
201-
boolean isInCopyStateMap(ReplicationCheckpoint replicationCheckpoint) {
202-
return copyStateMap.containsKey(replicationCheckpoint);
203-
}
204-
205147
int size() {
206148
return allocationIdToHandlers.size();
207149
}
@@ -211,58 +153,20 @@ Map<String, SegmentReplicationSourceHandler> getHandlers() {
211153
return allocationIdToHandlers;
212154
}
213155

214-
int cachedCopyStateSize() {
215-
return copyStateMap.size();
216-
}
217-
218-
private SegmentReplicationSourceHandler createTargetHandler(
219-
DiscoveryNode node,
220-
CopyState copyState,
221-
String allocationId,
222-
FileChunkWriter fileChunkWriter
223-
) {
224-
return new SegmentReplicationSourceHandler(
225-
node,
226-
fileChunkWriter,
227-
copyState.getShard().getThreadPool(),
228-
copyState,
229-
allocationId,
230-
Math.toIntExact(recoverySettings.getChunkSize().getBytes()),
231-
recoverySettings.getMaxConcurrentFileChunks()
232-
);
233-
}
234-
235156
/**
236-
* Adds the input {@link CopyState} object to {@link #copyStateMap}.
237-
* The key is the CopyState's {@link ReplicationCheckpoint} object.
238-
*/
239-
private void addToCopyStateMap(ReplicationCheckpoint checkpoint, CopyState copyState) {
240-
copyStateMap.putIfAbsent(checkpoint, copyState);
241-
}
242-
243-
/**
244-
* Given a {@link ReplicationCheckpoint}, return the corresponding
245-
* {@link CopyState} object, if any, from {@link #copyStateMap}.
246-
*/
247-
private CopyState fetchFromCopyStateMap(ReplicationCheckpoint replicationCheckpoint) {
248-
return copyStateMap.get(replicationCheckpoint);
249-
}
250-
251-
/**
252-
* Remove a CopyState. Intended to be called after a replication event completes.
253-
* This method will remove a copyState from the copyStateMap only if its refCount hits 0.
254-
*
255-
* @param copyState {@link CopyState}
157+
* Clear handlers for any allocationIds not in sync.
158+
* @param shardId {@link ShardId}
159+
* @param inSyncAllocationIds {@link List} of in-sync allocation Ids.
256160
*/
257-
private synchronized void removeCopyState(CopyState copyState) {
258-
if (copyState.decRef() == true) {
259-
copyStateMap.remove(copyState.getRequestedReplicationCheckpoint());
260-
}
161+
void clearOutOfSyncIds(ShardId shardId, Set<String> inSyncAllocationIds) {
162+
cancelHandlers(
163+
(handler) -> handler.shardId().equals(shardId) && inSyncAllocationIds.contains(handler.getAllocationId()) == false,
164+
"Shard is no longer in-sync with the primary"
165+
);
261166
}
262167

263168
/**
264169
* Remove handlers from allocationIdToHandlers map based on a filter predicate.
265-
* This will also decref the handler's CopyState reference.
266170
*/
267171
private void cancelHandlers(Predicate<? super SegmentReplicationSourceHandler> predicate, String reason) {
268172
final List<String> allocationIds = allocationIdToHandlers.values()
@@ -278,17 +182,4 @@ private void cancelHandlers(Predicate<? super SegmentReplicationSourceHandler> p
278182
cancel(allocationId, reason);
279183
}
280184
}
281-
282-
/**
283-
* Clear copystate and target handlers for any non insync allocationIds.
284-
* @param shardId {@link ShardId}
285-
* @param inSyncAllocationIds {@link List} of in-sync allocation Ids.
286-
*/
287-
public void clearOutOfSyncIds(ShardId shardId, Set<String> inSyncAllocationIds) {
288-
cancelHandlers(
289-
(handler) -> handler.getCopyState().getShard().shardId().equals(shardId)
290-
&& inSyncAllocationIds.contains(handler.getAllocationId()) == false,
291-
"Shard is no longer in-sync with the primary"
292-
);
293-
}
294185
}

0 commit comments

Comments
 (0)