Skip to content

Commit f4c2a04

Browse files
committed
Send manifest file name in remote publish
Signed-off-by: Sooraj Sinha <soosinha@amazon.com>
1 parent d520c91 commit f4c2a04

18 files changed

+166
-100
lines changed

server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java

+4-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
import java.util.Map;
5151
import java.util.Optional;
5252
import java.util.Set;
53+
import org.opensearch.node.remotestore.RemoteStoreNodeAttribute;
5354

5455
import static org.opensearch.cluster.coordination.Coordinator.ZEN1_BWC_TERM;
5556
import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.isRemoteRoutingTableEnabled;
@@ -79,6 +80,7 @@ public class CoordinationState {
7980
private long lastPublishedVersion;
8081
private VotingConfiguration lastPublishedConfiguration;
8182
private VoteCollection publishVotes;
83+
private final boolean isRemoteStateEnabled;
8284
private final boolean isRemotePublicationEnabled;
8385

8486
public CoordinationState(
@@ -102,7 +104,8 @@ public CoordinationState(
102104
.getLastAcceptedState()
103105
.getLastAcceptedConfiguration();
104106
this.publishVotes = new VoteCollection();
105-
this.isRemotePublicationEnabled = isRemoteStoreClusterStateEnabled(settings) && isRemoteRoutingTableEnabled(settings);
107+
this.isRemoteStateEnabled = isRemoteStoreClusterStateEnabled(settings);
108+
this.isRemotePublicationEnabled = RemoteStoreNodeAttribute.isRemotePublicationEnabled(settings);
106109
}
107110

108111
public boolean isRemotePublicationEnabled() {

server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import org.opensearch.cluster.coordination.CoordinationState.VoteCollection;
5050
import org.opensearch.cluster.coordination.FollowersChecker.FollowerCheckRequest;
5151
import org.opensearch.cluster.coordination.JoinHelper.InitialJoinAccumulator;
52+
import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType;
5253
import org.opensearch.cluster.metadata.Metadata;
5354
import org.opensearch.cluster.node.DiscoveryNode;
5455
import org.opensearch.cluster.node.DiscoveryNodes;
@@ -1334,7 +1335,8 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
13341335

13351336
final PublicationTransportHandler.PublicationContext publicationContext = publicationHandler.newPublicationContext(
13361337
clusterChangedEvent,
1337-
coordinationState.get().isRemotePublicationEnabled()
1338+
coordinationState.get().isRemotePublicationEnabled(),
1339+
persistedStateRegistry
13381340
);
13391341

13401342
final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState);

server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java

+42-52
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131

3232
package org.opensearch.cluster.coordination;
3333

34+
import java.util.Locale;
35+
import java.util.Optional;
36+
import java.util.function.Supplier;
3437
import org.apache.logging.log4j.LogManager;
3538
import org.apache.logging.log4j.Logger;
3639
import org.apache.logging.log4j.message.ParameterizedMessage;
@@ -40,13 +43,16 @@
4043
import org.opensearch.cluster.ClusterState;
4144
import org.opensearch.cluster.Diff;
4245
import org.opensearch.cluster.IncompatibleClusterStateVersionException;
46+
import org.opensearch.cluster.coordination.CoordinationState.PersistedState;
47+
import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType;
4348
import org.opensearch.cluster.node.DiscoveryNode;
4449
import org.opensearch.cluster.node.DiscoveryNodes;
4550
import org.opensearch.core.action.ActionListener;
4651
import org.opensearch.core.common.bytes.BytesReference;
4752
import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
4853
import org.opensearch.core.common.io.stream.StreamInput;
4954
import org.opensearch.core.transport.TransportResponse;
55+
import org.opensearch.gateway.GatewayMetaState.RemotePersistedState;
5056
import org.opensearch.gateway.remote.ClusterMetadataManifest;
5157
import org.opensearch.gateway.remote.RemoteClusterStateService;
5258
import org.opensearch.threadpool.ThreadPool;
@@ -229,50 +235,35 @@ private PublishWithJoinResponse handleIncomingPublishRequest(BytesTransportReque
229235
}
230236

231237
private PublishWithJoinResponse handleIncomingRemotePublishRequest(RemotePublishRequest request) throws IOException {
232-
final Optional<ClusterMetadataManifest> manifestOptional = remoteClusterStateService.getClusterMetadataManifestByTermVersion(
233-
request.getClusterName(),
234-
request.getClusterUUID(),
235-
request.term,
236-
request.version
237-
);
238-
if (manifestOptional.isPresent() == false) {
239-
throw new IllegalStateException(
240-
String.format(Locale.ROOT, "Manifest is not present for term - %s version - %s", request.term, request.version)
241-
);
238+
if (transportService.getLocalNode().equals(request.getSourceNode())) {
239+
return acceptStateOnLocalNode(request);
242240
}
243-
ClusterMetadataManifest manifest = manifestOptional.get();
241+
ClusterMetadataManifest manifest = remoteClusterStateService.getClusterMetadataManifestByFileName(request.getClusterUUID(), request.getManifestFile());
244242
boolean applyFullState = false;
245243
final ClusterState lastSeen = lastSeenClusterState.get();
246244
if (lastSeen == null) {
247-
logger.debug("Diff cannot be applied as there is no last cluster state");
245+
logger.debug(() -> "Diff cannot be applied as there is no last cluster state");
248246
applyFullState = true;
249247
} else if (manifest.getDiffManifest() == null) {
250-
logger.debug("There is no diff in the manifest");
248+
logger.trace(() -> "There is no diff in the manifest");
251249
applyFullState = true;
252250
} else if (manifest.getDiffManifest().getFromStateUUID().equals(lastSeen.stateUUID()) == false) {
253-
logger.debug("Last cluster state not compatible with the diff");
251+
logger.debug(() -> "Last cluster state not compatible with the diff");
254252
applyFullState = true;
255253
}
256254

257255
if (applyFullState == true) {
258-
ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest(
259-
request.getClusterName(),
260-
manifest,
261-
transportService.getLocalNode().getId()
262-
);
263-
logger.debug("Downloaded full cluster state [{}]", clusterState);
256+
logger.debug(() -> new ParameterizedMessage("Downloading full cluster state for term {}, version {}, stateUUID {}", manifest.getClusterTerm(), manifest.getStateVersion(),
257+
manifest.getStateUUID()));
258+
ClusterState clusterState = remoteClusterStateService.getClusterStateForManifest(request.getClusterName(), manifest, transportService.getLocalNode().getId(), true);
264259
fullClusterStateReceivedCount.incrementAndGet();
265260
final PublishWithJoinResponse response = acceptState(clusterState);
266261
lastSeenClusterState.set(clusterState);
267262
return response;
268263
} else {
269-
ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff(
270-
request.getClusterName(),
271-
manifest,
272-
lastSeenClusterState.get(),
273-
transportService.getLocalNode().getId()
274-
);
275-
logger.debug("Downloaded full cluster state from diff [{}]", clusterState);
264+
logger.debug(() -> new ParameterizedMessage("Downloading diff cluster state for term {}, version {}, previousUUID {}, current UUID {}", manifest.getClusterTerm(),
265+
manifest.getStateVersion(), manifest.getDiffManifest().getFromStateUUID(), manifest.getStateUUID()));
266+
ClusterState clusterState = remoteClusterStateService.getClusterStateUsingDiff(request.getClusterName(), manifest, lastSeen, transportService.getLocalNode().getId());
276267
compatibleClusterStateDiffReceivedCount.incrementAndGet();
277268
final PublishWithJoinResponse response = acceptState(clusterState);
278269
lastSeenClusterState.compareAndSet(lastSeen, clusterState);
@@ -293,8 +284,20 @@ private PublishWithJoinResponse acceptState(ClusterState incomingState) {
293284
return handlePublishRequest.apply(new PublishRequest(incomingState));
294285
}
295286

296-
public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemotePublicationEnabled) {
297-
final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent, isRemotePublicationEnabled);
287+
private PublishWithJoinResponse acceptStateOnLocalNode(RemotePublishRequest remotePublishRequest) {
288+
final PublishRequest publishRequest = currentPublishRequestToSelf.get();
289+
if (publishRequest == null || publishRequest.getAcceptedState().coordinationMetadata().term() != remotePublishRequest.term
290+
|| publishRequest.getAcceptedState().version() != remotePublishRequest.version) {
291+
throw new IllegalStateException("publication to self failed for " + remotePublishRequest);
292+
}
293+
PublishWithJoinResponse publishWithJoinResponse = handlePublishRequest.apply(publishRequest);
294+
lastSeenClusterState.set(publishRequest.getAcceptedState());
295+
return publishWithJoinResponse;
296+
}
297+
298+
public PublicationContext newPublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemotePublicationEnabled,
299+
PersistedStateRegistry persistedStateRegistry) {
300+
final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent, isRemotePublicationEnabled, persistedStateRegistry);
298301

299302
// Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication
300303
// straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and
@@ -340,13 +343,15 @@ public class PublicationContext {
340343
private final Map<Version, BytesReference> serializedStates = new HashMap<>();
341344
private final Map<Version, BytesReference> serializedDiffs = new HashMap<>();
342345
private final boolean sendRemoteState;
346+
private final PersistedStateRegistry persistedStateRegistry;
343347

344-
PublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemotePublicationEnabled) {
348+
PublicationContext(ClusterChangedEvent clusterChangedEvent, boolean isRemotePublicationEnabled, PersistedStateRegistry persistedStateRegistry) {
345349
discoveryNodes = clusterChangedEvent.state().nodes();
346350
newState = clusterChangedEvent.state();
347351
previousState = clusterChangedEvent.previousState();
348352
sendFullVersion = previousState.getBlocks().disableStatePersistence();
349353
sendRemoteState = isRemotePublicationEnabled;
354+
this.persistedStateRegistry = persistedStateRegistry;
350355
}
351356

352357
void buildDiffAndSerializeStates() {
@@ -410,7 +415,7 @@ public void onFailure(Exception e) {
410415
} else {
411416
responseActionListener = listener;
412417
}
413-
if (sendRemoteState && destination.isRemoteStateNode()) {
418+
if (sendRemoteState && destination.isRemoteClusterStateEnabled() && destination.isRemoteRoutingTableEnabled()) {
414419
sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener);
415420
} else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) {
416421
logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination);
@@ -457,25 +462,16 @@ public String executor() {
457462
);
458463
}
459464

460-
private void sendRemoteClusterState(
461-
DiscoveryNode destination,
462-
ClusterState clusterState,
463-
ActionListener<PublishWithJoinResponse> listener
464-
) {
465+
private void sendRemoteClusterState(final DiscoveryNode destination, final ClusterState clusterState, final ActionListener<PublishWithJoinResponse> listener) {
465466
try {
466-
final RemotePublishRequest remotePublishRequest = new RemotePublishRequest(
467-
discoveryNodes.getLocalNode(),
468-
clusterState.term(),
469-
clusterState.getVersion(),
470-
clusterState.getClusterName().value(),
471-
clusterState.metadata().clusterUUID()
472-
);
467+
final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE)).getLastUploadedManifestFile();
468+
final RemotePublishRequest remotePublishRequest = new RemotePublishRequest(discoveryNodes.getLocalNode(), clusterState.term(),
469+
clusterState.getVersion(), clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), manifestFileName);
473470
final Consumer<TransportException> transportExceptionHandler = exp -> {
474471
logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp);
475472
listener.onFailure(exp);
476473
};
477-
final TransportResponseHandler<PublishWithJoinResponse> responseHandler = new TransportResponseHandler<
478-
PublishWithJoinResponse>() {
474+
final TransportResponseHandler<PublishWithJoinResponse> responseHandler = new TransportResponseHandler<>() {
479475

480476
@Override
481477
public PublishWithJoinResponse read(StreamInput in) throws IOException {
@@ -497,13 +493,7 @@ public String executor() {
497493
return ThreadPool.Names.GENERIC;
498494
}
499495
};
500-
transportService.sendRequest(
501-
destination,
502-
PUBLISH_REMOTE_STATE_ACTION_NAME,
503-
remotePublishRequest,
504-
stateRequestOptions,
505-
responseHandler
506-
);
496+
transportService.sendRequest(destination, PUBLISH_REMOTE_STATE_ACTION_NAME, remotePublishRequest, stateRequestOptions, responseHandler);
507497
} catch (Exception e) {
508498
logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e);
509499
listener.onFailure(e);

server/src/main/java/org/opensearch/cluster/coordination/RemotePublishRequest.java

+15-2
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,36 @@
1616

1717
public class RemotePublishRequest extends TermVersionRequest {
1818

19-
// todo Do we need cluster name and UUID ?
2019
private final String clusterName;
2120
private final String clusterUUID;
21+
private final String manifestFile;
2222

23-
public RemotePublishRequest(DiscoveryNode sourceNode, long term, long version, String clusterName, String clusterUUID) {
23+
public RemotePublishRequest(DiscoveryNode sourceNode, long term, long version, String clusterName, String clusterUUID, String manifestFile) {
2424
super(sourceNode, term, version);
2525
this.clusterName = clusterName;
2626
this.clusterUUID = clusterUUID;
27+
this.manifestFile = manifestFile;
2728
}
2829

2930
public RemotePublishRequest(StreamInput in) throws IOException {
3031
super(in);
3132
this.clusterName = in.readString();
3233
this.clusterUUID = in.readString();
34+
this.manifestFile = in.readString();
3335
}
3436

3537
@Override
3638
public void writeTo(StreamOutput out) throws IOException {
3739
super.writeTo(out);
3840
out.writeString(clusterName);
3941
out.writeString(clusterUUID);
42+
out.writeString(manifestFile);
43+
}
44+
45+
@Override
46+
public String toString() {
47+
return "RemotePublishRequest{" + "term=" + term + ", version=" + version + ", clusterName=" + clusterName + ", clusterUUID=" + clusterUUID
48+
+ ", sourceNode=" + sourceNode + ", manifestFile=" + manifestFile + '}';
4049
}
4150

4251
public String getClusterName() {
@@ -46,4 +55,8 @@ public String getClusterName() {
4655
public String getClusterUUID() {
4756
return clusterUUID;
4857
}
58+
59+
public String getManifestFile() {
60+
return manifestFile;
61+
}
4962
}

server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java

+9-7
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
package org.opensearch.cluster.node;
3434

3535
import org.opensearch.Version;
36+
import org.opensearch.cluster.metadata.Metadata;
3637
import org.opensearch.common.UUIDs;
3738
import org.opensearch.common.annotation.PublicApi;
3839
import org.opensearch.common.settings.Setting;
@@ -43,6 +44,7 @@
4344
import org.opensearch.core.common.transport.TransportAddress;
4445
import org.opensearch.core.xcontent.ToXContentFragment;
4546
import org.opensearch.core.xcontent.XContentBuilder;
47+
import org.opensearch.core.xcontent.XContentParser;
4648
import org.opensearch.node.Node;
4749

4850
import java.io.IOException;
@@ -474,20 +476,20 @@ public boolean isRemoteStoreNode() {
474476

475477
/**
476478
* Returns whether the node is a remote cluster state enabled node.
477-
* @return true if the node contains remote cluster state and remote routing table node attributes, false otherwise
479+
* @return true if the node contains remote cluster state node attribute, false otherwise
478480
*/
479-
public boolean isRemoteStateNode() {
480-
return isRemoteClusterStateEnabled() && isRemoteRoutingTableEnabled();
481-
}
482-
483-
private boolean isRemoteClusterStateEnabled() {
481+
public boolean isRemoteClusterStateEnabled() {
484482
return this.getAttributes()
485483
.keySet()
486484
.stream()
487485
.anyMatch(key -> (key.equals(REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY)));
488486
}
489487

490-
private boolean isRemoteRoutingTableEnabled() {
488+
/**
489+
* Returns whether remote routing table is enabled on the node
490+
* @return true if the node contains remote routing table node attributes, false otherwise
491+
*/
492+
public boolean isRemoteRoutingTableEnabled() {
491493
return this.getAttributes().keySet().stream().anyMatch(key -> key.equals(REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY));
492494
}
493495

server/src/main/java/org/opensearch/common/settings/ClusterSettings.java

+1-3
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
import org.opensearch.cluster.routing.allocation.decider.SameShardAllocationDecider;
7878
import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider;
7979
import org.opensearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
80+
import org.opensearch.cluster.routing.remote.RemoteRoutingTableService;
8081
import org.opensearch.cluster.service.ClusterApplierService;
8182
import org.opensearch.cluster.service.ClusterManagerService;
8283
import org.opensearch.cluster.service.ClusterManagerTaskThrottler;
@@ -715,9 +716,6 @@ public void apply(Settings value, Settings current, Settings previous) {
715716
// Remote cluster state settings
716717
RemoteClusterStateCleanupManager.REMOTE_CLUSTER_STATE_CLEANUP_INTERVAL_SETTING,
717718
RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING,
718-
RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING,
719-
RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING,
720-
RemoteClusterStateService.METADATA_MANIFEST_UPLOAD_TIMEOUT_SETTING,
721719
RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING,
722720
RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING,
723721
IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING,

server/src/main/java/org/opensearch/discovery/DiscoveryModule.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
5454
import org.opensearch.core.common.transport.TransportAddress;
5555
import org.opensearch.gateway.GatewayMetaState;
56+
import org.opensearch.gateway.remote.RemoteClusterStateService;
5657
import org.opensearch.monitor.NodeHealthService;
5758
import org.opensearch.node.remotestore.RemoteStoreNodeService;
5859
import org.opensearch.plugins.DiscoveryPlugin;
@@ -135,7 +136,8 @@ public DiscoveryModule(
135136
NodeHealthService nodeHealthService,
136137
PersistedStateRegistry persistedStateRegistry,
137138
RemoteStoreNodeService remoteStoreNodeService,
138-
ClusterManagerMetrics clusterManagerMetrics
139+
ClusterManagerMetrics clusterManagerMetrics,
140+
RemoteClusterStateService remoteClusterStateService
139141
) {
140142
final Collection<BiConsumer<DiscoveryNode, ClusterState>> joinValidators = new ArrayList<>();
141143
final Map<String, Supplier<SeedHostsProvider>> hostProviders = new HashMap<>();
@@ -214,7 +216,8 @@ public DiscoveryModule(
214216
nodeHealthService,
215217
persistedStateRegistry,
216218
remoteStoreNodeService,
217-
clusterManagerMetrics
219+
clusterManagerMetrics,
220+
remoteClusterStateService
218221
);
219222
} else {
220223
throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]");

0 commit comments

Comments
 (0)