Skip to content

Commit 33e8fe4

Browse files
soosinhaakolarkunnu
authored andcommitted
Remote publication using min node version for backward compatibility (opensearch-project#15216)
* Publish remote state using min node version Signed-off-by: Sooraj Sinha <[email protected]>
1 parent 55431e2 commit 33e8fe4

14 files changed

+448
-137
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6666
- Add lower limit for primary and replica batch allocators timeout ([#14979](https://github.com/opensearch-project/OpenSearch/pull/14979))
6767
- Optimize regexp-based include/exclude on aggregations when pattern matches prefixes ([#14371](https://github.com/opensearch-project/OpenSearch/pull/14371))
6868
- Replace and block usages of org.apache.logging.log4j.util.Strings ([#15238](https://github.com/opensearch-project/OpenSearch/pull/15238))
69+
- Remote publication using minimum node version for backward compatibility ([#15216](https://github.com/opensearch-project/OpenSearch/pull/15216))
70+
6971

7072
### Deprecated
7173

server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java

+1
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
13371337
coordinationState.get().isRemotePublicationEnabled(),
13381338
persistedStateRegistry
13391339
);
1340+
logger.debug("initialized PublicationContext using class: {}", publicationContext.getClass().toString());
13401341

13411342
final PublishRequest publishRequest = coordinationState.get().handleClientValue(clusterState);
13421343
final CoordinatorPublication publication = new CoordinatorPublication(

server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java

+105-80
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import java.io.IOException;
6363
import java.util.HashMap;
6464
import java.util.Map;
65+
import java.util.concurrent.atomic.AtomicBoolean;
6566
import java.util.concurrent.atomic.AtomicLong;
6667
import java.util.concurrent.atomic.AtomicReference;
6768
import java.util.function.BiConsumer;
@@ -97,6 +98,7 @@ public class PublicationTransportHandler {
9798
private final AtomicLong fullClusterStateReceivedCount = new AtomicLong();
9899
private final AtomicLong incompatibleClusterStateDiffReceivedCount = new AtomicLong();
99100
private final AtomicLong compatibleClusterStateDiffReceivedCount = new AtomicLong();
101+
private final AtomicBoolean allNodesRemotePublicationEnabled = new AtomicBoolean();
100102
// -> no need to put a timeout on the options here, because we want the response to eventually be received
101103
// and not log an error if it arrives after the timeout
102104
private final TransportRequestOptions stateRequestOptions = TransportRequestOptions.builder()
@@ -332,11 +334,18 @@ public PublicationContext newPublicationContext(
332334
boolean isRemotePublicationEnabled,
333335
PersistedStateRegistry persistedStateRegistry
334336
) {
335-
final PublicationContext publicationContext = new PublicationContext(
336-
clusterChangedEvent,
337-
isRemotePublicationEnabled,
338-
persistedStateRegistry
339-
);
337+
if (isRemotePublicationEnabled == true) {
338+
if (allNodesRemotePublicationEnabled.get() == false) {
339+
if (validateRemotePublicationOnAllNodes(clusterChangedEvent.state().nodes()) == true) {
340+
allNodesRemotePublicationEnabled.set(true);
341+
}
342+
}
343+
if (allNodesRemotePublicationEnabled.get() == true) {
344+
// if all nodes are remote then create remote publication context
345+
return new RemotePublicationContext(clusterChangedEvent, persistedStateRegistry);
346+
}
347+
}
348+
final PublicationContext publicationContext = new PublicationContext(clusterChangedEvent, persistedStateRegistry);
340349

341350
// Build the serializations we expect to need now, early in the process, so that an error during serialization fails the publication
342351
// straight away. This isn't watertight since we send diffs on a best-effort basis and may fall back to sending a full state (and
@@ -345,6 +354,17 @@ public PublicationContext newPublicationContext(
345354
return publicationContext;
346355
}
347356

357+
private boolean validateRemotePublicationOnAllNodes(DiscoveryNodes discoveryNodes) {
358+
assert ClusterMetadataManifest.getCodecForVersion(discoveryNodes.getMinNodeVersion()) >= ClusterMetadataManifest.CODEC_V0;
359+
for (DiscoveryNode node : discoveryNodes.getNodes().values()) {
360+
// if a node is non-remote then created local publication context
361+
if (node.isRemoteStatePublicationEnabled() == false) {
362+
return false;
363+
}
364+
}
365+
return true;
366+
}
367+
348368
// package private for testing
349369
void setCurrentPublishRequestToSelf(PublishRequest publishRequest) {
350370
this.currentPublishRequestToSelf.set(publishRequest);
@@ -385,25 +405,19 @@ private static BytesReference serializeDiffClusterState(Diff<ClusterState> diff,
385405
*/
386406
public class PublicationContext {
387407

388-
private final DiscoveryNodes discoveryNodes;
389-
private final ClusterState newState;
390-
private final ClusterState previousState;
391-
private final boolean sendFullVersion;
408+
protected final DiscoveryNodes discoveryNodes;
409+
protected final ClusterState newState;
410+
protected final ClusterState previousState;
411+
protected final boolean sendFullVersion;
392412
private final Map<Version, BytesReference> serializedStates = new HashMap<>();
393413
private final Map<Version, BytesReference> serializedDiffs = new HashMap<>();
394-
private final boolean sendRemoteState;
395-
private final PersistedStateRegistry persistedStateRegistry;
414+
protected final PersistedStateRegistry persistedStateRegistry;
396415

397-
PublicationContext(
398-
ClusterChangedEvent clusterChangedEvent,
399-
boolean isRemotePublicationEnabled,
400-
PersistedStateRegistry persistedStateRegistry
401-
) {
416+
PublicationContext(ClusterChangedEvent clusterChangedEvent, PersistedStateRegistry persistedStateRegistry) {
402417
discoveryNodes = clusterChangedEvent.state().nodes();
403418
newState = clusterChangedEvent.state();
404419
previousState = clusterChangedEvent.previousState();
405420
sendFullVersion = previousState.getBlocks().disableStatePersistence();
406-
sendRemoteState = isRemotePublicationEnabled;
407421
this.persistedStateRegistry = persistedStateRegistry;
408422
}
409423

@@ -468,17 +482,7 @@ public void onFailure(Exception e) {
468482
} else {
469483
responseActionListener = listener;
470484
}
471-
// TODO Decide to send remote state before starting publication by checking remote publication on all nodes
472-
if (sendRemoteState && destination.isRemoteStatePublicationEnabled()) {
473-
logger.trace("sending remote cluster state version [{}] to [{}]", newState.version(), destination);
474-
sendRemoteClusterState(destination, publishRequest.getAcceptedState(), responseActionListener);
475-
} else if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) {
476-
logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination);
477-
sendFullClusterState(destination, responseActionListener);
478-
} else {
479-
logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination);
480-
sendClusterStateDiff(destination, responseActionListener);
481-
}
485+
sendClusterState(destination, responseActionListener);
482486
}
483487

484488
public void sendApplyCommit(
@@ -517,58 +521,14 @@ public String executor() {
517521
);
518522
}
519523

520-
private void sendRemoteClusterState(
521-
final DiscoveryNode destination,
522-
final ClusterState clusterState,
523-
final ActionListener<PublishWithJoinResponse> listener
524-
) {
525-
try {
526-
final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE))
527-
.getLastUploadedManifestFile();
528-
final RemotePublishRequest remotePublishRequest = new RemotePublishRequest(
529-
discoveryNodes.getLocalNode(),
530-
clusterState.term(),
531-
clusterState.getVersion(),
532-
clusterState.getClusterName().value(),
533-
clusterState.metadata().clusterUUID(),
534-
manifestFileName
535-
);
536-
final Consumer<TransportException> transportExceptionHandler = exp -> {
537-
logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp);
538-
listener.onFailure(exp);
539-
};
540-
final TransportResponseHandler<PublishWithJoinResponse> responseHandler = new TransportResponseHandler<>() {
541-
542-
@Override
543-
public PublishWithJoinResponse read(StreamInput in) throws IOException {
544-
return new PublishWithJoinResponse(in);
545-
}
546-
547-
@Override
548-
public void handleResponse(PublishWithJoinResponse response) {
549-
listener.onResponse(response);
550-
}
551-
552-
@Override
553-
public void handleException(TransportException exp) {
554-
transportExceptionHandler.accept(exp);
555-
}
556-
557-
@Override
558-
public String executor() {
559-
return ThreadPool.Names.GENERIC;
560-
}
561-
};
562-
transportService.sendRequest(
563-
destination,
564-
PUBLISH_REMOTE_STATE_ACTION_NAME,
565-
remotePublishRequest,
566-
stateRequestOptions,
567-
responseHandler
568-
);
569-
} catch (Exception e) {
570-
logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e);
571-
listener.onFailure(e);
524+
public void sendClusterState(DiscoveryNode destination, ActionListener<PublishWithJoinResponse> listener) {
525+
logger.info("sending cluster state over transport to node: {}", destination.getName());
526+
if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) {
527+
logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination);
528+
sendFullClusterState(destination, listener);
529+
} else {
530+
logger.trace("sending cluster state diff for version [{}] to [{}]", newState.version(), destination);
531+
sendClusterStateDiff(destination, listener);
572532
}
573533
}
574534

@@ -648,4 +608,69 @@ public String executor() {
648608
}
649609
}
650610

611+
/**
612+
* An extension of {@code PublicationContext} to support remote cluster state publication
613+
*
614+
* @opensearch.internal
615+
*/
616+
public class RemotePublicationContext extends PublicationContext {
617+
618+
RemotePublicationContext(ClusterChangedEvent clusterChangedEvent, PersistedStateRegistry persistedStateRegistry) {
619+
super(clusterChangedEvent, persistedStateRegistry);
620+
}
621+
622+
@Override
623+
public void sendClusterState(final DiscoveryNode destination, final ActionListener<PublishWithJoinResponse> listener) {
624+
try {
625+
logger.info("sending remote cluster state to node: {}", destination.getName());
626+
final String manifestFileName = ((RemotePersistedState) persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE))
627+
.getLastUploadedManifestFile();
628+
final RemotePublishRequest remotePublishRequest = new RemotePublishRequest(
629+
discoveryNodes.getLocalNode(),
630+
newState.term(),
631+
newState.getVersion(),
632+
newState.getClusterName().value(),
633+
newState.metadata().clusterUUID(),
634+
manifestFileName
635+
);
636+
final Consumer<TransportException> transportExceptionHandler = exp -> {
637+
logger.debug(() -> new ParameterizedMessage("failed to send remote cluster state to {}", destination), exp);
638+
listener.onFailure(exp);
639+
};
640+
final TransportResponseHandler<PublishWithJoinResponse> responseHandler = new TransportResponseHandler<>() {
641+
642+
@Override
643+
public PublishWithJoinResponse read(StreamInput in) throws IOException {
644+
return new PublishWithJoinResponse(in);
645+
}
646+
647+
@Override
648+
public void handleResponse(PublishWithJoinResponse response) {
649+
listener.onResponse(response);
650+
}
651+
652+
@Override
653+
public void handleException(TransportException exp) {
654+
transportExceptionHandler.accept(exp);
655+
}
656+
657+
@Override
658+
public String executor() {
659+
return ThreadPool.Names.GENERIC;
660+
}
661+
};
662+
transportService.sendRequest(
663+
destination,
664+
PUBLISH_REMOTE_STATE_ACTION_NAME,
665+
remotePublishRequest,
666+
stateRequestOptions,
667+
responseHandler
668+
);
669+
} catch (Exception e) {
670+
logger.warn(() -> new ParameterizedMessage("error sending remote cluster state to {}", destination), e);
671+
listener.onFailure(e);
672+
}
673+
}
674+
}
675+
651676
}

server/src/main/java/org/opensearch/gateway/GatewayMetaState.java

+11-4
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,12 @@ public String getLastUploadedManifestFile() {
701701
public void setLastAcceptedState(ClusterState clusterState) {
702702
try {
703703
final RemoteClusterStateManifestInfo manifestDetails;
704-
if (shouldWriteFullClusterState(clusterState)) {
704+
// Decide the codec version
705+
int codecVersion = ClusterMetadataManifest.getCodecForVersion(clusterState.nodes().getMinNodeVersion());
706+
assert codecVersion >= 0 : codecVersion;
707+
logger.info("codec version is {}", codecVersion);
708+
709+
if (shouldWriteFullClusterState(clusterState, codecVersion)) {
705710
final Optional<ClusterMetadataManifest> latestManifest = remoteClusterStateService.getLatestClusterMetadataManifest(
706711
clusterState.getClusterName().value(),
707712
clusterState.metadata().clusterUUID()
@@ -718,7 +723,7 @@ public void setLastAcceptedState(ClusterState clusterState) {
718723
clusterState.metadata().clusterUUID()
719724
);
720725
}
721-
manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID);
726+
manifestDetails = remoteClusterStateService.writeFullMetadata(clusterState, previousClusterUUID, codecVersion);
722727
} else {
723728
assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == true
724729
: "Previous manifest and previous ClusterState are not in sync";
@@ -758,11 +763,13 @@ private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest,
758763
return true;
759764
}
760765

761-
private boolean shouldWriteFullClusterState(ClusterState clusterState) {
766+
private boolean shouldWriteFullClusterState(ClusterState clusterState, int codecVersion) {
767+
assert lastAcceptedManifest == null || lastAcceptedManifest.getCodecVersion() <= codecVersion;
762768
if (lastAcceptedState == null
763769
|| lastAcceptedManifest == null
764770
|| lastAcceptedState.term() != clusterState.term()
765-
|| lastAcceptedManifest.getOpensearchVersion() != Version.CURRENT) {
771+
|| lastAcceptedManifest.getOpensearchVersion() != Version.CURRENT
772+
|| lastAcceptedManifest.getCodecVersion() != codecVersion) {
766773
return true;
767774
}
768775
return false;

server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java

+25-1
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import org.opensearch.core.xcontent.ToXContentFragment;
2121
import org.opensearch.core.xcontent.XContentBuilder;
2222
import org.opensearch.core.xcontent.XContentParser;
23-
import org.opensearch.gateway.remote.ClusterMetadataManifest.Builder;
2423

2524
import java.io.IOException;
2625
import java.util.ArrayList;
26+
import java.util.Arrays;
2727
import java.util.Collections;
2828
import java.util.HashMap;
2929
import java.util.List;
@@ -46,6 +46,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment {
4646
// required for state publication
4747
public static final int CODEC_V3 = 3; // In Codec V3, we have introduced new diff field in diff-manifest's routing_table_diff
4848

49+
public static final int[] CODEC_VERSIONS = { CODEC_V0, CODEC_V1, CODEC_V2, CODEC_V3 };
50+
4951
private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term");
5052
private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version");
5153
private static final ParseField CLUSTER_UUID_FIELD = new ParseField("cluster_uuid");
@@ -237,12 +239,34 @@ private static ClusterStateDiffManifest diffManifest(Object[] fields) {
237239
);
238240

239241
private static final ConstructingObjectParser<ClusterMetadataManifest, Void> CURRENT_PARSER = PARSER_V3;
242+
public static final int MANIFEST_CURRENT_CODEC_VERSION = CODEC_V3;
243+
244+
private static final Map<Version, Integer> VERSION_TO_CODEC_MAPPING;
240245

241246
static {
242247
declareParser(PARSER_V0, CODEC_V0);
243248
declareParser(PARSER_V1, CODEC_V1);
244249
declareParser(PARSER_V2, CODEC_V2);
245250
declareParser(PARSER_V3, CODEC_V3);
251+
252+
assert Arrays.stream(CODEC_VERSIONS).max().getAsInt() == MANIFEST_CURRENT_CODEC_VERSION;
253+
Map<Version, Integer> versionToCodecMapping = new HashMap<>();
254+
for (Version version : Version.getDeclaredVersions(Version.class)) {
255+
if (version.onOrAfter(Version.V_2_10_0) && version.before(Version.V_2_12_0)) {
256+
versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V0);
257+
} else if (version.onOrAfter(Version.V_2_12_0) && version.before(Version.V_2_15_0)) {
258+
versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V1);
259+
} else if (version.onOrAfter(Version.V_2_15_0) && version.before(Version.V_2_16_0)) {
260+
versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V2);
261+
} else if (version.onOrAfter(Version.V_2_16_0)) {
262+
versionToCodecMapping.put(version, ClusterMetadataManifest.CODEC_V3);
263+
}
264+
}
265+
VERSION_TO_CODEC_MAPPING = Collections.unmodifiableMap(versionToCodecMapping);
266+
}
267+
268+
public static int getCodecForVersion(Version version) {
269+
return VERSION_TO_CODEC_MAPPING.getOrDefault(version, -1);
246270
}
247271

248272
private static void declareParser(ConstructingObjectParser<ClusterMetadataManifest, Void> parser, long codec_version) {

0 commit comments

Comments
 (0)