From 08f22329819b80ea44713fe9c4f774c96f530b68 Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Fri, 23 Sep 2022 14:21:38 -0700 Subject: [PATCH 1/2] Unmute RelocationIT.testRelocationWhileIndexingRandom. (#4580) Signed-off-by: Marc Handalian Signed-off-by: Marc Handalian --- CHANGELOG.md | 3 ++- .../java/org/opensearch/recovery/RelocationIT.java | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e10d8d0337bf..cf57552ff5c81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Segment Replication] Update replicas to commit SegmentInfos instead of relying on SIS files from primary shards. ([#4402](https://github.com/opensearch-project/OpenSearch/pull/4402)) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) - [Remote Store] Change behaviour in replica recovery for remote translog enabled indices ([#4318](https://github.com/opensearch-project/OpenSearch/pull/4318)) +- Unmute test RelocationIT.testRelocationWhileIndexingRandom ([#4580](https://github.com/opensearch-project/OpenSearch/pull/4580)) ### Deprecated @@ -103,4 +104,4 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) [Unreleased]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...HEAD -[2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...2.x \ No newline at end of file +[2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.2.0...2.x diff --git a/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java index 1f16cc0363686..aa99155724661 100644 --- a/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/recovery/RelocationIT.java @@ -191,7 +191,6 @@ public void testSimpleRelocationNoIndexing() { assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(20L)); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/2063") public void testRelocationWhileIndexingRandom() throws Exception { int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4); int numberOfReplicas = randomBoolean() ? 0 : 1; From 751d0696d680544d36149fda9b6baf6433ad856f Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Sun, 25 Sep 2022 12:37:39 +0530 Subject: [PATCH 2/2] Add DecommissionService and helper to execute awareness attribute decommissioning (#4084) * Add Executor to decommission node attribute * Decommission service implementation with cluster metadata * Master abdication changes to decommission local awareness leader * Update join validator changes to validate decommissioned node join request Signed-off-by: Rishab Nahata --- CHANGELOG.md | 2 + .../org/opensearch/OpenSearchException.java | 12 + .../org/opensearch/cluster/ClusterModule.java | 14 + .../coordination/JoinTaskExecutor.java | 37 +- .../decommission/DecommissionAttribute.java | 92 ++++ .../DecommissionAttributeMetadata.java | 265 ++++++++++ .../decommission/DecommissionController.java | 270 ++++++++++ .../decommission/DecommissionService.java | 483 ++++++++++++++++++ .../decommission/DecommissionStatus.java | 68 +++ .../DecommissioningFailedException.java | 55 ++ .../NodeDecommissionedException.java | 31 ++ .../cluster/decommission/package-info.java | 12 + .../opensearch/cluster/metadata/Metadata.java | 14 + .../ExceptionSerializationTests.java | 4 + .../coordination/JoinTaskExecutorTests.java | 70 +++ .../DecommissionControllerTests.java | 365 +++++++++++++ .../DecommissionServiceTests.java | 262 ++++++++++ ...onAttributeMetadataSerializationTests.java | 83 +++ .../DecommissionAttributeMetadataTests.java | 52 ++ ...missionAttributeMetadataXContentTests.java | 38 ++ 20 files changed, 2226 insertions(+), 3 deletions(-) create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java create mode 100644 server/src/main/java/org/opensearch/cluster/decommission/package-info.java create mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java create mode 100644 server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index cf57552ff5c81..acb8ded704a7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,8 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) - [Remote Store] Change behaviour in replica recovery for remote translog enabled indices ([#4318](https://github.com/opensearch-project/OpenSearch/pull/4318)) - Unmute test RelocationIT.testRelocationWhileIndexingRandom ([#4580](https://github.com/opensearch-project/OpenSearch/pull/4580)) +- Add DecommissionService and helper to execute awareness attribute decommissioning ([#4084](https://github.com/opensearch-project/OpenSearch/pull/4084)) + ### Deprecated diff --git a/server/src/main/java/org/opensearch/OpenSearchException.java b/server/src/main/java/org/opensearch/OpenSearchException.java index 87efc03734d26..34d7509c7afb2 100644 --- a/server/src/main/java/org/opensearch/OpenSearchException.java +++ b/server/src/main/java/org/opensearch/OpenSearchException.java @@ -1608,6 +1608,18 @@ private enum OpenSearchExceptionHandle { org.opensearch.index.shard.PrimaryShardClosedException::new, 162, V_3_0_0 + ), + DECOMMISSIONING_FAILED_EXCEPTION( + org.opensearch.cluster.decommission.DecommissioningFailedException.class, + org.opensearch.cluster.decommission.DecommissioningFailedException::new, + 163, + V_3_0_0 + ), + NODE_DECOMMISSIONED_EXCEPTION( + org.opensearch.cluster.decommission.NodeDecommissionedException.class, + org.opensearch.cluster.decommission.NodeDecommissionedException::new, + 164, + V_3_0_0 ); final Class exceptionClass; diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 46552bb5d6a03..3ac3a08d4848a 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -35,6 +35,7 @@ import org.opensearch.cluster.action.index.MappingUpdatedAction; import org.opensearch.cluster.action.index.NodeMappingRefreshAction; import org.opensearch.cluster.action.shard.ShardStateAction; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.cluster.metadata.ComponentTemplateMetadata; import org.opensearch.cluster.metadata.ComposableIndexTemplateMetadata; import org.opensearch.cluster.metadata.DataStreamMetadata; @@ -193,6 +194,12 @@ public static List getNamedWriteables() { ); registerMetadataCustom(entries, DataStreamMetadata.TYPE, DataStreamMetadata::new, DataStreamMetadata::readDiffFrom); registerMetadataCustom(entries, WeightedRoutingMetadata.TYPE, WeightedRoutingMetadata::new, WeightedRoutingMetadata::readDiffFrom); + registerMetadataCustom( + entries, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new, + DecommissionAttributeMetadata::readDiffFrom + ); // Task Status (not Diffable) entries.add(new Entry(Task.Status.class, PersistentTasksNodeService.Status.NAME, PersistentTasksNodeService.Status::new)); return entries; @@ -283,6 +290,13 @@ public static List getNamedXWriteables() { WeightedRoutingMetadata::fromXContent ) ); + entries.add( + new NamedXContentRegistry.Entry( + Metadata.Custom.class, + new ParseField(DecommissionAttributeMetadata.TYPE), + DecommissionAttributeMetadata::fromXContent + ) + ); return entries; } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 5afdb5b12db23..78b09dc020dfc 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -39,6 +39,10 @@ import org.opensearch.cluster.ClusterStateTaskExecutor; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlocks; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -358,6 +362,7 @@ public boolean runOnlyOnClusterManager() { /** * a task indicates that the current node should become master + * * @deprecated As of 2.0, because supporting inclusive language, replaced by {@link #newBecomeClusterManagerTask()} */ @Deprecated @@ -384,8 +389,9 @@ public static Task newFinishElectionTask() { * Ensures that all indices are compatible with the given node version. This will ensure that all indices in the given metadata * will not be created with a newer version of opensearch as well as that all indices are newer or equal to the minimum index * compatibility version. - * @see Version#minimumIndexCompatibilityVersion() + * * @throws IllegalStateException if any index is incompatible with the given version + * @see Version#minimumIndexCompatibilityVersion() */ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata metadata) { Version supportedIndexVersion = nodeVersion.minimumIndexCompatibilityVersion(); @@ -415,14 +421,18 @@ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata } } - /** ensures that the joining node has a version that's compatible with all current nodes*/ + /** + * ensures that the joining node has a version that's compatible with all current nodes + */ public static void ensureNodesCompatibility(final Version joiningNodeVersion, DiscoveryNodes currentNodes) { final Version minNodeVersion = currentNodes.getMinNodeVersion(); final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); ensureNodesCompatibility(joiningNodeVersion, minNodeVersion, maxNodeVersion); } - /** ensures that the joining node has a version that's compatible with a given version range */ + /** + * ensures that the joining node has a version that's compatible with a given version range + */ public static void ensureNodesCompatibility(Version joiningNodeVersion, Version minClusterNodeVersion, Version maxClusterNodeVersion) { assert minClusterNodeVersion.onOrBefore(maxClusterNodeVersion) : minClusterNodeVersion + " > " + maxClusterNodeVersion; if (joiningNodeVersion.isCompatible(maxClusterNodeVersion) == false) { @@ -466,6 +476,26 @@ public static void ensureMajorVersionBarrier(Version joiningNodeVersion, Version } } + public static void ensureNodeCommissioned(DiscoveryNode node, Metadata metadata) { + DecommissionAttributeMetadata decommissionAttributeMetadata = metadata.decommissionAttributeMetadata(); + if (decommissionAttributeMetadata != null) { + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + DecommissionStatus status = decommissionAttributeMetadata.status(); + if (decommissionAttribute != null && status != null) { + // We will let the node join the cluster if the current status is in FAILED state + if (node.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()) + && status.equals(DecommissionStatus.FAILED) == false) { + throw new NodeDecommissionedException( + "node [{}] has decommissioned attribute [{}] with current status of decommissioning [{}]", + node.toString(), + decommissionAttribute.toString(), + status.status() + ); + } + } + } + } + public static Collection> addBuiltInJoinValidators( Collection> onJoinValidators ) { @@ -473,6 +503,7 @@ public static Collection> addBuiltInJoin validators.add((node, state) -> { ensureNodesCompatibility(node.getVersion(), state.getNodes()); ensureIndexCompatibility(node.getVersion(), state.getMetadata()); + ensureNodeCommissioned(node, state.getMetadata()); }); validators.addAll(onJoinValidators); return Collections.unmodifiableCollection(validators); diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java new file mode 100644 index 0000000000000..bf2487a1a0e18 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttribute.java @@ -0,0 +1,92 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Objects; + +/** + * {@link DecommissionAttribute} encapsulates information about decommissioned node attribute like attribute name, attribute value. + * + * @opensearch.internal + */ +public final class DecommissionAttribute implements Writeable { + private final String attributeName; + private final String attributeValue; + + /** + * Constructs new decommission attribute name value pair + * + * @param attributeName attribute name + * @param attributeValue attribute value + */ + public DecommissionAttribute(String attributeName, String attributeValue) { + this.attributeName = attributeName; + this.attributeValue = attributeValue; + } + + /** + * Returns attribute name + * + * @return attributeName + */ + public String attributeName() { + return this.attributeName; + } + + /** + * Returns attribute value + * + * @return attributeValue + */ + public String attributeValue() { + return this.attributeValue; + } + + public DecommissionAttribute(StreamInput in) throws IOException { + attributeName = in.readString(); + attributeValue = in.readString(); + } + + /** + * Writes decommission attribute name value to stream output + * + * @param out stream output + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(attributeName); + out.writeString(attributeValue); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttribute that = (DecommissionAttribute) o; + + if (!attributeName.equals(that.attributeName)) return false; + return attributeValue.equals(that.attributeValue); + } + + @Override + public int hashCode() { + return Objects.hash(attributeName, attributeValue); + } + + @Override + public String toString() { + return "DecommissionAttribute{" + "attributeName='" + attributeName + '\'' + ", attributeValue='" + attributeValue + '\'' + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java new file mode 100644 index 0000000000000..dbb3fea823eb6 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionAttributeMetadata.java @@ -0,0 +1,265 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchParseException; +import org.opensearch.Version; +import org.opensearch.cluster.AbstractNamedDiffable; +import org.opensearch.cluster.NamedDiff; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.Metadata.Custom; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; +import org.opensearch.common.xcontent.ToXContent; +import org.opensearch.common.xcontent.XContentBuilder; +import org.opensearch.common.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Objects; + +/** + * Contains metadata about decommission attribute + * + * @opensearch.internal + */ +public class DecommissionAttributeMetadata extends AbstractNamedDiffable implements Custom { + + public static final String TYPE = "decommissionedAttribute"; + + private final DecommissionAttribute decommissionAttribute; + private DecommissionStatus status; + public static final String attributeType = "awareness"; + + /** + * Constructs new decommission attribute metadata with given status + * + * @param decommissionAttribute attribute details + * @param status current status of the attribute decommission + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute, DecommissionStatus status) { + this.decommissionAttribute = decommissionAttribute; + this.status = status; + } + + /** + * Constructs new decommission attribute metadata with status as {@link DecommissionStatus#INIT} + * + * @param decommissionAttribute attribute details + */ + public DecommissionAttributeMetadata(DecommissionAttribute decommissionAttribute) { + this(decommissionAttribute, DecommissionStatus.INIT); + } + + /** + * Returns the current decommissioned attribute + * + * @return decommissioned attributes + */ + public DecommissionAttribute decommissionAttribute() { + return this.decommissionAttribute; + } + + /** + * Returns the current status of the attribute decommission + * + * @return attribute type + */ + public DecommissionStatus status() { + return this.status; + } + + /** + * Returns instance of the metadata with updated status + * @param newStatus status to be updated with + * @return instance with valid status + */ + // synchronized is strictly speaking not needed (this is called by a single thread), but just to be safe + public synchronized DecommissionAttributeMetadata setUpdatedStatus(DecommissionStatus newStatus) { + // if the current status is the expected status already, we return the same instance + if (newStatus.equals(status)) { + return this; + } + // We don't expect that INIT will be new status, as it is registered only when starting the decommission action + switch (newStatus) { + case IN_PROGRESS: + validateAndSetStatus(DecommissionStatus.INIT, newStatus); + break; + case SUCCESSFUL: + validateAndSetStatus(DecommissionStatus.IN_PROGRESS, newStatus); + break; + case FAILED: + // we don't need to validate here and directly update status to FAILED + this.status = newStatus; + break; + default: + throw new IllegalArgumentException( + "illegal decommission status [" + newStatus.status() + "] requested for updating metadata" + ); + } + return this; + } + + private void validateAndSetStatus(DecommissionStatus expected, DecommissionStatus next) { + if (status.equals(expected) == false) { + assert false : "can't move decommission status to [" + + next + + "]. current status: [" + + status + + "] (expected [" + + expected + + "])"; + throw new IllegalStateException( + "can't move decommission status to [" + next + "]. current status: [" + status + "] (expected [" + expected + "])" + ); + } + status = next; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DecommissionAttributeMetadata that = (DecommissionAttributeMetadata) o; + + if (!status.equals(that.status)) return false; + return decommissionAttribute.equals(that.decommissionAttribute); + } + + @Override + public int hashCode() { + return Objects.hash(attributeType, decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public Version getMinimalSupportedVersion() { + return Version.V_3_0_0; + } + + public DecommissionAttributeMetadata(StreamInput in) throws IOException { + this.decommissionAttribute = new DecommissionAttribute(in); + this.status = DecommissionStatus.fromString(in.readString()); + } + + public static NamedDiff readDiffFrom(StreamInput in) throws IOException { + return readDiffFrom(Custom.class, TYPE, in); + } + + /** + * {@inheritDoc} + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + decommissionAttribute.writeTo(out); + out.writeString(status.status()); + } + + public static DecommissionAttributeMetadata fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token; + DecommissionAttribute decommissionAttribute = null; + DecommissionStatus status = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String currentFieldName = parser.currentName(); + if (attributeType.equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new OpenSearchParseException( + "failed to parse decommission attribute type [{}], expected object", + attributeType + ); + } + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + String fieldName = parser.currentName(); + String value; + token = parser.nextToken(); + if (token == XContentParser.Token.VALUE_STRING) { + value = parser.text(); + } else { + throw new OpenSearchParseException( + "failed to parse attribute [{}], expected string for attribute value", + fieldName + ); + } + decommissionAttribute = new DecommissionAttribute(fieldName, value); + parser.nextToken(); + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}], unexpected type", attributeType); + } + } else { + throw new OpenSearchParseException("failed to parse attribute type [{}]", attributeType); + } + } else if ("status".equals(currentFieldName)) { + if (parser.nextToken() != XContentParser.Token.VALUE_STRING) { + throw new OpenSearchParseException( + "failed to parse status of decommissioning, expected string but found unknown type" + ); + } + status = DecommissionStatus.fromString(parser.text()); + } else { + throw new OpenSearchParseException( + "unknown field found [{}], failed to parse the decommission attribute", + currentFieldName + ); + } + } + } + return new DecommissionAttributeMetadata(decommissionAttribute, status); + } + + /** + * {@inheritDoc} + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException { + toXContent(decommissionAttribute, status, attributeType, builder, params); + return builder; + } + + @Override + public EnumSet context() { + return Metadata.API_AND_GATEWAY; + } + + /** + * @param decommissionAttribute decommission attribute + * @param status decommission status + * @param attributeType attribute type + * @param builder XContent builder + * @param params serialization parameters + */ + public static void toXContent( + DecommissionAttribute decommissionAttribute, + DecommissionStatus status, + String attributeType, + XContentBuilder builder, + ToXContent.Params params + ) throws IOException { + builder.startObject(attributeType); + builder.field(decommissionAttribute.attributeName(), decommissionAttribute.attributeValue()); + builder.endObject(); + builder.field("status", status.status()); + } + + @Override + public String toString() { + return Strings.toString(this); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java new file mode 100644 index 0000000000000..7719012f2f3d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionController.java @@ -0,0 +1,270 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.AddVotingConfigExclusionsResponse; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; +import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateTaskConfig; +import org.opensearch.cluster.ClusterStateTaskListener; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.NodeRemovalClusterStateTaskExecutor; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.Strings; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportException; +import org.opensearch.transport.TransportResponseHandler; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +/** + * Helper controller class to remove list of nodes from the cluster and update status + * + * @opensearch.internal + */ + +public class DecommissionController { + + private static final Logger logger = LogManager.getLogger(DecommissionController.class); + + private final NodeRemovalClusterStateTaskExecutor nodeRemovalExecutor; + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + + DecommissionController( + ClusterService clusterService, + TransportService transportService, + AllocationService allocationService, + ThreadPool threadPool + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); + this.threadPool = threadPool; + } + + /** + * Transport call to add nodes to voting config exclusion + * + * @param nodes set of nodes Ids to be added to voting config exclusion list + * @param listener callback for response or failure + */ + public void excludeDecommissionedNodesFromVotingConfig(Set nodes, ActionListener listener) { + transportService.sendRequest( + transportService.getLocalNode(), + AddVotingConfigExclusionsAction.NAME, + new AddVotingConfigExclusionsRequest( + Strings.EMPTY_ARRAY, + nodes.toArray(String[]::new), + Strings.EMPTY_ARRAY, + TimeValue.timeValueSeconds(120) // giving a larger timeout of 120 sec as cluster might already be in stress when + // decommission is triggered + ), + new TransportResponseHandler() { + @Override + public void handleResponse(AddVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public AddVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new AddVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * Transport call to clear voting config exclusion + * + * @param listener callback for response or failure + */ + public void clearVotingConfigExclusion(ActionListener listener, boolean waitForRemoval) { + final ClearVotingConfigExclusionsRequest clearVotingConfigExclusionsRequest = new ClearVotingConfigExclusionsRequest(); + clearVotingConfigExclusionsRequest.setWaitForRemoval(waitForRemoval); + transportService.sendRequest( + transportService.getLocalNode(), + ClearVotingConfigExclusionsAction.NAME, + clearVotingConfigExclusionsRequest, + new TransportResponseHandler() { + @Override + public void handleResponse(ClearVotingConfigExclusionsResponse response) { + listener.onResponse(null); + } + + @Override + public void handleException(TransportException exp) { + listener.onFailure(exp); + } + + @Override + public String executor() { + return ThreadPool.Names.SAME; + } + + @Override + public ClearVotingConfigExclusionsResponse read(StreamInput in) throws IOException { + return new ClearVotingConfigExclusionsResponse(in); + } + } + ); + } + + /** + * This method triggers batch of tasks for nodes to be decommissioned using executor {@link NodeRemovalClusterStateTaskExecutor} + * Once the tasks are submitted, it waits for an expected cluster state to guarantee + * that the expected decommissioned nodes are removed from the cluster + * + * @param nodesToBeDecommissioned set of the node to be decommissioned + * @param reason reason of removal + * @param timeout timeout for the request + * @param nodesRemovedListener callback for the success or failure + */ + public synchronized void removeDecommissionedNodes( + Set nodesToBeDecommissioned, + String reason, + TimeValue timeout, + ActionListener nodesRemovedListener + ) { + final Map nodesDecommissionTasks = new LinkedHashMap<>( + nodesToBeDecommissioned.size() + ); + nodesToBeDecommissioned.forEach(discoveryNode -> { + final NodeRemovalClusterStateTaskExecutor.Task task = new NodeRemovalClusterStateTaskExecutor.Task(discoveryNode, reason); + nodesDecommissionTasks.put(task, nodeRemovalExecutor); + }); + + logger.info("submitting state update task to remove [{}] nodes due to decommissioning", nodesToBeDecommissioned.toString()); + clusterService.submitStateUpdateTasks( + "node-decommissioned", + nodesDecommissionTasks, + ClusterStateTaskConfig.build(Priority.URGENT), + nodeRemovalExecutor + ); + + Predicate allDecommissionedNodesRemovedPredicate = clusterState -> { + Set intersection = Arrays.stream(clusterState.nodes().getNodes().values().toArray(DiscoveryNode.class)) + .collect(Collectors.toSet()); + intersection.retainAll(nodesToBeDecommissioned); + return intersection.size() == 0; + }; + + final ClusterStateObserver observer = new ClusterStateObserver(clusterService, timeout, logger, threadPool.getThreadContext()); + + final ClusterStateObserver.Listener removalListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.info("successfully removed all decommissioned nodes [{}] from the cluster", nodesToBeDecommissioned.toString()); + nodesRemovedListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + logger.warn( + "cluster service closed while waiting for removal of decommissioned nodes [{}]", + nodesToBeDecommissioned.toString() + ); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ); + nodesRemovedListener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for removal of decommissioned nodes [{}]", + timeout.toString(), + nodesToBeDecommissioned.toString() + ) + ); + } + }; + + if (allDecommissionedNodesRemovedPredicate.test(clusterService.getClusterApplierService().state())) { + removalListener.onNewClusterState(clusterService.getClusterApplierService().state()); + } else { + observer.waitForNextChange(removalListener, allDecommissionedNodesRemovedPredicate); + } + } + + /** + * This method updates the status in the currently registered metadata. + * + * @param decommissionStatus status to update decommission metadata with + * @param listener listener for response and failure + */ + public void updateMetadataWithDecommissionStatus(DecommissionStatus decommissionStatus, ActionListener listener) { + clusterService.submitStateUpdateTask("update-decommission-status", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null && decommissionAttributeMetadata.decommissionAttribute() != null; + logger.info( + "attempting to update current decommission status [{}] with expected status [{}]", + decommissionAttributeMetadata.status(), + decommissionStatus + ); + // setUpdatedStatus can throw IllegalStateException if the sequence of update is not valid + decommissionAttributeMetadata.setUpdatedStatus(decommissionStatus); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttributeMetadata != null; + assert decommissionAttributeMetadata.status().equals(decommissionStatus); + listener.onResponse(decommissionAttributeMetadata.status()); + } + }); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java new file mode 100644 index 0000000000000..1a0704c5a4ac2 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionService.java @@ -0,0 +1,483 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.ActionListener; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.NotClusterManagerException; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.Priority; +import org.opensearch.common.inject.Inject; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING; +import static org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING; + +/** + * Service responsible for entire lifecycle of decommissioning and recommissioning an awareness attribute. + *

+ * Whenever a cluster manager initiates operation to decommission an awareness attribute, + * the service makes the best attempt to perform the following task - + *

    + *
  • Initiates nodes decommissioning by adding custom metadata with the attribute and state as {@link DecommissionStatus#INIT}
  • + *
  • Remove to-be-decommissioned cluster-manager eligible nodes from voting config and wait for its abdication if it is active leader
  • + *
  • Triggers weigh away for nodes having given awareness attribute to drain.
  • + *
  • Once weighed away, the service triggers nodes decommission. This marks the decommission status as {@link DecommissionStatus#IN_PROGRESS}
  • + *
  • Once the decommission is successful, the service clears the voting config and marks the status as {@link DecommissionStatus#SUCCESSFUL}
  • + *
  • If service fails at any step, it makes best attempt to mark the status as {@link DecommissionStatus#FAILED} and to clear voting config exclusion
  • + *
+ * + * @opensearch.internal + */ +public class DecommissionService { + + private static final Logger logger = LogManager.getLogger(DecommissionService.class); + + private final ClusterService clusterService; + private final TransportService transportService; + private final ThreadPool threadPool; + private final DecommissionController decommissionController; + private volatile List awarenessAttributes; + private volatile Map> forcedAwarenessAttributes; + + @Inject + public DecommissionService( + Settings settings, + ClusterSettings clusterSettings, + ClusterService clusterService, + TransportService transportService, + ThreadPool threadPool, + AllocationService allocationService + ) { + this.clusterService = clusterService; + this.transportService = transportService; + this.threadPool = threadPool; + this.decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); + clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING, this::setAwarenessAttributes); + + setForcedAwarenessAttributes(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.get(settings)); + clusterSettings.addSettingsUpdateConsumer( + CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING, + this::setForcedAwarenessAttributes + ); + } + + private void setAwarenessAttributes(List awarenessAttributes) { + this.awarenessAttributes = awarenessAttributes; + } + + private void setForcedAwarenessAttributes(Settings forceSettings) { + Map> forcedAwarenessAttributes = new HashMap<>(); + Map forceGroups = forceSettings.getAsGroups(); + for (Map.Entry entry : forceGroups.entrySet()) { + List aValues = entry.getValue().getAsList("values"); + if (aValues.size() > 0) { + forcedAwarenessAttributes.put(entry.getKey(), aValues); + } + } + this.forcedAwarenessAttributes = forcedAwarenessAttributes; + } + + /** + * Starts the new decommission request and registers the metadata with status as {@link DecommissionStatus#INIT} + * Once the status is updated, it tries to exclude to-be-decommissioned cluster manager eligible nodes from Voting Configuration + * + * @param decommissionAttribute register decommission attribute in the metadata request + * @param listener register decommission listener + */ + public void startDecommissionAction( + final DecommissionAttribute decommissionAttribute, + final ActionListener listener + ) { + // register the metadata with status as INIT as first step + clusterService.submitStateUpdateTask("decommission [" + decommissionAttribute + "]", new ClusterStateUpdateTask(Priority.URGENT) { + @Override + public ClusterState execute(ClusterState currentState) { + // validates if correct awareness attributes and forced awareness attribute set to the cluster before starting action + validateAwarenessAttribute(decommissionAttribute, awarenessAttributes, forcedAwarenessAttributes); + DecommissionAttributeMetadata decommissionAttributeMetadata = currentState.metadata().decommissionAttributeMetadata(); + // check that request is eligible to proceed + ensureEligibleRequest(decommissionAttributeMetadata, decommissionAttribute); + decommissionAttributeMetadata = new DecommissionAttributeMetadata(decommissionAttribute); + logger.info("registering decommission metadata [{}] to execute action", decommissionAttributeMetadata.toString()); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentState.metadata()).decommissionAttributeMetadata(decommissionAttributeMetadata)) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to start decommission action for attribute [{}]", + decommissionAttribute.toString() + ), + e + ); + listener.onFailure(e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assert decommissionAttribute.equals(decommissionAttributeMetadata.decommissionAttribute()); + logger.info( + "registered decommission metadata for attribute [{}] with status [{}]", + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + decommissionClusterManagerNodes(decommissionAttributeMetadata.decommissionAttribute(), listener); + } + }); + } + + private synchronized void decommissionClusterManagerNodes( + final DecommissionAttribute decommissionAttribute, + ActionListener listener + ) { + ClusterState state = clusterService.getClusterApplierService().state(); + // since here metadata is already registered with INIT, we can guarantee that no new node with decommission attribute can further + // join the cluster + // and hence in further request lifecycle we are sure that no new to-be-decommission leader will join the cluster + Set clusterManagerNodesToBeDecommissioned = filterNodesWithDecommissionAttribute(state, decommissionAttribute, true); + logger.info( + "resolved cluster manager eligible nodes [{}] that should be removed from Voting Configuration", + clusterManagerNodesToBeDecommissioned.toString() + ); + + // remove all 'to-be-decommissioned' cluster manager eligible nodes from voting config + Set nodeIdsToBeExcluded = clusterManagerNodesToBeDecommissioned.stream() + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + + final Predicate allNodesRemovedAndAbdicated = clusterState -> { + final Set votingConfigNodeIds = clusterState.getLastCommittedConfiguration().getNodeIds(); + return nodeIdsToBeExcluded.stream().noneMatch(votingConfigNodeIds::contains) + && nodeIdsToBeExcluded.contains(clusterState.nodes().getClusterManagerNodeId()) == false; + }; + + ActionListener exclusionListener = new ActionListener() { + @Override + public void onResponse(Void unused) { + if (clusterService.getClusterApplierService().state().nodes().isLocalNodeElectedClusterManager()) { + if (nodeHasDecommissionedAttribute(clusterService.localNode(), decommissionAttribute)) { + // this is an unexpected state, as after exclusion of nodes having decommission attribute, + // this local node shouldn't have had the decommission attribute. Will send the failure response to the user + String errorMsg = + "unexpected state encountered [local node is to-be-decommissioned leader] while executing decommission request"; + logger.error(errorMsg); + // will go ahead and clear the voting config and mark the status as false + clearVotingConfigExclusionAndUpdateStatus(false, false); + // we can send the failure response to the user here + listener.onFailure(new IllegalStateException(errorMsg)); + } else { + logger.info("will attempt to fail decommissioned nodes as local node is eligible to process the request"); + // we are good here to send the response now as the request is processed by an eligible active leader + // and to-be-decommissioned cluster manager is no more part of Voting Configuration and no more to-be-decommission + // nodes can be part of Voting Config + listener.onResponse(new ClusterStateUpdateResponse(true)); + failDecommissionedNodes(clusterService.getClusterApplierService().state()); + } + } else { + // explicitly calling listener.onFailure with NotClusterManagerException as the local node is not the cluster manager + // this will ensures that request is retried until cluster manager times out + logger.info( + "local node is not eligible to process the request, " + + "throwing NotClusterManagerException to attempt a retry on an eligible node" + ); + listener.onFailure( + new NotClusterManagerException( + "node [" + + transportService.getLocalNode().toString() + + "] not eligible to execute decommission request. Will retry until timeout." + ) + ); + } + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + // attempting to mark the status as FAILED + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }; + + if (allNodesRemovedAndAbdicated.test(state)) { + exclusionListener.onResponse(null); + } else { + logger.debug("sending transport request to remove nodes [{}] from voting config", nodeIdsToBeExcluded.toString()); + // send a transport request to exclude to-be-decommissioned cluster manager eligible nodes from voting config + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodeIdsToBeExcluded, new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully removed decommissioned cluster manager eligible nodes [{}] from voting config ", + clusterManagerNodesToBeDecommissioned.toString() + ); + final ClusterStateObserver abdicationObserver = new ClusterStateObserver( + clusterService, + TimeValue.timeValueSeconds(60L), + logger, + threadPool.getThreadContext() + ); + final ClusterStateObserver.Listener abdicationListener = new ClusterStateObserver.Listener() { + @Override + public void onNewClusterState(ClusterState state) { + logger.debug("to-be-decommissioned node is no more the active leader"); + exclusionListener.onResponse(null); + } + + @Override + public void onClusterServiceClose() { + String errorMsg = "cluster service closed while waiting for abdication of to-be-decommissioned leader"; + logger.warn(errorMsg); + listener.onFailure(new DecommissioningFailedException(decommissionAttribute, errorMsg)); + } + + @Override + public void onTimeout(TimeValue timeout) { + logger.info("timed out while waiting for abdication of to-be-decommissioned leader"); + clearVotingConfigExclusionAndUpdateStatus(false, false); + listener.onFailure( + new OpenSearchTimeoutException( + "timed out [{}] while waiting for abdication of to-be-decommissioned leader", + timeout.toString() + ) + ); + } + }; + // In case the cluster state is already processed even before this code is executed + // therefore testing first before attaching the listener + ClusterState currentState = clusterService.getClusterApplierService().state(); + if (allNodesRemovedAndAbdicated.test(currentState)) { + abdicationListener.onNewClusterState(currentState); + } else { + logger.debug("waiting to abdicate to-be-decommissioned leader"); + abdicationObserver.waitForNextChange(abdicationListener, allNodesRemovedAndAbdicated); + } + } + + @Override + public void onFailure(Exception e) { + logger.error( + new ParameterizedMessage( + "failure in removing to-be-decommissioned cluster manager eligible nodes [{}] from voting config", + nodeIdsToBeExcluded.toString() + ), + e + ); + exclusionListener.onFailure(e); + } + }); + } + } + + private void failDecommissionedNodes(ClusterState state) { + // this method ensures no matter what, we always exit from this function after clearing the voting config exclusion + DecommissionAttributeMetadata decommissionAttributeMetadata = state.metadata().decommissionAttributeMetadata(); + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.IN_PROGRESS, new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + // execute nodes decommissioning + decommissionController.removeDecommissionedNodes( + filterNodesWithDecommissionAttribute(clusterService.getClusterApplierService().state(), decommissionAttribute, false), + "nodes-decommissioned", + TimeValue.timeValueSeconds(120L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + clearVotingConfigExclusionAndUpdateStatus(true, true); + } + + @Override + public void onFailure(Exception e) { + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + } + ); + } + + @Override + public void onFailure(Exception e) { + logger.error( + () -> new ParameterizedMessage( + "failed to update decommission status for attribute [{}] to [{}]", + decommissionAttribute.toString(), + DecommissionStatus.IN_PROGRESS + ), + e + ); + // since we are not able to update the status, we will clear the voting config exclusion we have set earlier + clearVotingConfigExclusionAndUpdateStatus(false, false); + } + }); + } + + private void clearVotingConfigExclusionAndUpdateStatus(boolean decommissionSuccessful, boolean waitForRemoval) { + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + logger.info( + "successfully cleared voting config exclusion after completing decommission action, proceeding to update metadata" + ); + DecommissionStatus updateStatusWith = decommissionSuccessful ? DecommissionStatus.SUCCESSFUL : DecommissionStatus.FAILED; + decommissionController.updateMetadataWithDecommissionStatus(updateStatusWith, statusUpdateListener()); + } + + @Override + public void onFailure(Exception e) { + logger.debug( + new ParameterizedMessage("failure in clearing voting config exclusion after processing decommission request"), + e + ); + decommissionController.updateMetadataWithDecommissionStatus(DecommissionStatus.FAILED, statusUpdateListener()); + } + }, waitForRemoval); + } + + private Set filterNodesWithDecommissionAttribute( + ClusterState clusterState, + DecommissionAttribute decommissionAttribute, + boolean onlyClusterManagerNodes + ) { + Set nodesWithDecommissionAttribute = new HashSet<>(); + Iterator nodesIter = onlyClusterManagerNodes + ? clusterState.nodes().getClusterManagerNodes().valuesIt() + : clusterState.nodes().getNodes().valuesIt(); + + while (nodesIter.hasNext()) { + final DiscoveryNode node = nodesIter.next(); + if (nodeHasDecommissionedAttribute(node, decommissionAttribute)) { + nodesWithDecommissionAttribute.add(node); + } + } + return nodesWithDecommissionAttribute; + } + + private static boolean nodeHasDecommissionedAttribute(DiscoveryNode discoveryNode, DecommissionAttribute decommissionAttribute) { + return discoveryNode.getAttributes().get(decommissionAttribute.attributeName()).equals(decommissionAttribute.attributeValue()); + } + + private static void validateAwarenessAttribute( + final DecommissionAttribute decommissionAttribute, + List awarenessAttributes, + Map> forcedAwarenessAttributes + ) { + String msg = null; + if (awarenessAttributes == null) { + msg = "awareness attribute not set to the cluster."; + } else if (forcedAwarenessAttributes == null) { + msg = "forced awareness attribute not set to the cluster."; + } else if (awarenessAttributes.contains(decommissionAttribute.attributeName()) == false) { + msg = "invalid awareness attribute requested for decommissioning"; + } else if (forcedAwarenessAttributes.containsKey(decommissionAttribute.attributeName()) == false) { + msg = "forced awareness attribute [" + forcedAwarenessAttributes.toString() + "] doesn't have the decommissioning attribute"; + } else if (forcedAwarenessAttributes.get(decommissionAttribute.attributeName()) + .contains(decommissionAttribute.attributeValue()) == false) { + msg = "invalid awareness attribute value requested for decommissioning. Set forced awareness values before to decommission"; + } + + if (msg != null) { + throw new DecommissioningFailedException(decommissionAttribute, msg); + } + } + + private static void ensureEligibleRequest( + DecommissionAttributeMetadata decommissionAttributeMetadata, + DecommissionAttribute requestedDecommissionAttribute + ) { + String msg = null; + if (decommissionAttributeMetadata != null) { + // check if the same attribute is registered and handle it accordingly + if (decommissionAttributeMetadata.decommissionAttribute().equals(requestedDecommissionAttribute)) { + switch (decommissionAttributeMetadata.status()) { + // for INIT and FAILED - we are good to process it again + case INIT: + case FAILED: + break; + case IN_PROGRESS: + case SUCCESSFUL: + msg = "same request is already in status [" + decommissionAttributeMetadata.status() + "]"; + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } else { + switch (decommissionAttributeMetadata.status()) { + case SUCCESSFUL: + // one awareness attribute is already decommissioned. We will reject the new request + msg = "one awareness attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] already successfully decommissioned, recommission before triggering another decommission"; + break; + case IN_PROGRESS: + case INIT: + // it means the decommission has been initiated or is inflight. In that case, will fail new request + msg = "there's an inflight decommission request for attribute [" + + decommissionAttributeMetadata.decommissionAttribute().toString() + + "] is in progress, cannot process this request"; + break; + case FAILED: + break; + default: + throw new IllegalStateException( + "unknown status [" + decommissionAttributeMetadata.status() + "] currently registered in metadata" + ); + } + } + } + + if (msg != null) { + throw new DecommissioningFailedException(requestedDecommissionAttribute, msg); + } + } + + private ActionListener statusUpdateListener() { + return new ActionListener<>() { + @Override + public void onResponse(DecommissionStatus status) { + logger.info("updated the decommission status to [{}]", status); + } + + @Override + public void onFailure(Exception e) { + logger.error("unexpected failure occurred during decommission status update", e); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java new file mode 100644 index 0000000000000..af88b0d0f5902 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissionStatus.java @@ -0,0 +1,68 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +/** + * An enumeration of the states during decommissioning + */ +public enum DecommissionStatus { + /** + * Decommission process is initiated, and to-be-decommissioned leader is excluded from voting config + */ + INIT("init"), + /** + * Decommission process has started, decommissioned nodes should be removed + */ + IN_PROGRESS("in_progress"), + /** + * Decommission action completed + */ + SUCCESSFUL("successful"), + /** + * Decommission request failed + */ + FAILED("failed"); + + private final String status; + + DecommissionStatus(String status) { + this.status = status; + } + + /** + * Returns status that represents the decommission state + * + * @return status + */ + public String status() { + return status; + } + + /** + * Generate decommission status from given string + * + * @param status status in string + * @return status + */ + public static DecommissionStatus fromString(String status) { + if (status == null) { + throw new IllegalArgumentException("decommission status cannot be null"); + } + if (status.equals(INIT.status())) { + return INIT; + } else if (status.equals(IN_PROGRESS.status())) { + return IN_PROGRESS; + } else if (status.equals(SUCCESSFUL.status())) { + return SUCCESSFUL; + } else if (status.equals(FAILED.status())) { + return FAILED; + } + throw new IllegalStateException("Decommission status [" + status + "] not recognized."); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java new file mode 100644 index 0000000000000..fe1b9368ac712 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/DecommissioningFailedException.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; +import org.opensearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * This exception is thrown whenever a failure occurs in decommission request @{@link DecommissionService} + * + * @opensearch.internal + */ + +public class DecommissioningFailedException extends OpenSearchException { + + private final DecommissionAttribute decommissionAttribute; + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg) { + this(decommissionAttribute, msg, null); + } + + public DecommissioningFailedException(DecommissionAttribute decommissionAttribute, String msg, Throwable cause) { + super("[" + (decommissionAttribute == null ? "_na" : decommissionAttribute.toString()) + "] " + msg, cause); + this.decommissionAttribute = decommissionAttribute; + } + + public DecommissioningFailedException(StreamInput in) throws IOException { + super(in); + decommissionAttribute = new DecommissionAttribute(in); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + decommissionAttribute.writeTo(out); + } + + /** + * Returns decommission attribute + * + * @return decommission attribute + */ + public DecommissionAttribute decommissionAttribute() { + return decommissionAttribute; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java new file mode 100644 index 0000000000000..847d5a527b017 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/NodeDecommissionedException.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.opensearch.OpenSearchException; +import org.opensearch.common.io.stream.StreamInput; + +import java.io.IOException; + +/** + * This exception is thrown if the node is decommissioned by @{@link DecommissionService} + * and this nodes needs to be removed from the cluster + * + * @opensearch.internal + */ +public class NodeDecommissionedException extends OpenSearchException { + + public NodeDecommissionedException(String msg, Object... args) { + super(msg, args); + } + + public NodeDecommissionedException(StreamInput in) throws IOException { + super(in); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/decommission/package-info.java b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java new file mode 100644 index 0000000000000..256c2f22253cc --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/decommission/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Decommission lifecycle classes + */ +package org.opensearch.cluster.decommission; diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 086865d2170c3..eb5e8bbc2d49b 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -51,6 +51,7 @@ import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; import org.opensearch.common.Nullable; import org.opensearch.common.Strings; import org.opensearch.common.UUIDs; @@ -795,6 +796,10 @@ public Map dataStreams() { .orElse(Collections.emptyMap()); } + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return custom(DecommissionAttributeMetadata.TYPE); + } + public ImmutableOpenMap customs() { return this.customs; } @@ -1336,6 +1341,15 @@ public IndexGraveyard indexGraveyard() { return graveyard; } + public Builder decommissionAttributeMetadata(final DecommissionAttributeMetadata decommissionAttributeMetadata) { + putCustom(DecommissionAttributeMetadata.TYPE, decommissionAttributeMetadata); + return this; + } + + public DecommissionAttributeMetadata decommissionAttributeMetadata() { + return (DecommissionAttributeMetadata) getCustom(DecommissionAttributeMetadata.TYPE); + } + public Builder updateSettings(Settings settings, String... indices) { if (indices == null || indices.length == 0) { indices = this.indices.keys().toArray(String.class); diff --git a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java index 26b0ce7e9e20c..ff2bb77531486 100644 --- a/server/src/test/java/org/opensearch/ExceptionSerializationTests.java +++ b/server/src/test/java/org/opensearch/ExceptionSerializationTests.java @@ -49,6 +49,8 @@ import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.coordination.CoordinationStateRejectedException; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; +import org.opensearch.cluster.decommission.DecommissioningFailedException; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.IllegalShardRoutingStateException; import org.opensearch.cluster.routing.ShardRouting; @@ -860,6 +862,8 @@ public void testIds() { ids.put(160, NoSeedNodeLeftException.class); ids.put(161, ReplicationFailedException.class); ids.put(162, PrimaryShardClosedException.class); + ids.put(163, DecommissioningFailedException.class); + ids.put(164, NodeDecommissionedException.class); Map, Integer> reverse = new HashMap<>(); for (Map.Entry> entry : ids.entrySet()) { diff --git a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java index 02e502e762561..28f1a92cc1bdc 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/JoinTaskExecutorTests.java @@ -36,9 +36,14 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskExecutor; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.cluster.decommission.NodeDecommissionedException; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.RerouteService; import org.opensearch.cluster.routing.allocation.AllocationService; @@ -48,7 +53,9 @@ import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; +import java.util.Collections; import java.util.HashSet; +import java.util.Map; import static org.hamcrest.Matchers.is; import static org.opensearch.test.VersionUtils.allVersions; @@ -216,4 +223,67 @@ public void testIsBecomeClusterManagerTask() { JoinTaskExecutor.Task joinTaskOfClusterManager = JoinTaskExecutor.newBecomeClusterManagerTask(); assertThat(joinTaskOfClusterManager.isBecomeClusterManagerTask(), is(true)); } + + public void testJoinClusterWithNoDecommission() { + Settings.builder().build(); + Metadata.Builder metaBuilder = Metadata.builder(); + Metadata metadata = metaBuilder.build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testPreventJoinClusterWithDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom( + DecommissionStatus.INIT, + DecommissionStatus.IN_PROGRESS, + DecommissionStatus.SUCCESSFUL + ); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + expectThrows(NodeDecommissionedException.class, () -> JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata)); + } + + public void testJoinClusterWithDifferentDecommission() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + decommissionStatus + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-2")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + public void testJoinClusterWithDecommissionFailed() { + Settings.builder().build(); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "zone-1"); + DecommissionAttributeMetadata decommissionAttributeMetadata = new DecommissionAttributeMetadata( + decommissionAttribute, + DecommissionStatus.FAILED + ); + Metadata metadata = Metadata.builder().decommissionAttributeMetadata(decommissionAttributeMetadata).build(); + + DiscoveryNode discoveryNode = newDiscoveryNode(Collections.singletonMap("zone", "zone-1")); + JoinTaskExecutor.ensureNodeCommissioned(discoveryNode, metadata); + } + + private DiscoveryNode newDiscoveryNode(Map attributes) { + return new DiscoveryNode( + randomAlphaOfLength(10), + randomAlphaOfLength(10), + buildNewFakeTransportAddress(), + attributes, + Collections.singleton(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE), + Version.CURRENT + ); + } } diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java new file mode 100644 index 0000000000000..8b5343184dabd --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -0,0 +1,365 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.junit.After; +import org.junit.Before; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.action.admin.cluster.configuration.TransportAddVotingConfigExclusionsAction; +import org.opensearch.action.admin.cluster.configuration.TransportClearVotingConfigExclusionsAction; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ClusterStateObserver; +import org.opensearch.cluster.ClusterStateUpdateTask; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.sameInstance; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionControllerTests extends OpenSearchTestCase { + + private static ThreadPool threadPool; + private static ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionController decommissionController; + private ClusterSettings clusterSettings; + + @Before + public void setTransportServiceAndDefaultClusterState() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding five nodes on same zone_1"); + clusterState = addNodes(clusterState, "zone_1", "node1", "node2", "node3", "node4", "node5"); + logger.info("--> adding five nodes on same zone_2"); + clusterState = addNodes(clusterState, "zone_2", "node6", "node7", "node8", "node9", "node10"); + logger.info("--> adding five nodes on same zone_3"); + clusterState = addNodes(clusterState, "zone_3", "node11", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setThreeNodesInVotingConfig(clusterState); + final ClusterState.Builder builder = builder(clusterState); + clusterService = createClusterService(threadPool, clusterState.nodes().get("node1")); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder(); + final Settings nodeSettings = nodeSettingsBuilder.build(); + clusterSettings = new ClusterSettings(nodeSettings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + new TransportAddVotingConfigExclusionsAction( + nodeSettings, + clusterSettings, + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + new TransportClearVotingConfigExclusionsAction( + transportService, + clusterService, + threadPool, + new ActionFilters(emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)) + ); // registers action + + transportService.start(); + transportService.acceptIncomingRequests(); + decommissionController = new DecommissionController(clusterService, transportService, allocationService, threadPool); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + public void testAddNodesToVotingConfigExclusion() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(2); + + ClusterStateObserver clusterStateObserver = new ClusterStateObserver(clusterService, null, logger, threadPool.getThreadContext()); + clusterStateObserver.waitForNextChange(new AdjustConfigurationForExclusions(countDownLatch)); + Set nodesToRemoveFromVotingConfig = Collections.singleton(randomFrom("node1", "node6", "node11")); + decommissionController.excludeDecommissionedNodesFromVotingConfig(nodesToRemoveFromVotingConfig, new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while removing node from voting config " + e); + } + }); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + clusterService.getClusterApplierService().state().getVotingConfigExclusions().forEach(vce -> { + assertTrue(nodesToRemoveFromVotingConfig.contains(vce.getNodeId())); + assertEquals(nodesToRemoveFromVotingConfig.size(), 1); + }); + } + + public void testClearVotingConfigExclusions() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + decommissionController.clearVotingConfigExclusion(new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("unexpected failure occurred while clearing voting config exclusion" + e); + } + }, false); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + assertThat(clusterService.getClusterApplierService().state().getVotingConfigExclusions(), empty()); + } + + public void testNodesRemovedForDecommissionRequestSuccessfulResponse() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test", + TimeValue.timeValueSeconds(30L), + new ActionListener() { + @Override + public void onResponse(Void unused) { + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("there shouldn't have been any failure"); + } + } + ); + + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + // test all 5 nodes removed and cluster has 10 nodes + Set nodes = StreamSupport.stream(clusterService.getClusterApplierService().state().nodes().spliterator(), false) + .collect(Collectors.toSet()); + assertEquals(nodes.size(), 10); + // test no nodes part of zone-3 + for (DiscoveryNode node : nodes) { + assertNotEquals(node.getAttributes().get("zone"), "zone-1"); + } + } + + public void testTimesOut() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + Set nodesToBeRemoved = new HashSet<>(); + nodesToBeRemoved.add(clusterService.state().nodes().get("node11")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node12")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); + nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + decommissionController.removeDecommissionedNodes( + nodesToBeRemoved, + "unit-test-timeout", + TimeValue.timeValueMillis(2), + new ActionListener() { + @Override + public void onResponse(Void unused) { + fail("response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertThat(e, instanceOf(OpenSearchTimeoutException.class)); + assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes")); + countDownLatch.countDown(); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + public void testSuccessfulDecommissionStatusMetadataUpdate() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone-1"), + DecommissionStatus.IN_PROGRESS + ); + ClusterState state = clusterService.state(); + Metadata metadata = state.metadata(); + Metadata.Builder mdBuilder = Metadata.builder(metadata); + mdBuilder.decommissionAttributeMetadata(oldMetadata); + state = ClusterState.builder(state).metadata(mdBuilder).build(); + setState(clusterService, state); + + decommissionController.updateMetadataWithDecommissionStatus( + DecommissionStatus.SUCCESSFUL, + new ActionListener() { + @Override + public void onResponse(DecommissionStatus status) { + assertEquals(DecommissionStatus.SUCCESSFUL, status); + countDownLatch.countDown(); + } + + @Override + public void onFailure(Exception e) { + fail("decommission status update failed"); + } + } + ); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + ClusterState newState = clusterService.getClusterApplierService().state(); + DecommissionAttributeMetadata decommissionAttributeMetadata = newState.metadata().decommissionAttributeMetadata(); + assertEquals(decommissionAttributeMetadata.status(), DecommissionStatus.SUCCESSFUL); + } + + private static class AdjustConfigurationForExclusions implements ClusterStateObserver.Listener { + + final CountDownLatch doneLatch; + + AdjustConfigurationForExclusions(CountDownLatch latch) { + this.doneLatch = latch; + } + + @Override + public void onNewClusterState(ClusterState state) { + clusterService.getClusterManagerService().submitStateUpdateTask("reconfiguration", new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + assertThat(currentState, sameInstance(state)); + final Set votingNodeIds = new HashSet<>(); + currentState.nodes().forEach(n -> votingNodeIds.add(n.getId())); + currentState.getVotingConfigExclusions().forEach(t -> votingNodeIds.remove(t.getNodeId())); + final CoordinationMetadata.VotingConfiguration votingConfiguration = new CoordinationMetadata.VotingConfiguration( + votingNodeIds + ); + return builder(currentState).metadata( + Metadata.builder(currentState.metadata()) + .coordinationMetadata( + CoordinationMetadata.builder(currentState.coordinationMetadata()) + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ) + ).build(); + } + + @Override + public void onFailure(String source, Exception e) { + throw new AssertionError("unexpected failure", e); + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) { + doneLatch.countDown(); + } + }); + } + + @Override + public void onClusterServiceClose() { + throw new AssertionError("unexpected close"); + } + + @Override + public void onTimeout(TimeValue timeout) { + throw new AssertionError("unexpected timeout"); + } + } + + private ClusterState addNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setThreeNodesInVotingConfig(ClusterState clusterState) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of( + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_DATA_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Arrays.asList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + ); +} diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java new file mode 100644 index 0000000000000..71ee61ffec275 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionServiceTests.java @@ -0,0 +1,262 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.decommission; + +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; +import org.opensearch.Version; +import org.opensearch.action.ActionListener; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.ack.ClusterStateUpdateResponse; +import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.decider.AwarenessAllocationDecider; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.transport.MockTransport; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonMap; +import static org.opensearch.cluster.ClusterState.builder; +import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.opensearch.test.ClusterServiceUtils.setState; + +public class DecommissionServiceTests extends OpenSearchTestCase { + + private ThreadPool threadPool; + private ClusterService clusterService; + private TransportService transportService; + private AllocationService allocationService; + private DecommissionService decommissionService; + private ClusterSettings clusterSettings; + + @Before + public void setUpService() { + threadPool = new TestThreadPool("test", Settings.EMPTY); + clusterService = createClusterService(threadPool); + allocationService = createAllocationService(); + ClusterState clusterState = ClusterState.builder(new ClusterName("test")).build(); + logger.info("--> adding cluster manager node on zone_1"); + clusterState = addClusterManagerNodes(clusterState, "zone_1", "node1"); + logger.info("--> adding cluster manager node on zone_2"); + clusterState = addClusterManagerNodes(clusterState, "zone_2", "node6"); + logger.info("--> adding cluster manager node on zone_3"); + clusterState = addClusterManagerNodes(clusterState, "zone_3", "node11"); + logger.info("--> adding four data nodes on zone_1"); + clusterState = addDataNodes(clusterState, "zone_1", "node2", "node3", "node4", "node5"); + logger.info("--> adding four data nodes on zone_2"); + clusterState = addDataNodes(clusterState, "zone_2", "node7", "node8", "node9", "node10"); + logger.info("--> adding four data nodes on zone_3"); + clusterState = addDataNodes(clusterState, "zone_3", "node12", "node13", "node14", "node15"); + clusterState = setLocalNodeAsClusterManagerNode(clusterState, "node1"); + clusterState = setNodesInVotingConfig( + clusterState, + clusterState.nodes().get("node1"), + clusterState.nodes().get("node6"), + clusterState.nodes().get("node11") + ); + final ClusterState.Builder builder = builder(clusterState); + setState(clusterService, builder); + final MockTransport transport = new MockTransport(); + transportService = transport.createTransportService( + Settings.EMPTY, + threadPool, + TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> clusterService.state().nodes().get("node1"), + null, + emptySet() + ); + + final Settings.Builder nodeSettingsBuilder = Settings.builder() + .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") + .put("cluster.routing.allocation.awareness.force.zone.values", "zone_1,zone_2,zone_3"); + + clusterSettings = new ClusterSettings(nodeSettingsBuilder.build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + transportService.start(); + transportService.acceptIncomingRequests(); + + this.decommissionService = new DecommissionService( + nodeSettingsBuilder.build(), + clusterSettings, + clusterService, + transportService, + threadPool, + allocationService + ); + } + + @After + public void shutdownThreadPoolAndClusterService() { + clusterService.stop(); + threadPool.shutdown(); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeName() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("rack", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat(e.getMessage(), Matchers.endsWith("invalid awareness attribute requested for decommissioning")); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningNotStartedForInvalidAttributeValue() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute("zone", "rack-a"); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + assertThat( + e.getMessage(), + Matchers.endsWith( + "invalid awareness attribute value requested for decommissioning. " + + "Set forced awareness values before to decommission" + ) + ); + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(decommissionAttribute, listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + @SuppressWarnings("unchecked") + public void testDecommissioningFailedWhenAnotherAttributeDecommissioningSuccessful() throws InterruptedException { + final CountDownLatch countDownLatch = new CountDownLatch(1); + DecommissionStatus oldStatus = randomFrom(DecommissionStatus.SUCCESSFUL, DecommissionStatus.IN_PROGRESS, DecommissionStatus.INIT); + DecommissionAttributeMetadata oldMetadata = new DecommissionAttributeMetadata( + new DecommissionAttribute("zone", "zone_1"), + oldStatus + ); + final ClusterState.Builder builder = builder(clusterService.state()); + setState( + clusterService, + builder.metadata(Metadata.builder(clusterService.state().metadata()).decommissionAttributeMetadata(oldMetadata).build()) + ); + ActionListener listener = new ActionListener() { + @Override + public void onResponse(ClusterStateUpdateResponse clusterStateUpdateResponse) { + fail("on response shouldn't have been called"); + } + + @Override + public void onFailure(Exception e) { + assertTrue(e instanceof DecommissioningFailedException); + if (oldStatus.equals(DecommissionStatus.SUCCESSFUL)) { + assertThat( + e.getMessage(), + Matchers.endsWith("already successfully decommissioned, recommission before triggering another decommission") + ); + } else { + assertThat(e.getMessage(), Matchers.endsWith("is in progress, cannot process this request")); + } + countDownLatch.countDown(); + } + }; + decommissionService.startDecommissionAction(new DecommissionAttribute("zone", "zone_2"), listener); + assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); + } + + private ClusterState addDataNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeId -> nodeBuilder.add(newDataNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState addClusterManagerNodes(ClusterState clusterState, String zone, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + org.opensearch.common.collect.List.of(nodeIds) + .forEach(nodeId -> nodeBuilder.add(newClusterManagerNode(nodeId, singletonMap("zone", zone)))); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setLocalNodeAsClusterManagerNode(ClusterState clusterState, String nodeId) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.nodes()); + nodeBuilder.localNodeId(nodeId); + nodeBuilder.clusterManagerNodeId(nodeId); + clusterState = ClusterState.builder(clusterState).nodes(nodeBuilder).build(); + return clusterState; + } + + private ClusterState setNodesInVotingConfig(ClusterState clusterState, DiscoveryNode... nodes) { + final CoordinationMetadata.VotingConfiguration votingConfiguration = CoordinationMetadata.VotingConfiguration.of(nodes); + + Metadata.Builder builder = Metadata.builder() + .coordinationMetadata( + CoordinationMetadata.builder() + .lastAcceptedConfiguration(votingConfiguration) + .lastCommittedConfiguration(votingConfiguration) + .build() + ); + clusterState = ClusterState.builder(clusterState).metadata(builder).build(); + return clusterState; + } + + private static DiscoveryNode newDataNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, DATA_ROLE, Version.CURRENT); + } + + private static DiscoveryNode newClusterManagerNode(String nodeId, Map attributes) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), attributes, CLUSTER_MANAGER_ROLE, Version.CURRENT); + } + + final private static Set CLUSTER_MANAGER_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.CLUSTER_MANAGER_ROLE)) + ); + + final private static Set DATA_ROLE = Collections.unmodifiableSet( + new HashSet<>(Collections.singletonList(DiscoveryNodeRole.DATA_ROLE)) + ); + + private ClusterState removeNodes(ClusterState clusterState, String... nodeIds) { + DiscoveryNodes.Builder nodeBuilder = DiscoveryNodes.builder(clusterState.getNodes()); + org.opensearch.common.collect.List.of(nodeIds).forEach(nodeBuilder::remove); + return allocationService.disassociateDeadNodes(ClusterState.builder(clusterState).nodes(nodeBuilder).build(), false, "test"); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java new file mode 100644 index 0000000000000..60b3a03848830 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataSerializationTests.java @@ -0,0 +1,83 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.ClusterModule; +import org.opensearch.cluster.Diff; +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.common.io.stream.Writeable; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractDiffableSerializationTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataSerializationTests extends AbstractDiffableSerializationTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return DecommissionAttributeMetadata::new; + } + + @Override + protected Metadata.Custom createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected Metadata.Custom mutateInstance(Metadata.Custom instance) { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected Metadata.Custom makeTestChanges(Metadata.Custom testInstance) { + DecommissionAttributeMetadata decommissionAttributeMetadata = (DecommissionAttributeMetadata) testInstance; + DecommissionAttribute decommissionAttribute = decommissionAttributeMetadata.decommissionAttribute(); + String attributeName = decommissionAttribute.attributeName(); + String attributeValue = decommissionAttribute.attributeValue(); + DecommissionStatus decommissionStatus = decommissionAttributeMetadata.status(); + if (randomBoolean()) { + decommissionStatus = randomFrom(DecommissionStatus.values()); + } + if (randomBoolean()) { + attributeName = randomAlphaOfLength(6); + } + if (randomBoolean()) { + attributeValue = randomAlphaOfLength(6); + } + return new DecommissionAttributeMetadata(new DecommissionAttribute(attributeName, attributeValue), decommissionStatus); + } + + @Override + protected Writeable.Reader> diffReader() { + return DecommissionAttributeMetadata::readDiffFrom; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(ClusterModule.getNamedWriteables()); + } + + @Override + protected Metadata.Custom doParseInstance(XContentParser parser) throws IOException { + assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); + DecommissionAttributeMetadata decommissionAttributeMetadata = DecommissionAttributeMetadata.fromXContent(parser); + assertEquals(XContentParser.Token.END_OBJECT, parser.currentToken()); + return new DecommissionAttributeMetadata( + decommissionAttributeMetadata.decommissionAttribute(), + decommissionAttributeMetadata.status() + ); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java new file mode 100644 index 0000000000000..746d4565b0db3 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataTests.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.io.stream.NamedWriteableRegistry; +import org.opensearch.test.AbstractNamedWriteableTestCase; + +import java.io.IOException; +import java.util.Collections; + +public class DecommissionAttributeMetadataTests extends AbstractNamedWriteableTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata mutateInstance(DecommissionAttributeMetadata instance) throws IOException { + return randomValueOtherThan(instance, this::createTestInstance); + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry( + Collections.singletonList( + new NamedWriteableRegistry.Entry( + DecommissionAttributeMetadata.class, + DecommissionAttributeMetadata.TYPE, + DecommissionAttributeMetadata::new + ) + ) + ); + } + + @Override + protected Class categoryClass() { + return DecommissionAttributeMetadata.class; + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java new file mode 100644 index 0000000000000..030946f4510a1 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/DecommissionAttributeMetadataXContentTests.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.cluster.decommission.DecommissionAttribute; +import org.opensearch.cluster.decommission.DecommissionAttributeMetadata; +import org.opensearch.cluster.decommission.DecommissionStatus; +import org.opensearch.common.xcontent.XContentParser; +import org.opensearch.test.AbstractXContentTestCase; + +import java.io.IOException; + +public class DecommissionAttributeMetadataXContentTests extends AbstractXContentTestCase { + @Override + protected DecommissionAttributeMetadata createTestInstance() { + String attributeName = randomAlphaOfLength(6); + String attributeValue = randomAlphaOfLength(6); + DecommissionAttribute decommissionAttribute = new DecommissionAttribute(attributeName, attributeValue); + DecommissionStatus decommissionStatus = randomFrom(DecommissionStatus.values()); + return new DecommissionAttributeMetadata(decommissionAttribute, decommissionStatus); + } + + @Override + protected DecommissionAttributeMetadata doParseInstance(XContentParser parser) throws IOException { + return DecommissionAttributeMetadata.fromXContent(parser); + } + + @Override + protected boolean supportsUnknownFields() { + return false; + } +}