From 35562cc5af4bea6a4d4b8cb93eb2141298955489 Mon Sep 17 00:00:00 2001 From: Dianna Hohensee Date: Wed, 7 Jan 2026 03:48:25 -0800 Subject: [PATCH] Overall Decision for Deciders prioritizes THROTTLE (#140237) Fixing a bug where AllocationDeciders could summarize AllocationDecider responses as NOT_PREFERRED, which allows shard movement, when an AllocationDecider responded THROTTLE. Relates ES-13903 Co-authored-by: Nick Tindall Co-authored-by: Henning Andersen <33268011+henningandersen@users.noreply.github.com> Co-authored-by: Yang Wang --- docs/changelog/140237.yaml | 5 + .../allocator/NonDesiredBalanceIT.java | 150 ++++++++++++++++++ .../allocation/allocator/ReconcilerIT.java | 141 ++++++++++++++++ .../allocator/BalancedShardsAllocator.java | 14 +- .../allocator/DesiredBalanceReconciler.java | 8 + .../decider/AllocationDeciders.java | 20 ++- .../routing/allocation/decider/Decision.java | 62 ++++++-- .../allocation/decider/DecisionTests.java | 26 ++- 8 files changed, 395 insertions(+), 31 deletions(-) create mode 100644 docs/changelog/140237.yaml create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/NonDesiredBalanceIT.java create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/ReconcilerIT.java diff --git a/docs/changelog/140237.yaml b/docs/changelog/140237.yaml new file mode 100644 index 0000000000000..cb1e11be6940d --- /dev/null +++ b/docs/changelog/140237.yaml @@ -0,0 +1,5 @@ +pr: 140237 +summary: Overall Decision for Deciders prioritizes THROTTLE +area: Allocation +type: bug +issues: [] diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/NonDesiredBalanceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/NonDesiredBalanceIT.java new file mode 100644 index 0000000000000..0bf14f07ecd75 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/NonDesiredBalanceIT.java @@ -0,0 +1,150 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.allocator; + +import org.apache.logging.log4j.Level; +import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequest; +import org.elasticsearch.action.admin.cluster.reroute.TransportClusterRerouteAction; +import org.elasticsearch.cluster.ClusterModule; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.Decision; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.ClusterPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ClusterServiceUtils; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.MockLog; +import org.elasticsearch.test.junit.annotations.TestLogging; +import org.junit.After; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class NonDesiredBalanceIT extends ESIntegTestCase { + + private static final Set NOT_PREFERRED_AND_THROTTLED_NODES = Collections.newSetFromMap(new ConcurrentHashMap<>()); + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(NotPreferredAndThrottledPlugin.class); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(ClusterModule.SHARDS_ALLOCATOR_TYPE_SETTING.getKey(), ClusterModule.BALANCED_ALLOCATOR) + .build(); + } + + @After + public void clearThrottleAndNotPreferredNodes() { + NOT_PREFERRED_AND_THROTTLED_NODES.clear(); + } + + /** + * Tests that the non-desired balance allocator obeys the THROTTLE decision of a node. + */ + @TestLogging( + reason = "watch for can't move message", + value = "org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator:TRACE" + ) + public void testThrottleVsNotPreferredPriorityInDecisions() { + final Settings settings = Settings.builder().build(); + final var sourceNode = internalCluster().startNode(settings); + final var indexName = randomIdentifier(); + createIndex(indexName, 1, 0); + ensureGreen(indexName); + final var targetNode = internalCluster().startNode(settings); + final var sourceNodeID = getNodeId(sourceNode); + final var targetNodeID = getNodeId(targetNode); + + NOT_PREFERRED_AND_THROTTLED_NODES.add(targetNodeID); + + var mapNodeIdsToNames = nodeIdsToNames(); + final var sourceNodeName = mapNodeIdsToNames.get(sourceNodeID); + final var targetNodeName = mapNodeIdsToNames.get(targetNodeID); + assertNotNull(sourceNodeName); + assertNotNull(targetNodeName); + + logger.info("--> Verifying the shard did not move because allocation to the target node (ID: " + targetNodeID + ") is throttled."); + MockLog.awaitLogger(() -> { + logger.info( + "--> Excluding shard assignment to node " + + sourceNodeName + + "(ID: " + + sourceNodeID + + ") to force its shard to move to node " + + targetNodeName + + "(ID: " + + targetNodeID + + ")" + ); + updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", sourceNodeName)); + }, + BalancedShardsAllocator.class, + new MockLog.SeenEventExpectation( + "unable to relocate shard due to throttling", + BalancedShardsAllocator.class.getCanonicalName(), + Level.TRACE, + "[[*]][0] can't move: [MoveDecision{canMoveDecision=throttled, canRemainDecision=NO(), *}]" + ) + ); + + logger.info("--> Clearing THROTTLE decider response and prodding the Reconciler (with a reroute request) to try again"); + clearThrottleAndNotPreferredNodes(); + + safeGet( + client().execute(TransportClusterRerouteAction.TYPE, new ClusterRerouteRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT)) + ); + + safeAwait( + ClusterServiceUtils.addMasterTemporaryStateListener( + state -> state.routingTable(ProjectId.DEFAULT) + .index(indexName) + .allShards() + .flatMap(IndexShardRoutingTable::allShards) + .allMatch(shardRouting -> shardRouting.currentNodeId().equals(targetNodeID) && shardRouting.started()) + ) + ); + } + + public static class NotPreferredAndThrottledPlugin extends Plugin implements ClusterPlugin { + @Override + public Collection createAllocationDeciders(Settings settings, ClusterSettings clusterSettings) { + + return List.of(new AllocationDecider() { + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return NOT_PREFERRED_AND_THROTTLED_NODES.contains(node.nodeId()) ? Decision.NOT_PREFERRED : Decision.YES; + } + }, new AllocationDecider() { + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + // THROTTLING is not returned in simulation, so only THROTTLE for real moves in the Reconciler. + return (NOT_PREFERRED_AND_THROTTLED_NODES.contains(node.nodeId()) && allocation.isSimulating() == false) + ? Decision.THROTTLE + : Decision.YES; + } + }); + } + } +} diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/ReconcilerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/ReconcilerIT.java new file mode 100644 index 0000000000000..e30c509de5958 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/ReconcilerIT.java @@ -0,0 +1,141 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.allocator; + +import org.apache.logging.log4j.Level; +import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequest; +import org.elasticsearch.action.admin.cluster.reroute.TransportClusterRerouteAction; +import org.elasticsearch.cluster.metadata.ProjectId; +import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.Decision; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.ClusterPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ClusterServiceUtils; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.MockLog; +import org.elasticsearch.test.junit.annotations.TestLogging; +import org.junit.After; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class ReconcilerIT extends ESIntegTestCase { + + private static final Set NOT_PREFERRED_AND_THROTTLED_NODES = Collections.newSetFromMap(new ConcurrentHashMap<>()); + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(NotPreferredAndThrottledPlugin.class); + } + + @After + public void clearThrottleAndNotPreferredNodes() { + NOT_PREFERRED_AND_THROTTLED_NODES.clear(); + } + + /** + * Tests that the Reconciler obeys the THROTTLE decision of a node. + */ + @TestLogging( + reason = "track when Reconciler decisions", + value = "org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceReconciler:TRACE" + ) + public void testThrottleVsNotPreferredPriorityInDecisions() { + final Settings settings = Settings.builder().build(); + final var sourceNode = internalCluster().startNode(settings); + final var indexName = randomIdentifier(); + createIndex(indexName, 1, 0); + ensureGreen(indexName); + final var targetNode = internalCluster().startNode(settings); + final var sourceNodeID = getNodeId(sourceNode); + final var targetNodeID = getNodeId(targetNode); + + NOT_PREFERRED_AND_THROTTLED_NODES.add(targetNodeID); + + var mapNodeIdsToNames = nodeIdsToNames(); + final var sourceNodeName = mapNodeIdsToNames.get(sourceNodeID); + final var targetNodeName = mapNodeIdsToNames.get(targetNodeID); + assertNotNull(sourceNodeName); + assertNotNull(targetNodeName); + + logger.info("--> Verifying the shard did not move because allocation to the target node (ID: " + targetNodeID + ") is throttled."); + MockLog.awaitLogger(() -> { + logger.info( + "--> Excluding shard assignment to node " + + sourceNodeName + + "(ID: " + + sourceNodeID + + ") to force its shard to move to node " + + targetNodeName + + "(ID: " + + targetNodeID + + ")" + ); + updateClusterSettings(Settings.builder().put("cluster.routing.allocation.exclude._name", sourceNodeName)); + }, + DesiredBalanceReconciler.class, + new MockLog.SeenEventExpectation( + "unable to relocate shard that can no longer remain", + DesiredBalanceReconciler.class.getCanonicalName(), + Level.TRACE, + "Cannot move shard * and cannot remain because of [NO()]" + ) + ); + + logger.info("--> Clearing THROTTLE decider response and prodding the Reconciler (with a reroute request) to try again"); + clearThrottleAndNotPreferredNodes(); + + safeGet( + client().execute(TransportClusterRerouteAction.TYPE, new ClusterRerouteRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT)) + ); + + safeAwait( + ClusterServiceUtils.addMasterTemporaryStateListener( + state -> state.routingTable(ProjectId.DEFAULT) + .index(indexName) + .allShards() + .flatMap(IndexShardRoutingTable::allShards) + .allMatch(shardRouting -> shardRouting.currentNodeId().equals(targetNodeID) && shardRouting.started()) + ) + ); + } + + public static class NotPreferredAndThrottledPlugin extends Plugin implements ClusterPlugin { + @Override + public Collection createAllocationDeciders(Settings settings, ClusterSettings clusterSettings) { + + return List.of(new AllocationDecider() { + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + return NOT_PREFERRED_AND_THROTTLED_NODES.contains(node.nodeId()) ? Decision.NOT_PREFERRED : Decision.YES; + } + }, new AllocationDecider() { + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + // THROTTLING is not returned in simulation, so only THROTTLE for real moves in the Reconciler. + return (NOT_PREFERRED_AND_THROTTLED_NODES.contains(node.nodeId()) && allocation.isSimulating() == false) + ? Decision.THROTTLE + : Decision.YES; + } + }); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index fe3b7bb958e0c..3faa09ff975e5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -530,12 +530,12 @@ private MoveDecision explainRebalanceDecision(final ProjectIndex index, final Sh // if the simulated weight delta with the shard moved away is better than the weight delta // with the shard remaining on the current node, and we are allowed to allocate to the // node in question, then allow the rebalance - if (rebalanceConditionsMet && canAllocate.type().higherThan(bestRebalanceCanAllocateDecisionType)) { + if (rebalanceConditionsMet && canAllocate.type().compareToBetweenNodes(bestRebalanceCanAllocateDecisionType) > 0) { // Overwrite the best decision since it is better than the last. This means that YES/THROTTLE decisions will replace // NOT_PREFERRED/NO decisions, and a YES decision will replace a THROTTLE decision. NOT_PREFERRED will also replace // NO, even if neither are acted upon for rebalancing, for allocation explain purposes. bestRebalanceCanAllocateDecisionType = canAllocate.type(); - if (canAllocate.type().higherThan(Type.NOT_PREFERRED)) { + if (canAllocate.type().compareToBetweenNodes(Type.NOT_PREFERRED) > 0) { // Movement is only allowed to THROTTLE/YES nodes. NOT_PREFERRED is the same as no for rebalancing, since // rebalancing aims to distribute resource usage and NOT_PREFERRED means the move could cause hot-spots. targetNode = node; @@ -1053,7 +1053,7 @@ private MoveDecision decideMove( // Relocating a shard from one NOT_PREFERRED node to another NOT_PREFERRED node would not improve the situation. continue; } - if (allocationDecision.type().higherThan(bestDecision)) { + if (allocationDecision.type().compareToBetweenNodes(bestDecision) > 0) { bestDecision = allocationDecision.type(); if (bestDecision == Type.YES) { targetNode = target; @@ -1570,12 +1570,16 @@ private boolean tryRelocateShard(ModelNode minNode, ModelNode maxNode, ProjectIn continue; } - final Decision.Type canAllocateOrRebalance = Decision.Type.min(allocationDecision.type(), rebalanceDecision.type()); + assert rebalanceDecision.type() == Type.YES || rebalanceDecision.type() == Type.THROTTLE + : "We should only see YES/THROTTLE decisions here"; + assert allocationDecision.type() == Type.YES || allocationDecision.type() == Type.THROTTLE + : "We should only see YES/THROTTLE decisions here"; + final Decision.Type canAllocateOrRebalance = allocationDecision.type() == Type.THROTTLE + || rebalanceDecision.type() == Type.THROTTLE ? Type.THROTTLE : Type.YES; maxNode.removeShard(projectIndex(shard), shard); long shardSize = allocation.clusterInfo().getShardSize(shard, ShardRouting.UNAVAILABLE_EXPECTED_SHARD_SIZE); - assert canAllocateOrRebalance == Type.YES || canAllocateOrRebalance == Type.THROTTLE : canAllocateOrRebalance; logger.debug( "decision [{}]: relocate [{}] from [{}] to [{}]", canAllocateOrRebalance, diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index 38e122af9b236..d37614533dcbf 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -530,6 +530,14 @@ private void moveShards() { iterator.dePrioritizeNode(shardRouting.currentNodeId()); moveOrdering.recordAllocation(shardRouting.currentNodeId()); movedUndesiredShard = true; + } else { + logger.trace( + "Cannot move shard [{}][{}] away from {}, and cannot remain because of [{}]", + shardRouting.index(), + shardRouting.shardId(), + shardRouting.currentNodeId(), + canRemainDecision + ); } } finally { if (movedUndesiredShard) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java index 6a09c894dbc7d..511050b6be3b7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AllocationDeciders.java @@ -208,24 +208,21 @@ private Decision withDeciders( Decision mostNegativeDecision = Decision.YES; for (AllocationDecider decider : deciders) { var decision = deciderAction.apply(decider); - if (mostNegativeDecision.type().higherThan(decision.type())) { + if (mostNegativeDecision.type().compareToBetweenDecisions(decision.type()) > 0) { mostNegativeDecision = decision; if (mostNegativeDecision.type() == Decision.Type.NO) { - if (logger.isTraceEnabled()) { - logger.trace(() -> logMessageCreator.apply(decider.getClass().getSimpleName(), decision)); - } + traceNoDecisions(decider, decision, logMessageCreator); break; } } } + return mostNegativeDecision; } else { final var multiDecision = new Decision.Multi(); for (AllocationDecider decider : deciders) { var decision = deciderAction.apply(decider); - if (logger.isTraceEnabled() && decision.type() == Decision.Type.NO) { - logger.trace(() -> logMessageCreator.apply(decider.getClass().getSimpleName(), decision)); - } + traceNoDecisions(decider, decision, logMessageCreator); if (decision != Decision.ALWAYS && (debugMode == RoutingAllocation.DebugMode.ON || decision.type() != Decision.Type.YES)) { multiDecision.add(decision); } @@ -234,6 +231,15 @@ private Decision withDeciders( } } + /** + * NO decisions have TRACE-level logging. + */ + private void traceNoDecisions(AllocationDecider decider, Decision decision, BiFunction logMessageCreator) { + if (logger.isTraceEnabled() && decision.type() == Decision.Type.NO) { + logger.trace(() -> logMessageCreator.apply(decider.getClass().getSimpleName(), decision)); + } + } + public Optional> getForcedInitialShardAllocationToNodes(ShardRouting shardRouting, RoutingAllocation allocation) { var result = Optional.>empty(); for (AllocationDecider decider : deciders) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java index 406e5b122cf46..16ff7c976d938 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/Decision.java @@ -117,17 +117,23 @@ private static Single readSingleFrom(StreamInput in) throws IOException { * This enumeration defines the possible types of decisions */ enum Type implements Writeable { - // ordered by positiveness; order matters for serialization and comparison - NO, - NOT_PREFERRED, - // Temporarily throttled is a better choice than choosing a not-preferred node, - // but NOT_PREFERRED and THROTTLED are generally not comparable. - THROTTLE, - YES; + // order matters only for serialization, do NOT use for comparison + NO(0, 0), + NOT_PREFERRED(1, 2), + THROTTLE(2, 1), + YES(3, 3); // visible for testing static final TransportVersion ALLOCATION_DECISION_NOT_PREFERRED = TransportVersion.fromName("allocation_decision_not_preferred"); + private final int nodeComparisonOrdinal; + private final int decisionComparisonOrdinal; + + Type(int nodeComparisonOrdinal, int decisionComparisonOrdinal) { + this.nodeComparisonOrdinal = nodeComparisonOrdinal; + this.decisionComparisonOrdinal = decisionComparisonOrdinal; + } + public static Type readFrom(StreamInput in) throws IOException { if (in.getTransportVersion().supports(AllocationDecision.ADD_NOT_PREFERRED_ALLOCATION_DECISION)) { return in.readEnum(Type.class); @@ -152,13 +158,6 @@ public static Type readFrom(StreamInput in) throws IOException { } } - /** - * @return lowest decision by natural order - */ - public static Type min(Type a, Type b) { - return a.compareTo(b) < 0 ? a : b; - } - @Override public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().supports(AllocationDecision.ADD_NOT_PREFERRED_ALLOCATION_DECISION)) { @@ -179,8 +178,30 @@ public void writeTo(StreamOutput out) throws IOException { } } - public boolean higherThan(Type other) { - return this.compareTo(other) > 0; + /** + * Compares this decision against another decision (for choosing a node) + *

+ * This comparison is used at the node level when deciding which node to allocate a shard to. We prefer to wait and allocate a shard + * to a THROTTLE'd node than to move a shard to a NOT_PREFERRED node immediately. + * + * @return 0 when this == other, 1 when this > other, -1 when this < other + */ + public int compareToBetweenNodes(Type other) { + return Integer.compare(nodeComparisonOrdinal, other.nodeComparisonOrdinal); + } + + /** + * Compares this decision against another decision (for decision-aggregation) + *

+ * This comparison is used when aggregating the results from many deciders. If one decider returns THROTTLE and + * another returns NOT_PREFERRED, we want to return THROTTLE to ensure we respect any throttling deciders. + * This can only occur in the reconciler or non-desired balancer, in both cases if we see a THROTTLE we want to + * respect that until it resolves. + * + * @return 0 when this == other, 1 when this > other, -1 when this < other + */ + public int compareToBetweenDecisions(Type other) { + return Integer.compare(decisionComparisonOrdinal, other.decisionComparisonOrdinal); } /** @@ -287,7 +308,14 @@ public Multi add(Decision decision) { @Override public Type type() { // returns most negative decision - return decisions.stream().map(Single::type).reduce(Type.YES, Type::min); + Decision.Type worst = Type.YES; + for (Single decision : decisions) { + final var next = decision.type(); + if (next.compareToBetweenDecisions(worst) < 0) { + worst = next; + } + } + return worst; } @Override diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DecisionTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DecisionTests.java index b7ec9e2b81709..83c0a899d4ed3 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DecisionTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DecisionTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.test.EnumSerializationTestUtils; import java.io.IOException; +import java.util.Arrays; import java.util.List; import static org.elasticsearch.cluster.routing.allocation.decider.Decision.Type.ALLOCATION_DECISION_NOT_PREFERRED; @@ -24,6 +25,7 @@ import static org.elasticsearch.cluster.routing.allocation.decider.Decision.Type.NOT_PREFERRED; import static org.elasticsearch.cluster.routing.allocation.decider.Decision.Type.THROTTLE; import static org.elasticsearch.cluster.routing.allocation.decider.Decision.Type.YES; +import static org.hamcrest.Matchers.equalTo; /** * A class for unit testing the {@link Decision} class. @@ -53,8 +55,15 @@ public void testTypeEnumOrder() { EnumSerializationTestUtils.assertEnumSerialization(Type.class, NO, NOT_PREFERRED, THROTTLE, YES); } - public void testTypeHigherThan() { - assertTrue(YES.higherThan(THROTTLE) && THROTTLE.higherThan(NOT_PREFERRED) && NOT_PREFERRED.higherThan(NO)); + public void testTypeComparisonOrder() { + assertThat( + shuffledList(Arrays.asList(Type.values())).stream().sorted(Type::compareToBetweenDecisions).toList(), + equalTo(List.of(NO, THROTTLE, NOT_PREFERRED, YES)) + ); + assertThat( + shuffledList(Arrays.asList(Type.values())).stream().sorted(Type::compareToBetweenNodes).toList(), + equalTo(List.of(NO, NOT_PREFERRED, THROTTLE, YES)) + ); } public void testTypeAllowed() { @@ -98,6 +107,19 @@ public void testSerializationBackwardCompatibility() throws IOException { testReadWriteEnum(NO, OriginalType.class, OriginalType.NO, TransportVersion.minimumCompatible()); } + public void testMultiPrioritisation() { + assertEffectiveDecision(NO, Decision.NO, Decision.THROTTLE, Decision.NOT_PREFERRED, Decision.YES); + assertEffectiveDecision(THROTTLE, Decision.THROTTLE, Decision.NOT_PREFERRED, Decision.YES); + assertEffectiveDecision(NOT_PREFERRED, Decision.NOT_PREFERRED, Decision.YES); + assertEffectiveDecision(YES, Decision.YES); + assertEffectiveDecision(YES); + } + + private void assertEffectiveDecision(Decision.Type effectiveDecision, Decision.Single... decisions) { + final var multi = new Decision.Multi(shuffledList(List.of(decisions))); + assertEquals(effectiveDecision, multi.type()); + } + /** * Test the reading and writing of an enum to a specific transport version (assuming lossless roundtrip) *