From 6014ddec8cafe1adbc5e58ebf75dcff5d8719abb Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Tue, 14 May 2024 10:49:03 +0200 Subject: [PATCH 01/14] Fail fast after cloud node removal --- .../steps/durable_task/DurableTaskStep.java | 2 +- .../support/pickles/ExecutorPickle.java | 2 +- .../steps/ExecutorStepDynamicContext.java | 2 +- .../support/steps/ExecutorStepExecution.java | 86 ++++++++++++------- .../steps/ExecutorStepDynamicContextTest.java | 44 +++++++++- 5 files changed, 100 insertions(+), 36 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java index 4b56cab0..7e0bd7aa 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java @@ -367,7 +367,7 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab } else { LOGGER.fine(() -> "rediscovering that " + node + " has been removed and timeout has expired"); listener().getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); - throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause()); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); } } removedNodeDiscovered = 0; // something else; reset diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java index bd9756ed..5dc55d10 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java @@ -122,7 +122,7 @@ protected Executor tryResolve() throws Exception { Queue.getInstance().cancel(item); owner.getListener().getLogger().printf("Killed %s after waiting for %s because we assume unknown agent %s is never going to appear%n", item.task.getDisplayName(), Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS), placeholder.getAssignedLabel()); - throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause()); + throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); } } } diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java index 5c0f6c35..80f41eca 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java @@ -107,7 +107,7 @@ void resume(StepContext context) throws Exception { exec = item.getFuture().getStartCondition().get(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } catch (TimeoutException x) { listener.getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); - throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause()); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); } catch (CancellationException x) { LOGGER.log(Level.FINE, "ceased to wait for " + node, x); throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.QueueTaskCancelled()); diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index 07418832..a1cab99e 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -26,6 +26,7 @@ import hudson.model.ResourceList; import hudson.model.Result; import hudson.model.Run; +import hudson.model.Slave; import hudson.model.TaskListener; import hudson.model.TopLevelItem; import hudson.model.User; @@ -38,6 +39,7 @@ import hudson.security.ACLContext; import hudson.security.AccessControlled; import hudson.security.Permission; +import hudson.slaves.AbstractCloudSlave; import hudson.slaves.OfflineCause; import hudson.slaves.WorkspaceList; import java.io.IOException; @@ -74,6 +76,7 @@ import org.acegisecurity.Authentication; import org.jenkinsci.plugins.durabletask.executors.ContinuableExecutable; import org.jenkinsci.plugins.durabletask.executors.ContinuedTask; +import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; import org.jenkinsci.plugins.workflow.actions.LabelAction; import org.jenkinsci.plugins.workflow.actions.QueueItemAction; import org.jenkinsci.plugins.workflow.actions.ThreadNameAction; @@ -346,40 +349,53 @@ public static final class QueueTaskCancelled extends CauseOfInterruption { return; } LOGGER.fine(() -> "received node deletion event on " + node.getNodeName()); - Timer.get().schedule(() -> { - Computer c = node.toComputer(); - if (c == null || c.isOnline()) { - LOGGER.fine(() -> "computer for " + node.getNodeName() + " was missing or online, skipping"); - return; - } - LOGGER.fine(() -> "processing node deletion event on " + node.getNodeName()); - for (Executor e : c.getExecutors()) { - Queue.Executable exec = e.getCurrentExecutable(); - if (exec instanceof PlaceholderTask.PlaceholderExecutable) { - PlaceholderTask task = ((PlaceholderTask.PlaceholderExecutable) exec).getParent(); - TaskListener listener; - try { - listener = task.context.get(TaskListener.class); - } catch (Exception x) { - LOGGER.log(Level.WARNING, null, x); - continue; - } - task.withExecution(execution -> { - BodyExecution body = execution.body; - if (body == null) { - listener.getLogger().println("Agent " + node.getNodeName() + " was deleted, but do not have a node body to cancel"); - return; - } - listener.getLogger().println("Agent " + node.getNodeName() + " was deleted; cancelling node body"); - if (Util.isOverridden(BodyExecution.class, body.getClass(), "cancel", Throwable.class)) { - body.cancel(new FlowInterruptedException(Result.ABORTED, false, new RemovedNodeCause())); - } else { // TODO remove once https://github.com/jenkinsci/workflow-cps-plugin/pull/570 is widely deployed - body.cancel(new RemovedNodeCause()); - } - }); + if (isOneShotAgent(node)) { + LOGGER.finest(() -> "Cancelling owner run for one-shot agent " + node.getNodeName() + " immediately"); + cancelOwnerExecution(node, new RemovedNodeCause()); + } else { + LOGGER.finest(() -> "Will cancel owner run for agent " + node.getNodeName() + " after waiting for " + TIMEOUT_WAITING_FOR_NODE_MILLIS + "ms"); + Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeCause(), new RemovedNodeTimeoutCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + } + } + + private static boolean isOneShotAgent(Node node) { + return node instanceof AbstractCloudSlave || + (node instanceof Slave && ((Slave) node).getRetentionStrategy() instanceof OnceRetentionStrategy); + } + + private static void cancelOwnerExecution(Node node, CauseOfInterruption... causes) { + Computer c = node.toComputer(); + if (c == null || c.isOnline()) { + LOGGER.fine(() -> "computer for " + node.getNodeName() + " was missing or online, skipping"); + return; + } + LOGGER.fine(() -> "processing node deletion event on " + node.getNodeName()); + for (Executor e : c.getExecutors()) { + Queue.Executable exec = e.getCurrentExecutable(); + if (exec instanceof PlaceholderTask.PlaceholderExecutable) { + PlaceholderTask task = ((PlaceholderTask.PlaceholderExecutable) exec).getParent(); + TaskListener listener; + try { + listener = task.context.get(TaskListener.class); + } catch (Exception x) { + LOGGER.log(Level.WARNING, null, x); + continue; } + task.withExecution(execution -> { + BodyExecution body = execution.body; + if (body == null) { + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted, but do not have a node body to cancel"); + return; + } + listener.getLogger().println("Agent " + node.getNodeName() + " was deleted; cancelling node body"); + if (Util.isOverridden(BodyExecution.class, body.getClass(), "cancel", Throwable.class)) { + body.cancel(new FlowInterruptedException(Result.ABORTED, false, causes)); + } else { // TODO remove once https://github.com/jenkinsci/workflow-cps-plugin/pull/570 is widely deployed + body.cancel(causes); + } + }); } - }, TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + } } } @@ -391,6 +407,12 @@ public static final class RemovedNodeCause extends CauseOfInterruption { } } + public static final class RemovedNodeTimeoutCause extends CauseOfInterruption { + @Override public String getShortDescription() { + return "Timeout waiting for agent to come back"; + } + } + /** Transient handle of a running executor task. */ private static final class RunningTask { /** null until placeholder executable runs */ diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 6db3d29d..076fda90 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -35,15 +35,20 @@ import java.util.logging.Level; import jenkins.model.InterruptedBuildAction; import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.isA; import static org.hamcrest.Matchers.anyOf; + +import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; import org.jenkinsci.plugins.workflow.job.WorkflowJob; import org.jenkinsci.plugins.workflow.job.WorkflowRun; import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; + +import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -121,7 +126,11 @@ public class ExecutorStepDynamicContextTest { assertThat(j.jenkins.getQueue().getItems(), emptyArray()); InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); assertNotNull(iba); - assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeCause.class))); + assertThat(iba.getCauses(), contains( + isA(ExecutorStepExecution.RemovedNodeCause.class), + isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class + )) + ); }); } @@ -207,4 +216,37 @@ public class ExecutorStepDynamicContextTest { }); } + @Test public void onceRetentionStrategyNodeDisappearance() throws Throwable { + logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + sessions.then(j -> { + // Start up a build that needs executor and then reboot and take the node offline + // Starting job first ensures we don't immediately fail if Node comes from a Cloud + // and takes a min to provision + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("node('ghost') {if (isUnix()) {sh 'sleep infinity'} else {bat 'echo + sleep infinity && ping -n 999999 localhost'}}", true)); + + DumbSlave s = j.createSlave(Label.get("ghost")); + s.setRetentionStrategy(new OnceRetentionStrategy(0)); + j.waitForMessage("+ sleep infinity", p.scheduleBuild2(0).waitForStart()); + j.jenkins.removeNode(s); + }); + + sessions.then(j -> { + // Start up a build and then reboot and take the node offline + assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted + assertNull(j.jenkins.getNode("ghost")); // Make sure test impl is correctly deleted + WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); + j.assertLogNotContains("slave0 has been removed for ", run); + assertThat(j.jenkins.getQueue().getItems(), emptyArray()); + InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); + assertNotNull(iba); + assertThat(iba.getCauses(), + allOf( + contains(isA(ExecutorStepExecution.RemovedNodeCause.class)), + not(contains(isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class))) + ) + ); + }); + } } From a68e629a9d4aa35184077d549c52b819ce4add23 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Tue, 14 May 2024 15:16:35 +0200 Subject: [PATCH 02/14] Remove useless assertion The agent is never named `ghost`, rather `slave0` --- .../workflow/support/steps/ExecutorStepDynamicContextTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 076fda90..c48ab76d 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -119,7 +119,6 @@ public class ExecutorStepDynamicContextTest { sessions.then(j -> { // Start up a build and then reboot and take the node offline assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted - assertNull(j.jenkins.getNode("ghost")); // Make sure test impl is correctly deleted WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); j.assertLogContains("slave0 has been removed for ", run); @@ -234,7 +233,6 @@ public class ExecutorStepDynamicContextTest { sessions.then(j -> { // Start up a build and then reboot and take the node offline assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted - assertNull(j.jenkins.getNode("ghost")); // Make sure test impl is correctly deleted WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); j.assertLogNotContains("slave0 has been removed for ", run); From 17b1e376b83e3c8dfc70a3f78e64f323baca265b Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Tue, 14 May 2024 17:00:37 +0200 Subject: [PATCH 03/14] Suppress the restart, as the node deletion is processed immediately, this is causing some havoc. --- .../support/steps/ExecutorStepDynamicContextTest.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index c48ab76d..9ad9a02b 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -226,14 +226,9 @@ public class ExecutorStepDynamicContextTest { DumbSlave s = j.createSlave(Label.get("ghost")); s.setRetentionStrategy(new OnceRetentionStrategy(0)); - j.waitForMessage("+ sleep infinity", p.scheduleBuild2(0).waitForStart()); + var run = p.scheduleBuild2(0).waitForStart(); + j.waitForMessage("+ sleep infinity", run); j.jenkins.removeNode(s); - }); - - sessions.then(j -> { - // Start up a build and then reboot and take the node offline - assertEquals(0, j.jenkins.getLabel("ghost").getNodes().size()); // Make sure test impl is correctly deleted - WorkflowRun run = j.jenkins.getItemByFullName("p", WorkflowJob.class).getLastBuild(); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); j.assertLogNotContains("slave0 has been removed for ", run); assertThat(j.jenkins.getQueue().getItems(), emptyArray()); From f23bf967f5b1c97e8f86acae4b4e8b917add7993 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 09:24:37 +0200 Subject: [PATCH 04/14] Simplify execution * Use only one cause * When build is cancelled immediately, use RemovedNodeCause * When build is cancelled after observing timeout, use RemovedNodeTimeoutCause * Introduce a marker interface to simplify matching in AgentErrorCondition --- .../steps/durable_task/DurableTaskStep.java | 2 +- .../support/pickles/ExecutorPickle.java | 2 +- .../support/steps/AgentErrorCondition.java | 3 +-- .../steps/ExecutorStepDynamicContext.java | 2 +- .../support/steps/ExecutorStepExecution.java | 18 ++++++++++++++---- .../steps/durable_task/ShellStepTest.java | 2 +- .../steps/ExecutorStepDynamicContextTest.java | 19 +++---------------- 7 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java index 7e0bd7aa..1a92ea37 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/steps/durable_task/DurableTaskStep.java @@ -367,7 +367,7 @@ static final class Execution extends AbstractStepExecutionImpl implements Runnab } else { LOGGER.fine(() -> "rediscovering that " + node + " has been removed and timeout has expired"); listener().getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); - throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeTimeoutCause()); } } removedNodeDiscovered = 0; // something else; reset diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java index 5dc55d10..b1c98490 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/pickles/ExecutorPickle.java @@ -122,7 +122,7 @@ protected Executor tryResolve() throws Exception { Queue.getInstance().cancel(item); owner.getListener().getLogger().printf("Killed %s after waiting for %s because we assume unknown agent %s is never going to appear%n", item.task.getDisplayName(), Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS), placeholder.getAssignedLabel()); - throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); + throw new FlowInterruptedException(Result.ABORTED, new ExecutorStepExecution.RemovedNodeTimeoutCause()); } } } diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java index 129e7b09..95525fbe 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java @@ -57,8 +57,7 @@ public final class AgentErrorCondition extends ErrorCondition { if (t instanceof AgentOfflineException) { return true; } - if (t instanceof FlowInterruptedException && ((FlowInterruptedException) t).getCauses().stream().anyMatch( - c -> c instanceof ExecutorStepExecution.RemovedNodeCause || c instanceof ExecutorStepExecution.QueueTaskCancelled)) { + if (t instanceof FlowInterruptedException && ((FlowInterruptedException) t).getCauses().stream().anyMatch(ExecutorStepExecution.Retryable.class::isInstance)) { return true; } if (isClosedChannelException(t)) { diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java index 80f41eca..a2757c82 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContext.java @@ -107,7 +107,7 @@ void resume(StepContext context) throws Exception { exec = item.getFuture().getStartCondition().get(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } catch (TimeoutException x) { listener.getLogger().println(node + " has been removed for " + Util.getTimeSpanString(ExecutorStepExecution.TIMEOUT_WAITING_FOR_NODE_MILLIS) + ", assuming it is not coming back"); - throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeCause(), new ExecutorStepExecution.RemovedNodeTimeoutCause()); + throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.RemovedNodeTimeoutCause()); } catch (CancellationException x) { LOGGER.log(Level.FINE, "ceased to wait for " + node, x); throw new FlowInterruptedException(Result.ABORTED, /* TODO false probably more appropriate */true, new ExecutorStepExecution.QueueTaskCancelled()); diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index a1cab99e..15bfc44f 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -337,12 +337,17 @@ public void stop(@NonNull Throwable cause) throws Exception { } - public static final class QueueTaskCancelled extends CauseOfInterruption { + public static final class QueueTaskCancelled extends RetryableCauseOfInterruption { @Override public String getShortDescription() { return Messages.ExecutorStepExecution_queue_task_cancelled(); } } + /** + * A marker interface for {@link CauseOfInterruption} instances that can be retried through {@link AgentErrorCondition}. + */ + public interface Retryable {} + @Extension public static final class RemovedNodeListener extends NodeListener { @Override protected void onDeleted(@NonNull Node node) { if (!RemovedNodeCause.ENABLED) { @@ -354,7 +359,7 @@ public static final class QueueTaskCancelled extends CauseOfInterruption { cancelOwnerExecution(node, new RemovedNodeCause()); } else { LOGGER.finest(() -> "Will cancel owner run for agent " + node.getNodeName() + " after waiting for " + TIMEOUT_WAITING_FOR_NODE_MILLIS + "ms"); - Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeCause(), new RemovedNodeTimeoutCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeTimeoutCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } } @@ -399,7 +404,7 @@ private static void cancelOwnerExecution(Node node, CauseOfInterruption... cause } } - public static final class RemovedNodeCause extends CauseOfInterruption { + public static final class RemovedNodeCause extends RetryableCauseOfInterruption { @SuppressFBWarnings(value = "MS_SHOULD_BE_FINAL", justification = "deliberately mutable") public static boolean ENABLED = Boolean.parseBoolean(System.getProperty(ExecutorStepExecution.class.getName() + ".REMOVED_NODE_DETECTION", "true")); @Override public String getShortDescription() { @@ -407,12 +412,17 @@ public static final class RemovedNodeCause extends CauseOfInterruption { } } - public static final class RemovedNodeTimeoutCause extends CauseOfInterruption { + public static final class RemovedNodeTimeoutCause extends RetryableCauseOfInterruption { @Override public String getShortDescription() { return "Timeout waiting for agent to come back"; } } + /** + * Base class for a cause of interruption that can be retried via {@link AgentErrorCondition}. + */ + private abstract static class RetryableCauseOfInterruption extends CauseOfInterruption implements Retryable {} + /** Transient handle of a running executor task. */ private static final class RunningTask { /** null until placeholder executable runs */ diff --git a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java index b8c7ac12..0f29c793 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java @@ -680,7 +680,7 @@ private static final class HelloNote extends ConsoleNote> { j.waitForMessage(Functions.isWindows() ? ">ping" : "+ sleep", b); j.jenkins.removeNode(s); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); - j.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b); + j.waitForMessage(new ExecutorStepExecution.RemovedNodeTimeoutCause().getShortDescription(), b); } @Issue("JENKINS-44521") diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 9ad9a02b..25a3fed3 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -35,7 +35,6 @@ import java.util.logging.Level; import jenkins.model.InterruptedBuildAction; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.*; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.isA; @@ -48,10 +47,8 @@ import org.jenkinsci.plugins.workflow.job.WorkflowRun; import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; -import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; @@ -91,7 +88,7 @@ public class ExecutorStepDynamicContextTest { assertNotNull(iba); assertThat(iba.getCauses(), contains(anyOf( isA(ExecutorStepExecution.QueueTaskCancelled.class), // normal - isA(ExecutorStepExecution.RemovedNodeCause.class)))); // observed on occasion + isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class)))); // observed on occasion }); } @@ -125,11 +122,7 @@ public class ExecutorStepDynamicContextTest { assertThat(j.jenkins.getQueue().getItems(), emptyArray()); InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); assertNotNull(iba); - assertThat(iba.getCauses(), contains( - isA(ExecutorStepExecution.RemovedNodeCause.class), - isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class - )) - ); + assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class))); }); } @@ -230,16 +223,10 @@ public class ExecutorStepDynamicContextTest { j.waitForMessage("+ sleep infinity", run); j.jenkins.removeNode(s); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); - j.assertLogNotContains("slave0 has been removed for ", run); assertThat(j.jenkins.getQueue().getItems(), emptyArray()); InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); assertNotNull(iba); - assertThat(iba.getCauses(), - allOf( - contains(isA(ExecutorStepExecution.RemovedNodeCause.class)), - not(contains(isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class))) - ) - ); + assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeCause.class))); }); } } From 2e02220e4ba1c5992bbbe0c4548a289708ddca35 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 09:27:09 +0200 Subject: [PATCH 05/14] Simplify test --- .../support/steps/ExecutorStepDynamicContextTest.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 25a3fed3..d5a96744 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -211,14 +211,10 @@ public class ExecutorStepDynamicContextTest { @Test public void onceRetentionStrategyNodeDisappearance() throws Throwable { logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); sessions.then(j -> { - // Start up a build that needs executor and then reboot and take the node offline - // Starting job first ensures we don't immediately fail if Node comes from a Cloud - // and takes a min to provision - WorkflowJob p = j.createProject(WorkflowJob.class, "p"); - p.setDefinition(new CpsFlowDefinition("node('ghost') {if (isUnix()) {sh 'sleep infinity'} else {bat 'echo + sleep infinity && ping -n 999999 localhost'}}", true)); - DumbSlave s = j.createSlave(Label.get("ghost")); s.setRetentionStrategy(new OnceRetentionStrategy(0)); + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("node('ghost') {if (isUnix()) {sh 'sleep infinity'} else {bat 'echo + sleep infinity && ping -n 999999 localhost'}}", true)); var run = p.scheduleBuild2(0).waitForStart(); j.waitForMessage("+ sleep infinity", run); j.jenkins.removeNode(s); From 118d49e4fb5cb96ae26ccab192b27550c43c4bc4 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 09:27:37 +0200 Subject: [PATCH 06/14] Review: adjust loging levels --- .../plugins/workflow/support/steps/ExecutorStepExecution.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index 15bfc44f..caa1400b 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -355,10 +355,10 @@ public interface Retryable {} } LOGGER.fine(() -> "received node deletion event on " + node.getNodeName()); if (isOneShotAgent(node)) { - LOGGER.finest(() -> "Cancelling owner run for one-shot agent " + node.getNodeName() + " immediately"); + LOGGER.fine(() -> "Cancelling owner run for one-shot agent " + node.getNodeName() + " immediately"); cancelOwnerExecution(node, new RemovedNodeCause()); } else { - LOGGER.finest(() -> "Will cancel owner run for agent " + node.getNodeName() + " after waiting for " + TIMEOUT_WAITING_FOR_NODE_MILLIS + "ms"); + LOGGER.fine(() -> "Will cancel owner run for agent " + node.getNodeName() + " after waiting for " + TIMEOUT_WAITING_FOR_NODE_MILLIS + "ms"); Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeTimeoutCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } } From 5753b5d5cbd0ba583200f8aaa18565e3a88117c1 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 09:34:28 +0200 Subject: [PATCH 07/14] Add a test covering cloud agents --- pom.xml | 6 +++ .../steps/ExecutorStepDynamicContextTest.java | 51 ++++++++++++++----- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/pom.xml b/pom.xml index eabd17e1..3e1a17e4 100644 --- a/pom.xml +++ b/pom.xml @@ -161,6 +161,12 @@ 1.2 test + + org.jenkins-ci.plugins + mock-slave + 153.v9768799a_2294 + test + org.awaitility awaitility diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index d5a96744..4f02bdf0 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -24,6 +24,15 @@ package org.jenkinsci.plugins.workflow.support.steps; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.emptyArray; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.isA; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + import hudson.model.Label; import hudson.model.Queue; import hudson.model.Result; @@ -34,21 +43,13 @@ import java.util.List; import java.util.logging.Level; import jenkins.model.InterruptedBuildAction; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.contains; -import static org.hamcrest.Matchers.emptyArray; -import static org.hamcrest.Matchers.isA; -import static org.hamcrest.Matchers.anyOf; - +import org.jenkinci.plugins.mock_slave.MockCloud; import org.jenkinsci.plugins.durabletask.executors.OnceRetentionStrategy; import org.jenkinsci.plugins.workflow.cps.CpsFlowDefinition; import org.jenkinsci.plugins.workflow.flow.FlowExecutionList; import org.jenkinsci.plugins.workflow.job.WorkflowJob; import org.jenkinsci.plugins.workflow.job.WorkflowRun; import org.jenkinsci.plugins.workflow.test.steps.SemaphoreStep; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; @@ -65,6 +66,10 @@ public class ExecutorStepDynamicContextTest { @Rule public TemporaryFolder tmp = new TemporaryFolder(); @Rule public LoggerRule logging = new LoggerRule(); + private void commonSetup() { + logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + } + @Test public void canceledQueueItem() throws Throwable { sessions.then(j -> { DumbSlave s = j.createSlave(Label.get("remote")); @@ -100,7 +105,7 @@ public class ExecutorStepDynamicContextTest { */ @Issue("JENKINS-36013") @Test public void normalNodeDisappearance() throws Throwable { - logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + commonSetup(); sessions.then(j -> { // Start up a build that needs executor and then reboot and take the node offline // Starting job first ensures we don't immediately fail if Node comes from a Cloud @@ -128,7 +133,7 @@ public class ExecutorStepDynamicContextTest { @Issue("JENKINS-36013") @Test public void parallelNodeDisappearance() throws Throwable { - logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + commonSetup(); sessions.then(j -> { WorkflowJob p = j.createProject(WorkflowJob.class, "p"); p.setDefinition(new CpsFlowDefinition("def bs = [:]; for (int _i = 0; _i < 5; _i++) {def i = _i; bs[/b$i/] = {node('remote') {semaphore(/s$i/)}}}; parallel bs", true)); @@ -209,7 +214,7 @@ public class ExecutorStepDynamicContextTest { } @Test public void onceRetentionStrategyNodeDisappearance() throws Throwable { - logging.recordPackage(ExecutorStepExecution.class, Level.FINE).record(FlowExecutionList.class, Level.FINE); + commonSetup(); sessions.then(j -> { DumbSlave s = j.createSlave(Label.get("ghost")); s.setRetentionStrategy(new OnceRetentionStrategy(0)); @@ -225,4 +230,26 @@ public class ExecutorStepDynamicContextTest { assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeCause.class))); }); } + + @Test public void cloudNodeDisappearance() throws Throwable { + commonSetup(); + sessions.then(j -> { + var mockCloud = new MockCloud("mock"); + mockCloud.setLabels("mock"); + j.jenkins.clouds.add(mockCloud); + WorkflowJob p = j.createProject(WorkflowJob.class, "p"); + p.setDefinition(new CpsFlowDefinition("node('mock') {if (isUnix()) {sh 'sleep infinity'} else {bat 'echo + sleep infinity && ping -n 999999 localhost'}}", true)); + WorkflowRun run = p.scheduleBuild2(0).waitForStart(); + j.waitForMessage("+ sleep infinity", run); + var mockNodes = j.jenkins.getLabel("mock").getNodes(); + assertThat(mockNodes, hasSize(1)); + var mockNode = mockNodes.iterator().next(); + j.jenkins.removeNode(mockNode); + j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(run)); + assertThat(j.jenkins.getQueue().getItems(), emptyArray()); + InterruptedBuildAction iba = run.getAction(InterruptedBuildAction.class); + assertNotNull(iba); + assertThat(iba.getCauses(), contains(isA(ExecutorStepExecution.RemovedNodeCause.class))); + }); + } } From c4e7a10e49434ee8b2d3ef36841f2a6982af28d9 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 09:36:16 +0200 Subject: [PATCH 08/14] Remove active wait and remove unused tmp folder while I'm here --- .../support/steps/ExecutorStepDynamicContextTest.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 4f02bdf0..9123b183 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -24,6 +24,7 @@ package org.jenkinsci.plugins.workflow.support.steps; +import static org.awaitility.Awaitility.await; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.contains; @@ -53,7 +54,6 @@ import org.junit.ClassRule; import org.junit.Rule; import org.junit.Test; -import org.junit.rules.TemporaryFolder; import org.jvnet.hudson.test.BuildWatcher; import org.jvnet.hudson.test.Issue; import org.jvnet.hudson.test.JenkinsSessionRule; @@ -63,7 +63,6 @@ public class ExecutorStepDynamicContextTest { @ClassRule public static BuildWatcher buildWatcher = new BuildWatcher(); @Rule public JenkinsSessionRule sessions = new JenkinsSessionRule(); - @Rule public TemporaryFolder tmp = new TemporaryFolder(); @Rule public LoggerRule logging = new LoggerRule(); private void commonSetup() { @@ -82,9 +81,7 @@ private void commonSetup() { sessions.then(j -> { SemaphoreStep.success("wait/1", null); WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1); - while (Queue.getInstance().getItems().length == 0) { - Thread.sleep(100); - } + await().until(() -> j.jenkins.getQueue().getItems(), emptyArray()); Queue.Item[] items = Queue.getInstance().getItems(); assertEquals(1, items.length); Queue.getInstance().cancel(items[0]); From 04e044ec595a7c24bc5f4685d0b378528b2dda5b Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 10:53:30 +0200 Subject: [PATCH 09/14] Give it a longer timeout --- .../workflow/support/steps/ExecutorStepDynamicContextTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 9123b183..0c04d546 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -40,6 +40,7 @@ import hudson.slaves.DumbSlave; import hudson.slaves.RetentionStrategy; import java.io.File; +import java.time.Duration; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; @@ -81,7 +82,7 @@ private void commonSetup() { sessions.then(j -> { SemaphoreStep.success("wait/1", null); WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1); - await().until(() -> j.jenkins.getQueue().getItems(), emptyArray()); + await().timeout(Duration.ofMinutes(1)).until(() -> j.jenkins.getQueue().getItems(), emptyArray()); Queue.Item[] items = Queue.getInstance().getItems(); assertEquals(1, items.length); Queue.getInstance().cancel(items[0]); From b62ed6f71bb82b6cd006afed25db9b55697dcb2e Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 12:33:39 +0200 Subject: [PATCH 10/14] Fix invalid expectation --- .../support/steps/ExecutorStepDynamicContextTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 0c04d546..31b7bfd1 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -27,10 +27,12 @@ import static org.awaitility.Awaitility.await; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.arrayWithSize; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.isA; +import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -82,9 +84,7 @@ private void commonSetup() { sessions.then(j -> { SemaphoreStep.success("wait/1", null); WorkflowRun b = j.jenkins.getItemByFullName("p", WorkflowJob.class).getBuildByNumber(1); - await().timeout(Duration.ofMinutes(1)).until(() -> j.jenkins.getQueue().getItems(), emptyArray()); - Queue.Item[] items = Queue.getInstance().getItems(); - assertEquals(1, items.length); + var items = await().timeout(Duration.ofMinutes(1)).until(() -> j.jenkins.getQueue().getItems(), arrayWithSize(1)); Queue.getInstance().cancel(items[0]); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); InterruptedBuildAction iba = b.getAction(InterruptedBuildAction.class); From 5a8617e3d6d976d548a1573243a150456fbc99a1 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 15:45:21 +0200 Subject: [PATCH 11/14] Remove Retryable under AgentErrorCondition --- .../workflow/support/steps/AgentErrorCondition.java | 8 +++++++- .../workflow/support/steps/ExecutorStepExecution.java | 7 +------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java index 95525fbe..d858ecfe 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/AgentErrorCondition.java @@ -36,6 +36,7 @@ import java.io.IOException; import java.nio.channels.ClosedChannelException; import java.util.stream.Stream; +import jenkins.model.CauseOfInterruption; import org.jenkinsci.Symbol; import org.jenkinsci.plugins.workflow.flow.ErrorCondition; import org.jenkinsci.plugins.workflow.steps.FlowInterruptedException; @@ -57,7 +58,7 @@ public final class AgentErrorCondition extends ErrorCondition { if (t instanceof AgentOfflineException) { return true; } - if (t instanceof FlowInterruptedException && ((FlowInterruptedException) t).getCauses().stream().anyMatch(ExecutorStepExecution.Retryable.class::isInstance)) { + if (t instanceof FlowInterruptedException && ((FlowInterruptedException) t).getCauses().stream().anyMatch(Retryable.class::isInstance)) { return true; } if (isClosedChannelException(t)) { @@ -89,6 +90,11 @@ private static boolean isClosedChannelException(Throwable t) { } } + /** + * A marker interface for {@link CauseOfInterruption} instances that can be retried through {@link AgentErrorCondition}. + */ + public interface Retryable {} + @Symbol("agent") @Extension public static final class DescriptorImpl extends ErrorConditionDescriptor { diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index caa1400b..a12c904c 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -343,11 +343,6 @@ public static final class QueueTaskCancelled extends RetryableCauseOfInterruptio } } - /** - * A marker interface for {@link CauseOfInterruption} instances that can be retried through {@link AgentErrorCondition}. - */ - public interface Retryable {} - @Extension public static final class RemovedNodeListener extends NodeListener { @Override protected void onDeleted(@NonNull Node node) { if (!RemovedNodeCause.ENABLED) { @@ -421,7 +416,7 @@ public static final class RemovedNodeTimeoutCause extends RetryableCauseOfInterr /** * Base class for a cause of interruption that can be retried via {@link AgentErrorCondition}. */ - private abstract static class RetryableCauseOfInterruption extends CauseOfInterruption implements Retryable {} + private abstract static class RetryableCauseOfInterruption extends CauseOfInterruption implements AgentErrorCondition.Retryable {} /** Transient handle of a running executor task. */ private static final class RunningTask { From 797c108c63a131b6f2ac90298a18bb378335c1b5 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 15:47:50 +0200 Subject: [PATCH 12/14] Use RemovedNodeCause even for static agents removal. --- .../plugins/workflow/support/steps/ExecutorStepExecution.java | 2 +- .../workflow/support/steps/ExecutorStepDynamicContextTest.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java index a12c904c..cd1203cb 100644 --- a/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java +++ b/src/main/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepExecution.java @@ -354,7 +354,7 @@ public static final class QueueTaskCancelled extends RetryableCauseOfInterruptio cancelOwnerExecution(node, new RemovedNodeCause()); } else { LOGGER.fine(() -> "Will cancel owner run for agent " + node.getNodeName() + " after waiting for " + TIMEOUT_WAITING_FOR_NODE_MILLIS + "ms"); - Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeTimeoutCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); + Timer.get().schedule(() -> cancelOwnerExecution(node, new RemovedNodeCause()), TIMEOUT_WAITING_FOR_NODE_MILLIS, TimeUnit.MILLISECONDS); } } diff --git a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java index 31b7bfd1..0b05323b 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/support/steps/ExecutorStepDynamicContextTest.java @@ -32,7 +32,6 @@ import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.isA; -import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -91,7 +90,7 @@ private void commonSetup() { assertNotNull(iba); assertThat(iba.getCauses(), contains(anyOf( isA(ExecutorStepExecution.QueueTaskCancelled.class), // normal - isA(ExecutorStepExecution.RemovedNodeTimeoutCause.class)))); // observed on occasion + isA(ExecutorStepExecution.RemovedNodeCause.class)))); // observed on occasion }); } From c81e29084255d7816c4f5b9874c0773ad56561b9 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 16:13:50 +0200 Subject: [PATCH 13/14] Fix back expected message --- .../workflow/steps/durable_task/ShellStepTest.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java index 0f29c793..54b42ec6 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java @@ -1,7 +1,11 @@ package org.jenkinsci.plugins.workflow.steps.durable_task; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.*; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.hasItemInArray; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.not; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; @@ -680,7 +684,7 @@ private static final class HelloNote extends ConsoleNote> { j.waitForMessage(Functions.isWindows() ? ">ping" : "+ sleep", b); j.jenkins.removeNode(s); j.assertBuildStatus(Result.ABORTED, j.waitForCompletion(b)); - j.waitForMessage(new ExecutorStepExecution.RemovedNodeTimeoutCause().getShortDescription(), b); + j.waitForMessage(new ExecutorStepExecution.RemovedNodeCause().getShortDescription(), b); } @Issue("JENKINS-44521") From 04e4192dadec01ff5b041e961136aee5c74a75f4 Mon Sep 17 00:00:00 2001 From: Vincent Latombe Date: Wed, 15 May 2024 16:38:01 +0200 Subject: [PATCH 14/14] Reverting imports --- .../plugins/workflow/steps/durable_task/ShellStepTest.java | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java index 54b42ec6..b8c7ac12 100644 --- a/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java +++ b/src/test/java/org/jenkinsci/plugins/workflow/steps/durable_task/ShellStepTest.java @@ -1,11 +1,7 @@ package org.jenkinsci.plugins.workflow.steps.durable_task; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.containsInAnyOrder; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.hasItemInArray; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.*; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull;