From a17aea599e56c18c07767bf50d5b9603ccf2e315 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 18:32:16 -0400 Subject: [PATCH] print reason why parent task was cancelled (#14604) (#14710) (cherry picked from commit dfb8449ed8f85e0c1e8601c86d17b833e1641e0f) Signed-off-by: kkewwei Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- CHANGELOG.md | 1 + .../cluster/node/tasks/CancellableTasksIT.java | 4 ++-- .../tasks/TaskCancellationService.java | 2 +- .../java/org/opensearch/tasks/TaskManager.java | 16 ++++++++++++---- .../node/tasks/CancellableTasksTests.java | 2 +- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad9585fc2f0a8..27784a5f546ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix race condition while parsing derived fields from search definition ([14445](https://github.com/opensearch-project/OpenSearch/pull/14445)) - Add allowlist setting for ingest-common and search-pipeline-common processors ([#14439](https://github.com/opensearch-project/OpenSearch/issues/14439)) - Create SystemIndexRegistry with helper method matchesSystemIndex ([#14415](https://github.com/opensearch-project/OpenSearch/pull/14415)) +- Print reason why parent task was cancelled ([#14604](https://github.com/opensearch-project/OpenSearch/issues/14604)) ### Dependencies - Update to Apache Lucene 9.11.1 ([#14042](https://github.com/opensearch-project/OpenSearch/pull/14042), [#14576](https://github.com/opensearch-project/OpenSearch/pull/14576)) diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksIT.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksIT.java index bdb36b62ada21..d8a4bed4740bf 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksIT.java @@ -327,7 +327,7 @@ public void testFailedToStartChildTaskAfterCancelled() throws Exception { mainAction.startSubTask(taskId, subRequest, future); TransportException te = expectThrows(TransportException.class, future::actionGet); assertThat(te.getCause(), instanceOf(TaskCancelledException.class)); - assertThat(te.getCause().getMessage(), equalTo("The parent task was cancelled, shouldn't start any child tasks")); + assertThat(te.getCause().getMessage(), equalTo("The parent task was cancelled, shouldn't start any child tasks, by user request")); allowEntireRequest(rootRequest); waitForRootTask(rootTaskFuture); ensureAllBansRemoved(); @@ -386,7 +386,7 @@ static void waitForRootTask(ActionFuture rootTask) { assertThat( cause.getMessage(), anyOf( - equalTo("The parent task was cancelled, shouldn't start any child tasks"), + equalTo("The parent task was cancelled, shouldn't start any child tasks, by user request"), containsString("Task cancelled before it started:"), equalTo("Task was cancelled while executing") ) diff --git a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java index 4a97513d8f81a..f2dd15b32a7b2 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskCancellationService.java @@ -93,7 +93,7 @@ void cancelTaskAndDescendants(CancellableTask task, String reason, boolean waitF Collection childrenNodes = taskManager.startBanOnChildrenNodes(task.getId(), () -> { logger.trace("child tasks of parent [{}] are completed", taskId); groupedListener.onResponse(null); - }); + }, reason); taskManager.cancel(task, reason, () -> { logger.trace("task [{}] is cancelled", taskId); groupedListener.onResponse(null); diff --git a/server/src/main/java/org/opensearch/tasks/TaskManager.java b/server/src/main/java/org/opensearch/tasks/TaskManager.java index a49968ab85e89..6ad06da9d2fa2 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskManager.java +++ b/server/src/main/java/org/opensearch/tasks/TaskManager.java @@ -510,17 +510,22 @@ public Set getBannedTaskIds() { return Collections.unmodifiableSet(banedParents.keySet()); } + public Collection startBanOnChildrenNodes(long taskId, Runnable onChildTasksCompleted) { + return startBanOnChildrenNodes(taskId, onChildTasksCompleted, "unknown"); + } + /** * Start rejecting new child requests as the parent task was cancelled. * * @param taskId the parent task id * @param onChildTasksCompleted called when all child tasks are completed or failed + * @param reason the ban reason * @return the set of current nodes that have outstanding child tasks */ - public Collection startBanOnChildrenNodes(long taskId, Runnable onChildTasksCompleted) { + public Collection startBanOnChildrenNodes(long taskId, Runnable onChildTasksCompleted, String reason) { final CancellableTaskHolder holder = cancellableTasks.get(taskId); if (holder != null) { - return holder.startBan(onChildTasksCompleted); + return holder.startBan(onChildTasksCompleted, reason); } else { onChildTasksCompleted.run(); return Collections.emptySet(); @@ -585,6 +590,7 @@ private static class CancellableTaskHolder { private List cancellationListeners = null; private Map childTasksPerNode = null; private boolean banChildren = false; + private String banReason; private List childTaskCompletedListeners = null; CancellableTaskHolder(CancellableTask task) { @@ -662,7 +668,7 @@ public CancellableTask getTask() { synchronized void registerChildNode(DiscoveryNode node) { if (banChildren) { - throw new TaskCancelledException("The parent task was cancelled, shouldn't start any child tasks"); + throw new TaskCancelledException("The parent task was cancelled, shouldn't start any child tasks, " + banReason); } if (childTasksPerNode == null) { childTasksPerNode = new HashMap<>(); @@ -686,11 +692,13 @@ void unregisterChildNode(DiscoveryNode node) { notifyListeners(listeners); } - Set startBan(Runnable onChildTasksCompleted) { + Set startBan(Runnable onChildTasksCompleted, String reason) { final Set pendingChildNodes; final Runnable toRun; synchronized (this) { banChildren = true; + assert reason != null; + banReason = reason; if (childTasksPerNode == null) { pendingChildNodes = Collections.emptySet(); } else { diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksTests.java index 6bedcc5ec6feb..1f222f7d0627e 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/tasks/CancellableTasksTests.java @@ -428,7 +428,7 @@ public void testRegisterAndExecuteChildTaskWhileParentTaskIsBeingCanceled() thro ); assertThat(cancelledException.getMessage(), startsWith("Task cancelled before it started:")); CountDownLatch latch = new CountDownLatch(1); - taskManager.startBanOnChildrenNodes(parentTaskId.getId(), latch::countDown); + taskManager.startBanOnChildrenNodes(parentTaskId.getId(), latch::countDown, cancelledException.getMessage()); assertTrue("onChildTasksCompleted() is not invoked", latch.await(1, TimeUnit.SECONDS)); }