diff --git a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java index 1791792dfca6..302c0c4223a6 100644 --- a/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java +++ b/core/trino-main/src/main/java/io/trino/execution/SqlTaskManager.java @@ -36,6 +36,7 @@ import io.trino.execution.buffer.BufferResult; import io.trino.execution.buffer.OutputBuffers; import io.trino.execution.buffer.PipelinedOutputBuffers; +import io.trino.execution.executor.PrioritizedSplitRunner; import io.trino.execution.executor.TaskExecutor; import io.trino.execution.executor.TaskExecutor.RunningSplitInfo; import io.trino.memory.LocalMemoryManager; @@ -705,6 +706,31 @@ private Optional createStuckSplitTasksInterrupter( taskExecutor)); } + /** + * The class detects and interrupts runaway splits. It interrupts threads via failing the task that is holding the split + * and relying on {@link PrioritizedSplitRunner#destroy()} method to actually interrupt the responsible thread. + * The detection is invoked periodically with the frequency of {@link StuckSplitTasksInterrupter#stuckSplitsDetectionInterval}. + * A thread gets interrupted once the split processing continues beyond {@link StuckSplitTasksInterrupter#interruptStuckSplitTasksTimeout} and + * the split threaddump matches with {@link StuckSplitTasksInterrupter#stuckSplitStackTracePredicate}.

+ * + * There is a potential race condition for this {@link StuckSplitTasksInterrupter} class. The problematic flow is that we may + * kill a task that is long-running, but not really stuck on the code that matches {@link StuckSplitTasksInterrupter#stuckSplitStackTracePredicate} (e.g. JONI code). + * Consider the following example: + *

    + *
  1. We find long-running splits; we get A, B, C.
  2. + *
  3. None of those is actually running JONI code.
  4. + *
  5. just before when we investigate stack trace for A, the underlying thread already switched to some other unrelated split D; and D is actually running JONI
  6. + * we get the stacktrace for what we believe is A, but it is for D, and we decide we should kill the task that A belongs to + *
  7. (clash!!!) wrong decision is made
  8. + *
+ * A proposed fix and more details of this issue are at: pull/13272. + * We decided not to fix the race condition due to + *
    + *
  1. its extremely low chance of occurring
  2. + *
  3. potential low impact if it indeed happened
  4. + *
  5. extra synchronization complexity the patch would add
  6. + *
+ */ private class StuckSplitTasksInterrupter { private final Duration interruptStuckSplitTasksTimeout; diff --git a/core/trino-main/src/main/java/io/trino/execution/executor/TaskExecutor.java b/core/trino-main/src/main/java/io/trino/execution/executor/TaskExecutor.java index 3bf50f16a3ef..c9e2a716ab7c 100644 --- a/core/trino-main/src/main/java/io/trino/execution/executor/TaskExecutor.java +++ b/core/trino-main/src/main/java/io/trino/execution/executor/TaskExecutor.java @@ -882,6 +882,12 @@ public Set getStuckSplitTaskIds(Duration processingDurationThreshold, Pr .filter(filter).map(RunningSplitInfo::getTaskId).collect(toImmutableSet()); } + /** + * A class representing a split that is running on the TaskRunner. + * It has a Thread object that gets assigned while assigning the split + * to the taskRunner. However, when the TaskRunner moves to a different split, + * the thread stored here will not remain assigned to this split anymore. + */ public static class RunningSplitInfo implements Comparable {