Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Comment thread
losipiuk marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.execution.scheduler.faulttolerant;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
Expand Down Expand Up @@ -206,6 +207,9 @@
public class EventDrivenFaultTolerantQueryScheduler
implements QueryScheduler
{
@VisibleForTesting
public static final Duration NO_FINAL_TASK_INFO_CHECK_INTERVAL = new Duration(1, MINUTES);

private static final Logger log = Logger.get(EventDrivenFaultTolerantQueryScheduler.class);

private final QueryStateMachine queryStateMachine;
Expand Down Expand Up @@ -1676,17 +1680,18 @@ public Void onSinkInstanceHandleAcquired(SinkInstanceHandleAcquiredEvent sinkIns
log.error("Did not receive final task info for task %s after it FINISHED; internal inconsistency; failing query", task.getTaskId());
queryStateMachine.transitionToFailed(new TrinoException(GENERIC_INTERNAL_ERROR, "Did not receive final task info for task after it finished; failing query"));
}
}, 1, MINUTES);
}, NO_FINAL_TASK_INFO_CHECK_INTERVAL.toMillis(), MILLISECONDS);
case CANCELED, ABORTED, FAILED -> scheduledExecutorService.schedule(() -> {
if (!finalTaskInfoReceived.get()) {
log.error("Did not receive final task info for task %s after it %s; internal inconsistency; marking task failed in scheduler to unblock query progression", taskStatus.getState(), task.getTaskId());
eventQueue.add(new RemoteTaskCompletedEvent(taskStatus));
}
}, 1, MINUTES);
}, NO_FINAL_TASK_INFO_CHECK_INTERVAL.toMillis(), MILLISECONDS);
default -> throw new IllegalStateException("Unexpected task state: " + taskStatus.getState());
}
});

task.addFinalTaskInfoListener(_ -> finalTaskInfoReceived.set(true));
task.addFinalTaskInfoListener(taskExecutionStats::update);
task.addFinalTaskInfoListener(taskInfo -> eventQueue.add(new RemoteTaskCompletedEvent(taskInfo.taskStatus())));
nodeLease.attachTaskId(task.getTaskId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
import org.junit.jupiter.api.Test;

import static io.airlift.testing.Closeables.closeAllSuppress;
import static io.trino.execution.scheduler.faulttolerant.EventDrivenFaultTolerantQueryScheduler.NO_FINAL_TASK_INFO_CHECK_INTERVAL;
import static io.trino.testing.TestingNames.randomNameSuffix;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.assertj.core.api.Assertions.assertThat;

public class TestDistributedFaultTolerantEngineOnlyQueries
extends AbstractDistributedEngineOnlyQueries
Expand Down Expand Up @@ -101,4 +104,27 @@ t2 AS (

assertUpdate("DROP TABLE " + tableName);
}

@Test
public void testIssue25080()
{
// regression test for verifying logic for catching queries with taks missing final info works correctly.
// https://github.com/trinodb/trino/pull/25080
assertUpdate("""
CREATE TABLE blackhole.default.fast (dummy BIGINT)
WITH (split_count = 1,
pages_per_split = 1,
rows_per_page = 1)
""");
assertUpdate("""
CREATE TABLE blackhole.default.delay (dummy BIGINT)
WITH (split_count = 1,
pages_per_split = 1,
rows_per_page = 1,
page_processing_delay = '%ss')
""".formatted(((int) NO_FINAL_TASK_INFO_CHECK_INTERVAL.getValue(SECONDS)) + 5));
assertThat(query("SELECT * FROM blackhole.default.delay UNION ALL SELECT * FROM blackhole.default.fast"))
.succeeds()
.matches("VALUES BIGINT '0', BIGINT '0'");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import static java.util.concurrent.Executors.newCachedThreadPool;
import static org.assertj.core.api.Assertions.assertThat;

public class TestMetadataOnlyQueries
public class TestFaultTolerantMetadataOnlyQueries
extends AbstractTestQueryFramework
{
@Override
Expand Down