Skip to content

Commit d5340cc

Browse files
authored
[ML] do not start stopping tasks on reassignment (#55315)
When a anomaly jobs, datafeeds, and analytics tasks are stopped, they enter an ephemeral state called `STOPPING`. If the node executing the task fails while this is occurring, they could be stuck in the limbo state of `STOPPING`. It is best to mark the tasks as completed if they get reassigned to a node.
1 parent 803cedb commit d5340cc

File tree

3 files changed

+25
-4
lines changed

3 files changed

+25
-4
lines changed

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportOpenJobAction.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,18 @@ protected void nodeOperation(AllocatedPersistentTask task, OpenJobAction.JobPara
439439
JobTask jobTask = (JobTask) task;
440440
jobTask.autodetectProcessManager = autodetectProcessManager;
441441
JobTaskState jobTaskState = (JobTaskState) state;
442+
JobState jobState = jobTaskState == null ? null : jobTaskState.getState();
443+
// If the job is closing, simply stop and return
444+
if (JobState.CLOSING.equals(jobState)) {
445+
// Mark as completed instead of using `stop` as stop assumes native processes have started
446+
logger.info("[{}] job got reassigned while stopping. Marking as completed", params.getJobId());
447+
jobTask.markAsCompleted();
448+
return;
449+
}
442450
// If the job is failed then the Persistent Task Service will
443451
// try to restart it on a node restart. Exiting here leaves the
444452
// job in the failed state and it must be force closed.
445-
if (jobTaskState != null && jobTaskState.getState().isAnyOf(JobState.FAILED, JobState.CLOSING)) {
453+
if (JobState.FAILED.equals(jobState)) {
446454
return;
447455
}
448456

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDataFrameAnalyticsAction.java

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -645,11 +645,16 @@ protected void nodeOperation(AllocatedPersistentTask task, StartDataFrameAnalyti
645645
PersistentTaskState state) {
646646
logger.info("[{}] Starting data frame analytics", params.getId());
647647
DataFrameAnalyticsTaskState analyticsTaskState = (DataFrameAnalyticsTaskState) state;
648+
DataFrameAnalyticsState analyticsState = analyticsTaskState == null ? null : analyticsTaskState.getState();
648649

649-
// If we are "stopping" there is nothing to do
650+
// If we are "stopping" there is nothing to do and we should stop
651+
if (DataFrameAnalyticsState.STOPPING.equals(analyticsState)) {
652+
logger.info("[{}] data frame analytics got reassigned while stopping. Marking as completed", params.getId());
653+
task.markAsCompleted();
654+
return;
655+
}
650656
// If we are "failed" then we should leave the task as is; for recovery it must be force stopped.
651-
if (analyticsTaskState != null && analyticsTaskState.getState().isAnyOf(
652-
DataFrameAnalyticsState.STOPPING, DataFrameAnalyticsState.FAILED)) {
657+
if (DataFrameAnalyticsState.FAILED.equals(analyticsState)) {
653658
return;
654659
}
655660

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartDatafeedAction.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,14 @@ protected void nodeOperation(final AllocatedPersistentTask allocatedPersistentTa
398398
final StartDatafeedAction.DatafeedParams params,
399399
final PersistentTaskState state) {
400400
DatafeedTask datafeedTask = (DatafeedTask) allocatedPersistentTask;
401+
DatafeedState datafeedState = (DatafeedState) state;
402+
403+
// If we are "stopping" there is nothing to do
404+
if (DatafeedState.STOPPING.equals(datafeedState)) {
405+
logger.info("[{}] datafeed got reassigned while stopping. Marking as completed", params.getDatafeedId());
406+
datafeedTask.markAsCompleted();
407+
return;
408+
}
401409
datafeedTask.datafeedManager = datafeedManager;
402410
datafeedManager.run(datafeedTask,
403411
(error) -> {

0 commit comments

Comments
 (0)