Skip to content

Commit f048c52

Browse files
authored
[ML] JIndex: Restore finalize job action (#35939)
1 parent d72ad3b commit f048c52

File tree

9 files changed

+180
-116
lines changed

9 files changed

+180
-116
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/messages/Messages.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ public final class Messages {
4848
"Datafeed frequency [{0}] must be a multiple of the aggregation interval [{1}]";
4949
public static final String DATAFEED_ID_ALREADY_TAKEN = "A datafeed with id [{0}] already exists";
5050

51-
public static final String FILTER_NOT_FOUND = "No filter with id [{0}] exists";
51+
public static final String FILTER_CANNOT_DELETE = "Cannot delete filter [{0}] currently used by jobs {1}";
5252
public static final String FILTER_CONTAINS_TOO_MANY_ITEMS = "Filter [{0}] contains too many items; up to [{1}] items are allowed";
53+
public static final String FILTER_NOT_FOUND = "No filter with id [{0}] exists";
5354

5455
public static final String INCONSISTENT_ID =
5556
"Inconsistent {0}; ''{1}'' specified in the body differs from ''{2}'' specified as a URL argument";

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportCloseJobAction.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.transport.TransportService;
2929
import org.elasticsearch.xpack.core.ml.MlTasks;
3030
import org.elasticsearch.xpack.core.ml.action.CloseJobAction;
31+
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
3132
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedState;
3233
import org.elasticsearch.xpack.core.ml.job.config.JobState;
3334
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;
@@ -47,6 +48,9 @@
4748
import java.util.concurrent.atomic.AtomicInteger;
4849
import java.util.stream.Collectors;
4950

51+
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
52+
import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
53+
5054
public class TransportCloseJobAction extends TransportTasksAction<TransportOpenJobAction.JobTask, CloseJobAction.Request,
5155
CloseJobAction.Response, CloseJobAction.Response> {
5256

@@ -422,7 +426,10 @@ void waitForJobClosed(CloseJobAction.Request request, WaitForCloseRequest waitFo
422426
}, request.getCloseTimeout(), new ActionListener<Boolean>() {
423427
@Override
424428
public void onResponse(Boolean result) {
425-
listener.onResponse(response);
429+
FinalizeJobExecutionAction.Request finalizeRequest = new FinalizeJobExecutionAction.Request(
430+
waitForCloseRequest.jobsToFinalize.toArray(new String[0]));
431+
executeAsyncWithOrigin(client, ML_ORIGIN, FinalizeJobExecutionAction.INSTANCE, finalizeRequest,
432+
ActionListener.wrap(r -> listener.onResponse(response), listener::onFailure));
426433
}
427434

428435
@Override

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportDeleteFilterAction.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.xpack.core.ml.job.config.Detector;
2626
import org.elasticsearch.xpack.core.ml.job.config.Job;
2727
import org.elasticsearch.xpack.core.ml.job.config.MlFilter;
28+
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
2829
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
2930
import org.elasticsearch.xpack.ml.job.persistence.JobConfigProvider;
3031

@@ -58,7 +59,7 @@ protected void doExecute(Task task, DeleteFilterAction.Request request, ActionLi
5859
List<String> currentlyUsedBy = findJobsUsingFilter(jobs, filterId);
5960
if (!currentlyUsedBy.isEmpty()) {
6061
listener.onFailure(ExceptionsHelper.conflictStatusException(
61-
"Cannot delete filter, currently used by jobs: " + currentlyUsedBy));
62+
Messages.getMessage(Messages.FILTER_CANNOT_DELETE, filterId, currentlyUsedBy)));
6263
} else {
6364
deleteFilter(filterId, listener);
6465
}

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportFinalizeJobExecutionAction.java

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,12 @@
77

88
import org.elasticsearch.action.ActionListener;
99
import org.elasticsearch.action.support.ActionFilters;
10+
import org.elasticsearch.action.support.WriteRequest;
1011
import org.elasticsearch.action.support.master.AcknowledgedResponse;
1112
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
13+
import org.elasticsearch.action.update.UpdateAction;
14+
import org.elasticsearch.action.update.UpdateRequest;
15+
import org.elasticsearch.client.Client;
1216
import org.elasticsearch.cluster.ClusterState;
1317
import org.elasticsearch.cluster.block.ClusterBlockException;
1418
import org.elasticsearch.cluster.block.ClusterBlockLevel;
@@ -18,15 +22,31 @@
1822
import org.elasticsearch.threadpool.ThreadPool;
1923
import org.elasticsearch.transport.TransportService;
2024
import org.elasticsearch.xpack.core.ml.action.FinalizeJobExecutionAction;
25+
import org.elasticsearch.xpack.core.ml.job.config.Job;
26+
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
27+
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
28+
import org.elasticsearch.xpack.ml.MachineLearning;
29+
import org.elasticsearch.xpack.ml.utils.ChainTaskExecutor;
30+
31+
import java.util.Collections;
32+
import java.util.Date;
33+
import java.util.Map;
34+
35+
import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
36+
import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
2137

2238
public class TransportFinalizeJobExecutionAction extends TransportMasterNodeAction<FinalizeJobExecutionAction.Request,
2339
AcknowledgedResponse> {
2440

41+
private final Client client;
42+
2543
@Inject
2644
public TransportFinalizeJobExecutionAction(TransportService transportService, ClusterService clusterService, ThreadPool threadPool,
27-
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver) {
45+
ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver,
46+
Client client) {
2847
super(FinalizeJobExecutionAction.NAME, transportService, clusterService, threadPool, actionFilters,
2948
indexNameExpressionResolver, FinalizeJobExecutionAction.Request::new);
49+
this.client = client;
3050
}
3151

3252
@Override
@@ -42,9 +62,36 @@ protected AcknowledgedResponse newResponse() {
4262
@Override
4363
protected void masterOperation(FinalizeJobExecutionAction.Request request, ClusterState state,
4464
ActionListener<AcknowledgedResponse> listener) {
45-
// This action is no longer required but needs to be preserved
46-
// in case it is called by an old node in a mixed cluster
47-
listener.onResponse(new AcknowledgedResponse(true));
65+
String jobIdString = String.join(",", request.getJobIds());
66+
logger.debug("finalizing jobs [{}]", jobIdString);
67+
68+
ChainTaskExecutor chainTaskExecutor = new ChainTaskExecutor(threadPool.executor(
69+
MachineLearning.UTILITY_THREAD_POOL_NAME), true);
70+
71+
Map<String, Object> update = Collections.singletonMap(Job.FINISHED_TIME.getPreferredName(), new Date());
72+
73+
for (String jobId: request.getJobIds()) {
74+
UpdateRequest updateRequest = new UpdateRequest(AnomalyDetectorsIndex.configIndexName(),
75+
ElasticsearchMappings.DOC_TYPE, Job.documentId(jobId));
76+
updateRequest.retryOnConflict(3);
77+
updateRequest.doc(update);
78+
updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
79+
80+
chainTaskExecutor.add(chainedListener -> {
81+
executeAsyncWithOrigin(client, ML_ORIGIN, UpdateAction.INSTANCE, updateRequest, ActionListener.wrap(
82+
updateResponse -> chainedListener.onResponse(null),
83+
chainedListener::onFailure
84+
));
85+
});
86+
}
87+
88+
chainTaskExecutor.execute(ActionListener.wrap(
89+
aVoid -> {
90+
logger.debug("finalized job [{}]", jobIdString);
91+
listener.onResponse(new AcknowledgedResponse(true));
92+
},
93+
listener::onFailure
94+
));
4895
}
4996

5097
@Override

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportGetJobsStatsAction.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ public TransportGetJobsStatsAction(TransportService transportService,
6969

7070
@Override
7171
protected void doExecute(Task task, GetJobsStatsAction.Request request, ActionListener<GetJobsStatsAction.Response> finalListener) {
72+
logger.debug("Get stats for job [{}]", request.getJobId());
7273

7374
jobConfigProvider.expandJobsIds(request.getJobId(), request.allowNoJobs(), true, ActionListener.wrap(
7475
expandedIds -> {
@@ -105,7 +106,6 @@ protected QueryPage<GetJobsStatsAction.Response.JobStats> readTaskResponse(Strea
105106
protected void taskOperation(GetJobsStatsAction.Request request, TransportOpenJobAction.JobTask task,
106107
ActionListener<QueryPage<GetJobsStatsAction.Response.JobStats>> listener) {
107108
String jobId = task.getJobId();
108-
logger.debug("Get stats for job [{}]", jobId);
109109
ClusterState state = clusterService.state();
110110
PersistentTasksCustomMetaData tasks = state.getMetaData().custom(PersistentTasksCustomMetaData.TYPE);
111111
Optional<Tuple<DataCounts, ModelSizeStats>> stats = processManager.getStatistics(task);

x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/autodetect/output/AutoDetectResultProcessor.java

Lines changed: 20 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,16 @@
1111
import org.elasticsearch.action.ActionListener;
1212
import org.elasticsearch.action.DocWriteResponse;
1313
import org.elasticsearch.action.index.IndexResponse;
14-
import org.elasticsearch.action.support.ThreadedActionListener;
1514
import org.elasticsearch.action.support.WriteRequest;
16-
import org.elasticsearch.action.update.UpdateAction;
17-
import org.elasticsearch.action.update.UpdateRequest;
18-
import org.elasticsearch.action.update.UpdateResponse;
1915
import org.elasticsearch.client.Client;
2016
import org.elasticsearch.common.Nullable;
2117
import org.elasticsearch.common.unit.ByteSizeUnit;
2218
import org.elasticsearch.common.unit.ByteSizeValue;
2319
import org.elasticsearch.xpack.core.ml.MachineLearningField;
24-
import org.elasticsearch.xpack.core.ml.job.config.Job;
20+
import org.elasticsearch.xpack.core.ml.action.PutJobAction;
21+
import org.elasticsearch.xpack.core.ml.action.UpdateJobAction;
22+
import org.elasticsearch.xpack.core.ml.job.config.JobUpdate;
2523
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
26-
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
27-
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
2824
import org.elasticsearch.xpack.core.ml.job.process.autodetect.output.FlushAcknowledgement;
2925
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
3026
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
@@ -36,20 +32,15 @@
3632
import org.elasticsearch.xpack.core.ml.job.results.ForecastRequestStats;
3733
import org.elasticsearch.xpack.core.ml.job.results.Influencer;
3834
import org.elasticsearch.xpack.core.ml.job.results.ModelPlot;
39-
import org.elasticsearch.xpack.ml.MachineLearning;
4035
import org.elasticsearch.xpack.ml.job.persistence.JobResultsPersister;
4136
import org.elasticsearch.xpack.ml.job.process.autodetect.AutodetectProcess;
4237
import org.elasticsearch.xpack.ml.job.process.normalizer.Renormalizer;
4338
import org.elasticsearch.xpack.ml.job.results.AutodetectResult;
4439
import org.elasticsearch.xpack.ml.notifications.Auditor;
4540

4641
import java.time.Duration;
47-
import java.util.Collections;
48-
import java.util.Date;
49-
import java.util.HashMap;
5042
import java.util.Iterator;
5143
import java.util.List;
52-
import java.util.Map;
5344
import java.util.Objects;
5445
import java.util.concurrent.CountDownLatch;
5546
import java.util.concurrent.Semaphore;
@@ -88,7 +79,6 @@ public class AutoDetectResultProcessor {
8879

8980
final CountDownLatch completionLatch = new CountDownLatch(1);
9081
final Semaphore updateModelSnapshotSemaphore = new Semaphore(1);
91-
volatile CountDownLatch onCloseActionsLatch;
9282
private final FlushListener flushListener;
9383
private volatile boolean processKilled;
9484
private volatile boolean failed;
@@ -149,18 +139,8 @@ public void process(AutodetectProcess process) {
149139
} catch (Exception e) {
150140
LOGGER.warn(new ParameterizedMessage("[{}] Error persisting autodetect results", jobId), e);
151141
}
152-
if (processKilled == false) {
153-
try {
154-
onAutodetectClose();
155-
} catch (Exception e) {
156-
if (onCloseActionsLatch != null) {
157-
onCloseActionsLatch.countDown();
158-
}
159-
throw e;
160-
}
161-
}
162-
163142
LOGGER.info("[{}] {} buckets parsed from autodetect output", jobId, bucketCount);
143+
164144
} catch (Exception e) {
165145
failed = true;
166146

@@ -313,6 +293,9 @@ private void notifyModelMemoryStatusChange(Context context, ModelSizeStats model
313293
}
314294

315295
protected void updateModelSnapshotOnJob(ModelSnapshot modelSnapshot) {
296+
JobUpdate update = new JobUpdate.Builder(jobId).setModelSnapshotId(modelSnapshot.getSnapshotId()).build();
297+
UpdateJobAction.Request updateRequest = UpdateJobAction.Request.internal(jobId, update);
298+
316299
try {
317300
// This blocks the main processing thread in the unlikely event
318301
// there are 2 model snapshots queued up. But it also has the
@@ -324,52 +307,20 @@ protected void updateModelSnapshotOnJob(ModelSnapshot modelSnapshot) {
324307
return;
325308
}
326309

327-
Map<String, Object> update = new HashMap<>();
328-
update.put(Job.MODEL_SNAPSHOT_ID.getPreferredName(), modelSnapshot.getSnapshotId());
329-
update.put(Job.MODEL_SNAPSHOT_MIN_VERSION.getPreferredName(), modelSnapshot.getMinVersion().toString());
330-
331-
updateJob(jobId, update, new ActionListener<UpdateResponse>() {
332-
@Override
333-
public void onResponse(UpdateResponse updateResponse) {
334-
updateModelSnapshotSemaphore.release();
335-
LOGGER.debug("[{}] Updated job with model snapshot id [{}]", jobId, modelSnapshot.getSnapshotId());
336-
}
337-
338-
@Override
339-
public void onFailure(Exception e) {
340-
updateModelSnapshotSemaphore.release();
341-
LOGGER.error("[" + jobId + "] Failed to update job with new model snapshot id [" +
342-
modelSnapshot.getSnapshotId() + "]", e);
343-
}
344-
});
345-
}
346-
347-
private void onAutodetectClose() {
348-
onCloseActionsLatch = new CountDownLatch(1);
349-
350-
ActionListener<UpdateResponse> updateListener = ActionListener.wrap(
351-
updateResponse -> {
352-
onCloseActionsLatch.countDown();
353-
},
354-
e -> {
355-
LOGGER.error("[" + jobId + "] Failed to finalize job on autodetect close", e);
356-
onCloseActionsLatch.countDown();
357-
}
358-
);
359-
360-
updateJob(jobId, Collections.singletonMap(Job.FINISHED_TIME.getPreferredName(), new Date()),
361-
new ThreadedActionListener<>(LOGGER, client.threadPool(),
362-
MachineLearning.UTILITY_THREAD_POOL_NAME, updateListener, false)
363-
);
364-
}
310+
executeAsyncWithOrigin(client, ML_ORIGIN, UpdateJobAction.INSTANCE, updateRequest, new ActionListener<PutJobAction.Response>() {
311+
@Override
312+
public void onResponse(PutJobAction.Response response) {
313+
updateModelSnapshotSemaphore.release();
314+
LOGGER.debug("[{}] Updated job with model snapshot id [{}]", jobId, modelSnapshot.getSnapshotId());
315+
}
365316

366-
private void updateJob(String jobId, Map<String, Object> update, ActionListener<UpdateResponse> listener) {
367-
UpdateRequest updateRequest = new UpdateRequest(AnomalyDetectorsIndex.configIndexName(),
368-
ElasticsearchMappings.DOC_TYPE, Job.documentId(jobId));
369-
updateRequest.retryOnConflict(3);
370-
updateRequest.doc(update);
371-
updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
372-
executeAsyncWithOrigin(client, ML_ORIGIN, UpdateAction.INSTANCE, updateRequest, listener);
317+
@Override
318+
public void onFailure(Exception e) {
319+
updateModelSnapshotSemaphore.release();
320+
LOGGER.error("[" + jobId + "] Failed to update job with new model snapshot id [" +
321+
modelSnapshot.getSnapshotId() + "]", e);
322+
}
323+
});
373324
}
374325

375326
public void awaitCompletion() throws TimeoutException {
@@ -381,13 +332,6 @@ public void awaitCompletion() throws TimeoutException {
381332
throw new TimeoutException("Timed out waiting for results processor to complete for job " + jobId);
382333
}
383334

384-
// Once completionLatch has passed then onCloseActionsLatch must either
385-
// be set or null, it will not be set later.
386-
if (onCloseActionsLatch != null && onCloseActionsLatch.await(
387-
MachineLearningField.STATE_PERSIST_RESTORE_TIMEOUT.getMinutes(), TimeUnit.MINUTES) == false) {
388-
throw new TimeoutException("Timed out waiting for results processor run post close actions " + jobId);
389-
}
390-
391335
// Input stream has been completely processed at this point.
392336
// Wait for any updateModelSnapshotOnJob calls to complete.
393337
updateModelSnapshotSemaphore.acquire();

0 commit comments

Comments
 (0)