Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,11 @@ public MemoryUsage getMemoryUsage() {
return memoryUsage;
}

@Nullable
public AnalysisStats getAnalysisStats() {
return analysisStats;
}

public DiscoveryNode getNode() {
return node;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ private static ConstructingObjectParser<DataCounts, Void> createParser(boolean i
private final long testDocsCount;
private final long skippedDocsCount;

public DataCounts(String jobId) {
this(jobId, 0, 0, 0);
}

public DataCounts(String jobId, long trainingDocsCount, long testDocsCount, long skippedDocsCount) {
this.jobId = Objects.requireNonNull(jobId);
this.trainingDocsCount = trainingDocsCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsTaskState;
import org.elasticsearch.xpack.core.ml.dataframe.stats.AnalysisStats;
import org.elasticsearch.xpack.core.ml.dataframe.stats.common.DataCounts;
import org.elasticsearch.xpack.core.ml.dataframe.stats.Fields;
import org.elasticsearch.xpack.core.ml.dataframe.stats.MemoryUsage;
import org.elasticsearch.xpack.core.ml.dataframe.stats.classification.ClassificationStats;
import org.elasticsearch.xpack.core.ml.dataframe.stats.common.DataCounts;
import org.elasticsearch.xpack.core.ml.dataframe.stats.outlierdetection.OutlierDetectionStats;
import org.elasticsearch.xpack.core.ml.dataframe.stats.regression.RegressionStats;
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
Expand All @@ -60,7 +60,9 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -140,9 +142,15 @@ protected void doExecute(Task task, GetDataFrameAnalyticsStatsAction.Request req
runningTasksStatsResponse -> gatherStatsForStoppedTasks(request.getExpandedIds(), runningTasksStatsResponse,
ActionListener.wrap(
finalResponse -> {

// We need to have access to the config itself to fill in missing defaults which
// is why we do it here
List<Stats> statsWithDefaults = buildStatsWithDefaults(getResponse.getResources().results(),
finalResponse.getResponse().results());

// While finalResponse has all the stats objects we need, we should report the count
// from the get response
QueryPage<Stats> finalStats = new QueryPage<>(finalResponse.getResponse().results(),
QueryPage<Stats> finalStats = new QueryPage<>(statsWithDefaults,
getResponse.getResources().count(), GetDataFrameAnalyticsAction.Response.RESULTS_FIELD);
listener.onResponse(new GetDataFrameAnalyticsStatsAction.Response(finalStats));
},
Expand Down Expand Up @@ -310,6 +318,31 @@ private GetDataFrameAnalyticsStatsAction.Response.Stats buildStats(String concre
);
}

private List<Stats> buildStatsWithDefaults(List<DataFrameAnalyticsConfig> configs, List<Stats> stats) {
Map<String, DataFrameAnalyticsConfig> configById = new HashMap<>();
for (DataFrameAnalyticsConfig config : configs) {
configById.put(config.getId(), config);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Map<String, DataFrameAnalyticsConfig> configById = new HashMap<>();
for (DataFrameAnalyticsConfig config : configs) {
configById.put(config.getId(), config);
}
Map<String, DataFrameAnalyticsConfig> configById = configs.stream().collect(Collectors.toMap(DataFrameAnalyticsConfig::getId, Function.identity()));


List<Stats> statsWithDefaults = new ArrayList<>(stats.size());
for (Stats statsItem : stats) {
DataFrameAnalyticsConfig config = configById.get(statsItem.getId());
statsWithDefaults.add(new Stats(
statsItem.getId(),
statsItem.getState(),
statsItem.getFailureReason(),
statsItem.getProgress(),
statsItem.getDataCounts() == null ? new DataCounts(config.getId()) : statsItem.getDataCounts(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't this and the memory usage be initialized in the stats ctor?

this.memoryUsage = memoryUsage == null ? new MemoryUsage(config.getId(), Instant.now(), 0) : memoryUsage;
this.dataCounts = dataCounts == null ? new DataCounts(config.getId()) : dataCounts;

Why do we need to set memory_usage create time to the config's create time? Is that adding value for the added complexity?

Or maybe I am misunderstanding the Stats#getId() value...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that's too much complexity for the gain. I'll make MemoryUsage.timestamp nullable.

statsItem.getMemoryUsage() == null ?
new MemoryUsage(config.getId(), config.getCreateTime(), 0) : statsItem.getMemoryUsage(),
statsItem.getAnalysisStats(),
statsItem.getNode(),
statsItem.getAssignmentExplanation()
));
}
return statsWithDefaults;
}

private static class RetrievedStatsHolder {

private volatile StoredProgress progress = new StoredProgress(new ProgressTracker().report());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,36 @@ setup:
- match: { data_frame_analytics.0.id: "foo-1" }
- match: { data_frame_analytics.0.state: "stopped" }

---
"Test get stats on newly created congig":

- do:
ml.put_data_frame_analytics:
id: "foo-1"
body: >
{
"source": {
"index": "index-source"
},
"dest": {
"index": "index-foo-1_dest"
},
"analysis": {"outlier_detection":{}}
}
- match: { id: "foo-1" }

- do:
ml.get_data_frame_analytics_stats:
id: "foo-1"
- match: { count: 1 }
- length: { data_frame_analytics: 1 }
- match: { data_frame_analytics.0.id: "foo-1" }
- match: { data_frame_analytics.0.state: "stopped" }
- match: { data_frame_analytics.0.data_counts.training_docs_count: 0 }
- match: { data_frame_analytics.0.data_counts.test_docs_count: 0 }
- match: { data_frame_analytics.0.data_counts.skipped_docs_count: 0 }
- match: { data_frame_analytics.0.memory_usage.peak_usage_bytes: 0 }

---
"Test delete given stopped config":

Expand Down