Skip to content

Commit

Permalink
[ML-Dataframe] Feature/fib multi aggs and sources (#34525)
Browse files Browse the repository at this point in the history
implement support for multiple sources and aggregations
  • Loading branch information
Hendrik Muhs authored Oct 19, 2018
1 parent 97782f5 commit ad8cc92
Show file tree
Hide file tree
Showing 3 changed files with 374 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/

package org.elasticsearch.xpack.ml.featureindexbuilder.job;

import org.apache.log4j.Logger;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation;
import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation.SingleValue;

import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

final class AggregationResultUtils {
private static final Logger logger = Logger.getLogger(AggregationResultUtils.class.getName());

/**
* Extracts aggregation results from a composite aggregation and puts it into a map.
*
* @param agg The aggregation result
* @param sources The original sources used for querying
* @param aggregationBuilders the aggregation used for querying
* @return a map containing the results of the aggregation in a consumable way
*/
public static Stream<Map<String, Object>> extractCompositeAggregationResults(CompositeAggregation agg,
List<CompositeValuesSourceBuilder<?>> sources, Collection<AggregationBuilder> aggregationBuilders) {
return agg.getBuckets().stream().map(bucket -> {
Map<String, Object> document = new HashMap<>();
for (CompositeValuesSourceBuilder<?> source : sources) {
String destinationFieldName = source.name();
document.put(destinationFieldName, bucket.getKey().get(destinationFieldName));
}
for (AggregationBuilder aggregationBuilder : aggregationBuilders) {
String aggName = aggregationBuilder.getName();

// TODO: support other aggregation types
Aggregation aggResult = bucket.getAggregations().get(aggName);

if (aggResult instanceof NumericMetricsAggregation.SingleValue) {
NumericMetricsAggregation.SingleValue aggResultSingleValue = (SingleValue) aggResult;
document.put(aggName, aggResultSingleValue.value());
} else {
// Execution should never reach this point!
// Creating jobs with unsupported aggregations shall not be possible
logger.error("Dataframe Internal Error: unsupported aggregation ["+ aggResult.getName() +"], ignoring");
assert false;
}
}
return document;
});
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,23 @@
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation;
import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
import org.elasticsearch.search.aggregations.metrics.NumericMetricsAggregation;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.xpack.core.indexing.AsyncTwoPhaseIndexer;
import org.elasticsearch.xpack.core.indexing.IndexerState;
import org.elasticsearch.xpack.core.indexing.IterationResult;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE;

public abstract class FeatureIndexBuilderIndexer extends AsyncTwoPhaseIndexer<Map<String, Object>, FeatureIndexBuilderJobStats> {

Expand All @@ -58,36 +59,37 @@ protected void onStartJob(long now) {
@Override
protected IterationResult<Map<String, Object>> doProcess(SearchResponse searchResponse) {
final CompositeAggregation agg = searchResponse.getAggregations().get("feature");
return new IterationResult<>(processBuckets(agg), agg.afterKey(), agg.getBuckets().isEmpty());
return new IterationResult<>(processBucketsToIndexRequests(agg).collect(Collectors.toList()), agg.afterKey(),
agg.getBuckets().isEmpty());
}

/*
* Mocked demo case
* Parses the result and creates a stream of indexable documents
*
* TODO: replace with proper implementation
* Implementation decisions:
*
* Extraction uses generic maps as intermediate exchange format in order to hook in ingest pipelines/processors
* in later versions, see {@link IngestDocument).
*/
private List<IndexRequest> processBuckets(CompositeAggregation agg) {
// for now only 1 source supported
String destinationFieldName = job.getConfig().getSourceConfig().getSources().get(0).name();
String aggName = job.getConfig().getAggregationConfig().getAggregatorFactories().iterator().next().getName();
private Stream<IndexRequest> processBucketsToIndexRequests(CompositeAggregation agg) {
String indexName = job.getConfig().getDestinationIndex();
List<CompositeValuesSourceBuilder<?>> sources = job.getConfig().getSourceConfig().getSources();
Collection<AggregationBuilder> aggregationBuilders = job.getConfig().getAggregationConfig().getAggregatorFactories();

return agg.getBuckets().stream().map(b -> {
NumericMetricsAggregation.SingleValue aggResult = b.getAggregations().get(aggName);
return AggregationResultUtils.extractCompositeAggregationResults(agg, sources, aggregationBuilders).map(document -> {
XContentBuilder builder;
try {
builder = jsonBuilder();
builder.startObject();
builder.field(destinationFieldName, b.getKey().get(destinationFieldName));
builder.field(aggName, aggResult.value());
builder.map(document);
builder.endObject();
} catch (IOException e) {
throw new UncheckedIOException(e);
}

String indexName = job.getConfig().getDestinationIndex();
IndexRequest request = new IndexRequest(indexName, DOC_TYPE).source(builder);
return request;
}).collect(Collectors.toList());
});
}

@Override
Expand Down
Loading

0 comments on commit ad8cc92

Please sign in to comment.