-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Save a little space in agg tree #53730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
8552939
4c9db02
5b36280
bd890bc
aa33bbe
b9f4cda
1562e76
cff7a67
f335863
feb194c
12dae79
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,6 +115,39 @@ | |
| - match: { aggregations.cluster.buckets.0.key: "local_cluster" } | ||
| - match: { aggregations.cluster.buckets.0.doc_count: 5 } | ||
|
|
||
| # once more, this time with a pipeline agg | ||
| - do: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we have a similar test for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It'd be great to have a "mixed cluster CCS" test. I talked that one through with @javanna and we don't have one now and probably don't want to build one just for this. |
||
| search: | ||
| rest_total_hits_as_int: true | ||
| index: test_index,my_remote_cluster:test_index | ||
| body: | ||
| seq_no_primary_term: true | ||
| aggs: | ||
| cluster: | ||
| terms: | ||
| field: f1.keyword | ||
| aggs: | ||
| s: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add a non-top-level pipeline agg just to confirm they aren't affected? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I figured I'd get it in my next PR about non-top-level pipeline aggs, but I'm happy to do it now! |
||
| sum: | ||
| field: filter_field | ||
| average_sum: | ||
| avg_bucket: | ||
| buckets_path: cluster.s | ||
|
|
||
| - match: { num_reduce_phases: 3 } | ||
| - match: {_clusters.total: 2} | ||
| - match: {_clusters.successful: 2} | ||
| - match: {_clusters.skipped: 0} | ||
| - match: { _shards.total: 5 } | ||
| - match: { hits.total: 11 } | ||
| - length: { aggregations.cluster.buckets: 2 } | ||
| - match: { aggregations.cluster.buckets.0.key: "remote_cluster" } | ||
| - match: { aggregations.cluster.buckets.0.doc_count: 6 } | ||
| - match: { aggregations.cluster.buckets.0.s.value: 2 } | ||
| - match: { aggregations.cluster.buckets.1.key: "local_cluster" } | ||
| - match: { aggregations.cluster.buckets.0.s.value: 2 } | ||
| - match: { aggregations.average_sum.value: 2 } | ||
|
|
||
| --- | ||
| "Add transient remote cluster based on the preset cluster": | ||
| - do: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| */ | ||
| package org.elasticsearch.search.aggregations; | ||
|
|
||
| import org.elasticsearch.Version; | ||
| import org.elasticsearch.common.io.stream.StreamInput; | ||
| import org.elasticsearch.common.io.stream.StreamOutput; | ||
| import org.elasticsearch.common.io.stream.Writeable; | ||
|
|
@@ -34,9 +35,13 @@ | |
| import java.util.Iterator; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.function.Function; | ||
| import java.util.function.Supplier; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| import static java.util.Collections.emptyList; | ||
| import static java.util.stream.Collectors.toList; | ||
|
|
||
| /** | ||
| * An internal implementation of {@link Aggregations}. | ||
| */ | ||
|
|
@@ -54,35 +59,56 @@ public final class InternalAggregations extends Aggregations implements Writeabl | |
| } | ||
| }; | ||
|
|
||
| private final List<SiblingPipelineAggregator> topLevelPipelineAggregators; | ||
| /** | ||
| * The way to build a tree of pipeline aggregators. Used only for | ||
| * serialization backwards compatibility. | ||
| */ | ||
| private final Supplier<PipelineAggregator.PipelineTree> pipelineTreeForBwcSerialization; | ||
|
|
||
| /** | ||
| * Constructs a new aggregation. | ||
| */ | ||
| public InternalAggregations(List<InternalAggregation> aggregations) { | ||
| super(aggregations); | ||
| this.topLevelPipelineAggregators = Collections.emptyList(); | ||
| this.pipelineTreeForBwcSerialization = null; | ||
| } | ||
|
|
||
| /** | ||
| * Constructs a new aggregation providing its {@link InternalAggregation}s and {@link SiblingPipelineAggregator}s | ||
| * Constructs a node in the aggregation tree. | ||
| * @param pipelineTreeSource must be null inside the tree or after final reduction. Should reference the | ||
| * search request otherwise so we can properly serialize the response to | ||
| * versions of Elasticsearch that require the pipelines to be serialized. | ||
| */ | ||
| public InternalAggregations(List<InternalAggregation> aggregations, List<SiblingPipelineAggregator> topLevelPipelineAggregators) { | ||
| public InternalAggregations(List<InternalAggregation> aggregations, Supplier<PipelineAggregator.PipelineTree> pipelineTreeSource) { | ||
| super(aggregations); | ||
| this.topLevelPipelineAggregators = Objects.requireNonNull(topLevelPipelineAggregators); | ||
| this.pipelineTreeForBwcSerialization = pipelineTreeSource; | ||
| } | ||
|
|
||
| public InternalAggregations(StreamInput in) throws IOException { | ||
| super(in.readList(stream -> in.readNamedWriteable(InternalAggregation.class))); | ||
| this.topLevelPipelineAggregators = in.readList( | ||
| stream -> (SiblingPipelineAggregator)in.readNamedWriteable(PipelineAggregator.class)); | ||
| if (in.getVersion().before(Version.V_8_0_0)) { // TODO switch to 7.7.0 before merging | ||
| in.readNamedWriteableList(PipelineAggregator.class); | ||
| } | ||
| /* | ||
| * Setting the pipeline tree source to null is here is correct but | ||
| * only because we don't immediately pass the InternalAggregations | ||
| * off to another node. Instead, we always reduce together with | ||
| * many aggregations and that always adds the | ||
|
||
| */ | ||
| pipelineTreeForBwcSerialization = null; | ||
| } | ||
|
|
||
| @Override | ||
| @SuppressWarnings("unchecked") | ||
| public void writeTo(StreamOutput out) throws IOException { | ||
| out.writeNamedWriteableList((List<InternalAggregation>)aggregations); | ||
| out.writeNamedWriteableList(topLevelPipelineAggregators); | ||
| if (out.getVersion().before(Version.V_8_0_0)) { // TODO switch to 7.7.0 before merging | ||
| if (pipelineTreeForBwcSerialization == null) { | ||
| out.writeNamedWriteableList(emptyList()); | ||
| } else { | ||
| out.writeNamedWriteableList(pipelineTreeForBwcSerialization.get().aggregators()); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -95,12 +121,17 @@ public List<InternalAggregation> copyResults() { | |
| } | ||
|
|
||
| /** | ||
| * Returns the top-level pipeline aggregators. | ||
| * Note that top-level pipeline aggregators become normal aggregation once the final reduction has been performed, after which they | ||
| * become part of the list of {@link InternalAggregation}s. | ||
| * Get the top level pipeline aggregators. | ||
| * @deprecated these only exist for BWC serialization | ||
| */ | ||
| @Deprecated | ||
| public List<SiblingPipelineAggregator> getTopLevelPipelineAggregators() { | ||
| return topLevelPipelineAggregators; | ||
| if (pipelineTreeForBwcSerialization == null) { | ||
| return emptyList(); | ||
| } | ||
| return pipelineTreeForBwcSerialization.get().aggregators().stream() | ||
| .map(p -> (SiblingPipelineAggregator) p) | ||
| .collect(toList()); | ||
| } | ||
|
|
||
| @SuppressWarnings("unchecked") | ||
|
|
@@ -131,7 +162,8 @@ public double sortValue(AggregationPath.PathElement head, Iterator<AggregationPa | |
| * aggregations (both embedded parent/sibling as well as top-level sibling pipelines) | ||
| */ | ||
| public static InternalAggregations topLevelReduce(List<InternalAggregations> aggregationsList, ReduceContext context) { | ||
| InternalAggregations reduced = reduce(aggregationsList, context); | ||
| InternalAggregations reduced = reduce(aggregationsList, context, | ||
| reducedAggregations -> new InternalAggregations(reducedAggregations, context.pipelineTreeForBwcSerialization())); | ||
| if (reduced == null) { | ||
| return null; | ||
| } | ||
|
|
@@ -157,12 +189,16 @@ public static InternalAggregations topLevelReduce(List<InternalAggregations> agg | |
| * {@link InternalAggregations} object found in the list. | ||
| * Note that pipeline aggregations _are not_ reduced by this method. Pipelines are handled | ||
| * separately by {@link InternalAggregations#topLevelReduce(List, ReduceContext)} | ||
| * @param ctor used to build the {@link InternalAggregations}. The top level reduce specifies a constructor | ||
| * that adds pipeline aggregation information that is used to send pipeline aggregations to | ||
| * older versions of Elasticsearch that require the pipeline aggregations to be returned | ||
| * as part of the aggregation tree | ||
| */ | ||
| public static InternalAggregations reduce(List<InternalAggregations> aggregationsList, ReduceContext context) { | ||
| public static InternalAggregations reduce(List<InternalAggregations> aggregationsList, ReduceContext context, | ||
| Function<List<InternalAggregation>, InternalAggregations> ctor) { | ||
| if (aggregationsList.isEmpty()) { | ||
| return null; | ||
| } | ||
| List<SiblingPipelineAggregator> topLevelPipelineAggregators = aggregationsList.get(0).getTopLevelPipelineAggregators(); | ||
|
|
||
| // first we collect all aggregations of the same type and list them together | ||
| Map<String, List<InternalAggregation>> aggByName = new HashMap<>(); | ||
|
|
@@ -185,6 +221,14 @@ public static InternalAggregations reduce(List<InternalAggregations> aggregation | |
| reducedAggregations.add(first.reduce(aggregations, context)); | ||
| } | ||
|
|
||
| return new InternalAggregations(reducedAggregations, topLevelPipelineAggregators); | ||
| return ctor.apply(reducedAggregations); | ||
| } | ||
|
|
||
| /** | ||
| * Version of {@link #reduce(List, ReduceContext, Function)} for nodes inside the aggregation tree. | ||
| */ | ||
| public static InternalAggregations reduce(List<InternalAggregations> aggregationsList, ReduceContext context) { | ||
| return reduce(aggregationsList, context, InternalAggregations::new); | ||
| } | ||
|
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, should we maybe split this out into it's own test? Just thinking that long multi-step yaml tests can be tricky to debug sometimes.
Haven't looked at the rest of the tests in this yaml though, so it might not be easy to move the indexing (or whatever else) steps up to the setup though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll move it, sure! They can get hard to debug.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok - this is a test that doesn't clear indices after it runs. So moving things around is a bit more complex than we'd like to be honest. I can do it, but I think it should wait for a followup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah ok, no worries then. not worth re-arranging everything for 👍