Skip to content

Commit 4263c25

Browse files
authored
Save memory when histogram agg is not on top (backport of #57277) (#57377)
This saves some memory when the `histogram` aggregation is not a top level aggregation by dropping `asMultiBucketAggregator` in favor of natively implementing multi-bucket storage in the aggregator. For the most part this just uses the `LongKeyedBucketOrds` that we built the first time we did this.
1 parent b15a304 commit 4263c25

File tree

11 files changed

+465
-288
lines changed

11 files changed

+465
-288
lines changed

rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,65 @@ setup:
492492
- match: { aggregations.histo.buckets.0.doc_count: 1 }
493493

494494
---
495-
"profiler":
495+
"histogram profiler":
496+
- skip:
497+
version: " - 7.8.99"
498+
reason: debug info added in 7.9.0
499+
500+
- do:
501+
indices.create:
502+
index: test_2
503+
body:
504+
settings:
505+
number_of_replicas: 0
506+
number_of_shards: 1
507+
mappings:
508+
properties:
509+
n:
510+
type: long
511+
512+
- do:
513+
bulk:
514+
index: test_2
515+
refresh: true
516+
body:
517+
- '{"index": {}}'
518+
- '{"n": "1"}'
519+
- '{"index": {}}'
520+
- '{"n": "2"}'
521+
- '{"index": {}}'
522+
- '{"n": "10"}'
523+
- '{"index": {}}'
524+
- '{"n": "17"}'
525+
526+
- do:
527+
search:
528+
index: test_2
529+
body:
530+
size: 0
531+
profile: true
532+
aggs:
533+
histo:
534+
histogram:
535+
field: n
536+
interval: 5
537+
- match: { hits.total.value: 4 }
538+
- length: { aggregations.histo.buckets: 4 }
539+
- match: { aggregations.histo.buckets.0.key: 0 }
540+
- match: { aggregations.histo.buckets.0.doc_count: 2 }
541+
- match: { aggregations.histo.buckets.1.key: 5 }
542+
- match: { aggregations.histo.buckets.1.doc_count: 0 }
543+
- match: { aggregations.histo.buckets.2.key: 10 }
544+
- match: { aggregations.histo.buckets.2.doc_count: 1 }
545+
- match: { aggregations.histo.buckets.3.key: 15 }
546+
- match: { aggregations.histo.buckets.3.doc_count: 1 }
547+
- match: { profile.shards.0.aggregations.0.type: NumericHistogramAggregator }
548+
- match: { profile.shards.0.aggregations.0.description: histo }
549+
- match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 }
550+
- match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 }
551+
552+
---
553+
"date_histogram profiler":
496554
- skip:
497555
version: " - 7.8.99"
498556
reason: debug info added in 7.9.0
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.search.aggregations.bucket.histogram;
21+
22+
import org.apache.lucene.util.CollectionUtil;
23+
import org.elasticsearch.common.lease.Releasables;
24+
import org.elasticsearch.search.DocValueFormat;
25+
import org.elasticsearch.search.aggregations.Aggregator;
26+
import org.elasticsearch.search.aggregations.AggregatorFactories;
27+
import org.elasticsearch.search.aggregations.BucketOrder;
28+
import org.elasticsearch.search.aggregations.InternalAggregation;
29+
import org.elasticsearch.search.aggregations.bucket.BucketsAggregator;
30+
import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram.EmptyBucketInfo;
31+
import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;
32+
import org.elasticsearch.search.internal.SearchContext;
33+
34+
import java.io.IOException;
35+
import java.util.Collections;
36+
import java.util.Map;
37+
import java.util.function.BiConsumer;
38+
39+
/**
40+
* Base class for functionality shared between aggregators for this
41+
* {@code histogram} aggregation.
42+
*/
43+
public abstract class AbstractHistogramAggregator extends BucketsAggregator {
44+
protected final DocValueFormat formatter;
45+
protected final double interval;
46+
protected final double offset;
47+
protected final BucketOrder order;
48+
protected final boolean keyed;
49+
protected final long minDocCount;
50+
protected final double minBound;
51+
protected final double maxBound;
52+
protected final LongKeyedBucketOrds bucketOrds;
53+
54+
public AbstractHistogramAggregator(
55+
String name,
56+
AggregatorFactories factories,
57+
double interval,
58+
double offset,
59+
BucketOrder order,
60+
boolean keyed,
61+
long minDocCount,
62+
double minBound,
63+
double maxBound,
64+
DocValueFormat formatter,
65+
SearchContext context,
66+
Aggregator parent,
67+
boolean collectsFromSingleBucket,
68+
Map<String, Object> metadata
69+
) throws IOException {
70+
super(name, factories, context, parent, metadata);
71+
if (interval <= 0) {
72+
throw new IllegalArgumentException("interval must be positive, got: " + interval);
73+
}
74+
this.interval = interval;
75+
this.offset = offset;
76+
this.order = order;
77+
order.validate(this);
78+
this.keyed = keyed;
79+
this.minDocCount = minDocCount;
80+
this.minBound = minBound;
81+
this.maxBound = maxBound;
82+
this.formatter = formatter;
83+
bucketOrds = LongKeyedBucketOrds.build(context.bigArrays(), collectsFromSingleBucket);
84+
}
85+
86+
@Override
87+
public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
88+
return buildAggregationsForVariableBuckets(owningBucketOrds, bucketOrds,
89+
(bucketValue, docCount, subAggregationResults) -> {
90+
double roundKey = Double.longBitsToDouble(bucketValue);
91+
double key = roundKey * interval + offset;
92+
return new InternalHistogram.Bucket(key, docCount, keyed, formatter, subAggregationResults);
93+
}, buckets -> {
94+
// the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order
95+
CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator());
96+
97+
EmptyBucketInfo emptyBucketInfo = null;
98+
if (minDocCount == 0) {
99+
emptyBucketInfo = new EmptyBucketInfo(interval, offset, minBound, maxBound, buildEmptySubAggregations());
100+
}
101+
return new InternalHistogram(name, buckets, order, minDocCount, emptyBucketInfo, formatter, keyed, metadata());
102+
});
103+
}
104+
105+
@Override
106+
public InternalAggregation buildEmptyAggregation() {
107+
InternalHistogram.EmptyBucketInfo emptyBucketInfo = null;
108+
if (minDocCount == 0) {
109+
emptyBucketInfo = new InternalHistogram.EmptyBucketInfo(interval, offset, minBound, maxBound, buildEmptySubAggregations());
110+
}
111+
return new InternalHistogram(name, Collections.emptyList(), order, minDocCount, emptyBucketInfo, formatter, keyed, metadata());
112+
}
113+
114+
@Override
115+
public void doClose() {
116+
Releasables.close(bucketOrds);
117+
}
118+
119+
@Override
120+
public void collectDebugInfo(BiConsumer<String, Object> add) {
121+
add.accept("total_buckets", bucketOrds.size());
122+
super.collectDebugInfo(add);
123+
}
124+
}

server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
package org.elasticsearch.search.aggregations.bucket.histogram;
2121

22+
import org.elasticsearch.common.collect.List;
2223
import org.elasticsearch.index.query.QueryShardContext;
23-
import org.elasticsearch.search.DocValueFormat;
2424
import org.elasticsearch.search.aggregations.AggregationExecutionException;
2525
import org.elasticsearch.search.aggregations.Aggregator;
2626
import org.elasticsearch.search.aggregations.AggregatorFactories;
@@ -36,8 +36,6 @@
3636
import org.elasticsearch.search.internal.SearchContext;
3737

3838
import java.io.IOException;
39-
import java.util.Arrays;
40-
import java.util.Collections;
4139
import java.util.Map;
4240

4341
/**
@@ -54,40 +52,11 @@ public final class HistogramAggregatorFactory extends ValuesSourceAggregatorFact
5452

5553
static void registerAggregators(ValuesSourceRegistry.Builder builder) {
5654
builder.register(HistogramAggregationBuilder.NAME, CoreValuesSourceType.RANGE,
57-
new HistogramAggregatorSupplier() {
58-
@Override
59-
public Aggregator build(String name, AggregatorFactories factories, double interval, double offset,
60-
BucketOrder order, boolean keyed, long minDocCount, double minBound, double maxBound,
61-
ValuesSource valuesSource, DocValueFormat formatter, SearchContext context,
62-
Aggregator parent,
63-
Map<String, Object> metadata) throws IOException {
64-
ValuesSource.Range rangeValueSource = (ValuesSource.Range) valuesSource;
65-
if (rangeValueSource.rangeType().isNumeric() == false) {
66-
throw new IllegalArgumentException("Expected numeric range type but found non-numeric range ["
67-
+ rangeValueSource.rangeType().name + "]");
68-
}
69-
return new RangeHistogramAggregator(name, factories, interval, offset, order, keyed, minDocCount, minBound,
70-
maxBound, rangeValueSource, formatter, context, parent, metadata);
71-
}
72-
}
73-
);
55+
(HistogramAggregatorSupplier) RangeHistogramAggregator::new);
7456

7557
builder.register(HistogramAggregationBuilder.NAME,
76-
Collections.unmodifiableList(Arrays.asList(CoreValuesSourceType.NUMERIC,
77-
CoreValuesSourceType.DATE,
78-
CoreValuesSourceType.BOOLEAN)),
79-
new HistogramAggregatorSupplier() {
80-
@Override
81-
public Aggregator build(String name, AggregatorFactories factories, double interval, double offset,
82-
BucketOrder order, boolean keyed, long minDocCount, double minBound, double maxBound,
83-
ValuesSource valuesSource, DocValueFormat formatter, SearchContext context,
84-
Aggregator parent,
85-
Map<String, Object> metadata) throws IOException {
86-
return new NumericHistogramAggregator(name, factories, interval, offset, order, keyed, minDocCount, minBound,
87-
maxBound, (ValuesSource.Numeric) valuesSource, formatter, context, parent, metadata);
88-
}
89-
}
90-
);
58+
List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN),
59+
(HistogramAggregatorSupplier) NumericHistogramAggregator::new);
9160
}
9261

9362
public HistogramAggregatorFactory(String name,
@@ -123,10 +92,6 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource,
12392
Aggregator parent,
12493
boolean collectsFromSingleBucket,
12594
Map<String, Object> metadata) throws IOException {
126-
if (collectsFromSingleBucket == false) {
127-
return asMultiBucketAggregator(this, searchContext, parent);
128-
}
129-
13095
AggregatorSupplier aggregatorSupplier = queryShardContext.getValuesSourceRegistry().getAggregator(config.valueSourceType(),
13196
HistogramAggregationBuilder.NAME);
13297
if (aggregatorSupplier instanceof HistogramAggregatorSupplier == false) {
@@ -135,14 +100,14 @@ protected Aggregator doCreateInternal(ValuesSource valuesSource,
135100
}
136101
HistogramAggregatorSupplier histogramAggregatorSupplier = (HistogramAggregatorSupplier) aggregatorSupplier;
137102
return histogramAggregatorSupplier.build(name, factories, interval, offset, order, keyed, minDocCount, minBound, maxBound,
138-
valuesSource, config.format(), searchContext, parent, metadata);
103+
valuesSource, config.format(), searchContext, parent, collectsFromSingleBucket, metadata);
139104
}
140105

141106
@Override
142107
protected Aggregator createUnmapped(SearchContext searchContext,
143108
Aggregator parent,
144109
Map<String, Object> metadata) throws IOException {
145110
return new NumericHistogramAggregator(name, factories, interval, offset, order, keyed, minDocCount, minBound, maxBound,
146-
null, config.format(), searchContext, parent, metadata);
111+
null, config.format(), searchContext, parent, false, metadata);
147112
}
148113
}

0 commit comments

Comments
 (0)