From 3eb94bcf3a9285cade4bd8a423e7bafb76328377 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 8 Oct 2020 09:33:15 -0400 Subject: [PATCH 01/48] Execute date_histo agg as date_range agg WIP --- .../org/elasticsearch/common/Rounding.java | 18 +++ .../aggregations/AdaptingAggregator.java | 82 ++++++++++ .../search/aggregations/AggregatorBase.java | 2 +- ...stogramAdaptedFromDateRangeAggregator.java | 152 ++++++++++++++++++ .../DateHistogramAggregatorFactory.java | 56 ++++++- .../bucket/range/InternalDateRange.java | 5 +- .../bucket/range/InternalRange.java | 2 +- .../bucket/range/RangeAggregatorSupplier.java | 2 +- .../elasticsearch/common/RoundingTests.java | 31 ++++ 9 files changed, 344 insertions(+), 6 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index 4341d9ca65dd6..9a1d82662dff3 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -291,6 +291,8 @@ public interface Prepared { * next rounded value in specified units if possible. */ double roundingSize(long utcMillis, DateTimeUnit timeUnit); + + long[] fixedRoundingPoints(); } /** * Prepare to round many times. @@ -435,6 +437,11 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) } return new ArrayRounding(values, i, this); } + + @Override + public long[] fixedRoundingPoints() { + return null; + } } static class TimeUnitRounding extends Rounding { @@ -1253,6 +1260,12 @@ public long nextRoundingValue(long utcMillis) { public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { return delegatePrepared.roundingSize(utcMillis, timeUnit); } + + @Override + public long[] fixedRoundingPoints() { + // NOCOMMIT we can pick real rounding points + return null; + } }; } @@ -1335,5 +1348,10 @@ public long nextRoundingValue(long utcMillis) { public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { return delegate.roundingSize(utcMillis, timeUnit); } + + @Override + public long[] fixedRoundingPoints() { + return Arrays.copyOf(values, max); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java new file mode 100644 index 0000000000000..d62953e6d8d0f --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -0,0 +1,82 @@ +package org.elasticsearch.search.aggregations; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; + +public abstract class AdaptingAggregator extends Aggregator { + private final Aggregator delegate; + + public AdaptingAggregator(Aggregator delegate) { + this.delegate = delegate; + } + + @Override + public void close() { + delegate.close(); + } + + @Override + public ScoreMode scoreMode() { + return delegate.scoreMode(); + } + + @Override + public String name() { + return delegate.name(); + } + + @Override + public SearchContext context() { + return delegate.context(); + } + + @Override + public Aggregator parent() { + return delegate.parent(); + } + + @Override + public Aggregator subAggregator(String name) { + return delegate.subAggregator(name); + } + + @Override + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + return delegate.getLeafCollector(ctx); + } + + @Override + public void preCollection() throws IOException { + delegate.preCollection(); + } + + @Override + public void postCollection() throws IOException { + delegate.postCollection(); + } + + @Override + public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + InternalAggregation[] delegateResults = delegate.buildAggregations(owningBucketOrds); + InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; + for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + result[ordIdx] = adapt(delegateResults[ordIdx]); + } + return result; + } + + @Override + public InternalAggregation buildEmptyAggregation() { + return adapt(delegate.buildEmptyAggregation()); + } + + protected abstract InternalAggregation adapt(InternalAggregation delegateResult); + + public final InternalAggregations buildEmptySubAggregations() { + return ((BucketsAggregator) delegate).buildEmptySubAggregations(); + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index ff9785dde47b1..1345fae87b061 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -277,7 +277,7 @@ protected void doClose() {} protected void doPostCollection() throws IOException { } - protected final InternalAggregations buildEmptySubAggregations() { + public final InternalAggregations buildEmptySubAggregations() { List aggs = new ArrayList<>(); for (Aggregator aggregator : subAggregators) { aggs.add(aggregator.buildEmptyAggregation()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java new file mode 100644 index 0000000000000..12dc00b4c096b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java @@ -0,0 +1,152 @@ +package org.elasticsearch.search.aggregations.bucket.histogram; + +import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.Rounding; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.AdaptingAggregator; +import org.elasticsearch.search.aggregations.Aggregator; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.CardinalityUpperBound; +import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregatorSupplier; +import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +/** + * Adapts a {@link DateHistogramAggregator} results into {@link InternalDateHistogram}s. + */ +public class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { + static DateHistogramAdaptedFromDateRangeAggregator buildOptimizedOrNull( + String name, + AggregatorFactories factories, + Rounding rounding, + Rounding.Prepared preparedRounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (hardBounds != null) { + return null; + } + if (valuesSourceConfig.hasValues() == false) { + return null; + } + long[] points = preparedRounding.fixedRoundingPoints(); + if (points == null) { + return null; + } + RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() + .getValuesSourceRegistry() + .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); + if (rangeSupplier == null) { + return null; + } + RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; + for (int i = 0; i < points.length - 1; i++) { + ranges[i] = new RangeAggregator.Range(null, (double) points[i], (double) points[i + 1]); + } + ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) points[points.length - 1], null); + RangeAggregator delegate = rangeSupplier.build( + name, + factories, + (ValuesSource.Numeric) valuesSourceConfig.getValuesSource(), + valuesSourceConfig.format(), + InternalDateRange.FACTORY, + ranges, + false, + context, + parent, + cardinality, + metadata + ); + return new DateHistogramAdaptedFromDateRangeAggregator( + delegate, + valuesSourceConfig.format(), + rounding, + order, + minDocCount, + extendedBounds, + keyed + ); + } + + private final DocValueFormat format; + private final Rounding rounding; + private final BucketOrder order; + private final long minDocCount; + private final LongBounds extendedBounds; + private final boolean keyed; + + public DateHistogramAdaptedFromDateRangeAggregator( + RangeAggregator delegate, + DocValueFormat format, + Rounding rounding, + BucketOrder order, + long minDocCount, + LongBounds extendedBounds, + boolean keyed + ) { + super(delegate); + this.format = format; + this.rounding = rounding; + this.order = order; + this.minDocCount = minDocCount; + this.extendedBounds = extendedBounds; + this.keyed = keyed; + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalDateRange range = (InternalDateRange) delegateResult; + List buckets = new ArrayList<>(range.getBuckets().size()); + for (InternalDateRange.Bucket rangeBucket : range.getBuckets()) { + if (rangeBucket.getDocCount() > 0) { + buckets.add( + new InternalDateHistogram.Bucket( + rangeBucket.getFrom().toInstant().toEpochMilli(), + rangeBucket.getDocCount(), + keyed, + format, + rangeBucket.getAggregations() + ) + ); + } + } + CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); + + // value source will be null for unmapped fields + // Important: use `rounding` here, not `shardRounding` + InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 + ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) + : null; + return new InternalDateHistogram( + range.getName(), + buckets, + order, + minDocCount, + rounding.offset(), + emptyBucketInfo, + format, + keyed, + range.getMetadata() + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index a5518c8a3eb4a..5f33f7768d4c0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.histogram; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -42,12 +43,65 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( DateHistogramAggregationBuilder.REGISTRY_KEY, List.of(CoreValuesSourceType.DATE, CoreValuesSourceType.NUMERIC, CoreValuesSourceType.BOOLEAN), - DateHistogramAggregator::new, + DateHistogramAggregatorFactory::build, true); builder.register(DateHistogramAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.RANGE, DateRangeHistogramAggregator::new, true); } + private static Aggregator build( + String name, + AggregatorFactories factories, + Rounding rounding, + Rounding.Prepared preparedRounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + Aggregator optimized = DateHistogramAdaptedFromDateRangeAggregator.buildOptimizedOrNull( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + if (optimized != null) { + return optimized; + } + return new DateHistogramAggregator( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + }; + private final BucketOrder order; private final boolean keyed; private final long minDocCount; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java index 2c937ab104c54..fd68e3e1ac4b5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalDateRange.java @@ -27,6 +27,7 @@ import java.io.IOException; import java.time.Instant; import java.time.ZoneOffset; +import java.time.ZonedDateTime; import java.util.List; import java.util.Map; @@ -46,13 +47,13 @@ public Bucket(String key, double from, double to, long docCount, InternalAggrega } @Override - public Object getFrom() { + public ZonedDateTime getFrom() { return Double.isInfinite(((Number) from).doubleValue()) ? null : Instant.ofEpochMilli(((Number) from).longValue()).atZone(ZoneOffset.UTC); } @Override - public Object getTo() { + public ZonedDateTime getTo() { return Double.isInfinite(((Number) to).doubleValue()) ? null : Instant.ofEpochMilli(((Number) to).longValue()).atZone(ZoneOffset.UTC); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java index c750fdc2d062c..6d4bd035a1d52 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java @@ -113,7 +113,7 @@ public long getDocCount() { } @Override - public Aggregations getAggregations() { + public InternalAggregations getAggregations() { return aggregations; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java index 4bbfd3050106d..5cf8be6a0407f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java @@ -29,7 +29,7 @@ import java.util.Map; public interface RangeAggregatorSupplier { - Aggregator build(String name, + RangeAggregator build(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, diff --git a/server/src/test/java/org/elasticsearch/common/RoundingTests.java b/server/src/test/java/org/elasticsearch/common/RoundingTests.java index fa94cacbbe77c..5e7392caf9fdc 100644 --- a/server/src/test/java/org/elasticsearch/common/RoundingTests.java +++ b/server/src/test/java/org/elasticsearch/common/RoundingTests.java @@ -39,9 +39,11 @@ import java.time.zone.ZoneOffsetTransitionRule; import java.time.zone.ZoneRules; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.TimeUnit; +import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -1017,6 +1019,35 @@ public void testNonMillisecondsBasedUnitCalendarRoundingSize() { assertThat(prepared.roundingSize(thirdQuarter, Rounding.DateTimeUnit.HOUR_OF_DAY), closeTo(2208.0, 0.000001)); } + public void testFixedRoundingPoints() { + Rounding rounding = Rounding.builder(Rounding.DateTimeUnit.QUARTER_OF_YEAR).build(); + assertFixedRoundingPoints( + rounding.prepare(time("2020-01-01T00:00:00"), time("2021-01-01T00:00:00")), + "2020-01-01T00:00:00", + "2020-04-01T00:00:00", + "2020-07-01T00:00:00", + "2020-10-01T00:00:00", + "2021-01-01T00:00:00" + ); + rounding = Rounding.builder(Rounding.DateTimeUnit.DAY_OF_MONTH).build(); + assertFixedRoundingPoints( + rounding.prepare(time("2020-01-01T00:00:00"), time("2020-01-06T00:00:00")), + "2020-01-01T00:00:00", + "2020-01-02T00:00:00", + "2020-01-03T00:00:00", + "2020-01-04T00:00:00", + "2020-01-05T00:00:00", + "2020-01-06T00:00:00" + ); + } + + private void assertFixedRoundingPoints(Rounding.Prepared prepared, String... expected) { + assertThat( + Arrays.stream(prepared.fixedRoundingPoints()).mapToObj(Instant::ofEpochMilli).collect(toList()), + equalTo(Arrays.stream(expected).map(RoundingTests::time).map(Instant::ofEpochMilli).collect(toList())) + ); + } + private void assertInterval(long rounded, long nextRoundingValue, Rounding rounding, int minutes, ZoneId tz) { assertInterval(rounded, dateBetween(rounded, nextRoundingValue), nextRoundingValue, rounding, tz); From 3f99f0431ed868959b4941f64189c4d87f7a8ef1 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 14:05:32 -0400 Subject: [PATCH 02/48] factor out collector --- .../bucket/range/RangeAggregator.java | 132 ++++++++++-------- 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 23319116d1bda..b14a527688450 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -215,13 +215,12 @@ public boolean equals(Object obj) { } } - final ValuesSource.Numeric valuesSource; - final DocValueFormat format; - final Range[] ranges; - final boolean keyed; - final InternalRange.Factory rangeFactory; - - final double[] maxTo; + private final ValuesSource.Numeric valuesSource; + private final DocValueFormat format; + private final Range[] ranges; + private final boolean keyed; + private final InternalRange.Factory rangeFactory; + private final Collector collector; public RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, InternalRange.Factory rangeFactory, Range[] ranges, boolean keyed, SearchContext context, @@ -236,12 +235,12 @@ public RangeAggregator(String name, AggregatorFactories factories, ValuesSource. this.ranges = ranges; - maxTo = new double[this.ranges.length]; + double[] maxTo = new double[this.ranges.length]; maxTo[0] = this.ranges[0].to; for (int i = 1; i < this.ranges.length; ++i) { maxTo[i] = Math.max(this.ranges[i].to,maxTo[i-1]); } - + collector = new OverlapCollector(maxTo); } @Override @@ -253,8 +252,7 @@ public ScoreMode scoreMode() { } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); return new LeafBucketCollectorBase(sub, values) { @Override @@ -263,59 +261,10 @@ public void collect(int doc, long bucket) throws IOException { final int valuesCount = values.docValueCount(); for (int i = 0, lo = 0; i < valuesCount; ++i) { final double value = values.nextValue(); - lo = collect(doc, value, bucket, lo); + lo = collector.collect(sub, doc, value, bucket, lo); } } } - - private int collect(int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { - int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes - int mid = (lo + hi) >>> 1; - while (lo <= hi) { - if (value < ranges[mid].from) { - hi = mid - 1; - } else if (value >= maxTo[mid]) { - lo = mid + 1; - } else { - break; - } - mid = (lo + hi) >>> 1; - } - if (lo > hi) return lo; // no potential candidate - - // binary search the lower bound - int startLo = lo, startHi = mid; - while (startLo <= startHi) { - final int startMid = (startLo + startHi) >>> 1; - if (value >= maxTo[startMid]) { - startLo = startMid + 1; - } else { - startHi = startMid - 1; - } - } - - // binary search the upper bound - int endLo = mid, endHi = hi; - while (endLo <= endHi) { - final int endMid = (endLo + endHi) >>> 1; - if (value < ranges[endMid].from) { - endHi = endMid - 1; - } else { - endLo = endMid + 1; - } - } - - assert startLo == lowBound || value >= maxTo[startLo - 1]; - assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from; - - for (int i = startLo; i <= endHi; ++i) { - if (ranges[i].matches(value)) { - collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); - } - } - - return endHi + 1; - } }; } @@ -375,4 +324,65 @@ public InternalAggregation buildEmptyAggregation() { } } + interface Collector { + int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; + } + + private class OverlapCollector implements Collector { + private final double[] maxTo; + + public OverlapCollector(double[] maxTo) { + this.maxTo = maxTo; + } + + @Override + public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes + int mid = (lo + hi) >>> 1; + while (lo <= hi) { + if (value < ranges[mid].from) { + hi = mid - 1; + } else if (value >= maxTo[mid]) { + lo = mid + 1; + } else { + break; + } + mid = (lo + hi) >>> 1; + } + if (lo > hi) return lo; // no potential candidate + + // binary search the lower bound + int startLo = lo, startHi = mid; + while (startLo <= startHi) { + final int startMid = (startLo + startHi) >>> 1; + if (value >= maxTo[startMid]) { + startLo = startMid + 1; + } else { + startHi = startMid - 1; + } + } + + // binary search the upper bound + int endLo = mid, endHi = hi; + while (endLo <= endHi) { + final int endMid = (endLo + endHi) >>> 1; + if (value < ranges[endMid].from) { + endHi = endMid - 1; + } else { + endLo = endMid + 1; + } + } + + assert startLo == lowBound || value >= maxTo[startLo - 1]; + assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from; + + for (int i = startLo; i <= endHi; ++i) { + if (ranges[i].matches(value)) { + collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); + } + } + + return endHi + 1; + } + } } From 6c8cb0bcbd9742d7aa3c2423f71ccde190069478 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 14:39:20 -0400 Subject: [PATCH 03/48] ordered --- .../bucket/range/RangeAggregator.java | 41 +++++++++++++++---- .../bucket/range/RangeAggregatorTests.java | 29 +++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index b14a527688450..50f94eaf3398c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -232,15 +232,19 @@ public RangeAggregator(String name, AggregatorFactories factories, ValuesSource. this.format = format; this.keyed = keyed; this.rangeFactory = rangeFactory; - this.ranges = ranges; + collector = hasOverlap() ? new OverlapCollector() : new NoOverlapCollector(); + } - double[] maxTo = new double[this.ranges.length]; - maxTo[0] = this.ranges[0].to; + private boolean hasOverlap() { + double lastEnd = ranges[0].to; for (int i = 1; i < this.ranges.length; ++i) { - maxTo[i] = Math.max(this.ranges[i].to,maxTo[i-1]); + if (ranges[i].from < lastEnd) { + return true; + } + lastEnd = ranges[i].to; } - collector = new OverlapCollector(maxTo); + return false; } @Override @@ -328,11 +332,34 @@ interface Collector { int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; } + private class NoOverlapCollector implements Collector { + @Override + public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + int lo = lowBound, hi = ranges.length - 1; + while (lo <= hi) { + final int mid = (lo + hi) >>> 1; + if (value < ranges[mid].from) { + hi = mid - 1; + } else if (value >= ranges[mid].to) { + lo = mid + 1; + } else { + collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, mid)); + return mid; + } + } + return lo; + } + } + private class OverlapCollector implements Collector { private final double[] maxTo; - public OverlapCollector(double[] maxTo) { - this.maxTo = maxTo; + public OverlapCollector() { + maxTo = new double[ranges.length]; + maxTo[0] = ranges[0].to; + for (int i = 1; i < ranges.length; ++i) { + maxTo[i] = Math.max(ranges[i].to, maxTo[i - 1]); + } } @Override diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index dda30646a6ca5..6e3db461e5cf7 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -48,6 +48,7 @@ import static java.util.Collections.singleton; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class RangeAggregatorTests extends AggregatorTestCase { @@ -295,6 +296,34 @@ public void testSubAggCollectsFromManyBucketsIfManyRanges() throws IOException { }); } + public void testOverlappingRanges() throws IOException { + RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("test_range_agg"); + aggregationBuilder.field(NUMBER_FIELD_NAME); + aggregationBuilder.addRange(0d, 5d); + aggregationBuilder.addRange(10d, 20d); + aggregationBuilder.addRange(0d, 20d); + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 11))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 2))); + iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 3))); + }, result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(3)); + assertThat(ranges.get(0).getFrom(), equalTo(0d)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(1).getFrom(), equalTo(00d)); + assertThat(ranges.get(1).getTo(), equalTo(20d)); + assertThat(ranges.get(1).getDocCount(), equalTo(4L)); + assertThat(ranges.get(2).getFrom(), equalTo(10d)); + assertThat(ranges.get(2).getTo(), equalTo(20d)); + assertThat(ranges.get(2).getDocCount(), equalTo(1L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER)); + } + private void testCase(Query query, CheckedConsumer buildIndex, Consumer> verify) throws IOException { From 995ca2428be12b7c3d6bb9b6570a4ccb9ed97d08 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 15:05:44 -0400 Subject: [PATCH 04/48] refactor --- .../range/AbstractRangeAggregatorFactory.java | 2 +- .../GeoDistanceRangeAggregatorFactory.java | 2 +- .../bucket/range/RangeAggregator.java | 125 ++++++++++++++---- 3 files changed, 100 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java index 0ed96c207ec69..cf0c8188f1590 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java @@ -51,7 +51,7 @@ public static void registerAggregators( builder.register( registryKey, List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), - RangeAggregator::new, + RangeAggregator::build, true); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java index 83aa8cc1407c7..7e4863c1b733f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java @@ -66,7 +66,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { cardinality, metadata) -> { DistanceSource distanceSource = new DistanceSource((ValuesSource.GeoPoint) valuesSource, distanceType, origin, units); - return new RangeAggregator( + return RangeAggregator.build( name, factories, distanceSource, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 50f94eaf3398c..cca096a7fcd97 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -41,7 +41,9 @@ import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.range.InternalRange.Factory; import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -52,7 +54,7 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; -public class RangeAggregator extends BucketsAggregator { +public abstract class RangeAggregator extends BucketsAggregator { public static final ParseField RANGES_FIELD = new ParseField("ranges"); public static final ParseField KEYED_FIELD = new ParseField("keyed"); @@ -215,12 +217,54 @@ public boolean equals(Object obj) { } } + public static RangeAggregator build( + String name, + AggregatorFactories factories, + ValuesSource.Numeric valuesSource, + DocValueFormat format, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (hasOverlap(ranges)) { + return new OverlapRangeAggregator( + name, + factories, + valuesSource, + format, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } + return new NoOverlapCollector( + name, + factories, + valuesSource, + format, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } + private final ValuesSource.Numeric valuesSource; private final DocValueFormat format; - private final Range[] ranges; + protected final Range[] ranges; private final boolean keyed; private final InternalRange.Factory rangeFactory; - private final Collector collector; public RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, InternalRange.Factory rangeFactory, Range[] ranges, boolean keyed, SearchContext context, @@ -233,18 +277,6 @@ public RangeAggregator(String name, AggregatorFactories factories, ValuesSource. this.keyed = keyed; this.rangeFactory = rangeFactory; this.ranges = ranges; - collector = hasOverlap() ? new OverlapCollector() : new NoOverlapCollector(); - } - - private boolean hasOverlap() { - double lastEnd = ranges[0].to; - for (int i = 1; i < this.ranges.length; ++i) { - if (ranges[i].from < lastEnd) { - return true; - } - lastEnd = ranges[i].to; - } - return false; } @Override @@ -265,14 +297,14 @@ public void collect(int doc, long bucket) throws IOException { final int valuesCount = values.docValueCount(); for (int i = 0, lo = 0; i < valuesCount; ++i) { final double value = values.nextValue(); - lo = collector.collect(sub, doc, value, bucket, lo); + lo = RangeAggregator.this.collect(sub, doc, value, bucket, lo); } } } }; } - private long subBucketOrdinal(long owningBucketOrdinal, int rangeOrd) { + protected long subBucketOrdinal(long owningBucketOrdinal, int rangeOrd) { return owningBucketOrdinal * ranges.length + rangeOrd; } @@ -328,13 +360,28 @@ public InternalAggregation buildEmptyAggregation() { } } - interface Collector { - int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; - } + protected abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) + throws IOException; + + private static class NoOverlapCollector extends RangeAggregator { + public NoOverlapCollector( + String name, + AggregatorFactories factories, + Numeric valuesSource, + DocValueFormat format, + Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, valuesSource, format, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); + } - private class NoOverlapCollector implements Collector { @Override - public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + protected int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { int lo = lowBound, hi = ranges.length - 1; while (lo <= hi) { final int mid = (lo + hi) >>> 1; @@ -351,10 +398,21 @@ public int collect(LeafBucketCollector sub, int doc, double value, long owningBu } } - private class OverlapCollector implements Collector { - private final double[] maxTo; - - public OverlapCollector() { + private static class OverlapRangeAggregator extends RangeAggregator { + public OverlapRangeAggregator( + String name, + AggregatorFactories factories, + Numeric valuesSource, + DocValueFormat format, + Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, valuesSource, format, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); maxTo = new double[ranges.length]; maxTo[0] = ranges[0].to; for (int i = 1; i < ranges.length; ++i) { @@ -362,8 +420,10 @@ public OverlapCollector() { } } + private final double[] maxTo; + @Override - public int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + protected int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes int mid = (lo + hi) >>> 1; while (lo <= hi) { @@ -412,4 +472,15 @@ public int collect(LeafBucketCollector sub, int doc, double value, long owningBu return endHi + 1; } } + + private static boolean hasOverlap(Range[] ranges) { + double lastEnd = ranges[0].to; + for (int i = 1; i < ranges.length; ++i) { + if (ranges[i].from < lastEnd) { + return true; + } + lastEnd = ranges[i].to; + } + return false; + } } From d7becd802ac13a937b90d87489001c4b20bfecd8 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 15:44:36 -0400 Subject: [PATCH 05/48] Fixup --- server/src/main/java/org/elasticsearch/common/Rounding.java | 2 +- .../bucket/histogram/DateHistogramAggregator.java | 3 ++- .../bucket/histogram/DateHistogramAggregatorFactory.java | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index 9a1d82662dff3..e53dbefe5bd07 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -1263,7 +1263,7 @@ public double roundingSize(long utcMillis, DateTimeUnit timeUnit) { @Override public long[] fixedRoundingPoints() { - // NOCOMMIT we can pick real rounding points + // TODO we can likely translate here return null; } }; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index c5f0151f0e756..6a465ea60c38d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -72,6 +72,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg String name, AggregatorFactories factories, Rounding rounding, + Rounding.Prepared preparedRounding, BucketOrder order, boolean keyed, long minDocCount, @@ -86,7 +87,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg super(name, factories, aggregationContext, parent, CardinalityUpperBound.MANY, metadata); this.rounding = rounding; - this.preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); + this.preparedRounding = preparedRounding; this.order = order; order.validate(this); this.keyed = keyed; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 5f33f7768d4c0..9b91daeb7f95d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -53,7 +53,6 @@ private static Aggregator build( String name, AggregatorFactories factories, Rounding rounding, - Rounding.Prepared preparedRounding, BucketOrder order, boolean keyed, long minDocCount, @@ -65,6 +64,7 @@ private static Aggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { + Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); Aggregator optimized = DateHistogramAdaptedFromDateRangeAggregator.buildOptimizedOrNull( name, factories, @@ -165,7 +165,7 @@ protected Aggregator doCreateInternal( protected Aggregator createUnmapped(SearchContext searchContext, Aggregator parent, Map metadata) throws IOException { - return new DateHistogramAggregator(name, factories, rounding, order, keyed, minDocCount, extendedBounds, hardBounds, + return new DateHistogramAggregator(name, factories, rounding, null, order, keyed, minDocCount, extendedBounds, hardBounds, config, searchContext, parent, CardinalityUpperBound.NONE, metadata); } } From 0a1987d99b5ff07cdbbe5100cf291cb45cfefd08 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 16:17:42 -0400 Subject: [PATCH 06/48] Better name --- .../DateHistogramAdaptedFromDateRangeAggregator.java | 10 +++++++++- .../histogram/DateHistogramAggregatorFactory.java | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java index 12dc00b4c096b..1d79d4b0d5e8f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java @@ -27,7 +27,7 @@ * Adapts a {@link DateHistogramAggregator} results into {@link InternalDateHistogram}s. */ public class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { - static DateHistogramAdaptedFromDateRangeAggregator buildOptimizedOrNull( + static DateHistogramAdaptedFromDateRangeAggregator buildOrNull( String name, AggregatorFactories factories, Rounding rounding, @@ -53,6 +53,14 @@ static DateHistogramAdaptedFromDateRangeAggregator buildOptimizedOrNull( if (points == null) { return null; } + // Range aggs use a double to aggregate and we don't want to lose precision. + long max = points[points.length - 1]; + if ((double) max != max) { + return null; + } + if ((double) points[0] != points[0]) { + return null; + } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() .getValuesSourceRegistry() .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 9b91daeb7f95d..807182807d802 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -65,7 +65,7 @@ private static Aggregator build( Map metadata ) throws IOException { Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); - Aggregator optimized = DateHistogramAdaptedFromDateRangeAggregator.buildOptimizedOrNull( + Aggregator optimized = DateHistogramAdaptedFromDateRangeAggregator.buildOrNull( name, factories, rounding, From 1258ad4d88b441a88f315fdcf5984b9349473e38 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 16:32:44 -0400 Subject: [PATCH 07/48] Experiment --- .../bucket/filter/FiltersAggregator.java | 33 ++++++++++++++++--- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 4687ccd323d0b..deb51be68ab2a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -20,6 +20,8 @@ package org.elasticsearch.search.aggregations.bucket.filter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -116,19 +118,20 @@ public boolean equals(Object obj) { } private final String[] keys; - private Supplier filters; + private final Supplier filtersSupplier; private final boolean keyed; private final boolean showOtherBucket; private final String otherBucketKey; private final int totalNumKeys; + private Weight[] filters; - public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filters, boolean keyed, + public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filtersSupplier, boolean keyed, String otherBucketKey, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { super(name, factories, context, parent, cardinality.multiply(keys.length + (otherBucketKey == null ? 0 : 1)), metadata); this.keyed = keyed; this.keys = keys; - this.filters = filters; + this.filtersSupplier = filtersSupplier; this.showOtherBucket = otherBucketKey != null; this.otherBucketKey = otherBucketKey; if (showOtherBucket) { @@ -141,8 +144,28 @@ public FiltersAggregator(String name, AggregatorFactories factories, String[] ke @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - // no need to provide deleted docs to the filter - Weight[] filters = this.filters.get(); + if (filters == null) { + filters = this.filtersSupplier.get(); + } + + if (parent == null) { + Bits live = ctx.reader().getLiveDocs(); + for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { + DocIdSetIterator itr = filters[filterOrd].scorer(ctx).iterator(); + if (live == null) { + while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + collectBucket(sub, itr.docID(), filterOrd); + } + } else { + while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (live.get(itr.docID())) { + collectBucket(sub, itr.docID(), filterOrd); + } + } + } + } + throw new CollectionTerminatedException(); + } final Bits[] bits = new Bits[filters.length]; for (int i = 0; i < filters.length; ++i) { bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); From da02047b005365a94f687994e8426cd921a8c521 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 12 Oct 2020 18:01:26 -0400 Subject: [PATCH 08/48] Rework --- .../bucket/filter/FiltersAggregator.java | 228 +++++++++++++----- .../filter/FiltersAggregatorFactory.java | 2 +- .../bucket/range/RangeAggregator.java | 2 +- 3 files changed, 176 insertions(+), 56 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index deb51be68ab2a..d6ee00edbc143 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -49,7 +49,7 @@ import java.util.Objects; import java.util.function.Supplier; -public class FiltersAggregator extends BucketsAggregator { +public abstract class FiltersAggregator extends BucketsAggregator { public static final ParseField FILTERS_FIELD = new ParseField("filters"); public static final ParseField OTHER_BUCKET_FIELD = new ParseField("other_bucket"); @@ -117,12 +117,88 @@ public boolean equals(Object obj) { } } + public static FiltersAggregator build( + String name, + AggregatorFactories factories, + String[] keys, + Supplier filtersSupplier, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + FiltersAggregator filterOrder = filterOrderAggregatorOrNull( + name, + factories, + keys, + filtersSupplier, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + if (filterOrder != null) { + return filterOrder; + } + return new StandardOrderAggregator( + name, + factories, + keys, + filtersSupplier, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + } + + private static FiltersAggregator filterOrderAggregatorOrNull( + String name, + AggregatorFactories factories, + String[] keys, + Supplier filtersSupplier, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (parent != null) { + return null; + } + if (factories.countAggregators() != 0) { + return null; + } + if (otherBucketKey != null) { + return null; + } + // NOCOMMIT this is only ok if there isn't a query or if the filter is "inside" the query already + return new FilterOrderAggregator( + name, + factories, + keys, + filtersSupplier, + keyed, + otherBucketKey, + context, + parent, + cardinality, + metadata + ); + } + + private final String[] keys; private final Supplier filtersSupplier; private final boolean keyed; - private final boolean showOtherBucket; - private final String otherBucketKey; - private final int totalNumKeys; + protected final String otherBucketKey; private Weight[] filters; public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filtersSupplier, boolean keyed, @@ -132,64 +208,23 @@ public FiltersAggregator(String name, AggregatorFactories factories, String[] ke this.keyed = keyed; this.keys = keys; this.filtersSupplier = filtersSupplier; - this.showOtherBucket = otherBucketKey != null; this.otherBucketKey = otherBucketKey; - if (showOtherBucket) { - this.totalNumKeys = keys.length + 1; - } else { - this.totalNumKeys = keys.length; - } } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, - final LeafBucketCollector sub) throws IOException { + public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { if (filters == null) { filters = this.filtersSupplier.get(); } - - if (parent == null) { - Bits live = ctx.reader().getLiveDocs(); - for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { - DocIdSetIterator itr = filters[filterOrd].scorer(ctx).iterator(); - if (live == null) { - while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - collectBucket(sub, itr.docID(), filterOrd); - } - } else { - while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - if (live.get(itr.docID())) { - collectBucket(sub, itr.docID(), filterOrd); - } - } - } - } - throw new CollectionTerminatedException(); - } - final Bits[] bits = new Bits[filters.length]; - for (int i = 0; i < filters.length; ++i) { - bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); - } - return new LeafBucketCollectorBase(sub, null) { - @Override - public void collect(int doc, long bucket) throws IOException { - boolean matched = false; - for (int i = 0; i < bits.length; i++) { - if (bits[i].get(doc)) { - collectBucket(sub, doc, bucketOrd(bucket, i)); - matched = true; - } - } - if (showOtherBucket && !matched) { - collectBucket(sub, doc, bucketOrd(bucket, bits.length)); - } - } - }; + return getLeafCollector(ctx, sub, filters); } + protected abstract LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) + throws IOException; + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - return buildAggregationsForFixedBucketCount(owningBucketOrds, keys.length + (showOtherBucket ? 1 : 0), + return buildAggregationsForFixedBucketCount(owningBucketOrds, keys.length + (otherBucketKey == null ? 0 : 1), (offsetInOwningOrd, docCount, subAggregationResults) -> { if (offsetInOwningOrd < keys.length) { return new InternalFilters.InternalBucket(keys[offsetInOwningOrd], docCount, @@ -208,7 +243,7 @@ public InternalAggregation buildEmptyAggregation() { buckets.add(bucket); } - if (showOtherBucket) { + if (otherBucketKey != null) { InternalFilters.InternalBucket bucket = new InternalFilters.InternalBucket(otherBucketKey, 0, subAggs, keyed); buckets.add(bucket); } @@ -216,8 +251,93 @@ public InternalAggregation buildEmptyAggregation() { return new InternalFilters(name, buckets, keyed, metadata()); } - final long bucketOrd(long owningBucketOrdinal, int filterOrd) { - return owningBucketOrdinal * totalNumKeys + filterOrd; + private static class FilterOrderAggregator extends FiltersAggregator { + public FilterOrderAggregator( + String name, + AggregatorFactories factories, + String[] keys, + Supplier filtersSupplier, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, keys, filtersSupplier, keyed, otherBucketKey, context, parent, cardinality, metadata); + } + + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) + throws IOException { + Bits live = ctx.reader().getLiveDocs(); + for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { + DocIdSetIterator itr = filters[filterOrd].scorer(ctx).iterator(); + if (live == null) { + while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + collectBucket(sub, itr.docID(), filterOrd); + } + } else { + while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + if (live.get(itr.docID())) { + collectBucket(sub, itr.docID(), filterOrd); + } + } + } + } + throw new CollectionTerminatedException(); + } } + private static class StandardOrderAggregator extends FiltersAggregator { + private final int totalNumKeys; + + public StandardOrderAggregator( + String name, + AggregatorFactories factories, + String[] keys, + Supplier filtersSupplier, + boolean keyed, + String otherBucketKey, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + super(name, factories, keys, filtersSupplier, keyed, otherBucketKey, context, parent, cardinality, metadata); + if (otherBucketKey == null) { + this.totalNumKeys = keys.length; + } else { + this.totalNumKeys = keys.length + 1; + } + } + + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) + throws IOException { + final Bits[] bits = new Bits[filters.length]; + for (int i = 0; i < filters.length; ++i) { + bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); + } + return new LeafBucketCollectorBase(sub, null) { + @Override + public void collect(int doc, long bucket) throws IOException { + boolean matched = false; + for (int i = 0; i < bits.length; i++) { + if (bits[i].get(doc)) { + collectBucket(sub, doc, bucketOrd(bucket, i)); + matched = true; + } + } + if (otherBucketKey != null && false == matched) { + collectBucket(sub, doc, bucketOrd(bucket, bits.length)); + } + } + }; + } + + final long bucketOrd(long owningBucketOrdinal, int filterOrd) { + return owningBucketOrdinal * totalNumKeys + filterOrd; + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index 83d49dd12d626..d0e460c014a61 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -90,7 +90,7 @@ public Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { - return new FiltersAggregator(name, factories, keys, () -> getWeights(searchContext), keyed, + return FiltersAggregator.build(name, factories, keys, () -> getWeights(searchContext), keyed, otherBucket ? otherBucketKey : null, searchContext, parent, cardinality, metadata); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index cca096a7fcd97..6b283b5e4f953 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -266,7 +266,7 @@ public static RangeAggregator build( private final boolean keyed; private final InternalRange.Factory rangeFactory; - public RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, + private RangeAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, InternalRange.Factory rangeFactory, Range[] ranges, boolean keyed, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { From 9157f00d32ed4293b6fd9325675d62d72af23c82 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 08:21:57 -0400 Subject: [PATCH 09/48] Use query --- .../bucket/filter/FiltersAggregator.java | 12 ++++++----- .../filter/FiltersAggregatorFactory.java | 20 ++++++++++++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index d6ee00edbc143..271ba5bb5630c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -22,6 +22,8 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -47,6 +49,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Function; import java.util.function.Supplier; public abstract class FiltersAggregator extends BucketsAggregator { @@ -121,7 +124,7 @@ public static FiltersAggregator build( String name, AggregatorFactories factories, String[] keys, - Supplier filtersSupplier, + Function filtersSupplier, boolean keyed, String otherBucketKey, SearchContext context, @@ -148,7 +151,7 @@ public static FiltersAggregator build( name, factories, keys, - filtersSupplier, + () -> filtersSupplier.apply(new MatchAllDocsQuery()), keyed, otherBucketKey, context, @@ -162,7 +165,7 @@ private static FiltersAggregator filterOrderAggregatorOrNull( String name, AggregatorFactories factories, String[] keys, - Supplier filtersSupplier, + Function filtersSupplier, boolean keyed, String otherBucketKey, SearchContext context, @@ -179,12 +182,11 @@ private static FiltersAggregator filterOrderAggregatorOrNull( if (otherBucketKey != null) { return null; } - // NOCOMMIT this is only ok if there isn't a query or if the filter is "inside" the query already return new FilterOrderAggregator( name, factories, keys, - filtersSupplier, + () -> filtersSupplier.apply(context.query()), keyed, otherBucketKey, context, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index d0e460c014a61..dbdfcf11ce0c7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -19,7 +19,10 @@ package org.elasticsearch.search.aggregations.bucket.filter; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; @@ -66,17 +69,24 @@ public FiltersAggregatorFactory(String name, List filters, boolean * necessary. This is done lazily so that the {@link Weight}s are only * created if the aggregation collects documents reducing the overhead of * the aggregation in the case where no documents are collected. - * - * Note that as aggregations are initialsed and executed in a serial manner, + *

+ * Note that as aggregations are initialized and executed in a serial manner, * no concurrency considerations are necessary here. */ - public Weight[] getWeights(SearchContext searchContext) { + public Weight[] getWeights(Query query, SearchContext searchContext) { if (weights == null) { try { IndexSearcher contextSearcher = searchContext.searcher(); weights = new Weight[filters.length]; for (int i = 0; i < filters.length; ++i) { - this.weights[i] = contextSearcher.createWeight(contextSearcher.rewrite(filters[i]), ScoreMode.COMPLETE_NO_SCORES, 1); + Query filter = filters[i]; + if (false == query instanceof MatchAllDocsQuery) { + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(query, BooleanClause.Occur.MUST); + builder.add(filter, BooleanClause.Occur.MUST); + filter = builder.build(); + } + this.weights[i] = contextSearcher.createWeight(contextSearcher.rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); } } catch (IOException e) { throw new AggregationInitializationException("Failed to initialse filters for aggregation [" + name() + "]", e); @@ -90,7 +100,7 @@ public Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { - return FiltersAggregator.build(name, factories, keys, () -> getWeights(searchContext), keyed, + return FiltersAggregator.build(name, factories, keys, query -> getWeights(query, searchContext), keyed, otherBucket ? otherBucketKey : null, searchContext, parent, cardinality, metadata); } From 20047dc92fc4bf190ede12ecf9759c6cfe0210f8 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 09:18:20 -0400 Subject: [PATCH 10/48] Super hack --- .../filter/FiltersAggregatorFactory.java | 117 ++++++++++++++++-- 1 file changed, 110 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index dbdfcf11ce0c7..dc61280cff9f0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -19,10 +19,15 @@ package org.elasticsearch.search.aggregations.bucket.filter; +import org.apache.logging.log4j.LogManager; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; @@ -39,6 +44,8 @@ import java.util.List; import java.util.Map; +import static java.util.Arrays.compareUnsigned; + public class FiltersAggregatorFactory extends AggregatorFactory { private final String[] keys; @@ -79,13 +86,7 @@ public Weight[] getWeights(Query query, SearchContext searchContext) { IndexSearcher contextSearcher = searchContext.searcher(); weights = new Weight[filters.length]; for (int i = 0; i < filters.length; ++i) { - Query filter = filters[i]; - if (false == query instanceof MatchAllDocsQuery) { - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(query, BooleanClause.Occur.MUST); - builder.add(filter, BooleanClause.Occur.MUST); - filter = builder.build(); - } + Query filter = filterMatchingBoth(query, filters[i]); this.weights[i] = contextSearcher.createWeight(contextSearcher.rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); } } catch (IOException e) { @@ -95,6 +96,108 @@ public Weight[] getWeights(Query query, SearchContext searchContext) { return weights; } + private Query filterMatchingBoth(Query lhs, Query rhs) { + if (lhs instanceof MatchAllDocsQuery) { + return rhs; + } + if (rhs instanceof MatchAllDocsQuery) { + return lhs; + } + Query unwrappedLhs = unwrap(lhs); + Query unwrappedRhs = unwrap(rhs); + LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs, unwrappedRhs); + LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs instanceof PointRangeQuery, unwrappedRhs instanceof PointRangeQuery); + if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { + PointRangeQuery merged = mergePointRangeQueries((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); + LogManager.getLogger().error("ADSFDSAF {}", merged); + if (merged != null) { + // TODO rewrap? + return merged; + } + } + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(lhs, BooleanClause.Occur.MUST); + builder.add(rhs, BooleanClause.Occur.MUST); + return builder.build(); + } + + private Query unwrap(Query query) { + if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { + query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + } + if (query instanceof IndexOrDocValuesQuery) { + query = ((IndexOrDocValuesQuery) query).getIndexQuery(); + } + return query; + } + + private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQuery rhs) { + if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { + return null; + } + byte[] lower = mergePoint(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), true); + LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(lower, 0)); + if (lower == null) { + return null; + } + byte[] upper = mergePoint(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), false); + LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(upper, 0)); + if (upper == null) { + return null; + } + return new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { + @Override + protected String toString(int dimension, byte[] value) { + // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. + StringBuilder sb = new StringBuilder(); + sb.append("binary("); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(value[i] & 0xFF)); + } + sb.append(')'); + return sb.toString(); + } + }; + } + + /** + * Figure out if lhs's lower point is lower in all dimensions than + * rhs's lower point or if it is further. Return null if it is closer + * in some dimensions and further in others. + */ + private byte[] mergePoint(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean mergingLower) { + int runningCmp = 0; + for (int dim = 0; dim < numDims; dim++) { + int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); + if (runningCmp == 0) { + // Previous dimensions were all equal + runningCmp = cmp; + continue; + } + if (cmp == 0) { + // This dimension has the same value. + continue; + } + if ((runningCmp ^ cmp) < 0) { + // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. + return null; + } + } + if (runningCmp < 0) { + // lhs is lower + return mergingLower ? rhs : lhs; + } + return mergingLower ? lhs : rhs; + } + + private int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { + int offset = dim * bytesPerDim; + return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); + } + @Override public Aggregator createInternal(SearchContext searchContext, Aggregator parent, From e69628e7e081daea268a860fb329f8a1b984f44e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 12:19:23 -0400 Subject: [PATCH 11/48] Shuffle --- .../action/search/TransportSearchIT.java | 5 + .../aggregations/AdaptingAggregator.java | 11 +- .../search/aggregations/Aggregator.java | 5 + .../search/aggregations/AggregatorBase.java | 1 + .../bucket/DeferringBucketCollector.java | 5 + .../bucket/filter/FiltersAggregator.java | 198 +++++++++++++---- .../filter/FiltersAggregatorFactory.java | 120 +---------- ...stogramAdaptedFromDateRangeAggregator.java | 160 -------------- .../histogram/DateHistogramAggregator.java | 200 ++++++++++++++++++ .../DateHistogramAggregatorFactory.java | 57 +---- .../range/AbstractRangeAggregatorFactory.java | 4 +- .../bucket/range/RangeAggregator.java | 124 ++++++++++- .../bucket/range/RangeAggregatorSupplier.java | 9 +- .../aggregation/ProfilingAggregator.java | 5 + 14 files changed, 517 insertions(+), 387 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java index 2f295a1c8a395..5ae5e7909bb6a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java @@ -600,5 +600,10 @@ public void preCollection() throws IOException {} @Override public void postCollection() throws IOException {} + + @Override + public Aggregator[] subAggregators() { + throw new UnsupportedOperationException(); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index d62953e6d8d0f..057fa671d5423 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -6,6 +6,8 @@ import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; public abstract class AdaptingAggregator extends Aggregator { private final Aggregator delegate; @@ -74,9 +76,10 @@ public InternalAggregation buildEmptyAggregation() { return adapt(delegate.buildEmptyAggregation()); } - protected abstract InternalAggregation adapt(InternalAggregation delegateResult); - - public final InternalAggregations buildEmptySubAggregations() { - return ((BucketsAggregator) delegate).buildEmptySubAggregations(); + @Override + public Aggregator[] subAggregators() { + return delegate.subAggregators(); } + + protected abstract InternalAggregation adapt(InternalAggregation delegateResult); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java index 061640e73b510..9ddb20452e846 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java @@ -178,6 +178,11 @@ public final InternalAggregation buildTopLevel() throws IOException { */ public void collectDebugInfo(BiConsumer add) {} + /** + * Get the aggregators running under this one. + */ + public abstract Aggregator[] subAggregators(); + /** Aggregation mode for sub aggregations. */ public enum SubAggCollectionMode implements Writeable { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index 1345fae87b061..bda03fc1abbf7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -225,6 +225,7 @@ public Aggregator parent() { return parent; } + @Override public Aggregator[] subAggregators() { return subAggregators; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java index bc6ab0b746fe1..0207cdbff2ef6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java @@ -131,6 +131,11 @@ public Aggregator resolveSortPath(PathElement next, Iterator path) public BucketComparator bucketComparator(String key, SortOrder order) { throw new UnsupportedOperationException("Can't sort on deferred aggregations"); } + + @Override + public Aggregator[] subAggregators() { + return in.subAggregators(); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 271ba5bb5630c..bb13030811293 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -19,11 +19,19 @@ package org.elasticsearch.search.aggregations.bucket.filter; +import org.apache.logging.log4j.LogManager; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -49,8 +57,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.function.Function; -import java.util.function.Supplier; + +import static java.util.Arrays.compareUnsigned; public abstract class FiltersAggregator extends BucketsAggregator { @@ -124,7 +132,7 @@ public static FiltersAggregator build( String name, AggregatorFactories factories, String[] keys, - Function filtersSupplier, + Query[] filters, boolean keyed, String otherBucketKey, SearchContext context, @@ -136,7 +144,7 @@ public static FiltersAggregator build( name, factories, keys, - filtersSupplier, + filters, keyed, otherBucketKey, context, @@ -151,7 +159,7 @@ public static FiltersAggregator build( name, factories, keys, - () -> filtersSupplier.apply(new MatchAllDocsQuery()), + filters, keyed, otherBucketKey, context, @@ -165,7 +173,7 @@ private static FiltersAggregator filterOrderAggregatorOrNull( String name, AggregatorFactories factories, String[] keys, - Function filtersSupplier, + Query[] filters, boolean keyed, String otherBucketKey, SearchContext context, @@ -184,11 +192,9 @@ private static FiltersAggregator filterOrderAggregatorOrNull( } return new FilterOrderAggregator( name, - factories, keys, - () -> filtersSupplier.apply(context.query()), + filters, keyed, - otherBucketKey, context, parent, cardinality, @@ -196,34 +202,19 @@ private static FiltersAggregator filterOrderAggregatorOrNull( ); } - private final String[] keys; - private final Supplier filtersSupplier; private final boolean keyed; protected final String otherBucketKey; - private Weight[] filters; - public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, Supplier filtersSupplier, boolean keyed, + public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, boolean keyed, String otherBucketKey, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { super(name, factories, context, parent, cardinality.multiply(keys.length + (otherBucketKey == null ? 0 : 1)), metadata); this.keyed = keyed; this.keys = keys; - this.filtersSupplier = filtersSupplier; this.otherBucketKey = otherBucketKey; } - @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - if (filters == null) { - filters = this.filtersSupplier.get(); - } - return getLeafCollector(ctx, sub, filters); - } - - protected abstract LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) - throws IOException; - @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return buildAggregationsForFixedBucketCount(owningBucketOrds, keys.length + (otherBucketKey == null ? 0 : 1), @@ -253,28 +244,34 @@ public InternalAggregation buildEmptyAggregation() { return new InternalFilters(name, buckets, keyed, metadata()); } + public abstract boolean collectsInFilterOrder(); + private static class FilterOrderAggregator extends FiltersAggregator { + private final Query[] filters; + private Weight[] filterWeights; + public FilterOrderAggregator( String name, - AggregatorFactories factories, String[] keys, - Supplier filtersSupplier, + Query[] filters, boolean keyed, - String otherBucketKey, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata ) throws IOException { - super(name, factories, keys, filtersSupplier, keyed, otherBucketKey, context, parent, cardinality, metadata); + super(name, AggregatorFactories.EMPTY, keys, keyed, null, context, parent, cardinality, metadata); + this.filters = filters; } @Override - protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) - throws IOException { + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + if (filterWeights == null) { + filterWeights = buildWeights(context.query(), filters); + } Bits live = ctx.reader().getLiveDocs(); for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { - DocIdSetIterator itr = filters[filterOrd].scorer(ctx).iterator(); + DocIdSetIterator itr = filterWeights[filterOrd].scorer(ctx).iterator(); if (live == null) { while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { collectBucket(sub, itr.docID(), filterOrd); @@ -289,16 +286,24 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket } throw new CollectionTerminatedException(); } + + @Override + public boolean collectsInFilterOrder() { + return true; + } } private static class StandardOrderAggregator extends FiltersAggregator { + private final Query[] filters; + private Weight[] filterWeights; + private final int totalNumKeys; public StandardOrderAggregator( String name, AggregatorFactories factories, String[] keys, - Supplier filtersSupplier, + Query[] filters, boolean keyed, String otherBucketKey, SearchContext context, @@ -306,7 +311,8 @@ public StandardOrderAggregator( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - super(name, factories, keys, filtersSupplier, keyed, otherBucketKey, context, parent, cardinality, metadata); + super(name, factories, keys, keyed, otherBucketKey, context, parent, cardinality, metadata); + this.filters = filters; if (otherBucketKey == null) { this.totalNumKeys = keys.length; } else { @@ -315,11 +321,13 @@ public StandardOrderAggregator( } @Override - protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub, Weight[] filters) - throws IOException { + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + if (filterWeights == null) { + filterWeights = buildWeights(new MatchAllDocsQuery(), filters); + } final Bits[] bits = new Bits[filters.length]; for (int i = 0; i < filters.length; ++i) { - bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filters[i].scorerSupplier(ctx)); + bits[i] = Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), filterWeights[i].scorerSupplier(ctx)); } return new LeafBucketCollectorBase(sub, null) { @Override @@ -341,5 +349,121 @@ public void collect(int doc, long bucket) throws IOException { final long bucketOrd(long owningBucketOrdinal, int filterOrd) { return owningBucketOrdinal * totalNumKeys + filterOrd; } + + @Override + public boolean collectsInFilterOrder() { + return false; + } + } + + protected Weight[] buildWeights(Query topLevelQuery, Query filters[]) throws IOException{ + Weight[] weights = new Weight[filters.length]; + for (int i = 0; i < filters.length; ++i) { + Query filter = filterMatchingBoth(topLevelQuery, filters[i]); + weights[i] = context.searcher().createWeight(context.searcher().rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); + } + return weights; + } + + private Query filterMatchingBoth(Query lhs, Query rhs) { + if (lhs instanceof MatchAllDocsQuery) { + return rhs; + } + if (rhs instanceof MatchAllDocsQuery) { + return lhs; + } + Query unwrappedLhs = unwrap(lhs); + Query unwrappedRhs = unwrap(rhs); + LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs, unwrappedRhs); + LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs instanceof PointRangeQuery, unwrappedRhs instanceof PointRangeQuery); + if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { + PointRangeQuery merged = mergePointRangeQueries((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); + LogManager.getLogger().error("ADSFDSAF {}", merged); + if (merged != null) { + // TODO rewrap? + return merged; + } + } + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.add(lhs, BooleanClause.Occur.MUST); + builder.add(rhs, BooleanClause.Occur.MUST); + return builder.build(); + } + + private Query unwrap(Query query) { + if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { + query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + } + if (query instanceof IndexOrDocValuesQuery) { + query = ((IndexOrDocValuesQuery) query).getIndexQuery(); + } + return query; + } + + private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQuery rhs) { + if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { + return null; + } + byte[] lower = mergePoint(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), true); + LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(lower, 0)); + if (lower == null) { + return null; + } + byte[] upper = mergePoint(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), false); + LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(upper, 0)); + if (upper == null) { + return null; + } + return new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { + @Override + protected String toString(int dimension, byte[] value) { + // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. + StringBuilder sb = new StringBuilder(); + sb.append("binary("); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(value[i] & 0xFF)); + } + sb.append(')'); + return sb.toString(); + } + }; + } + + /** + * Figure out if lhs's lower point is lower in all dimensions than + * rhs's lower point or if it is further. Return null if it is closer + * in some dimensions and further in others. + */ + private byte[] mergePoint(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean mergingLower) { + int runningCmp = 0; + for (int dim = 0; dim < numDims; dim++) { + int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); + if (runningCmp == 0) { + // Previous dimensions were all equal + runningCmp = cmp; + continue; + } + if (cmp == 0) { + // This dimension has the same value. + continue; + } + if ((runningCmp ^ cmp) < 0) { + // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. + return null; + } + } + if (runningCmp < 0) { + // lhs is lower + return mergingLower ? rhs : lhs; + } + return mergingLower ? lhs : rhs; + } + + private int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { + int offset = dim * bytesPerDim; + return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index dc61280cff9f0..673bfcfac0f91 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -80,130 +80,14 @@ public FiltersAggregatorFactory(String name, List filters, boolean * Note that as aggregations are initialized and executed in a serial manner, * no concurrency considerations are necessary here. */ - public Weight[] getWeights(Query query, SearchContext searchContext) { - if (weights == null) { - try { - IndexSearcher contextSearcher = searchContext.searcher(); - weights = new Weight[filters.length]; - for (int i = 0; i < filters.length; ++i) { - Query filter = filterMatchingBoth(query, filters[i]); - this.weights[i] = contextSearcher.createWeight(contextSearcher.rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); - } - } catch (IOException e) { - throw new AggregationInitializationException("Failed to initialse filters for aggregation [" + name() + "]", e); - } - } - return weights; - } - - private Query filterMatchingBoth(Query lhs, Query rhs) { - if (lhs instanceof MatchAllDocsQuery) { - return rhs; - } - if (rhs instanceof MatchAllDocsQuery) { - return lhs; - } - Query unwrappedLhs = unwrap(lhs); - Query unwrappedRhs = unwrap(rhs); - LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs, unwrappedRhs); - LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs instanceof PointRangeQuery, unwrappedRhs instanceof PointRangeQuery); - if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { - PointRangeQuery merged = mergePointRangeQueries((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); - LogManager.getLogger().error("ADSFDSAF {}", merged); - if (merged != null) { - // TODO rewrap? - return merged; - } - } - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(lhs, BooleanClause.Occur.MUST); - builder.add(rhs, BooleanClause.Occur.MUST); - return builder.build(); - } - - private Query unwrap(Query query) { - if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { - query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); - } - if (query instanceof IndexOrDocValuesQuery) { - query = ((IndexOrDocValuesQuery) query).getIndexQuery(); - } - return query; - } - - private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQuery rhs) { - if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { - return null; - } - byte[] lower = mergePoint(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), true); - LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(lower, 0)); - if (lower == null) { - return null; - } - byte[] upper = mergePoint(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), false); - LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(upper, 0)); - if (upper == null) { - return null; - } - return new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { - @Override - protected String toString(int dimension, byte[] value) { - // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. - StringBuilder sb = new StringBuilder(); - sb.append("binary("); - for (int i = 0; i < value.length; i++) { - if (i > 0) { - sb.append(' '); - } - sb.append(Integer.toHexString(value[i] & 0xFF)); - } - sb.append(')'); - return sb.toString(); - } - }; - } - - /** - * Figure out if lhs's lower point is lower in all dimensions than - * rhs's lower point or if it is further. Return null if it is closer - * in some dimensions and further in others. - */ - private byte[] mergePoint(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean mergingLower) { - int runningCmp = 0; - for (int dim = 0; dim < numDims; dim++) { - int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); - if (runningCmp == 0) { - // Previous dimensions were all equal - runningCmp = cmp; - continue; - } - if (cmp == 0) { - // This dimension has the same value. - continue; - } - if ((runningCmp ^ cmp) < 0) { - // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. - return null; - } - } - if (runningCmp < 0) { - // lhs is lower - return mergingLower ? rhs : lhs; - } - return mergingLower ? lhs : rhs; - } - - private int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { - int offset = dim * bytesPerDim; - return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); - } + @Override public Aggregator createInternal(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { - return FiltersAggregator.build(name, factories, keys, query -> getWeights(query, searchContext), keyed, + return FiltersAggregator.build(name, factories, keys, filters, keyed, otherBucket ? otherBucketKey : null, searchContext, parent, cardinality, metadata); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java deleted file mode 100644 index 1d79d4b0d5e8f..0000000000000 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAdaptedFromDateRangeAggregator.java +++ /dev/null @@ -1,160 +0,0 @@ -package org.elasticsearch.search.aggregations.bucket.histogram; - -import org.apache.lucene.util.CollectionUtil; -import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.Rounding; -import org.elasticsearch.search.DocValueFormat; -import org.elasticsearch.search.aggregations.AdaptingAggregator; -import org.elasticsearch.search.aggregations.Aggregator; -import org.elasticsearch.search.aggregations.AggregatorFactories; -import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.CardinalityUpperBound; -import org.elasticsearch.search.aggregations.InternalAggregation; -import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange; -import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; -import org.elasticsearch.search.aggregations.bucket.range.RangeAggregatorSupplier; -import org.elasticsearch.search.aggregations.support.ValuesSource; -import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; -import org.elasticsearch.search.internal.SearchContext; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Adapts a {@link DateHistogramAggregator} results into {@link InternalDateHistogram}s. - */ -public class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { - static DateHistogramAdaptedFromDateRangeAggregator buildOrNull( - String name, - AggregatorFactories factories, - Rounding rounding, - Rounding.Prepared preparedRounding, - BucketOrder order, - boolean keyed, - long minDocCount, - @Nullable LongBounds extendedBounds, - @Nullable LongBounds hardBounds, - ValuesSourceConfig valuesSourceConfig, - SearchContext context, - Aggregator parent, - CardinalityUpperBound cardinality, - Map metadata - ) throws IOException { - if (hardBounds != null) { - return null; - } - if (valuesSourceConfig.hasValues() == false) { - return null; - } - long[] points = preparedRounding.fixedRoundingPoints(); - if (points == null) { - return null; - } - // Range aggs use a double to aggregate and we don't want to lose precision. - long max = points[points.length - 1]; - if ((double) max != max) { - return null; - } - if ((double) points[0] != points[0]) { - return null; - } - RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() - .getValuesSourceRegistry() - .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); - if (rangeSupplier == null) { - return null; - } - RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; - for (int i = 0; i < points.length - 1; i++) { - ranges[i] = new RangeAggregator.Range(null, (double) points[i], (double) points[i + 1]); - } - ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) points[points.length - 1], null); - RangeAggregator delegate = rangeSupplier.build( - name, - factories, - (ValuesSource.Numeric) valuesSourceConfig.getValuesSource(), - valuesSourceConfig.format(), - InternalDateRange.FACTORY, - ranges, - false, - context, - parent, - cardinality, - metadata - ); - return new DateHistogramAdaptedFromDateRangeAggregator( - delegate, - valuesSourceConfig.format(), - rounding, - order, - minDocCount, - extendedBounds, - keyed - ); - } - - private final DocValueFormat format; - private final Rounding rounding; - private final BucketOrder order; - private final long minDocCount; - private final LongBounds extendedBounds; - private final boolean keyed; - - public DateHistogramAdaptedFromDateRangeAggregator( - RangeAggregator delegate, - DocValueFormat format, - Rounding rounding, - BucketOrder order, - long minDocCount, - LongBounds extendedBounds, - boolean keyed - ) { - super(delegate); - this.format = format; - this.rounding = rounding; - this.order = order; - this.minDocCount = minDocCount; - this.extendedBounds = extendedBounds; - this.keyed = keyed; - } - - @Override - protected InternalAggregation adapt(InternalAggregation delegateResult) { - InternalDateRange range = (InternalDateRange) delegateResult; - List buckets = new ArrayList<>(range.getBuckets().size()); - for (InternalDateRange.Bucket rangeBucket : range.getBuckets()) { - if (rangeBucket.getDocCount() > 0) { - buckets.add( - new InternalDateHistogram.Bucket( - rangeBucket.getFrom().toInstant().toEpochMilli(), - rangeBucket.getDocCount(), - keyed, - format, - rangeBucket.getAggregations() - ) - ); - } - } - CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); - - // value source will be null for unmapped fields - // Important: use `rounding` here, not `shardRounding` - InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 - ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) - : null; - return new InternalDateHistogram( - range.getName(), - buckets, - order, - minDocCount, - rounding.offset(), - emptyBucketInfo, - format, - keyed, - range.getMetadata() - ); - } -} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 6a465ea60c38d..5378a35ac774a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -26,21 +26,29 @@ import org.elasticsearch.common.Rounding; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.AdaptingAggregator; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.CardinalityUpperBound; import org.elasticsearch.search.aggregations.InternalAggregation; +import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregatorSupplier; import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.function.BiConsumer; @@ -51,6 +59,126 @@ * @see Rounding */ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator { + public static Aggregator build( + String name, + AggregatorFactories factories, + Rounding rounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); + Aggregator asRange = buildAsRangeOrNull( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + if (asRange != null) { + return asRange; + } + return new DateHistogramAggregator( + name, + factories, + rounding, + preparedRounding, + order, + keyed, + minDocCount, + extendedBounds, + hardBounds, + valuesSourceConfig, + context, + parent, + cardinality, + metadata + ); + } + + private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( + String name, + AggregatorFactories factories, + Rounding rounding, + Rounding.Prepared preparedRounding, + BucketOrder order, + boolean keyed, + long minDocCount, + @Nullable LongBounds extendedBounds, + @Nullable LongBounds hardBounds, + ValuesSourceConfig valuesSourceConfig, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (hardBounds != null) { + return null; + } + if (valuesSourceConfig.hasValues() == false) { + return null; + } + long[] points = preparedRounding.fixedRoundingPoints(); + if (points == null) { + return null; + } + // Range aggs use a double to aggregate and we don't want to lose precision. + long max = points[points.length - 1]; + if ((double) max != max) { + return null; + } + if ((double) points[0] != points[0]) { + return null; + } + RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() + .getValuesSourceRegistry() + .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); + if (rangeSupplier == null) { + return null; + } + RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; + for (int i = 0; i < points.length - 1; i++) { + ranges[i] = new RangeAggregator.Range(null, (double) points[i], (double) points[i + 1]); + } + ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) points[points.length - 1], null); + Aggregator delegate = rangeSupplier.build( + name, + factories, + valuesSourceConfig, + InternalDateRange.FACTORY, + ranges, + false, + context, + parent, + cardinality, + metadata + ); + return new DateHistogramAdaptedFromDateRangeAggregator( + delegate, + valuesSourceConfig.format(), + rounding, + order, + minDocCount, + extendedBounds, + keyed + ); + } private final ValuesSource.Numeric valuesSource; private final DocValueFormat formatter; @@ -196,4 +324,76 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { return 1.0; } } + + static class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { + private final DocValueFormat format; + private final Rounding rounding; + private final BucketOrder order; + private final long minDocCount; + private final LongBounds extendedBounds; + private final boolean keyed; + + public DateHistogramAdaptedFromDateRangeAggregator( + Aggregator delegate, + DocValueFormat format, + Rounding rounding, + BucketOrder order, + long minDocCount, + LongBounds extendedBounds, + boolean keyed + ) { + super(delegate); + this.format = format; + this.rounding = rounding; + this.order = order; + this.minDocCount = minDocCount; + this.extendedBounds = extendedBounds; + this.keyed = keyed; + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalDateRange range = (InternalDateRange) delegateResult; + List buckets = new ArrayList<>(range.getBuckets().size()); + for (InternalDateRange.Bucket rangeBucket : range.getBuckets()) { + if (rangeBucket.getDocCount() > 0) { + buckets.add( + new InternalDateHistogram.Bucket( + rangeBucket.getFrom().toInstant().toEpochMilli(), + rangeBucket.getDocCount(), + keyed, + format, + rangeBucket.getAggregations() + ) + ); + } + } + CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); + + // value source will be null for unmapped fields + // Important: use `rounding` here, not `shardRounding` + InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 + ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) + : null; + return new InternalDateHistogram( + range.getName(), + buckets, + order, + minDocCount, + rounding.offset(), + emptyBucketInfo, + format, + keyed, + range.getMetadata() + ); + } + + public final InternalAggregations buildEmptySubAggregations() { + List aggs = new ArrayList<>(); + for (Aggregator aggregator : subAggregators()) { + aggs.add(aggregator.buildEmptyAggregation()); + } + return InternalAggregations.from(aggs); + } + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 807182807d802..ec9e5ca562101 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -19,13 +19,13 @@ package org.elasticsearch.search.aggregations.bucket.histogram; -import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.CardinalityUpperBound; +import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; @@ -43,65 +43,12 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( DateHistogramAggregationBuilder.REGISTRY_KEY, List.of(CoreValuesSourceType.DATE, CoreValuesSourceType.NUMERIC, CoreValuesSourceType.BOOLEAN), - DateHistogramAggregatorFactory::build, + DateHistogramAggregator::build, true); builder.register(DateHistogramAggregationBuilder.REGISTRY_KEY, CoreValuesSourceType.RANGE, DateRangeHistogramAggregator::new, true); } - private static Aggregator build( - String name, - AggregatorFactories factories, - Rounding rounding, - BucketOrder order, - boolean keyed, - long minDocCount, - @Nullable LongBounds extendedBounds, - @Nullable LongBounds hardBounds, - ValuesSourceConfig valuesSourceConfig, - SearchContext context, - Aggregator parent, - CardinalityUpperBound cardinality, - Map metadata - ) throws IOException { - Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); - Aggregator optimized = DateHistogramAdaptedFromDateRangeAggregator.buildOrNull( - name, - factories, - rounding, - preparedRounding, - order, - keyed, - minDocCount, - extendedBounds, - hardBounds, - valuesSourceConfig, - context, - parent, - cardinality, - metadata - ); - if (optimized != null) { - return optimized; - } - return new DateHistogramAggregator( - name, - factories, - rounding, - preparedRounding, - order, - keyed, - minDocCount, - extendedBounds, - hardBounds, - valuesSourceConfig, - context, - parent, - cardinality, - metadata - ); - }; - private final BucketOrder order; private final boolean keyed; private final long minDocCount; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java index cf0c8188f1590..377d9e02daae4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java @@ -27,7 +27,6 @@ import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator.Unmapped; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; -import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; @@ -92,8 +91,7 @@ protected Aggregator doCreateInternal( .build( name, factories, - (Numeric) config.getValuesSource(), - config.format(), + config, rangeFactory, ranges, keyed, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 6b283b5e4f953..14cb135b016b1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -19,8 +19,10 @@ package org.elasticsearch.search.aggregations.bucket.range; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -32,6 +34,7 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.AdaptingAggregator; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; @@ -41,9 +44,12 @@ import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.NonCollectingAggregator; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.InternalFilters; import org.elasticsearch.search.aggregations.bucket.range.InternalRange.Factory; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; @@ -217,12 +223,72 @@ public boolean equals(Object obj) { } } - public static RangeAggregator build( + public static Aggregator build( + String name, + AggregatorFactories factories, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (valuesSourceConfig.fieldType().isSearchable()) { + String[] keys = new String[ranges.length]; + Query[] filters = new Query[ranges.length]; + for (int i = 0; i < ranges.length; i++) { + keys[i] = Integer.toString(i); + filters[i] = valuesSourceConfig.fieldType() + .rangeQuery( + valuesSourceConfig.format().format(ranges[i].from), + valuesSourceConfig.format().format(ranges[i].to), + true, + false, + ShapeRelation.CONTAINS, + null, + null, + context.getQueryShardContext() + ); + } + FiltersAggregator delegate = FiltersAggregator.build( + name, + factories, + keys, + filters, + false, + null, + context, + parent, + cardinality, + metadata + ); + if (delegate.collectsInFilterOrder()) { + return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); + } + } + return build( + name, + factories, + (ValuesSource.Numeric) valuesSourceConfig.getValuesSource(), + valuesSourceConfig.format(), + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); + } + + public static Aggregator build( String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format, - InternalRange.Factory rangeFactory, + InternalRange.Factory rangeFactory, Range[] ranges, boolean keyed, SearchContext context, @@ -245,7 +311,7 @@ public static RangeAggregator build( metadata ); } - return new NoOverlapCollector( + return new NoOverlapAggregator( name, factories, valuesSource, @@ -363,8 +429,8 @@ public InternalAggregation buildEmptyAggregation() { protected abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; - private static class NoOverlapCollector extends RangeAggregator { - public NoOverlapCollector( + private static class NoOverlapAggregator extends RangeAggregator { + public NoOverlapAggregator( String name, AggregatorFactories factories, Numeric valuesSource, @@ -473,6 +539,54 @@ protected int collect(LeafBucketCollector sub, int doc, double value, long ownin } } + private static class RangeAdaptedFromFiltersAggregator extends AdaptingAggregator { + private final DocValueFormat format; + private final Range[] ranges; + private final boolean keyed; + private final InternalRange.Factory rangeFactory; + + public RangeAdaptedFromFiltersAggregator( + Aggregator delegate, + DocValueFormat format, + Range[] ranges, + boolean keyed, + InternalRange.Factory rangeFactory + ) { + super(delegate); + this.format = format; + this.ranges = ranges; + this.keyed = keyed; + this.rangeFactory = rangeFactory; + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + InternalFilters filters = (InternalFilters) delegateResult; + if (filters.getBuckets().size() != ranges.length) { + throw new IllegalStateException( + "bad number of filters [" + filters.getBuckets().size() + "] expecting [" + ranges.length + "]" + ); + } + List buckets = new ArrayList<>(filters.getBuckets().size()); + for (int i = 0; i < ranges.length; i++) { + Range r = ranges[i]; + InternalFilters.InternalBucket b = filters.getBuckets().get(i); + buckets.add( + rangeFactory.createBucket( + r.getKey(), + r.getFrom(), + r.getTo(), + b.getDocCount(), + (InternalAggregations) b.getAggregations(), + keyed, + format + ) + ); + } + return rangeFactory.create(name(), buckets, format, keyed, filters.getMetadata()); + } + } + private static boolean hasOverlap(Range[] ranges) { double lastEnd = ranges[0].to; for (int i = 1; i < ranges.length; ++i) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java index 5cf8be6a0407f..49a2dea0b781e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java @@ -22,18 +22,17 @@ import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; -import org.elasticsearch.search.aggregations.support.ValuesSource; +import org.elasticsearch.search.aggregations.support.ValuesSourceConfig; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; public interface RangeAggregatorSupplier { - RangeAggregator build(String name, + Aggregator build(String name, AggregatorFactories factories, - ValuesSource.Numeric valuesSource, - DocValueFormat format, - InternalRange.Factory rangeFactory, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, RangeAggregator.Range[] ranges, boolean keyed, SearchContext context, diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java index 1944904dfd11a..b356d84a432cc 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java @@ -140,6 +140,11 @@ public String toString() { return delegate.toString(); } + @Override + public Aggregator[] subAggregators() { + return delegate.subAggregators(); + } + public static Aggregator unwrap(Aggregator agg) { if (agg instanceof ProfilingAggregator) { return ((ProfilingAggregator) agg).delegate; From 607ae5f7c245c05a2414e0ceb50e417ad7c0e67e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 13:38:51 -0400 Subject: [PATCH 12/48] Tests --- .../bucket/filter/FiltersAggregator.java | 15 ++- .../GeoDistanceRangeAggregatorFactory.java | 2 +- .../bucket/range/RangeAggregator.java | 95 ++++++++++++------- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index bb13030811293..1987a2f4fe12b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -19,8 +19,6 @@ package org.elasticsearch.search.aggregations.bucket.filter; -import org.apache.logging.log4j.LogManager; -import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -32,6 +30,7 @@ import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -271,7 +270,12 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket } Bits live = ctx.reader().getLiveDocs(); for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { - DocIdSetIterator itr = filterWeights[filterOrd].scorer(ctx).iterator(); + Scorer scorer = filterWeights[filterOrd].scorer(ctx); + if (scorer == null) { + // the filter doesn't match any docs + continue; + } + DocIdSetIterator itr = scorer.iterator(); if (live == null) { while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { collectBucket(sub, itr.docID(), filterOrd); @@ -374,11 +378,8 @@ private Query filterMatchingBoth(Query lhs, Query rhs) { } Query unwrappedLhs = unwrap(lhs); Query unwrappedRhs = unwrap(rhs); - LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs, unwrappedRhs); - LogManager.getLogger().error("ADSFDSAF {} {}", unwrappedLhs instanceof PointRangeQuery, unwrappedRhs instanceof PointRangeQuery); if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { PointRangeQuery merged = mergePointRangeQueries((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); - LogManager.getLogger().error("ADSFDSAF {}", merged); if (merged != null) { // TODO rewrap? return merged; @@ -405,12 +406,10 @@ private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQu return null; } byte[] lower = mergePoint(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), true); - LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(lower, 0)); if (lower == null) { return null; } byte[] upper = mergePoint(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), false); - LogManager.getLogger().error("ADSFDSAF {}", LongPoint.decodeDimension(upper, 0)); if (upper == null) { return null; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java index 7e4863c1b733f..ae40318260b03 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/GeoDistanceRangeAggregatorFactory.java @@ -66,7 +66,7 @@ public static void registerAggregators(ValuesSourceRegistry.Builder builder) { cardinality, metadata) -> { DistanceSource distanceSource = new DistanceSource((ValuesSource.GeoPoint) valuesSource, distanceType, origin, units); - return RangeAggregator.build( + return RangeAggregator.buildWithoutAttemptedToAdaptToFilters( name, factories, distanceSource, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 14cb135b016b1..400edeef9475e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -33,6 +33,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; +import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AdaptingAggregator; import org.elasticsearch.search.aggregations.Aggregator; @@ -235,40 +237,11 @@ public static Aggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (valuesSourceConfig.fieldType().isSearchable()) { - String[] keys = new String[ranges.length]; - Query[] filters = new Query[ranges.length]; - for (int i = 0; i < ranges.length; i++) { - keys[i] = Integer.toString(i); - filters[i] = valuesSourceConfig.fieldType() - .rangeQuery( - valuesSourceConfig.format().format(ranges[i].from), - valuesSourceConfig.format().format(ranges[i].to), - true, - false, - ShapeRelation.CONTAINS, - null, - null, - context.getQueryShardContext() - ); - } - FiltersAggregator delegate = FiltersAggregator.build( - name, - factories, - keys, - filters, - false, - null, - context, - parent, - cardinality, - metadata - ); - if (delegate.collectsInFilterOrder()) { - return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); - } + Aggregator adapted = adaptIntoFiltersOrNull(name, factories, valuesSourceConfig, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); + if (adapted != null) { + return adapted; } - return build( + return buildWithoutAttemptedToAdaptToFilters( name, factories, (ValuesSource.Numeric) valuesSourceConfig.getValuesSource(), @@ -283,7 +256,61 @@ public static Aggregator build( ); } - public static Aggregator build( + public static Aggregator adaptIntoFiltersOrNull( + String name, + AggregatorFactories factories, + ValuesSourceConfig valuesSourceConfig, + InternalRange.Factory rangeFactory, + Range[] ranges, + boolean keyed, + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + if (false == valuesSourceConfig.fieldType().isSearchable()) { + return null; + } + if (valuesSourceConfig.fieldType() instanceof DateFieldType + && ((DateFieldType) valuesSourceConfig.fieldType()).resolution() == Resolution.NANOSECONDS) { + // We don't generate sensible Queries for nanoseconds. + return null; + } + String[] keys = new String[ranges.length]; + Query[] filters = new Query[ranges.length]; + for (int i = 0; i < ranges.length; i++) { + keys[i] = Integer.toString(i); + filters[i] = valuesSourceConfig.fieldType() + .rangeQuery( + valuesSourceConfig.format().format(ranges[i].from), + valuesSourceConfig.format().format(ranges[i].to), + true, + false, + ShapeRelation.CONTAINS, + null, + null, + context.getQueryShardContext() + ); + } + FiltersAggregator delegate = FiltersAggregator.build( + name, + factories, + keys, + filters, + false, + null, + context, + parent, + cardinality, + metadata + ); + if (false == delegate.collectsInFilterOrder()) { + return null; + } + return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); + } + + public static Aggregator buildWithoutAttemptedToAdaptToFilters( String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, From 0e68cad023bb5594110e176f53865b889c39be22 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 13:58:56 -0400 Subject: [PATCH 13/48] look --- .../src/main/java/org/elasticsearch/common/Rounding.java | 3 +++ .../bucket/histogram/DateHistogramAggregator.java | 8 ++++++++ .../search/aggregations/bucket/range/RangeAggregator.java | 2 ++ .../search/aggregations/support/CoreValuesSourceType.java | 3 +++ 4 files changed, 16 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index e53dbefe5bd07..6f002532295f5 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -423,6 +423,7 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) int i = 0; values[i++] = rounded; while ((rounded = nextRoundingValue(rounded)) <= maxUtcMillis) { + LogManager.getLogger().error("ADSFADSF {} {} {} {} {}", minUtcMillis, maxUtcMillis, values[i - 1], i, max, new Exception()); if (i >= max) { return this; } @@ -699,7 +700,9 @@ public long beforeOverlap(long localMillis, Overlap overlap) { @Override protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) { + LogManager.getLogger().error("ADSFADSF to midnight"); if (lookup.anyMoveBackToPreviousDay()) { + LogManager.getLogger().error("ADSFADSF to midnight - bad"); return this; } return super.maybeUseArray(minUtcMillis, maxUtcMillis, max); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 5378a35ac774a..7d0251f80f0ed 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.search.aggregations.bucket.histogram; +import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; @@ -129,27 +130,33 @@ private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( Map metadata ) throws IOException { if (hardBounds != null) { + LogManager.getLogger().error("ADSFADSF hard bounds"); return null; } if (valuesSourceConfig.hasValues() == false) { + LogManager.getLogger().error("ADSFADSF no values"); return null; } long[] points = preparedRounding.fixedRoundingPoints(); if (points == null) { + LogManager.getLogger().error("ADSFADSF no fixed points"); return null; } // Range aggs use a double to aggregate and we don't want to lose precision. long max = points[points.length - 1]; if ((double) max != max) { + LogManager.getLogger().error("ADSFADSF max can't double {}", max); return null; } if ((double) points[0] != points[0]) { + LogManager.getLogger().error("ADSFADSF min can't double {}", points[0]); return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() .getValuesSourceRegistry() .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); if (rangeSupplier == null) { + LogManager.getLogger().error("ADSFADSF no range supplier"); return null; } RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; @@ -169,6 +176,7 @@ private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( cardinality, metadata ); + LogManager.getLogger().error("ADSFADSF {}", (Object) ranges); return new DateHistogramAdaptedFromDateRangeAggregator( delegate, valuesSourceConfig.format(), diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 400edeef9475e..5b090e5b21ae6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.search.aggregations.bucket.range; +import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -307,6 +308,7 @@ public static Aggregator adaptIntoFiltersOrNull( if (false == delegate.collectsInFilterOrder()) { return null; } + LogManager.getLogger().error("ADSFADSF {}", (Object) filters); return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java index 8858a3db3696c..e9245876f75d1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.support; +import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PointValues; import org.apache.lucene.search.BooleanClause; @@ -286,6 +287,7 @@ public Function roundingPreparer() throws IOExcepti * is on the DateFieldType. */ byte[] min = PointValues.getMinPackedValue(context.searcher().getIndexReader(), fieldContext.field()); + LogManager.getLogger().error("ADSFADSF packed {}", min); if (min != null) { // null means that there aren't values in the index byte[] max = PointValues.getMaxPackedValue(context.searcher().getIndexReader(), fieldContext.field()); @@ -295,6 +297,7 @@ public Function roundingPreparer() throws IOExcepti } // Check the query for bounds + LogManager.getLogger().error("ADSFADSF query {}", context.query()); if (context.query() != null) { context.query().visit(new QueryVisitor() { @Override From 9754fb0303803ae7fec06351ca3cbdebd0ee4391 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 14:28:30 -0400 Subject: [PATCH 14/48] no looking --- .../src/main/java/org/elasticsearch/common/Rounding.java | 3 --- .../bucket/histogram/DateHistogramAggregator.java | 8 -------- .../search/aggregations/bucket/range/RangeAggregator.java | 2 -- .../search/aggregations/support/CoreValuesSourceType.java | 3 --- 4 files changed, 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index 6f002532295f5..e53dbefe5bd07 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -423,7 +423,6 @@ protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) int i = 0; values[i++] = rounded; while ((rounded = nextRoundingValue(rounded)) <= maxUtcMillis) { - LogManager.getLogger().error("ADSFADSF {} {} {} {} {}", minUtcMillis, maxUtcMillis, values[i - 1], i, max, new Exception()); if (i >= max) { return this; } @@ -700,9 +699,7 @@ public long beforeOverlap(long localMillis, Overlap overlap) { @Override protected Prepared maybeUseArray(long minUtcMillis, long maxUtcMillis, int max) { - LogManager.getLogger().error("ADSFADSF to midnight"); if (lookup.anyMoveBackToPreviousDay()) { - LogManager.getLogger().error("ADSFADSF to midnight - bad"); return this; } return super.maybeUseArray(minUtcMillis, maxUtcMillis, max); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 7d0251f80f0ed..5378a35ac774a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.histogram; -import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; @@ -130,33 +129,27 @@ private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( Map metadata ) throws IOException { if (hardBounds != null) { - LogManager.getLogger().error("ADSFADSF hard bounds"); return null; } if (valuesSourceConfig.hasValues() == false) { - LogManager.getLogger().error("ADSFADSF no values"); return null; } long[] points = preparedRounding.fixedRoundingPoints(); if (points == null) { - LogManager.getLogger().error("ADSFADSF no fixed points"); return null; } // Range aggs use a double to aggregate and we don't want to lose precision. long max = points[points.length - 1]; if ((double) max != max) { - LogManager.getLogger().error("ADSFADSF max can't double {}", max); return null; } if ((double) points[0] != points[0]) { - LogManager.getLogger().error("ADSFADSF min can't double {}", points[0]); return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() .getValuesSourceRegistry() .getAggregator(RangeAggregationBuilder.REGISTRY_KEY, valuesSourceConfig); if (rangeSupplier == null) { - LogManager.getLogger().error("ADSFADSF no range supplier"); return null; } RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; @@ -176,7 +169,6 @@ private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( cardinality, metadata ); - LogManager.getLogger().error("ADSFADSF {}", (Object) ranges); return new DateHistogramAdaptedFromDateRangeAggregator( delegate, valuesSourceConfig.format(), diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 5b090e5b21ae6..400edeef9475e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.range; -import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -308,7 +307,6 @@ public static Aggregator adaptIntoFiltersOrNull( if (false == delegate.collectsInFilterOrder()) { return null; } - LogManager.getLogger().error("ADSFADSF {}", (Object) filters); return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java index e9245876f75d1..8858a3db3696c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceType.java @@ -19,7 +19,6 @@ package org.elasticsearch.search.aggregations.support; -import org.apache.logging.log4j.LogManager; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PointValues; import org.apache.lucene.search.BooleanClause; @@ -287,7 +286,6 @@ public Function roundingPreparer() throws IOExcepti * is on the DateFieldType. */ byte[] min = PointValues.getMinPackedValue(context.searcher().getIndexReader(), fieldContext.field()); - LogManager.getLogger().error("ADSFADSF packed {}", min); if (min != null) { // null means that there aren't values in the index byte[] max = PointValues.getMaxPackedValue(context.searcher().getIndexReader(), fieldContext.field()); @@ -297,7 +295,6 @@ public Function roundingPreparer() throws IOExcepti } // Check the query for bounds - LogManager.getLogger().error("ADSFADSF query {}", context.query()); if (context.query() != null) { context.query().visit(new QueryVisitor() { @Override From 0750aa8bbe9c8a6bee2d1fa5c44307fa2369fe4b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 16:42:17 -0400 Subject: [PATCH 15/48] tests --- .../org/elasticsearch/common/Rounding.java | 6 +- .../aggregations/AdaptingAggregator.java | 59 +++++++++++++------ .../search/aggregations/AggregatorBase.java | 2 +- .../bucket/filter/FiltersAggregator.java | 4 +- .../filter/FiltersAggregatorFactory.java | 13 ---- .../histogram/DateHistogramAggregator.java | 8 +-- .../DateHistogramAggregatorFactory.java | 1 - .../bucket/range/InternalRange.java | 1 - .../bucket/range/RangeAggregator.java | 29 +++++++-- .../bucket/range/RangeAggregatorSupplier.java | 1 - .../range/DateRangeAggregatorTests.java | 6 +- .../bucket/range/RangeAggregatorTests.java | 43 ++++++++++---- 12 files changed, 114 insertions(+), 59 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index e53dbefe5bd07..16ec3552d9c51 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -291,7 +291,11 @@ public interface Prepared { * next rounded value in specified units if possible. */ double roundingSize(long utcMillis, DateTimeUnit timeUnit); - + /** + * An array of dates such that each date between each entry is will + * be rounded down to that entry or {@code null} if this rounding + * mechanism doesn't or can't precalculate these points. + */ long[] fixedRoundingPoints(); } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index 057fa671d5423..0c3e9fa383389 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -1,14 +1,35 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.search.aggregations; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ScoreMode; -import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +/** + * An {@linkplain Aggregator} that delegates collection to another + * {@linkplain Aggregator} and then translates its results into the results + * you'd expect from another aggregation. + */ public abstract class AdaptingAggregator extends Aggregator { private final Aggregator delegate; @@ -16,53 +37,59 @@ public AdaptingAggregator(Aggregator delegate) { this.delegate = delegate; } + /** + * Adapt the result from the collecting {@linkplain Aggregator} into the + * result expected by this {@linkplain Aggregator}. + */ + protected abstract InternalAggregation adapt(InternalAggregation delegateResult); + @Override - public void close() { + public final void close() { delegate.close(); } @Override - public ScoreMode scoreMode() { + public final ScoreMode scoreMode() { return delegate.scoreMode(); } @Override - public String name() { + public final String name() { return delegate.name(); } @Override - public SearchContext context() { + public final SearchContext context() { return delegate.context(); } @Override - public Aggregator parent() { + public final Aggregator parent() { return delegate.parent(); } @Override - public Aggregator subAggregator(String name) { + public final Aggregator subAggregator(String name) { return delegate.subAggregator(name); } @Override - public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { + public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException { return delegate.getLeafCollector(ctx); } @Override - public void preCollection() throws IOException { + public final void preCollection() throws IOException { delegate.preCollection(); } @Override - public void postCollection() throws IOException { + public final void postCollection() throws IOException { delegate.postCollection(); } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public final InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { InternalAggregation[] delegateResults = delegate.buildAggregations(owningBucketOrds); InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { @@ -72,14 +99,12 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I } @Override - public InternalAggregation buildEmptyAggregation() { + public final InternalAggregation buildEmptyAggregation() { return adapt(delegate.buildEmptyAggregation()); } @Override - public Aggregator[] subAggregators() { + public final Aggregator[] subAggregators() { return delegate.subAggregators(); } - - protected abstract InternalAggregation adapt(InternalAggregation delegateResult); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java index bda03fc1abbf7..ab2ad5000a8a5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorBase.java @@ -278,7 +278,7 @@ protected void doClose() {} protected void doPostCollection() throws IOException { } - public final InternalAggregations buildEmptySubAggregations() { + protected final InternalAggregations buildEmptySubAggregations() { List aggs = new ArrayList<>(); for (Aggregator aggregator : subAggregators) { aggs.add(aggregator.buildEmptyAggregation()); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 1987a2f4fe12b..2f697d53b6c67 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -249,7 +249,7 @@ private static class FilterOrderAggregator extends FiltersAggregator { private final Query[] filters; private Weight[] filterWeights; - public FilterOrderAggregator( + FilterOrderAggregator( String name, String[] keys, Query[] filters, @@ -303,7 +303,7 @@ private static class StandardOrderAggregator extends FiltersAggregator { private final int totalNumKeys; - public StandardOrderAggregator( + StandardOrderAggregator( String name, AggregatorFactories factories, String[] keys, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java index 673bfcfac0f91..0594286bd77f3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorFactory.java @@ -19,19 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.filter; -import org.apache.logging.log4j.LogManager; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; -import org.elasticsearch.search.aggregations.AggregationInitializationException; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactory; @@ -44,8 +33,6 @@ import java.util.List; import java.util.Map; -import static java.util.Arrays.compareUnsigned; - public class FiltersAggregatorFactory extends AggregatorFactory { private final String[] keys; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 5378a35ac774a..5e34c78f3c2a8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -75,7 +75,7 @@ public static Aggregator build( Map metadata ) throws IOException { Rounding.Prepared preparedRounding = valuesSourceConfig.roundingPreparer().apply(rounding); - Aggregator asRange = buildAsRangeOrNull( + Aggregator asRange = adaptIntoRangeOrNull( name, factories, rounding, @@ -112,7 +112,7 @@ public static Aggregator build( ); } - private static DateHistogramAdaptedFromDateRangeAggregator buildAsRangeOrNull( + private static DateHistogramAdaptedFromDateRangeAggregator adaptIntoRangeOrNull( String name, AggregatorFactories factories, Rounding rounding, @@ -325,7 +325,7 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { } } - static class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { + private static class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { private final DocValueFormat format; private final Rounding rounding; private final BucketOrder order; @@ -333,7 +333,7 @@ static class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggrega private final LongBounds extendedBounds; private final boolean keyed; - public DateHistogramAdaptedFromDateRangeAggregator( + DateHistogramAdaptedFromDateRangeAggregator( Aggregator delegate, DocValueFormat format, Rounding rounding, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index ec9e5ca562101..d069a7cb8c7a7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -25,7 +25,6 @@ import org.elasticsearch.search.aggregations.AggregatorFactory; import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.CardinalityUpperBound; -import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java index 6d4bd035a1d52..1ba14d7a0715c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalRange.java @@ -22,7 +22,6 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.search.DocValueFormat; -import org.elasticsearch.search.aggregations.Aggregations; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 400edeef9475e..7cf89b821caf3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -237,7 +237,18 @@ public static Aggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - Aggregator adapted = adaptIntoFiltersOrNull(name, factories, valuesSourceConfig, rangeFactory, ranges, keyed, context, parent, cardinality, metadata); + Aggregator adapted = adaptIntoFiltersOrNull( + name, + factories, + valuesSourceConfig, + rangeFactory, + ranges, + keyed, + context, + parent, + cardinality, + metadata + ); if (adapted != null) { return adapted; } @@ -268,9 +279,19 @@ public static Aggregator adaptIntoFiltersOrNull( CardinalityUpperBound cardinality, Map metadata ) throws IOException { + if (valuesSourceConfig.fieldType() == null) { + return null; + } if (false == valuesSourceConfig.fieldType().isSearchable()) { return null; } + if (valuesSourceConfig.missing() != null) { + return null; + } + if (valuesSourceConfig.script() != null) { + return null; + } + // TODO bail here for runtime fields. They'll be slower this way. Maybe we can somehow look at the Query? if (valuesSourceConfig.fieldType() instanceof DateFieldType && ((DateFieldType) valuesSourceConfig.fieldType()).resolution() == Resolution.NANOSECONDS) { // We don't generate sensible Queries for nanoseconds. @@ -457,7 +478,7 @@ protected abstract int collect(LeafBucketCollector sub, int doc, double value, l throws IOException; private static class NoOverlapAggregator extends RangeAggregator { - public NoOverlapAggregator( + NoOverlapAggregator( String name, AggregatorFactories factories, Numeric valuesSource, @@ -492,7 +513,7 @@ protected int collect(LeafBucketCollector sub, int doc, double value, long ownin } private static class OverlapRangeAggregator extends RangeAggregator { - public OverlapRangeAggregator( + OverlapRangeAggregator( String name, AggregatorFactories factories, Numeric valuesSource, @@ -572,7 +593,7 @@ private static class RangeAdaptedFromFiltersAggregator rangeFactory; - public RangeAdaptedFromFiltersAggregator( + RangeAdaptedFromFiltersAggregator( Aggregator delegate, DocValueFormat format, Range[] ranges, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java index 49a2dea0b781e..2e6714ee83b3e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorSupplier.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.aggregations.bucket.range; -import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java index 3c14aa75af93a..eaf7457f1bced 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.aggregations.bucket.range; +import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -53,7 +54,6 @@ public class DateRangeAggregatorTests extends AggregatorTestCase { private String NUMBER_FIELD_NAME = "number"; - private String UNMAPPED_FIELD_NAME = "field_not_appearing_in_this_index"; private String DATE_FIELD_NAME = "date"; private long milli1 = ZonedDateTime.of(2015, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); @@ -146,8 +146,8 @@ public void testNumberFieldNumberRanges() throws IOException { = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER); testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 1))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 7), new IntPoint(NUMBER_FIELD_NAME, 7))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 1), new IntPoint(NUMBER_FIELD_NAME, 1))); }, range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 6e3db461e5cf7..1bcbef320aaaa 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.range; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -32,6 +34,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedConsumer; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -71,9 +74,9 @@ public void testNoMatchingField() throws IOException { public void testMatchesSortedNumericDocValues() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 2))); - iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3))); + iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7), new IntPoint(NUMBER_FIELD_NAME, 7))); + iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 2), new IntPoint(NUMBER_FIELD_NAME, 2))); + iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3), new IntPoint(NUMBER_FIELD_NAME, 3))); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -85,9 +88,9 @@ public void testMatchesSortedNumericDocValues() throws IOException { public void testMatchesNumericDocValues() throws IOException { testCase(new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 2))); - iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 3))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 7), new IntPoint(NUMBER_FIELD_NAME, 7))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 2), new IntPoint(NUMBER_FIELD_NAME, 2))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 3), new IntPoint(NUMBER_FIELD_NAME, 3))); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -98,7 +101,16 @@ public void testMatchesNumericDocValues() throws IOException { } public void testDateFieldMillisecondResolution() throws IOException { - DateFieldMapper.DateFieldType fieldType = new DateFieldMapper.DateFieldType(DATE_FIELD_NAME); + DateFieldMapper.DateFieldType fieldType = new DateFieldMapper.DateFieldType( + DATE_FIELD_NAME, + randomBoolean(), + randomBoolean(), + true, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ); long milli1 = ZonedDateTime.of(2015, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); long milli2 = ZonedDateTime.of(2016, 11, 13, 16, 14, 34, 0, ZoneOffset.UTC).toInstant().toEpochMilli(); @@ -108,8 +120,8 @@ public void testDateFieldMillisecondResolution() throws IOException { .addRange(milli1 - 1, milli1 + 1); testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1))); - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2))); + iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1), new LongPoint(DATE_FIELD_NAME, milli1))); + iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2), new LongPoint(DATE_FIELD_NAME, milli2))); }, range -> { List ranges = range.getBuckets(); assertEquals(1, ranges.size()); @@ -302,6 +314,7 @@ public void testOverlappingRanges() throws IOException { aggregationBuilder.addRange(0d, 5d); aggregationBuilder.addRange(10d, 20d); aggregationBuilder.addRange(0d, 20d); + aggregationBuilder.missing(100); // Set a missing value to force the "normal" range collection instead of filter-based testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 11))); iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7))); @@ -327,8 +340,16 @@ public void testOverlappingRanges() throws IOException { private void testCase(Query query, CheckedConsumer buildIndex, Consumer> verify) throws IOException { - MappedFieldType fieldType - = new NumberFieldMapper.NumberFieldType(NUMBER_FIELD_NAME, NumberFieldMapper.NumberType.INTEGER); + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberFieldMapper.NumberType.INTEGER, + randomBoolean(), + randomBoolean(), + true, + false, + null, + null + ); RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("test_range_agg"); aggregationBuilder.field(NUMBER_FIELD_NAME); aggregationBuilder.addRange(0d, 5d); From c57c98a787bc9fab00b042de89a04dd06a937b6d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 17:51:33 -0400 Subject: [PATCH 16/48] Handle unbounded ranges --- .../bucket/range/RangeAggregator.java | 6 +-- .../range/DateRangeAggregatorTests.java | 38 +++++++++++++++++++ .../bucket/range/RangeAggregatorTests.java | 37 ++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 7cf89b821caf3..9fa4e2355cea2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -303,10 +303,10 @@ public static Aggregator adaptIntoFiltersOrNull( keys[i] = Integer.toString(i); filters[i] = valuesSourceConfig.fieldType() .rangeQuery( - valuesSourceConfig.format().format(ranges[i].from), - valuesSourceConfig.format().format(ranges[i].to), + ranges[i].from == Double.NEGATIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].from), + ranges[i].to == Double.POSITIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].to), true, - false, + ranges[i].to == Double.NEGATIVE_INFINITY, ShapeRelation.CONTAINS, null, null, diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java index eaf7457f1bced..55d698ecaf1f9 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.range; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -37,6 +38,7 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.MultiBucketConsumerService; @@ -50,6 +52,8 @@ import java.util.function.Consumer; import static java.util.Collections.singleton; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class DateRangeAggregatorTests extends AggregatorTestCase { @@ -122,6 +126,40 @@ public void testMissingDateStringWithDateField() throws IOException { }, fieldType); } + public void testUnboundedRanges() throws IOException { + testCase( + new RangeAggregationBuilder("name").field(DATE_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), + new MatchAllDocsQuery(), + iw -> { + iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 7), new LongPoint(DATE_FIELD_NAME, 7))); + iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 2), new LongPoint(DATE_FIELD_NAME, 2))); + iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 3), new LongPoint(DATE_FIELD_NAME, 3))); + }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(2)); + assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(1).getFrom(), equalTo(5d)); + assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); + assertThat(ranges.get(1).getDocCount(), equalTo(1L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + new DateFieldMapper.DateFieldType( + DATE_FIELD_NAME, + randomBoolean(), + randomBoolean(), + true, + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + Resolution.MILLISECONDS, + null, + null + ) + ); + } + public void testNumberFieldDateRanges() throws IOException { DateRangeAggregationBuilder aggregationBuilder = new DateRangeAggregationBuilder("date_range") .field(NUMBER_FIELD_NAME) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 1bcbef320aaaa..e9a79046e02c5 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.search.aggregations.bucket.range; +import com.carrotsearch.randomizedtesting.annotations.Seed; + import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; @@ -53,6 +55,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +@Seed("32AC488589A4F6FE") public class RangeAggregatorTests extends AggregatorTestCase { private static final String NUMBER_FIELD_NAME = "number"; @@ -100,6 +103,40 @@ public void testMatchesNumericDocValues() throws IOException { }); } + public void testUnboundedRanges() throws IOException { + testCase( + new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), + new MatchAllDocsQuery(), + iw -> { + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 7), new IntPoint(NUMBER_FIELD_NAME, 7))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 2), new IntPoint(NUMBER_FIELD_NAME, 2))); + iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 3), new IntPoint(NUMBER_FIELD_NAME, 3))); + }, + result -> { + InternalRange range = (InternalRange) result; + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(2)); + assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); + assertThat(ranges.get(0).getTo(), equalTo(5d)); + assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(1).getFrom(), equalTo(5d)); + assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); + assertThat(ranges.get(1).getDocCount(), equalTo(1L)); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberFieldMapper.NumberType.INTEGER, + randomBoolean(), + randomBoolean(), + true, + false, + null, + null + ) + ); + } + public void testDateFieldMillisecondResolution() throws IOException { DateFieldMapper.DateFieldType fieldType = new DateFieldMapper.DateFieldType( DATE_FIELD_NAME, From 402132781b8530f616ad6474c0b7652a588404c7 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 13 Oct 2020 17:53:25 -0400 Subject: [PATCH 17/48] Test for max and min --- .../bucket/range/DateRangeAggregatorTests.java | 10 ++++++++-- .../bucket/range/RangeAggregatorTests.java | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java index 55d698ecaf1f9..58dc5e0637a25 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java @@ -131,9 +131,15 @@ public void testUnboundedRanges() throws IOException { new RangeAggregationBuilder("name").field(DATE_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), new MatchAllDocsQuery(), iw -> { + iw.addDocument( + List.of(new NumericDocValuesField(DATE_FIELD_NAME, Long.MIN_VALUE), new LongPoint(DATE_FIELD_NAME, Long.MIN_VALUE)) + ); iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 7), new LongPoint(DATE_FIELD_NAME, 7))); iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 2), new LongPoint(DATE_FIELD_NAME, 2))); iw.addDocument(List.of(new NumericDocValuesField(DATE_FIELD_NAME, 3), new LongPoint(DATE_FIELD_NAME, 3))); + iw.addDocument( + List.of(new NumericDocValuesField(DATE_FIELD_NAME, Long.MAX_VALUE), new LongPoint(DATE_FIELD_NAME, Long.MAX_VALUE)) + ); }, result -> { InternalRange range = (InternalRange) result; @@ -141,10 +147,10 @@ public void testUnboundedRanges() throws IOException { assertThat(ranges, hasSize(2)); assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); assertThat(ranges.get(0).getTo(), equalTo(5d)); - assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(0).getDocCount(), equalTo(3L)); assertThat(ranges.get(1).getFrom(), equalTo(5d)); assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); - assertThat(ranges.get(1).getDocCount(), equalTo(1L)); + assertThat(ranges.get(1).getDocCount(), equalTo(2L)); assertTrue(AggregationInspectionHelper.hasValue(range)); }, new DateFieldMapper.DateFieldType( diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index e9a79046e02c5..1719f464ea98f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -108,9 +108,21 @@ public void testUnboundedRanges() throws IOException { new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5), new MatchAllDocsQuery(), iw -> { + iw.addDocument( + List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, Integer.MIN_VALUE), + new IntPoint(NUMBER_FIELD_NAME, Integer.MIN_VALUE) + ) + ); iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 7), new IntPoint(NUMBER_FIELD_NAME, 7))); iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 2), new IntPoint(NUMBER_FIELD_NAME, 2))); iw.addDocument(List.of(new NumericDocValuesField(NUMBER_FIELD_NAME, 3), new IntPoint(NUMBER_FIELD_NAME, 3))); + iw.addDocument( + List.of( + new NumericDocValuesField(NUMBER_FIELD_NAME, Integer.MAX_VALUE), + new IntPoint(NUMBER_FIELD_NAME, Integer.MAX_VALUE) + ) + ); }, result -> { InternalRange range = (InternalRange) result; @@ -118,10 +130,10 @@ public void testUnboundedRanges() throws IOException { assertThat(ranges, hasSize(2)); assertThat(ranges.get(0).getFrom(), equalTo(Double.NEGATIVE_INFINITY)); assertThat(ranges.get(0).getTo(), equalTo(5d)); - assertThat(ranges.get(0).getDocCount(), equalTo(2L)); + assertThat(ranges.get(0).getDocCount(), equalTo(3L)); assertThat(ranges.get(1).getFrom(), equalTo(5d)); assertThat(ranges.get(1).getTo(), equalTo(Double.POSITIVE_INFINITY)); - assertThat(ranges.get(1).getDocCount(), equalTo(1L)); + assertThat(ranges.get(1).getDocCount(), equalTo(2L)); assertTrue(AggregationInspectionHelper.hasValue(range)); }, new NumberFieldMapper.NumberFieldType( From 73aaa455a23286c8275510d09766e96346e5afbb Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 13:13:01 -0400 Subject: [PATCH 18/48] Fixup profiler --- .../test/search.aggregation/10_histogram.yml | 61 ++++++++++++++++++- .../aggregations/AdaptingAggregator.java | 13 ++++ .../bucket/filter/FiltersAggregator.java | 37 ++++++++--- .../histogram/DateHistogramAggregator.java | 8 +-- .../bucket/range/RangeAggregator.java | 8 +-- .../InternalAggregationProfileTree.java | 24 ++++---- 6 files changed, 121 insertions(+), 30 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml index 2371bd5ef86ca..14a5c63862e94 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/10_histogram.yml @@ -495,6 +495,58 @@ setup: date: type: date + - do: + bulk: + index: test_2 + refresh: true + body: + - '{"index": {}}' + - '{"date": "2000-01-01"}' # This date is intenationally very far in the past so we end up not being able to use the date_histo -> range -> filters optimization + - '{"index": {}}' + - '{"date": "2000-01-02"}' + - '{"index": {}}' + - '{"date": "2016-02-01"}' + - '{"index": {}}' + - '{"date": "2016-03-01"}' + + - do: + search: + index: test_2 + body: + size: 0 + profile: true + aggs: + histo: + date_histogram: + field: date + calendar_interval: month + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 195 } + - match: { aggregations.histo.buckets.0.key_as_string: "2000-01-01T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.doc_count: 2 } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.description: histo } + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } + - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } + +--- +"date_histogram run as filters profiler": + - skip: + version: " - 7.99.99" + reason: optimization added in 7.11.0, backport pending + + - do: + indices.create: + index: test_2 + body: + settings: + number_of_replicas: 0 + number_of_shards: 1 + mappings: + properties: + date: + type: date + - do: bulk: index: test_2 @@ -524,10 +576,13 @@ setup: - length: { aggregations.histo.buckets: 3 } - match: { aggregations.histo.buckets.0.key_as_string: "2016-01-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.0.doc_count: 2 } - - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator } + - match: { profile.shards.0.aggregations.0.type: DateHistogramAggregator.FromDateRange } - match: { profile.shards.0.aggregations.0.description: histo } - - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 4 } - - match: { profile.shards.0.aggregations.0.debug.total_buckets: 3 } + # ultimately this ends up as a filters agg that uses filter by filter collection which is tracked in build_leaf_collector + - match: { profile.shards.0.aggregations.0.breakdown.collect_count: 0 } + - match: { profile.shards.0.aggregations.0.debug.delegate: RangeAggregator.FromFilters } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.delegate: FiltersAggregator.FilterByFilter } + - match: { profile.shards.0.aggregations.0.debug.delegate_debug.delegate_debug.segments_with_deleted_docs: 0 } --- "histogram with hard bounds": diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index 0c3e9fa383389..c78c5f9a5cc76 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -22,8 +22,12 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ScoreMode; import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.search.profile.aggregation.InternalAggregationProfileTree; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.function.BiConsumer; /** * An {@linkplain Aggregator} that delegates collection to another @@ -107,4 +111,13 @@ public final InternalAggregation buildEmptyAggregation() { public final Aggregator[] subAggregators() { return delegate.subAggregators(); } + + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + add.accept("delegate", InternalAggregationProfileTree.typeFromAggregator(delegate)); + Map delegateDebug = new HashMap<>(); + delegate.collectDebugInfo(delegateDebug::put); + add.accept("delegate_debug", delegateDebug); + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 2f697d53b6c67..c0720a1bc3eef 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -56,6 +56,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.BiConsumer; import static java.util.Arrays.compareUnsigned; @@ -139,7 +140,7 @@ public static FiltersAggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - FiltersAggregator filterOrder = filterOrderAggregatorOrNull( + FiltersAggregator filterOrder = filterOrderOrNull( name, factories, keys, @@ -154,7 +155,7 @@ public static FiltersAggregator build( if (filterOrder != null) { return filterOrder; } - return new StandardOrderAggregator( + return new FiltersAggregator.Compatible( name, factories, keys, @@ -168,7 +169,7 @@ public static FiltersAggregator build( ); } - private static FiltersAggregator filterOrderAggregatorOrNull( + private static FiltersAggregator filterOrderOrNull( String name, AggregatorFactories factories, String[] keys, @@ -189,7 +190,7 @@ private static FiltersAggregator filterOrderAggregatorOrNull( if (otherBucketKey != null) { return null; } - return new FilterOrderAggregator( + return new FiltersAggregator.FilterByFilter( name, keys, filters, @@ -245,11 +246,18 @@ public InternalAggregation buildEmptyAggregation() { public abstract boolean collectsInFilterOrder(); - private static class FilterOrderAggregator extends FiltersAggregator { + /** + * Collects results by running each filter against the searcher and doesn't + * build any {@link LeafBucketCollector}s which is generally faster than + * {@link Compatible} but doesn't support when there is a parent aggregator + * or any child aggregators. + */ + private static class FilterByFilter extends FiltersAggregator { private final Query[] filters; private Weight[] filterWeights; + private int segmentsWithDeletedDocs; - FilterOrderAggregator( + FilterByFilter( String name, String[] keys, Query[] filters, @@ -281,6 +289,7 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket collectBucket(sub, itr.docID(), filterOrd); } } else { + segmentsWithDeletedDocs++; while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (live.get(itr.docID())) { collectBucket(sub, itr.docID(), filterOrd); @@ -295,15 +304,27 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket public boolean collectsInFilterOrder() { return true; } + + @Override + public void collectDebugInfo(BiConsumer add) { + super.collectDebugInfo(add); + add.accept("segments_with_deleted_docs", segmentsWithDeletedDocs); + } } - private static class StandardOrderAggregator extends FiltersAggregator { + /** + * Collects results by building a {@link Bits} per filter and testing if + * each doc sent to its {@link LeafBucketCollector} is in each filter + * which is generally slower than {@link FilterByFilter} but is compatible + * with parent and child aggregations. + */ + private static class Compatible extends FiltersAggregator { private final Query[] filters; private Weight[] filterWeights; private final int totalNumKeys; - StandardOrderAggregator( + Compatible( String name, AggregatorFactories factories, String[] keys, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 5e34c78f3c2a8..7ddc823be4937 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -112,7 +112,7 @@ public static Aggregator build( ); } - private static DateHistogramAdaptedFromDateRangeAggregator adaptIntoRangeOrNull( + private static FromDateRange adaptIntoRangeOrNull( String name, AggregatorFactories factories, Rounding rounding, @@ -169,7 +169,7 @@ private static DateHistogramAdaptedFromDateRangeAggregator adaptIntoRangeOrNull( cardinality, metadata ); - return new DateHistogramAdaptedFromDateRangeAggregator( + return new DateHistogramAggregator.FromDateRange( delegate, valuesSourceConfig.format(), rounding, @@ -325,7 +325,7 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { } } - private static class DateHistogramAdaptedFromDateRangeAggregator extends AdaptingAggregator { + private static class FromDateRange extends AdaptingAggregator { private final DocValueFormat format; private final Rounding rounding; private final BucketOrder order; @@ -333,7 +333,7 @@ private static class DateHistogramAdaptedFromDateRangeAggregator extends Adaptin private final LongBounds extendedBounds; private final boolean keyed; - DateHistogramAdaptedFromDateRangeAggregator( + FromDateRange( Aggregator delegate, DocValueFormat format, Rounding rounding, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 9fa4e2355cea2..549410c2de405 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -306,7 +306,7 @@ public static Aggregator adaptIntoFiltersOrNull( ranges[i].from == Double.NEGATIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].from), ranges[i].to == Double.POSITIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].to), true, - ranges[i].to == Double.NEGATIVE_INFINITY, + false, ShapeRelation.CONTAINS, null, null, @@ -328,7 +328,7 @@ public static Aggregator adaptIntoFiltersOrNull( if (false == delegate.collectsInFilterOrder()) { return null; } - return new RangeAdaptedFromFiltersAggregator<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); + return new RangeAggregator.FromFilters<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); } public static Aggregator buildWithoutAttemptedToAdaptToFilters( @@ -587,13 +587,13 @@ protected int collect(LeafBucketCollector sub, int doc, double value, long ownin } } - private static class RangeAdaptedFromFiltersAggregator extends AdaptingAggregator { + private static class FromFilters extends AdaptingAggregator { private final DocValueFormat format; private final Range[] ranges; private final boolean keyed; private final InternalRange.Factory rangeFactory; - RangeAdaptedFromFiltersAggregator( + FromFilters( Aggregator delegate, DocValueFormat format, Range[] ranges, diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java index 44d47ef12245b..5f435c7bc3990 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/InternalAggregationProfileTree.java @@ -31,17 +31,7 @@ protected AggregationProfileBreakdown createProfileBreakdown() { @Override protected String getTypeFromElement(Aggregator element) { - - // Anonymous classes (such as NonCollectingAggregator in TermsAgg) won't have a name, - // we need to get the super class - if (element.getClass().getSimpleName().isEmpty()) { - return element.getClass().getSuperclass().getSimpleName(); - } - Class enclosing = element.getClass().getEnclosingClass(); - if (enclosing != null) { - return enclosing.getSimpleName() + "." + element.getClass().getSimpleName(); - } - return element.getClass().getSimpleName(); + return typeFromAggregator(element); } @Override @@ -49,4 +39,16 @@ protected String getDescriptionFromElement(Aggregator element) { return element.name(); } + public static String typeFromAggregator(Aggregator aggregator) { + // Anonymous classes (such as NonCollectingAggregator in TermsAgg) won't have a name, + // we need to get the super class + if (aggregator.getClass().getSimpleName().isEmpty()) { + return aggregator.getClass().getSuperclass().getSimpleName(); + } + Class enclosing = aggregator.getClass().getEnclosingClass(); + if (enclosing != null) { + return enclosing.getSimpleName() + "." + aggregator.getClass().getSimpleName(); + } + return aggregator.getClass().getSimpleName(); + } } From b73df70e183e4940051134dedb6b0fbe7d589f9d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 15:57:33 -0400 Subject: [PATCH 19/48] Rate agg This is weird --- .../aggregations/AdaptingAggregator.java | 18 +++++- .../aggregations/AggregatorFactories.java | 12 ++++ .../histogram/DateHistogramAggregator.java | 56 ++++++++++++----- .../bucket/range/RangeAggregator.java | 26 +++++--- .../analytics/rate/RateAggregatorTests.java | 63 +++++++++++++------ 5 files changed, 129 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index c78c5f9a5cc76..82fb5116fe076 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.profile.aggregation.InternalAggregationProfileTree; @@ -35,10 +36,17 @@ * you'd expect from another aggregation. */ public abstract class AdaptingAggregator extends Aggregator { + private final Aggregator parent; private final Aggregator delegate; - public AdaptingAggregator(Aggregator delegate) { - this.delegate = delegate; + public AdaptingAggregator( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate + ) throws IOException { + this.parent = parent; + this.delegate = delegate.apply(subAggregators.fixParent(this)); + assert this.delegate.parent() == parent : "invalid parent set on delegate"; } /** @@ -69,7 +77,7 @@ public final SearchContext context() { @Override public final Aggregator parent() { - return delegate.parent(); + return parent; } @Override @@ -120,4 +128,8 @@ public void collectDebugInfo(BiConsumer add) { delegate.collectDebugInfo(delegateDebug::put); add.accept("delegate_debug", delegateDebug); } + + public Aggregator delegate() { + return delegate; + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java index 94934e7bc0768..112a183e586ec 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java @@ -227,6 +227,18 @@ public int countAggregators() { return factories.length; } + public AggregatorFactories fixParent(Aggregator fixedParent) { + AggregatorFactories previous = this; + return new AggregatorFactories(factories) { + @Override + public Aggregator[] createSubAggregators(SearchContext searchContext, Aggregator parent, CardinalityUpperBound cardinality) + throws IOException { + // Note that we're throwing out the "parent" passed in to this method and using the parent passed to fixParent + return previous.createSubAggregators(searchContext, fixedParent, cardinality); + } + }; + } + /** * A mutable collection of {@link AggregationBuilder}s and * {@link PipelineAggregationBuilder}s. diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 7ddc823be4937..532120021e077 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -22,8 +22,10 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.Rounding.DateTimeUnit; import org.elasticsearch.common.lease.Releasables; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AdaptingAggregator; @@ -134,16 +136,16 @@ private static FromDateRange adaptIntoRangeOrNull( if (valuesSourceConfig.hasValues() == false) { return null; } - long[] points = preparedRounding.fixedRoundingPoints(); - if (points == null) { + long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints(); + if (fixedRoundingPoints == null) { return null; } // Range aggs use a double to aggregate and we don't want to lose precision. - long max = points[points.length - 1]; + long max = fixedRoundingPoints[fixedRoundingPoints.length - 1]; if ((double) max != max) { return null; } - if ((double) points[0] != points[0]) { + if ((double) fixedRoundingPoints[0] != fixedRoundingPoints[0]) { return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() @@ -152,14 +154,14 @@ private static FromDateRange adaptIntoRangeOrNull( if (rangeSupplier == null) { return null; } - RangeAggregator.Range[] ranges = new RangeAggregator.Range[points.length]; - for (int i = 0; i < points.length - 1; i++) { - ranges[i] = new RangeAggregator.Range(null, (double) points[i], (double) points[i + 1]); + RangeAggregator.Range[] ranges = new RangeAggregator.Range[fixedRoundingPoints.length]; + for (int i = 0; i < fixedRoundingPoints.length - 1; i++) { + ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]); } - ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) points[points.length - 1], null); - Aggregator delegate = rangeSupplier.build( + ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null); + CheckedFunction delegate = subAggregators -> rangeSupplier.build( name, - factories, + subAggregators, valuesSourceConfig, InternalDateRange.FACTORY, ranges, @@ -170,13 +172,17 @@ private static FromDateRange adaptIntoRangeOrNull( metadata ); return new DateHistogramAggregator.FromDateRange( + parent, + factories, delegate, valuesSourceConfig.format(), rounding, + preparedRounding, order, minDocCount, extendedBounds, - keyed + keyed, + fixedRoundingPoints ); } @@ -325,30 +331,38 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { } } - private static class FromDateRange extends AdaptingAggregator { + private static class FromDateRange extends AdaptingAggregator implements SizedBucketAggregator { private final DocValueFormat format; private final Rounding rounding; + private final Rounding.Prepared preparedRounding; private final BucketOrder order; private final long minDocCount; private final LongBounds extendedBounds; private final boolean keyed; + private final long[] fixedRoundingPoints; FromDateRange( - Aggregator delegate, + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate, DocValueFormat format, Rounding rounding, + Rounding.Prepared preparedRounding, BucketOrder order, long minDocCount, LongBounds extendedBounds, - boolean keyed - ) { - super(delegate); + boolean keyed, + long[] fixedRoundingPoints + ) throws IOException { + super(parent, subAggregators, delegate); this.format = format; this.rounding = rounding; + this.preparedRounding = preparedRounding; this.order = order; this.minDocCount = minDocCount; this.extendedBounds = extendedBounds; this.keyed = keyed; + this.fixedRoundingPoints = fixedRoundingPoints; } @Override @@ -395,5 +409,15 @@ public final InternalAggregations buildEmptySubAggregations() { } return InternalAggregations.from(aggs); } + + @Override + public double bucketSize(long bucket, DateTimeUnit unitSize) { + if (unitSize != null) { + long startPoint = bucket < fixedRoundingPoints.length ? fixedRoundingPoints[(int) bucket] : Long.MIN_VALUE; + return preparedRounding.roundingSize(startPoint, unitSize); + } else { + return 1.0; + } + } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 549410c2de405..071302016280a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.io.stream.StreamInput; @@ -313,9 +314,9 @@ public static Aggregator adaptIntoFiltersOrNull( context.getQueryShardContext() ); } - FiltersAggregator delegate = FiltersAggregator.build( + CheckedFunction delegate = subAggregators -> FiltersAggregator.build( name, - factories, + subAggregators, keys, filters, false, @@ -325,10 +326,19 @@ public static Aggregator adaptIntoFiltersOrNull( cardinality, metadata ); - if (false == delegate.collectsInFilterOrder()) { + RangeAggregator.FromFilters fromFilters = new RangeAggregator.FromFilters<>( + parent, + factories, + delegate, + valuesSourceConfig.format(), + ranges, + keyed, + rangeFactory + ); + if (false == ((FiltersAggregator) fromFilters.delegate()).collectsInFilterOrder()) { return null; } - return new RangeAggregator.FromFilters<>(delegate, valuesSourceConfig.format(), ranges, keyed, rangeFactory); + return fromFilters; } public static Aggregator buildWithoutAttemptedToAdaptToFilters( @@ -594,13 +604,15 @@ private static class FromFilters extends Adaptin private final InternalRange.Factory rangeFactory; FromFilters( - Aggregator delegate, + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate, DocValueFormat format, Range[] ranges, boolean keyed, InternalRange.Factory rangeFactory - ) { - super(delegate); + ) throws IOException { + super(parent, subAggregators, delegate); this.format = format; this.ranges = ranges; this.keyed = keyed; diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java index e69d23f066cf5..6be41030ccc14 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/RateAggregatorTests.java @@ -6,23 +6,9 @@ package org.elasticsearch.xpack.analytics.rate; -import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues; -import static org.hamcrest.Matchers.closeTo; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.instanceOf; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Consumer; -import java.util.function.Function; - import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -58,6 +44,22 @@ import org.elasticsearch.xpack.analytics.AnalyticsPlugin; import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Function; + +import static org.elasticsearch.xpack.analytics.AnalyticsTestsUtils.histogramFieldDocValues; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; + public class RateAggregatorTests extends AggregatorTestCase { /** @@ -299,16 +301,36 @@ public void testKeywordSandwich() throws IOException { testCase(dateHistogramAggregationBuilder, new MatchAllDocsQuery(), iw -> { iw.addDocument( - doc("2010-03-11T01:07:45", new NumericDocValuesField("val", 1), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-03-11T01:07:45", + new NumericDocValuesField("val", 1), + new IntPoint("val", 1), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-03-12T01:07:45", new NumericDocValuesField("val", 2), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-03-12T01:07:45", + new NumericDocValuesField("val", 2), + new IntPoint("val", 2), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-04-01T03:43:34", new NumericDocValuesField("val", 3), new SortedSetDocValuesField("term", new BytesRef("a"))) + doc( + "2010-04-01T03:43:34", + new NumericDocValuesField("val", 3), + new IntPoint("val", 3), + new SortedSetDocValuesField("term", new BytesRef("a")) + ) ); iw.addDocument( - doc("2010-04-27T03:43:34", new NumericDocValuesField("val", 4), new SortedSetDocValuesField("term", new BytesRef("b"))) + doc( + "2010-04-27T03:43:34", + new NumericDocValuesField("val", 4), + new IntPoint("val", 4), + new SortedSetDocValuesField("term", new BytesRef("b")) + ) ); }, (Consumer) dh -> { assertThat(dh.getBuckets(), hasSize(2)); @@ -504,6 +526,7 @@ private Iterable doc(String date, IndexableField... fields) { List indexableFields = new ArrayList<>(); long instant = dateFieldType(DATE_FIELD).parse(date); indexableFields.add(new SortedNumericDocValuesField(DATE_FIELD, instant)); + indexableFields.add(new LongPoint(DATE_FIELD, instant)); indexableFields.addAll(Arrays.asList(fields)); return indexableFields; } From b7e8dccef147a4916c6f514ee7928218ebde15d3 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 16:03:57 -0400 Subject: [PATCH 20/48] WIP --- .../search/aggregations/AdaptingAggregator.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index 82fb5116fe076..ebed8f7aec808 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -39,12 +39,25 @@ public abstract class AdaptingAggregator extends Aggregator { private final Aggregator parent; private final Aggregator delegate; + /** + * Build the + * @param parent + * @param subAggregators + * @param delegate + * @throws IOException + */ public AdaptingAggregator( Aggregator parent, AggregatorFactories subAggregators, CheckedFunction delegate ) throws IOException { + // Its important we set parent first or else when we build the sub-aggregators they can fail because they'll call this.parent. this.parent = parent; + /* + * Lock the parent of the sub-aggregators to *this* instead of to + * the delegate. This keeps the parent link shaped like the requested + * agg tree. Thisis how it has always been and some aggs rely on it. + */ this.delegate = delegate.apply(subAggregators.fixParent(this)); assert this.delegate.parent() == parent : "invalid parent set on delegate"; } From 7c18141d23dc3e7f4dcd5883068c88424147af2f Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 16:36:19 -0400 Subject: [PATCH 21/48] Fixup weird formats --- .../aggregations/bucket/range/RangeAggregator.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 071302016280a..093cb306fc20e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -302,10 +302,17 @@ public static Aggregator adaptIntoFiltersOrNull( Query[] filters = new Query[ranges.length]; for (int i = 0; i < ranges.length; i++) { keys[i] = Integer.toString(i); + /* + * Use the native format on the field rather than the one provided + * on the valuesSourceConfig because the format on the field is what + * we parse. With https://github.com/elastic/elasticsearch/pull/63692 + * we can just cast to a long here and it'll be taken as millis. + */ + DocValueFormat format = valuesSourceConfig.fieldType().docValueFormat(null, null); filters[i] = valuesSourceConfig.fieldType() .rangeQuery( - ranges[i].from == Double.NEGATIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].from), - ranges[i].to == Double.POSITIVE_INFINITY ? null : valuesSourceConfig.format().format(ranges[i].to), + ranges[i].from == Double.NEGATIVE_INFINITY ? null : format.format(ranges[i].from), + ranges[i].to == Double.POSITIVE_INFINITY ? null : format.format(ranges[i].to), true, false, ShapeRelation.CONTAINS, From 2d04e358e33aeaf528221b900d41854118b37125 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 16:43:06 -0400 Subject: [PATCH 22/48] Feh --- .../search/aggregations/AdaptingAggregator.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index ebed8f7aec808..0ec68812e5b51 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -39,13 +39,6 @@ public abstract class AdaptingAggregator extends Aggregator { private final Aggregator parent; private final Aggregator delegate; - /** - * Build the - * @param parent - * @param subAggregators - * @param delegate - * @throws IOException - */ public AdaptingAggregator( Aggregator parent, AggregatorFactories subAggregators, From 102e5263357ef58f73b24333f46dddc5dfb6e52d Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 16:45:37 -0400 Subject: [PATCH 23/48] Shift --- .../histogram/DateHistogramAggregator.java | 25 +++++++++---------- .../bucket/range/RangeAggregator.java | 25 +++++++++---------- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 532120021e077..77ea4544ab712 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -159,22 +159,21 @@ private static FromDateRange adaptIntoRangeOrNull( ranges[i] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[i], (double) fixedRoundingPoints[i + 1]); } ranges[ranges.length - 1] = new RangeAggregator.Range(null, (double) fixedRoundingPoints[fixedRoundingPoints.length - 1], null); - CheckedFunction delegate = subAggregators -> rangeSupplier.build( - name, - subAggregators, - valuesSourceConfig, - InternalDateRange.FACTORY, - ranges, - false, - context, - parent, - cardinality, - metadata - ); return new DateHistogramAggregator.FromDateRange( parent, factories, - delegate, + subAggregators -> rangeSupplier.build( + name, + subAggregators, + valuesSourceConfig, + InternalDateRange.FACTORY, + ranges, + false, + context, + parent, + cardinality, + metadata + ), valuesSourceConfig.format(), rounding, preparedRounding, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 093cb306fc20e..0b52fb82fe07f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -321,22 +321,21 @@ public static Aggregator adaptIntoFiltersOrNull( context.getQueryShardContext() ); } - CheckedFunction delegate = subAggregators -> FiltersAggregator.build( - name, - subAggregators, - keys, - filters, - false, - null, - context, - parent, - cardinality, - metadata - ); RangeAggregator.FromFilters fromFilters = new RangeAggregator.FromFilters<>( parent, factories, - delegate, + subAggregators -> FiltersAggregator.build( + name, + subAggregators, + keys, + filters, + false, + null, + context, + parent, + cardinality, + metadata + ), valuesSourceConfig.format(), ranges, keyed, From e333cbb179ef4d3392e2439aa26ae8702a6c6287 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 14 Oct 2020 17:12:45 -0400 Subject: [PATCH 24/48] Forbidden --- .../bucket/filter/FiltersAggregator.java | 7 +++++++ .../bucket/histogram/DateHistogramAggregator.java | 15 +++++++++++++++ .../bucket/range/RangeAggregator.java | 9 +++++++++ .../bucket/range/RangeAggregatorTests.java | 3 --- 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index c0720a1bc3eef..c122623125a5b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -128,6 +128,13 @@ public boolean equals(Object obj) { } } + /** + * Build an {@link Aggregator} for a {@code filters} aggregation. If there + * isn't a parent, there aren't children, and we don't collect "other" + * buckets then this will a faster {@link FilterByFilter} aggregator. + * Otherwise it'll fall back to a slower aggregator that is + * {@link Compatible} with parent, children, and "other" buckets. + */ public static FiltersAggregator build( String name, AggregatorFactories factories, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 77ea4544ab712..074111a1118f8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -38,6 +38,7 @@ import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator; import org.elasticsearch.search.aggregations.bucket.range.InternalDateRange; import org.elasticsearch.search.aggregations.bucket.range.RangeAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.range.RangeAggregator; @@ -61,6 +62,20 @@ * @see Rounding */ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator { + /** + * Build an {@link Aggregator} for a {@code date_histogram} aggregation. + * If we can determine the bucket boundaries from + * {@link Rounding.Prepared#fixedRoundingPoints()} we use + * {@link RangeAggregator} to do the actual collecting, otherwise we use + * an specialized {@link DateHistogramAggregator Aggregator} specifically + * for the {@code date_histogram}s. We prefer to delegate to the + * {@linkplain RangeAggregator} because it can sometimes be further + * optimized into a {@link FiltersAggregator}. Even when it can't be + * optimized, it is going to be marginally faster and consume less memory + * than the {@linkplain DateHistogramAggregator} because it doesn't need + * to the rounding points and because it can pass precise cardinality + * estimates to its child aggregations. + */ public static Aggregator build( String name, AggregatorFactories factories, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 0b52fb82fe07f..649d33ec79c24 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -226,6 +226,15 @@ public boolean equals(Object obj) { } } + /** + * Build an {@link Aggregator} for a {@code range} aggregation. If the + * {@code ranges} can be converted into filters then it builds a + * {@link FiltersAggregator} and uses that to collect the results + * if that aggregator can run in "filter by filter" + * collection mode. If it can't then we'll collect the ranges using + * a native {@link RangeAggregator} which is significantly faster + * than the "compatible" collection mechanism for the filters agg. + */ public static Aggregator build( String name, AggregatorFactories factories, diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 1719f464ea98f..68de37ec05f9b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -19,8 +19,6 @@ package org.elasticsearch.search.aggregations.bucket.range; -import com.carrotsearch.randomizedtesting.annotations.Seed; - import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; @@ -55,7 +53,6 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; -@Seed("32AC488589A4F6FE") public class RangeAggregatorTests extends AggregatorTestCase { private static final String NUMBER_FIELD_NAME = "number"; From e2fd164096dc653417c16587a43805c5a02e2a90 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 15 Oct 2020 08:59:54 -0400 Subject: [PATCH 25/48] Moar tests --- .../aggregations/bucket/DateHistogramIT.java | 2 +- .../histogram/DateHistogramAggregator.java | 1 + .../bucket/range/RangeAggregator.java | 16 +++++++++------- .../xpack/unsignedlong/UnsignedLongTests.java | 17 +++++++++++++++++ 4 files changed, 28 insertions(+), 8 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java index 0b6d12d59726a..dd85431a087ee 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/DateHistogramIT.java @@ -38,10 +38,10 @@ import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; -import org.elasticsearch.search.aggregations.bucket.histogram.LongBounds; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket; import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram; +import org.elasticsearch.search.aggregations.bucket.histogram.LongBounds; import org.elasticsearch.search.aggregations.metrics.Avg; import org.elasticsearch.search.aggregations.metrics.Sum; import org.elasticsearch.test.ESIntegTestCase; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 074111a1118f8..22345b5c9af94 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -373,6 +373,7 @@ private static class FromDateRange extends AdaptingAggregator implements SizedBu this.rounding = rounding; this.preparedRounding = preparedRounding; this.order = order; + order.validate(this); this.minDocCount = minDocCount; this.extendedBounds = extendedBounds; this.keyed = keyed; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 649d33ec79c24..0a82ff2c9df94 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -370,7 +370,7 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( Map metadata ) throws IOException { if (hasOverlap(ranges)) { - return new OverlapRangeAggregator( + return new RangeAggregator.Overlap( name, factories, valuesSource, @@ -384,7 +384,7 @@ public static Aggregator buildWithoutAttemptedToAdaptToFilters( metadata ); } - return new NoOverlapAggregator( + return new RangeAggregator.NoOverlap( name, factories, valuesSource, @@ -502,8 +502,8 @@ public InternalAggregation buildEmptyAggregation() { protected abstract int collect(LeafBucketCollector sub, int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException; - private static class NoOverlapAggregator extends RangeAggregator { - NoOverlapAggregator( + private static class NoOverlap extends RangeAggregator { + NoOverlap( String name, AggregatorFactories factories, Numeric valuesSource, @@ -530,15 +530,16 @@ protected int collect(LeafBucketCollector sub, int doc, double value, long ownin lo = mid + 1; } else { collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, mid)); - return mid; + // The next value must fall in the next bucket to be collected. + return mid + 1; } } return lo; } } - private static class OverlapRangeAggregator extends RangeAggregator { - OverlapRangeAggregator( + private static class Overlap extends RangeAggregator { + Overlap( String name, AggregatorFactories factories, Numeric valuesSource, @@ -608,6 +609,7 @@ protected int collect(LeafBucketCollector sub, int doc, double value, long ownin } } + // The next value must fall in the next bucket to be collected. return endHi + 1; } } diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java index 97375e29a5ee7..23ef966641bbe 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; @@ -37,6 +38,7 @@ import static org.elasticsearch.search.aggregations.AggregationBuilders.sum; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; @ESIntegTestCase.SuiteScopeTestCase @@ -279,6 +281,21 @@ public void testAggs() { } } + public void testRangeQuery() { + SearchResponse response = client().prepareSearch("idx") + .setSize(0) + .setQuery(new RangeQueryBuilder("ul_field").to("9.223372036854776E18")) + .get(); + assertThat(response.getHits().getTotalHits().value, equalTo(3L)); + response = client().prepareSearch("idx") + .setSize(0) + .setQuery(new RangeQueryBuilder("ul_field").from("9.223372036854776E18").to("1.8446744073709552E19")) + .get(); + assertThat(response.getHits().getTotalHits().value, equalTo(3L)); + response = client().prepareSearch("idx").setSize(0).setQuery(new RangeQueryBuilder("ul_field").from("1.8446744073709552E19")).get(); + assertThat(response.getHits().getTotalHits().value, equalTo(3L)); + } + public void testSortDifferentFormatsShouldFail() { Exception exception = expectThrows( SearchPhaseExecutionException.class, From 46a10159401aac957f10196e619f3f08b64958d9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 15 Oct 2020 09:53:22 -0400 Subject: [PATCH 26/48] test --- .../aggregations/AdaptingAggregatorTests.java | 113 ++++++++++++++++++ .../index/mapper/MapperServiceTestCase.java | 16 ++- 2 files changed, 124 insertions(+), 5 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java new file mode 100644 index 0000000000000..5c5a3b8e4e95d --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java @@ -0,0 +1,113 @@ +package org.elasticsearch.search.aggregations; + +import org.apache.lucene.index.LeafReaderContext; +import org.elasticsearch.common.CheckedFunction; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperServiceTestCase; +import org.elasticsearch.search.aggregations.bucket.histogram.SizedBucketAggregator; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.support.AggregationContext; +import org.elasticsearch.search.aggregations.support.ValuesSourceRegistry; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class AdaptingAggregatorTests extends MapperServiceTestCase { + /** + * Its important that sub-aggregations of the {@linkplain AdaptingAggregator} + * receive a reference to the {@linkplain AdaptingAggregator} as the parent. + * Without it we can't do things like implement {@link SizedBucketAggregator}. + */ + public void testParent() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> {})); + ValuesSourceRegistry.Builder registry = new ValuesSourceRegistry.Builder(); + MaxAggregationBuilder.registerAggregators(registry); + withAggregationContext(registry.build(), mapperService, List.of(), null, context -> { + SearchContext searchContext = mock(SearchContext.class); + when(searchContext.bigArrays()).thenReturn(context.bigArrays()); + AggregatorFactories.Builder sub = AggregatorFactories.builder(); + sub.addAggregator(new MaxAggregationBuilder("test").field("foo")); + AggregatorFactory factory = new DummyAdaptingAggregatorFactory("test", context, null, sub, null); + Aggregator adapting = factory.create(searchContext, null, CardinalityUpperBound.ONE); + assertThat(adapting.subAggregators()[0].parent(), sameInstance(adapting)); + }); + } + + private static class DummyAdaptingAggregatorFactory extends AggregatorFactory { + public DummyAdaptingAggregatorFactory( + String name, + AggregationContext context, + AggregatorFactory parent, + AggregatorFactories.Builder subFactoriesBuilder, + Map metadata + ) throws IOException { + super(name, context, parent, subFactoriesBuilder, metadata); + } + + @Override + protected Aggregator createInternal( + SearchContext context, + Aggregator parent, + CardinalityUpperBound cardinality, + Map metadata + ) throws IOException { + return new DummyAdaptingAggregator( + parent, + factories, + subAggs -> new DummyAggregator(name, subAggs, context, parent, CardinalityUpperBound.ONE, metadata) + ); + } + } + + private static class DummyAdaptingAggregator extends AdaptingAggregator { + public DummyAdaptingAggregator( + Aggregator parent, + AggregatorFactories subAggregators, + CheckedFunction delegate + ) throws IOException { + super(parent, subAggregators, delegate); + } + + @Override + protected InternalAggregation adapt(InternalAggregation delegateResult) { + return null; + } + } + + private static class DummyAggregator extends AggregatorBase { + protected DummyAggregator( + String name, + AggregatorFactories factories, + SearchContext context, + Aggregator parent, + CardinalityUpperBound subAggregatorCardinality, + Map metadata + ) throws IOException { + super(name, factories, context, parent, subAggregatorCardinality, metadata); + } + + @Override + protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + // TODO Auto-generated method stub + return null; + } + + @Override + public InternalAggregation buildEmptyAggregation() { + // TODO Auto-generated method stub + return null; + } + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 5a00d23fa082d..f929699cc8b6d 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -251,7 +251,12 @@ protected final XContentBuilder fieldMapping(CheckedConsumer docs, CheckedConsumer test ) throws IOException { - withAggregationContext(mapperService, docs, null, test); + withAggregationContext(null, mapperService, docs, null, test); } - + protected final void withAggregationContext( + ValuesSourceRegistry valuesSourceRegistry, MapperService mapperService, List docs, Query query, @@ -351,7 +357,7 @@ protected final void withAggregationContext( writer.addDocuments(mapperService.documentMapper().parse(doc).docs()); } }, - reader -> test.accept(aggregationContext(mapperService, new IndexSearcher(reader), query)) + reader -> test.accept(aggregationContext(valuesSourceRegistry, mapperService, new IndexSearcher(reader), query)) ); } From deb56842f63ef0827f354f52582c7e830ca5b4ea Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 15 Oct 2020 10:14:28 -0400 Subject: [PATCH 27/48] Fixup tests --- .../aggregations/AdaptingAggregatorTests.java | 20 ++++++++++++++++--- .../support/CoreValuesSourceTypeTests.java | 4 ++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java index 5c5a3b8e4e95d..1f03bcdbb51d8 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java @@ -14,6 +14,7 @@ import java.util.List; import java.util.Map; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.sameInstance; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -39,6 +40,18 @@ public void testParent() throws IOException { }); } + public void testBuildCallsAdapt() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> {})); + withAggregationContext(mapperService, List.of(), context -> { + SearchContext searchContext = mock(SearchContext.class); + when(searchContext.bigArrays()).thenReturn(context.bigArrays()); + AggregatorFactory factory = new DummyAdaptingAggregatorFactory("test", context, null, AggregatorFactories.builder(), null); + Aggregator adapting = factory.create(searchContext, null, CardinalityUpperBound.ONE); + assertThat(adapting.buildEmptyAggregation().getMetadata(), equalTo(Map.of("dog", "woof"))); + assertThat(adapting.buildTopLevel().getMetadata(), equalTo(Map.of("dog", "woof"))); + }); + } + private static class DummyAdaptingAggregatorFactory extends AggregatorFactory { public DummyAdaptingAggregatorFactory( String name, @@ -76,7 +89,9 @@ public DummyAdaptingAggregator( @Override protected InternalAggregation adapt(InternalAggregation delegateResult) { - return null; + InternalAggregation result = mock(InternalAggregation.class); + when(result.getMetadata()).thenReturn(Map.of("dog", "woof")); + return result; } } @@ -100,8 +115,7 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - // TODO Auto-generated method stub - return null; + return new InternalAggregation[] {null}; } @Override diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java index 37b87463f0ee4..4ac4f77f6207c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/CoreValuesSourceTypeTests.java @@ -73,7 +73,7 @@ public void testDatePrepareRoundingWithQuery() throws IOException { MapperService mapperService = dateMapperService(); Query query = mapperService.fieldType("field") .rangeQuery(min, max, true, true, ShapeRelation.CONTAINS, null, null, createQueryShardContext(mapperService)); - withAggregationContext(mapperService, List.of(), query, context -> { + withAggregationContext(null, mapperService, List.of(), query, context -> { Rounding rounding = mock(Rounding.class); CoreValuesSourceType.DATE.getField(context.buildFieldContext("field"), null, context).roundingPreparer().apply(rounding); verify(rounding).prepare(min, max); @@ -102,7 +102,7 @@ public void testDatePrepareRoundingWithDocAndQuery() throws IOException { MapperService mapperService = dateMapperService(); Query query = mapperService.fieldType("field") .rangeQuery(minQuery, maxQuery, true, true, ShapeRelation.CONTAINS, null, null, createQueryShardContext(mapperService)); - withAggregationContext(mapperService, docsWithDatesBetween(minDocs, maxDocs), query, context -> { + withAggregationContext(null, mapperService, docsWithDatesBetween(minDocs, maxDocs), query, context -> { Rounding rounding = mock(Rounding.class); CoreValuesSourceType.DATE.getField(context.buildFieldContext("field"), null, context).roundingPreparer().apply(rounding); verify(rounding).prepare(min, max); From de96179d12412bf51045607ab1b27e87e6a43d4e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 15 Oct 2020 11:37:37 -0400 Subject: [PATCH 28/48] precommit --- .../aggregations/AdaptingAggregatorTests.java | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java index 1f03bcdbb51d8..fe9bf2c6fe141 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.search.aggregations; import org.apache.lucene.index.LeafReaderContext; @@ -53,7 +72,7 @@ public void testBuildCallsAdapt() throws IOException { } private static class DummyAdaptingAggregatorFactory extends AggregatorFactory { - public DummyAdaptingAggregatorFactory( + DummyAdaptingAggregatorFactory( String name, AggregationContext context, AggregatorFactory parent, @@ -79,7 +98,7 @@ protected Aggregator createInternal( } private static class DummyAdaptingAggregator extends AdaptingAggregator { - public DummyAdaptingAggregator( + DummyAdaptingAggregator( Aggregator parent, AggregatorFactories subAggregators, CheckedFunction delegate From f13a8ae89d99e7a8bc448ca734b2ed7c03fde224 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 16 Oct 2020 12:25:11 -0400 Subject: [PATCH 29/48] Drop old extra test --- .../xpack/unsignedlong/UnsignedLongTests.java | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java index 31ff3200fbd01..943622dc5cd83 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongTests.java @@ -276,21 +276,6 @@ public void testAggs() { } } - public void testRangeQuery() { - SearchResponse response = client().prepareSearch("idx") - .setSize(0) - .setQuery(new RangeQueryBuilder("ul_field").to("9.223372036854776E18")) - .get(); - assertThat(response.getHits().getTotalHits().value, equalTo(3L)); - response = client().prepareSearch("idx") - .setSize(0) - .setQuery(new RangeQueryBuilder("ul_field").from("9.223372036854776E18").to("1.8446744073709552E19")) - .get(); - assertThat(response.getHits().getTotalHits().value, equalTo(3L)); - response = client().prepareSearch("idx").setSize(0).setQuery(new RangeQueryBuilder("ul_field").from("1.8446744073709552E19")).get(); - assertThat(response.getHits().getTotalHits().value, equalTo(3L)); - } - public void testSortDifferentFormatsShouldFail() { Exception exception = expectThrows( SearchPhaseExecutionException.class, From 6b3324713d5ba6934c3cd72cab56dd3f1eb5d438 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 19 Oct 2020 09:21:41 -0400 Subject: [PATCH 30/48] TODO --- .../search/aggregations/bucket/range/RangeAggregator.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 0a82ff2c9df94..d634fb172b5fc 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -318,6 +318,7 @@ public static Aggregator adaptIntoFiltersOrNull( * we can just cast to a long here and it'll be taken as millis. */ DocValueFormat format = valuesSourceConfig.fieldType().docValueFormat(null, null); + // TODO correct the loss of precision from the range somehow.....? filters[i] = valuesSourceConfig.fieldType() .rangeQuery( ranges[i].from == Double.NEGATIVE_INFINITY ? null : format.format(ranges[i].from), From 2fd7e53762cbfc9ded7fae5b79796fe28941d442 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 20 Oct 2020 10:42:09 -0400 Subject: [PATCH 31/48] Don't attempt the optimization if rounding would break it --- .../bucket/range/RangeAggregator.java | 14 ++++++++ .../bucket/range/RangeAggregatorTests.java | 34 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index d634fb172b5fc..d62e46d6a0459 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -307,9 +307,23 @@ public static Aggregator adaptIntoFiltersOrNull( // We don't generate sensible Queries for nanoseconds. return null; } + boolean wholeNumbersOnly = false == ((ValuesSource.Numeric) valuesSourceConfig.getValuesSource()).isFloatingPoint(); String[] keys = new String[ranges.length]; Query[] filters = new Query[ranges.length]; for (int i = 0; i < ranges.length; i++) { + /* + * If the bounds on the ranges are too high then the `double`s + * that we work with will round differently in the native range + * aggregator than in the filters aggregator. So we can't use + * the filters. That is, if the input data type is a `long` in + * the first place. If it isn't then + */ + if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > 1 << 53) { + return null; + } + if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > 1 << 53) { + return null; + } keys[i] = Integer.toString(i); /* * Use the native format on the field rather than the one provided diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 68de37ec05f9b..7e00046ad1b61 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.CardinalityUpperBound; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; @@ -227,6 +228,39 @@ public void testMissingDateWithDateField() throws IOException { }, fieldType); } + public void testNotFitIntoDouble() throws IOException { + MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType( + NUMBER_FIELD_NAME, + NumberType.LONG, + true, + false, + true, + false, + null, + null + ); + + long start = 2L << 54; // Double stores 53 bits of mantissa, so we aggregate a bunch of bigger values + + RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("range") + .field(NUMBER_FIELD_NAME) + .addRange(start, start + 50) + .addRange(start + 50, start + 100) + .addUnboundedFrom(start + 100); + + testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> { + for (long l = start; l < start + 150; l++) { + iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, l), new LongPoint(NUMBER_FIELD_NAME, l))); + } + }, range -> { + List ranges = range.getBuckets(); + assertThat(ranges, hasSize(3)); + // If we had a native `double` range aggregator we'd get 50, 50, 50 + assertThat(ranges.stream().mapToLong(InternalRange.Bucket::getDocCount).toArray(), equalTo(new long[] {44, 48, 58})); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, fieldType); + } + public void testMissingDateWithNumberField() throws IOException { RangeAggregationBuilder aggregationBuilder = new RangeAggregationBuilder("range") .field(NUMBER_FIELD_NAME) From 97c35cdb46ee01c6fa4db505107d47b305f88791 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 20 Oct 2020 12:00:20 -0400 Subject: [PATCH 32/48] move building This way we don't have to worry about BigArrays and releasing and stuff. --- .../bucket/filter/FiltersAggregator.java | 23 +++++------- .../bucket/range/RangeAggregator.java | 36 +++++++++++-------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index c122623125a5b..5e748fde888e0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -147,7 +147,7 @@ public static FiltersAggregator build( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - FiltersAggregator filterOrder = filterOrderOrNull( + FiltersAggregator filterOrder = buildFilterOrderOrNull( name, factories, keys, @@ -176,7 +176,14 @@ public static FiltersAggregator build( ); } - private static FiltersAggregator filterOrderOrNull( + /** + * Build an {@link Aggregator} for a {@code filters} aggregation if we + * can collect {@link FilterByFilter}, otherwise return {@code null}. We can + * collect filter by filter if there isn't a parent, there aren't children, + * and we don't collect "other" buckets. Collecting {@link FilterByFilter} + * is generally going to be much faster than the {@link Compatible} aggregator. + */ + public static FiltersAggregator buildFilterOrderOrNull( String name, AggregatorFactories factories, String[] keys, @@ -251,8 +258,6 @@ public InternalAggregation buildEmptyAggregation() { return new InternalFilters(name, buckets, keyed, metadata()); } - public abstract boolean collectsInFilterOrder(); - /** * Collects results by running each filter against the searcher and doesn't * build any {@link LeafBucketCollector}s which is generally faster than @@ -307,11 +312,6 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket throw new CollectionTerminatedException(); } - @Override - public boolean collectsInFilterOrder() { - return true; - } - @Override public void collectDebugInfo(BiConsumer add) { super.collectDebugInfo(add); @@ -381,11 +381,6 @@ public void collect(int doc, long bucket) throws IOException { final long bucketOrd(long owningBucketOrdinal, int filterOrd) { return owningBucketOrdinal * totalNumKeys + filterOrd; } - - @Override - public boolean collectsInFilterOrder() { - return false; - } } protected Weight[] buildWeights(Query topLevelQuery, Query filters[]) throws IOException{ diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index d62e46d6a0459..1af07f0a53b76 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -345,29 +345,35 @@ public static Aggregator adaptIntoFiltersOrNull( context.getQueryShardContext() ); } + FiltersAggregator delegate = FiltersAggregator.buildFilterOrderOrNull( + name, + factories, + keys, + filters, + false, + null, + context, + parent, + cardinality, + metadata + ); + if (delegate == null) { + return null; + } RangeAggregator.FromFilters fromFilters = new RangeAggregator.FromFilters<>( parent, factories, - subAggregators -> FiltersAggregator.build( - name, - subAggregators, - keys, - filters, - false, - null, - context, - parent, - cardinality, - metadata - ), + subAggregators -> { + if (subAggregators.countAggregators() > 0) { + throw new IllegalStateException("didn't expect to have a delegate if there are child aggs"); + } + return delegate; + }, valuesSourceConfig.format(), ranges, keyed, rangeFactory ); - if (false == ((FiltersAggregator) fromFilters.delegate()).collectsInFilterOrder()) { - return null; - } return fromFilters; } From 6c7eaa1ef54adcf49f091459f22885b4e1ed88ec Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 20 Oct 2020 12:06:25 -0400 Subject: [PATCH 33/48] Computers are hard --- .../search/aggregations/bucket/range/RangeAggregator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 1af07f0a53b76..485b02c01b61e 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -318,10 +318,10 @@ public static Aggregator adaptIntoFiltersOrNull( * the filters. That is, if the input data type is a `long` in * the first place. If it isn't then */ - if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > 1 << 53) { + if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > 1L << 53) { return null; } - if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > 1 << 53) { + if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > 1L << 53) { return null; } keys[i] = Integer.toString(i); From 117eb77edc372d3d707dca96dbcdf161e3c33faf Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 20 Oct 2020 13:56:19 -0400 Subject: [PATCH 34/48] Moar javadoc --- .../bucket/filter/FiltersAggregator.java | 9 ++++++++- .../histogram/DateHistogramAggregator.java | 11 +++++++---- .../bucket/range/RangeAggregator.java | 9 +++++++++ .../aggregations/support/ValuesSourceConfig.java | 16 +++++++++++----- .../support/ValuesSourceConfigTests.java | 16 ++++++++++++++++ 5 files changed, 51 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 5e748fde888e0..cadcf56698880 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -60,6 +60,13 @@ import static java.util.Arrays.compareUnsigned; +/** + * Aggregator for {@code filters}. There are two known subclasses, + * {@link FilterByFilter} which is fast but only works in some cases and + * {@link Compatible} which works in all cases. + * {@link FiltersAggregator#build} will build the fastest version that + * works with the configuration. + */ public abstract class FiltersAggregator extends BucketsAggregator { public static final ParseField FILTERS_FIELD = new ParseField("filters"); @@ -220,7 +227,7 @@ public static FiltersAggregator buildFilterOrderOrNull( private final boolean keyed; protected final String otherBucketKey; - public FiltersAggregator(String name, AggregatorFactories factories, String[] keys, boolean keyed, + private FiltersAggregator(String name, AggregatorFactories factories, String[] keys, boolean keyed, String otherBucketKey, SearchContext context, Aggregator parent, CardinalityUpperBound cardinality, Map metadata) throws IOException { super(name, factories, context, parent, cardinality.multiply(keys.length + (otherBucketKey == null ? 0 : 1)), metadata); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 22345b5c9af94..5a8281825d69c 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -56,10 +56,13 @@ import java.util.function.BiConsumer; /** - * An aggregator for date values. Every date is rounded down using a configured - * {@link Rounding}. - * - * @see Rounding + * Aggregator for {@code date_histogram} that rounds values using + * {@link Rounding}. See {@link FromDateRange} which also aggregates for + * {@code date_histogram} but does so by running a {@code range} aggregation + * over the date and transforming the results. In general + * {@link FromDateRange} is faster than {@link DateHistogramAggregator} + * but {@linkplain DateHistogramAggregator} works when we can't precalculate + * all of the {@link Rounding.Prepared#fixedRoundingPoints() fixed rounding points}. */ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator { /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 485b02c01b61e..686d11d7ed45d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -63,6 +63,15 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; +/** + * Aggregator for {@code range}. There are two known subclasses, + * {@link NoOverlap} which is fast but only compatible with ranges that + * don't have overlaps and {@link Overlap} which handles overlapping + * ranges. There is also {@link FromFilters} which isn't a subclass + * but is also a functional aggregator for {@code range}. + * {@link RangeAggregator#build} will build the fastest of the three + * that is compatible with the requested configuration. + */ public abstract class RangeAggregator extends BucketsAggregator { public static final ParseField RANGES_FIELD = new ParseField("ranges"); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java index 5afcd394d645a..bb4386360fc20 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java @@ -385,11 +385,17 @@ public boolean hasGlobalOrdinals() { */ @Nullable public Function getPointReaderOrNull() { - MappedFieldType fieldType = fieldType(); - if (fieldType != null && script() == null && missing() == null) { - return fieldType.pointReaderIfPossible(); - } - return null; + return alignesWithSearchIndex() ? fieldType().pointReaderIfPossible() : null; + } + + /** + * Do {@link ValuesSource}s built by this config line up with the search + * index of the underlying field? This'll only return true if the fields + * is searchable and there aren't missing values or a script to confuse + * the ordering. + */ + public boolean alignesWithSearchIndex() { + return script() == null && missing() == null && fieldType() != null && fieldType().isSearchable(); } /** diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java index 3ac362764ecec..d1caaacafe077 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfigTests.java @@ -55,12 +55,15 @@ public void testEmptyKeyword() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedBinaryDocValues values = valuesSource.bytesValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); + config = ValuesSourceConfig.resolve(context, null, "field", null, "abc", null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Bytes) config.getValuesSource(); values = valuesSource.bytesValues(ctx); assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("abc"), values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -72,6 +75,7 @@ public void testUnmappedKeyword() throws Exception { ValuesSource.Bytes valuesSource = (ValuesSource.Bytes) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.STRING, "field", null, "abc", null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Bytes) config.getValuesSource(); @@ -80,6 +84,7 @@ public void testUnmappedKeyword() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("abc"), values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -94,6 +99,7 @@ public void testLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } @@ -106,6 +112,7 @@ public void testEmptyLong() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedNumericDocValues values = valuesSource.longValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, null, "field", null, 42, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -113,6 +120,7 @@ public void testEmptyLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -124,6 +132,7 @@ public void testUnmappedLong() throws Exception { ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.NUMBER, "field", null, 42, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -132,6 +141,7 @@ public void testUnmappedLong() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(42, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -146,6 +156,7 @@ public void testBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } @@ -158,6 +169,7 @@ public void testEmptyBoolean() throws Exception { LeafReaderContext ctx = context.searcher().getIndexReader().leaves().get(0); SortedNumericDocValues values = valuesSource.longValues(ctx); assertFalse(values.advanceExact(0)); + assertTrue(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, null, "field", null, true, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -165,6 +177,7 @@ public void testEmptyBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -176,6 +189,7 @@ public void testUnmappedBoolean() throws Exception { ValuesSource.Numeric valuesSource = (ValuesSource.Numeric) config.getValuesSource(); assertNotNull(valuesSource); assertFalse(config.hasValues()); + assertFalse(config.alignesWithSearchIndex()); config = ValuesSourceConfig.resolve(context, ValueType.BOOLEAN, "field", null, true, null, null, CoreValuesSourceType.BYTES); valuesSource = (ValuesSource.Numeric) config.getValuesSource(); @@ -184,6 +198,7 @@ public void testUnmappedBoolean() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(1, values.nextValue()); + assertFalse(config.alignesWithSearchIndex()); }); } @@ -214,6 +229,7 @@ public void testFieldAlias() throws Exception { assertTrue(values.advanceExact(0)); assertEquals(1, values.docValueCount()); assertEquals(new BytesRef("value"), values.nextValue()); + assertTrue(config.alignesWithSearchIndex()); }); } } From ce640e3543934de558ad5f3e21b9a26fcfa53f4a Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 12:38:35 -0400 Subject: [PATCH 35/48] Test cases we can't do it --- .../histogram/DateHistogramAggregator.java | 10 +- .../bucket/range/RangeAggregator.java | 11 +-- .../DateHistogramAggregatorTestCase.java | 21 ++++- .../DateHistogramAggregatorTests.java | 93 +++++++++++++++++++ 4 files changed, 115 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 5a8281825d69c..1f87979a64140 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -151,19 +151,15 @@ private static FromDateRange adaptIntoRangeOrNull( if (hardBounds != null) { return null; } - if (valuesSourceConfig.hasValues() == false) { - return null; - } long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints(); if (fixedRoundingPoints == null) { return null; } // Range aggs use a double to aggregate and we don't want to lose precision. - long max = fixedRoundingPoints[fixedRoundingPoints.length - 1]; - if ((double) max != max) { + if (fixedRoundingPoints[0] > 1L << 53) { return null; } - if ((double) fixedRoundingPoints[0] != fixedRoundingPoints[0]) { + if (fixedRoundingPoints[fixedRoundingPoints.length - 1] > 1L << 53) { return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() @@ -348,7 +344,7 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { } } - private static class FromDateRange extends AdaptingAggregator implements SizedBucketAggregator { + static class FromDateRange extends AdaptingAggregator implements SizedBucketAggregator { private final DocValueFormat format; private final Rounding rounding; private final Rounding.Prepared preparedRounding; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 686d11d7ed45d..06b2372f94eb3 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -298,16 +298,7 @@ public static Aggregator adaptIntoFiltersOrNull( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (valuesSourceConfig.fieldType() == null) { - return null; - } - if (false == valuesSourceConfig.fieldType().isSearchable()) { - return null; - } - if (valuesSourceConfig.missing() != null) { - return null; - } - if (valuesSourceConfig.script() != null) { + if (false == valuesSourceConfig.alignesWithSearchIndex()) { return null; } // TODO bail here for runtime fields. They'll be slower this way. Maybe we can somehow look at the Query? diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java index e0d644c9fdf18..6f01e4d85c5c0 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTestCase.java @@ -25,6 +25,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -97,9 +98,23 @@ protected final void asSubAggTestCase( } protected final DateFieldMapper.DateFieldType aggregableDateFieldType(boolean useNanosecondResolution, boolean isSearchable) { - return new DateFieldMapper.DateFieldType(AGGREGABLE_DATE, isSearchable, false, true, - DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER, + return aggregableDateFieldType(useNanosecondResolution, isSearchable, DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER); + } + + protected final DateFieldMapper.DateFieldType aggregableDateFieldType( + boolean useNanosecondResolution, + boolean isSearchable, + DateFormatter formatter + ) { + return new DateFieldMapper.DateFieldType( + AGGREGABLE_DATE, + isSearchable, + randomBoolean(), + true, + formatter, useNanosecondResolution ? DateFieldMapper.Resolution.NANOSECONDS : DateFieldMapper.Resolution.MILLISECONDS, - null, Collections.emptyMap()); + null, + Collections.emptyMap() + ); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index d297d9fabc613..a79412537817b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -30,13 +30,18 @@ import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.DateFormatters; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; +import org.elasticsearch.search.internal.SearchContext; +import org.hamcrest.Matcher; import java.io.IOException; import java.util.ArrayList; @@ -44,9 +49,13 @@ import java.util.Collections; import java.util.List; import java.util.function.Consumer; +import java.util.stream.IntStream; import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.not; +import static org.mockito.Mockito.mock; public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCase { /** @@ -1140,6 +1149,90 @@ public void testOverlappingBounds() { "hard bounds: [2010-01-01--2020-01-01], extended bounds: [2009-01-01--2021-01-01]")); } + public void testFewRoundingPointsUsesFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + IntStream.range(2000, 2010).mapToObj(Integer::toString).collect(toList()), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + true + ); + } + + public void testManyRoundingPointsDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + IntStream.range(2000, 3000).mapToObj(Integer::toString).collect(toList()), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + false + ); + } + + /** + * Nanos doesn't use from range, but we don't get the fancy compile into + * filters because of potential loss of precision. + */ + public void testNanosDoesUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(true, true, DateFormatter.forPattern("yyyy")), + List.of("2017", "2018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + true + ); + } + + public void testFarFutureDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyyyy")), + List.of("402017", "402018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR), + false + ); + } + + public void testMissingValueDoesNotUseFromRange() throws IOException { + aggregationImplementationChoiceTestCase( + aggregableDateFieldType(false, true, DateFormatter.forPattern("yyyy")), + List.of("2017", "2018"), + new DateHistogramAggregationBuilder("test").field(AGGREGABLE_DATE).calendarInterval(DateHistogramInterval.YEAR).missing("2020"), + false + ); + } + + private void aggregationImplementationChoiceTestCase( + DateFieldMapper.DateFieldType ft, + List data, + DateHistogramAggregationBuilder builder, + boolean usesFromRange + ) throws IOException { + try (Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) { + for (String d : data) { + long instant = asLong(d, ft); + indexWriter.addDocument( + List.of(new SortedNumericDocValuesField(AGGREGABLE_DATE, instant), new LongPoint(AGGREGABLE_DATE, instant)) + ); + } + try (IndexReader reader = indexWriter.getReader()) { + SearchContext context = createSearchContext( + new IndexSearcher(reader), + createIndexSettings(), + new MatchAllDocsQuery(), + mock(MultiBucketConsumer.class), + ft + ); + Aggregator agg = createAggregator(builder, context); + Matcher matcher = instanceOf(DateHistogramAggregator.FromDateRange.class); + if (usesFromRange == false) { + matcher = not(matcher); + } + assertThat(agg, matcher); + agg.preCollection(); + context.searcher().search(context.query(), agg); + InternalDateHistogram result = (InternalDateHistogram) agg.buildTopLevel(); + assertThat(result.getBuckets().stream().map(InternalDateHistogram.Bucket::getKeyAsString).collect(toList()), equalTo(data)); + } + } + } + public void testIllegalInterval() throws IOException { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> testSearchCase(new MatchAllDocsQuery(), Collections.emptyList(), From 523d420a065bac8298969997a8638a8846bb0870 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 13:12:42 -0400 Subject: [PATCH 36/48] Fix broken test --- .../histogram/DateHistogramAggregator.java | 4 +-- .../bucket/range/RangeAggregator.java | 9 ++++-- .../range/DateRangeAggregatorTests.java | 28 ++++++++++++++++--- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 1f87979a64140..cb0155d6a453b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -156,10 +156,10 @@ private static FromDateRange adaptIntoRangeOrNull( return null; } // Range aggs use a double to aggregate and we don't want to lose precision. - if (fixedRoundingPoints[0] > 1L << 53) { + if (Math.abs(fixedRoundingPoints[0]) > RangeAggregator.MAX_ACCURATE_BOUND) { return null; } - if (fixedRoundingPoints[fixedRoundingPoints.length - 1] > 1L << 53) { + if (Math.abs(fixedRoundingPoints[fixedRoundingPoints.length - 1]) > RangeAggregator.MAX_ACCURATE_BOUND) { return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 06b2372f94eb3..2cae1e04b500a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -73,6 +73,11 @@ * that is compatible with the requested configuration. */ public abstract class RangeAggregator extends BucketsAggregator { + /** + * The maximum {@code long} that can accurately fit into the + * {@code double} precision floating point bounds. + */ + public static final long MAX_ACCURATE_BOUND = 1L << 53; public static final ParseField RANGES_FIELD = new ParseField("ranges"); public static final ParseField KEYED_FIELD = new ParseField("keyed"); @@ -318,10 +323,10 @@ public static Aggregator adaptIntoFiltersOrNull( * the filters. That is, if the input data type is a `long` in * the first place. If it isn't then */ - if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > 1L << 53) { + if (wholeNumbersOnly && ranges[i].from != Double.NEGATIVE_INFINITY && Math.abs(ranges[i].from) > MAX_ACCURATE_BOUND) { return null; } - if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > 1L << 53) { + if (wholeNumbersOnly && ranges[i].to != Double.POSITIVE_INFINITY && Math.abs(ranges[i].to) > MAX_ACCURATE_BOUND) { return null; } keys[i] = Integer.toString(i); diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java index 813dcba068900..00ed8e34c7b0f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/DateRangeAggregatorTests.java @@ -81,8 +81,18 @@ public void testNoMatchingField() throws IOException { public void testMatchesSortedNumericDocValues() throws IOException { testBothResolutions(new MatchAllDocsQuery(), (iw, resolution) -> { - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)))); - iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)))); + iw.addDocument( + List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T1)) + ) + ); + iw.addDocument( + List.of( + new SortedNumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T2)) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); @@ -94,8 +104,18 @@ public void testMatchesSortedNumericDocValues() throws IOException { public void testMatchesNumericDocValues() throws IOException { testBothResolutions(new MatchAllDocsQuery(), (iw, resolution) -> { - iw.addDocument(singleton(new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)))); - iw.addDocument(singleton(new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)))); + iw.addDocument( + List.of( + new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T1)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T1)) + ) + ); + iw.addDocument( + List.of( + new NumericDocValuesField(DATE_FIELD_NAME, resolution.convert(T2)), + new LongPoint(DATE_FIELD_NAME, resolution.convert(T2)) + ) + ); }, range -> { List ranges = range.getBuckets(); assertEquals(2, ranges.size()); From f1ae9801b38dacebf8913ce06340cfb22f32e31b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 13:19:09 -0400 Subject: [PATCH 37/48] Zap --- .../bucket/histogram/DateHistogramAggregator.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index cb0155d6a453b..1a7c46cff352f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -301,8 +301,6 @@ public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws I // the contract of the histogram aggregation is that shards must return buckets ordered by key in ascending order CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); - // value source will be null for unmapped fields - // Important: use `rounding` here, not `shardRounding` InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) : null; @@ -398,8 +396,6 @@ protected InternalAggregation adapt(InternalAggregation delegateResult) { } CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); - // value source will be null for unmapped fields - // Important: use `rounding` here, not `shardRounding` InternalDateHistogram.EmptyBucketInfo emptyBucketInfo = minDocCount == 0 ? new InternalDateHistogram.EmptyBucketInfo(rounding.withoutOffset(), buildEmptySubAggregations(), extendedBounds) : null; From 9da87c50f3b30f192b3830c4803c3f77002032bd Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 13:24:15 -0400 Subject: [PATCH 38/48] Words --- .../org/elasticsearch/common/Rounding.java | 7 ++++--- .../aggregations/AggregatorFactories.java | 9 +++++++++ .../bucket/filter/FiltersAggregator.java | 19 +++++++++++++++++++ .../histogram/DateHistogramAggregator.java | 8 +++++--- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/Rounding.java b/server/src/main/java/org/elasticsearch/common/Rounding.java index 16ec3552d9c51..081a08a2f4a44 100644 --- a/server/src/main/java/org/elasticsearch/common/Rounding.java +++ b/server/src/main/java/org/elasticsearch/common/Rounding.java @@ -292,9 +292,10 @@ public interface Prepared { */ double roundingSize(long utcMillis, DateTimeUnit timeUnit); /** - * An array of dates such that each date between each entry is will - * be rounded down to that entry or {@code null} if this rounding - * mechanism doesn't or can't precalculate these points. + * If this rounding mechanism precalculates rounding points then + * this array stores dates such that each date between each entry. + * if the rounding mechanism doesn't precalculate points then this + * is {@code null}. */ long[] fixedRoundingPoints(); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java index 112a183e586ec..479ea05e0f8e0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AggregatorFactories.java @@ -227,6 +227,15 @@ public int countAggregators() { return factories.length; } + /** + * This returns a copy of {@link AggregatorFactories} modified so that + * calls to {@link #createSubAggregators} will ignore the provided parent + * aggregator and always use {@code fixedParent} provided in to this + * method. + *

+ * {@link AdaptingAggregator} uses this to make sure that sub-aggregators + * get the {@link AdaptingAggregator} aggregator itself as the parent. + */ public AggregatorFactories fixParent(Aggregator fixedParent) { AggregatorFactories previous = this; return new AggregatorFactories(factories) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index cadcf56698880..cabcee7107f34 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -290,6 +290,13 @@ private static class FilterByFilter extends FiltersAggregator { this.filters = filters; } + /** + * Instead of returning a {@link LeafBucketCollector} we do the + * collection ourselves by running the filters directly. This is safe + * because we only use this aggregator if there isn't a {@code parent} + * which would change how we collect buckets and because we take the + * top level query into account when building the filters. + */ @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { if (filterWeights == null) { @@ -399,6 +406,13 @@ protected Weight[] buildWeights(Query topLevelQuery, Query filters[]) throws IOE return weights; } + /** + * Make a filter that matches both queries, merging the + * {@link PointRangeQuery}s together if possible. The "merging together" + * part is provides a fairly substantial speed boost then executing a + * top level query on a date and a filter on a date. This kind of thing + * is very common when visualizing logs and metrics. + */ private Query filterMatchingBoth(Query lhs, Query rhs) { if (lhs instanceof MatchAllDocsQuery) { return rhs; @@ -431,6 +445,10 @@ private Query unwrap(Query query) { return query; } + /** + * Merge two {@linkplain PointRangeQuery}s into a single {@linkplain PointRangeQuery} + * that matches points that match both filters. + */ private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQuery rhs) { if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { return null; @@ -443,6 +461,7 @@ private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQu if (upper == null) { return null; } + // TODO this only makes the right answer when each document only has a single value for the field. return new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { @Override protected String toString(int dimension, byte[] value) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 1a7c46cff352f..a7a18cb5f5bde 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -76,7 +76,7 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg * optimized into a {@link FiltersAggregator}. Even when it can't be * optimized, it is going to be marginally faster and consume less memory * than the {@linkplain DateHistogramAggregator} because it doesn't need - * to the rounding points and because it can pass precise cardinality + * to the round points and because it can pass precise cardinality * estimates to its child aggregations. */ public static Aggregator build( @@ -156,10 +156,12 @@ private static FromDateRange adaptIntoRangeOrNull( return null; } // Range aggs use a double to aggregate and we don't want to lose precision. - if (Math.abs(fixedRoundingPoints[0]) > RangeAggregator.MAX_ACCURATE_BOUND) { + long min = fixedRoundingPoints[0]; + long max = fixedRoundingPoints[fixedRoundingPoints.length - 1]; + if (min < -RangeAggregator.MAX_ACCURATE_BOUND || min > RangeAggregator.MAX_ACCURATE_BOUND) { return null; } - if (Math.abs(fixedRoundingPoints[fixedRoundingPoints.length - 1]) > RangeAggregator.MAX_ACCURATE_BOUND) { + if (max < -RangeAggregator.MAX_ACCURATE_BOUND || max > RangeAggregator.MAX_ACCURATE_BOUND) { return null; } RangeAggregatorSupplier rangeSupplier = context.getQueryShardContext() From 3d3ea198857f7dd269e1b9e08f9356b1a69243e3 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 13:42:40 -0400 Subject: [PATCH 39/48] Coment --- .../search/aggregations/bucket/filter/FiltersAggregator.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index cabcee7107f34..f7ab94b9c64f1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -323,6 +323,7 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket } } } + // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. throw new CollectionTerminatedException(); } From 4291b701930cc85774df23be425225101e7b90c0 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 22 Oct 2020 16:27:26 -0400 Subject: [PATCH 40/48] Add fancy query --- .../bucket/filter/FiltersAggregator.java | 72 +------ .../bucket/filter/MergedPointRangeQuery.java | 188 ++++++++++++++++++ .../bucket/filter/FiltersAggregatorTests.java | 35 +++- 3 files changed, 223 insertions(+), 72 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index f7ab94b9c64f1..869981f0aeb0b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -424,7 +424,7 @@ private Query filterMatchingBoth(Query lhs, Query rhs) { Query unwrappedLhs = unwrap(lhs); Query unwrappedRhs = unwrap(rhs); if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { - PointRangeQuery merged = mergePointRangeQueries((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); + Query merged = MergedPointRangeQuery.merge((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); if (merged != null) { // TODO rewrap? return merged; @@ -445,74 +445,4 @@ private Query unwrap(Query query) { } return query; } - - /** - * Merge two {@linkplain PointRangeQuery}s into a single {@linkplain PointRangeQuery} - * that matches points that match both filters. - */ - private PointRangeQuery mergePointRangeQueries(PointRangeQuery lhs, PointRangeQuery rhs) { - if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { - return null; - } - byte[] lower = mergePoint(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), true); - if (lower == null) { - return null; - } - byte[] upper = mergePoint(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim(), false); - if (upper == null) { - return null; - } - // TODO this only makes the right answer when each document only has a single value for the field. - return new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { - @Override - protected String toString(int dimension, byte[] value) { - // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. - StringBuilder sb = new StringBuilder(); - sb.append("binary("); - for (int i = 0; i < value.length; i++) { - if (i > 0) { - sb.append(' '); - } - sb.append(Integer.toHexString(value[i] & 0xFF)); - } - sb.append(')'); - return sb.toString(); - } - }; - } - - /** - * Figure out if lhs's lower point is lower in all dimensions than - * rhs's lower point or if it is further. Return null if it is closer - * in some dimensions and further in others. - */ - private byte[] mergePoint(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean mergingLower) { - int runningCmp = 0; - for (int dim = 0; dim < numDims; dim++) { - int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); - if (runningCmp == 0) { - // Previous dimensions were all equal - runningCmp = cmp; - continue; - } - if (cmp == 0) { - // This dimension has the same value. - continue; - } - if ((runningCmp ^ cmp) < 0) { - // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. - return null; - } - } - if (runningCmp < 0) { - // lhs is lower - return mergingLower ? rhs : lhs; - } - return mergingLower ? lhs : rhs; - } - - private int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { - int offset = dim * bytesPerDim; - return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); - } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java new file mode 100644 index 0000000000000..dae48768ba27d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -0,0 +1,188 @@ +package org.elasticsearch.search.aggregations.bucket.filter; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BulkScorer; +import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; +import org.apache.lucene.search.Weight; + +import java.io.IOException; +import java.util.Set; + +import static java.util.Arrays.compareUnsigned; + +/** + * Query merging two point in range queries. + */ +public class MergedPointRangeQuery extends Query { + /** + * Merge two {@linkplain PointRangeQuery}s into a {@linkplain MergedPointRangeQuery} + * that matches points that match both filters. + */ + public static Query merge(PointRangeQuery lhs, PointRangeQuery rhs) { + if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { + return null; + } + Integer lowerCmp = compareAllDims(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim()); + if (lowerCmp == null) { + // Not all dimensions compared the same way. + return null; + } + Integer upperCmp = compareAllDims(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim()); + if (upperCmp == null) { + // Not all dimensions compared the same way. + return null; + } + if (lowerCmp == 1 && upperCmp == 1) { + // The points are the same. + return lhs; + } + byte[] lower = lowerCmp < 0 ? rhs.getLowerPoint() : lhs.getLowerPoint(); + byte[] upper = upperCmp < 0 ? lhs.getUpperPoint() : rhs.getUpperPoint(); + PointRangeQuery mostCompact = new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { + @Override + protected String toString(int dimension, byte[] value) { + // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. + StringBuilder sb = new StringBuilder(); + sb.append("binary("); + for (int i = 0; i < value.length; i++) { + if (i > 0) { + sb.append(' '); + } + sb.append(Integer.toHexString(value[i] & 0xFF)); + } + sb.append(')'); + return sb.toString(); + } + }; + return new MergedPointRangeQuery(lhs, rhs, mostCompact); + } + + private final String field; + private final BooleanQuery delegateForMultiValuedSegments; + private final PointRangeQuery mostCompactQuery; + + public MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, PointRangeQuery mostCompactQuery) { + field = lhs.getField(); + delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); + this.mostCompactQuery = mostCompactQuery; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + Weight delegateForMultiValuedSegmentsWeight = delegateForMultiValuedSegments.createWeight(searcher, scoreMode, boost); + return new Weight(this) { + Weight mostCompactWeight; + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return delegateForMultiValuedSegmentsWeight.isCacheable(ctx); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + ScorerSupplier scorerSupplier = scorerSupplier(context); + if (scorerSupplier == null) { + return null; + } + return scorerSupplier.get(Long.MAX_VALUE); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + /* + * If we're sure docs only have a single value for the field + * we can pick the most compact bounds. If there are multiple values + * for the field we have to run the boolean query. + */ + PointValues points = context.reader().getPointValues(field); + if (points == null) { + return null; + } + if (points.size() == points.getDocCount()) { + // Each doc that has points has exactly one point. + if (mostCompactWeight == null) { + mostCompactWeight = mostCompactQuery.createWeight(searcher, scoreMode, boost); + } + return mostCompactWeight.scorerSupplier(context); + } + return delegateForMultiValuedSegmentsWeight.scorerSupplier(context); + } + + @Override + public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { + return super.bulkScorer(context); + } + + @Override + @Deprecated + public void extractTerms(Set terms) { + delegateForMultiValuedSegmentsWeight.extractTerms(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + return delegateForMultiValuedSegmentsWeight.explain(context, doc); + } + }; + } + + @Override + public String toString(String field) { + return delegateForMultiValuedSegments.toString(field); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != getClass()) { + return false; + } + MergedPointRangeQuery other = (MergedPointRangeQuery) obj; + return delegateForMultiValuedSegments.equals(other.delegateForMultiValuedSegments); + } + + @Override + public int hashCode() { + return classHash() * 31 + delegateForMultiValuedSegments.hashCode(); + } + + /** + * Figure out if lhs point is closer to the origin in all dimensions than + * the rhs point or if it is further. Return null if it is closer + * in some dimensions and further in others. + */ + private static Integer compareAllDims(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim) { + int runningCmp = 0; + for (int dim = 0; dim < numDims; dim++) { + int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); + if (runningCmp == 0) { + // Previous dimensions were all equal + runningCmp = cmp; + continue; + } + if (cmp == 0) { + // This dimension has the same value. + continue; + } + if ((runningCmp ^ cmp) < 0) { + // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. + return null; + } + } + return runningCmp; + } + + private static int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { + int offset = dim * bytesPerDim; + return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 617eec9799a4d..55177fe162455 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -20,24 +20,36 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter; +import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; import org.junit.Before; +import java.io.IOException; import java.util.HashSet; import java.util.List; import java.util.Set; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + public class FiltersAggregatorTests extends AggregatorTestCase { private MappedFieldType fieldType; @@ -139,7 +151,7 @@ public void testRandom() throws Exception { // make sure we have more than one segment to test the merge indexWriter.commit(); } - int value = randomInt(maxTerm-1); + int value = randomInt(maxTerm - 1); expectedBucketCount[value] += 1; document.add(new Field("field", Integer.toString(value), KeywordFieldMapper.Defaults.FIELD_TYPE)); indexWriter.addDocument(document); @@ -188,4 +200,25 @@ public void testRandom() throws Exception { directory.close(); } } + + public void testMergePointRangeQueries() throws IOException { + MappedFieldType ft = new DateFieldMapper.DateFieldType("test", Resolution.MILLISECONDS); + AggregationBuilder builder = new FiltersAggregationBuilder( + "test", + new KeyedFilter("q1", new RangeQueryBuilder("test").from("2020-01-01").to("2020-03-01").includeUpper(false)) + ); + Query query = LongPoint.newRangeQuery( + "test", + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-01"), + DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-02-01") + ); + testCase(builder, query, iw -> { + iw.addDocument(List.of(new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2010-01-02")))); + iw.addDocument(List.of(new LongPoint("test", DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-01-02")))); + }, result -> { + InternalFilters filters = (InternalFilters) result; + assertThat(filters.getBuckets(), hasSize(1)); + assertThat(filters.getBucketByKey("q1").getDocCount(), equalTo(1L)); + }, ft); + } } From ff41cdbf5a7d6f40c198cad1831a616d07e3bf29 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 26 Oct 2020 09:34:51 -0400 Subject: [PATCH 41/48] NOCOMMIT --- .../aggregations/bucket/filter/MergedPointRangeQuery.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index dae48768ba27d..b531573498e69 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -40,7 +40,7 @@ public static Query merge(PointRangeQuery lhs, PointRangeQuery rhs) { Integer upperCmp = compareAllDims(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim()); if (upperCmp == null) { // Not all dimensions compared the same way. - return null; + return null; // NOCOMMIT it shouldn't matter - we can just merge them anyway } if (lowerCmp == 1 && upperCmp == 1) { // The points are the same. @@ -71,7 +71,7 @@ protected String toString(int dimension, byte[] value) { private final BooleanQuery delegateForMultiValuedSegments; private final PointRangeQuery mostCompactQuery; - public MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, PointRangeQuery mostCompactQuery) { + private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, PointRangeQuery mostCompactQuery) { field = lhs.getField(); delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); this.mostCompactQuery = mostCompactQuery; @@ -173,6 +173,7 @@ private static Integer compareAllDims(byte[] lhs, byte[] rhs, int numDims, int b // This dimension has the same value. continue; } + // TODO can't we merge these here instead of give up? if ((runningCmp ^ cmp) < 0) { // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. return null; From ec5bf41875bd04c13bab742828d0d4062f193834 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 28 Oct 2020 15:15:44 -0400 Subject: [PATCH 42/48] tests not enough yet --- .../aggregations/AdaptingAggregator.java | 6 - .../bucket/filter/FiltersAggregator.java | 4 +- .../bucket/filter/MergedPointRangeQuery.java | 67 ++++--- .../filter/MergedPointRangeQueryTests.java | 181 ++++++++++++++++++ .../DateHistogramAggregatorTests.java | 10 +- 5 files changed, 228 insertions(+), 40 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index 0ec68812e5b51..33cc7ee259dc4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ScoreMode; import org.elasticsearch.common.CheckedFunction; -import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.profile.aggregation.InternalAggregationProfileTree; import java.io.IOException; @@ -76,11 +75,6 @@ public final String name() { return delegate.name(); } - @Override - public final SearchContext context() { - return delegate.context(); - } - @Override public final Aggregator parent() { return parent; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 869981f0aeb0b..31a81f1b12c87 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -300,7 +300,7 @@ private static class FilterByFilter extends FiltersAggregator { @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { if (filterWeights == null) { - filterWeights = buildWeights(context.query(), filters); + filterWeights = buildWeights(topLevelQuery(), filters); } Bits live = ctx.reader().getLiveDocs(); for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { @@ -402,7 +402,7 @@ protected Weight[] buildWeights(Query topLevelQuery, Query filters[]) throws IOE Weight[] weights = new Weight[filters.length]; for (int i = 0; i < filters.length; ++i) { Query filter = filterMatchingBoth(topLevelQuery, filters[i]); - weights[i] = context.searcher().createWeight(context.searcher().rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); + weights[i] = searcher().createWeight(searcher().rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1); } return weights; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index b531573498e69..4426d8bb094ab 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -8,6 +8,7 @@ import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -29,31 +30,33 @@ public class MergedPointRangeQuery extends Query { * that matches points that match both filters. */ public static Query merge(PointRangeQuery lhs, PointRangeQuery rhs) { - if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { - return null; + if (lhs.equals(rhs)) { + // Lucky case! The queries were the same so their UNION is just the query itself. + return lhs; } - Integer lowerCmp = compareAllDims(lhs.getLowerPoint(), rhs.getLowerPoint(), lhs.getNumDims(), lhs.getBytesPerDim()); - if (lowerCmp == null) { - // Not all dimensions compared the same way. + if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { return null; } - Integer upperCmp = compareAllDims(lhs.getUpperPoint(), rhs.getUpperPoint(), lhs.getNumDims(), lhs.getBytesPerDim()); - if (upperCmp == null) { - // Not all dimensions compared the same way. - return null; // NOCOMMIT it shouldn't matter - we can just merge them anyway - } - if (lowerCmp == 1 && upperCmp == 1) { - // The points are the same. - return lhs; + int numDims = lhs.getNumDims(); + int bytesPerDim = lhs.getBytesPerDim(); + byte[] lower = mergeBound(lhs.getLowerPoint(), rhs.getLowerPoint(), numDims, bytesPerDim, true); + byte[] upper = mergeBound(lhs.getUpperPoint(), rhs.getUpperPoint(), numDims, bytesPerDim, false); + + // If we ended up with disjoint ranges in any dimension then on single valued segments we can't match any docs. + for (int dim = 0; dim < numDims; dim++) { + int offset = dim * bytesPerDim; + if (compareUnsigned(lower, offset, offset + bytesPerDim, upper, offset, offset + bytesPerDim) > 0) { + return new MergedPointRangeQuery(lhs, rhs, new MatchNoDocsQuery("disjoint ranges")); + } } - byte[] lower = lowerCmp < 0 ? rhs.getLowerPoint() : lhs.getLowerPoint(); - byte[] upper = upperCmp < 0 ? lhs.getUpperPoint() : rhs.getUpperPoint(); - PointRangeQuery mostCompact = new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { + + // Otherwise on single valued segments we can only match docs the match the UNION of the two ranges. + PointRangeQuery delegateForSingleValuedSegments = new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { @Override protected String toString(int dimension, byte[] value) { // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. StringBuilder sb = new StringBuilder(); - sb.append("binary("); + sb.append("("); for (int i = 0; i < value.length; i++) { if (i > 0) { sb.append(' '); @@ -64,17 +67,17 @@ protected String toString(int dimension, byte[] value) { return sb.toString(); } }; - return new MergedPointRangeQuery(lhs, rhs, mostCompact); + return new MergedPointRangeQuery(lhs, rhs, delegateForSingleValuedSegments); } private final String field; private final BooleanQuery delegateForMultiValuedSegments; - private final PointRangeQuery mostCompactQuery; + private final Query delegateForSingleValuedSegments; - private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, PointRangeQuery mostCompactQuery) { + private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, Query delegateForSingleValuedSegments) { field = lhs.getField(); delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); - this.mostCompactQuery = mostCompactQuery; + this.delegateForSingleValuedSegments = delegateForSingleValuedSegments; } @Override @@ -111,7 +114,7 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti if (points.size() == points.getDocCount()) { // Each doc that has points has exactly one point. if (mostCompactWeight == null) { - mostCompactWeight = mostCompactQuery.createWeight(searcher, scoreMode, boost); + mostCompactWeight = delegateForSingleValuedSegments.createWeight(searcher, scoreMode, boost); } return mostCompactWeight.scorerSupplier(context); } @@ -136,9 +139,16 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio }; } + /** + * The query used when we have single valued segments. + */ + Query delegateForSingleValuedSegments() { + return delegateForSingleValuedSegments; + } + @Override public String toString(String field) { - return delegateForMultiValuedSegments.toString(field); + return "MergedPointRange[" + delegateForMultiValuedSegments.toString(field) + "]"; } @Override @@ -155,6 +165,17 @@ public int hashCode() { return classHash() * 31 + delegateForMultiValuedSegments.hashCode(); } + private static byte[] mergeBound(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean lower) { + byte[] merged = new byte[lhs.length]; + for (int dim = 0; dim < numDims; dim++) { + int offset = dim * bytesPerDim; + boolean cmp = compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim) <= 0; + byte[] from = (cmp ^ lower) ? lhs : rhs; + System.arraycopy(from, offset, merged, offset, bytesPerDim); + } + return merged; + } + /** * Figure out if lhs point is closer to the origin in all dimensions than * the rhs point or if it is further. Return null if it is closer diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java new file mode 100644 index 0000000000000..875250e4dbefd --- /dev/null +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java @@ -0,0 +1,181 @@ +package org.elasticsearch.search.aggregations.bucket.filter; + +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointRangeQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.nullValue; + +public class MergedPointRangeQueryTests extends ESTestCase { + private Query merge(Query lhs, Query rhs) { + assertThat("error in test assumptions", lhs, instanceOf(PointRangeQuery.class)); + assertThat("error in test assumptions", rhs, instanceOf(PointRangeQuery.class)); + return MergedPointRangeQuery.merge((PointRangeQuery) lhs, (PointRangeQuery) rhs); + } + + private MergedPointRangeQuery mergeToMergedQuery(Query lhs, Query rhs) { + Query merged = merge(lhs, rhs); + assertThat(merged, instanceOf(MergedPointRangeQuery.class)); + return (MergedPointRangeQuery) merged; + } + + public void testDifferentField() { + assertThat(merge(LongPoint.newExactQuery("a", 0), LongPoint.newExactQuery("b", 0)), nullValue()); + } + + public void testDifferentDimensionCount() { + assertThat( + merge(LongPoint.newExactQuery("a", 0), LongPoint.newRangeQuery("a", new long[] { 1, 2 }, new long[] { 1, 2 })), + nullValue() + ); + } + + public void testDifferentDimensionSize() { + assertThat(merge(LongPoint.newExactQuery("a", 0), IntPoint.newExactQuery("a", 0)), nullValue()); + } + + public void testSame() { + Query lhs = LongPoint.newRangeQuery("a", 0, 100); + assertThat(merge(lhs, LongPoint.newRangeQuery("a", 0, 100)), equalTo(lhs)); + } + + public void testOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", -100, 100), + LongPoint.newRangeQuery("a", 0, 100) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange(overlapping, LongPoint.newRangeQuery("a", 0, 100)); + assertFalse(matches1d(overlapping, -50)); // Point not in range + assertTrue(matches1d(overlapping, 50)); // Point in range + assertTrue(matches1d(overlapping, -50, 10)); // Both points in range matches the doc + assertTrue(matches1d(overlapping, -200, 50)); // One point in range matches + assertFalse(matches1d(overlapping, -50, 200)); // No points in range doesn't match + } + + public void testNonOverlap() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery(LongPoint.newRangeQuery("a", -100, -10), LongPoint.newRangeQuery("a", 10, 100)); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches1d(disjoint, randomLong())); // No single point can match + assertFalse(matches1d(disjoint, -50, -20)); // Both points in lower + assertFalse(matches1d(disjoint, 20, 50)); // Both points in upper + assertTrue(matches1d(disjoint, -50, 50)); // One in lower, one in upper + assertFalse(matches1d(disjoint, -50, 200)); // No point in lower + assertFalse(matches1d(disjoint, -200, 50)); // No point in upper + } + + public void test2dSimpleOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { 100, 100 }), + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange( + overlapping, + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertFalse(matches2d(overlapping, -50, -50)); + assertTrue(matches2d(overlapping, 10, 10)); + assertTrue(matches2d(overlapping, -50, -50, 10, 10)); + } + + public void test2dComplexOverlap() throws IOException { + MergedPointRangeQuery overlapping = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, 0 }, new long[] { 100, 100 }), + LongPoint.newRangeQuery("a", new long[] { 0, -100 }, new long[] { 100, 100 }) + ); + assertDelegateForSingleValuedSegmentsEqualPointRange( + overlapping, + LongPoint.newRangeQuery("a", new long[] { 0, 0 }, new long[] { 100, 100 }) + ); + assertFalse(matches2d(overlapping, -50, -50)); + assertTrue(matches2d(overlapping, 10, 10)); + assertTrue(matches2d(overlapping, -50, -50, 10, 10)); + } + + public void test2dNoOverlap() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { -10, -10 }), + LongPoint.newRangeQuery("a", new long[] { 10, 10 }, new long[] { 100, 100 }) + ); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches2d(disjoint, randomLong(), randomLong())); + assertFalse(matches2d(disjoint, -50, -50)); + assertFalse(matches2d(disjoint, 50, 50)); + assertTrue(matches2d(disjoint, -50, -50, 50, 50)); + } + + public void test2dNoOverlapInOneDimension() throws IOException { + MergedPointRangeQuery disjoint = mergeToMergedQuery( + LongPoint.newRangeQuery("a", new long[] { -100, -100 }, new long[] { 100, -10 }), + LongPoint.newRangeQuery("a", new long[] { 0, 10 }, new long[] { 100, 100 }) + ); + assertThat(disjoint.delegateForSingleValuedSegments(), instanceOf(MatchNoDocsQuery.class)); + assertFalse(matches2d(disjoint, randomLong(), randomLong())); + assertFalse(matches2d(disjoint, -50, -50)); + assertFalse(matches2d(disjoint, 50, 50)); + assertTrue(matches2d(disjoint, 50, -50, 50, 50)); + } + + public void testEqualsAndHashCode() { + + } + + private boolean matches1d(Query query, long... values) throws IOException { + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + List doc = new ArrayList<>(); + for (long v : values) { + doc.add(new LongPoint("a", v)); + } + iw.addDocument(doc); + try (IndexReader r = iw.getReader()) { + IndexSearcher searcher = new IndexSearcher(r); + return searcher.count(query) > 0; + } + } + } + + private boolean matches2d(Query query, long... values) throws IOException { + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) { + List doc = new ArrayList<>(); + assertEquals(values.length % 2, 0); + for (int i = 0; i < values.length; i += 2) { + doc.add(new LongPoint("a", values[i], values[i + 1])); + } + iw.addDocument(doc); + try (IndexReader r = iw.getReader()) { + IndexSearcher searcher = new IndexSearcher(r); + return searcher.count(query) > 0; + } + } + } + + private void assertDelegateForSingleValuedSegmentsEqualPointRange(MergedPointRangeQuery actual, Query expected) { + /* + * This is a lot like asserThat(actual.delegateForSingleValuedSegments(), equalTo(expected)); but + * that doesn't work because the subclasses aren't the same. + */ + assertThat(expected, instanceOf(PointRangeQuery.class)); + assertThat(actual.delegateForSingleValuedSegments(), instanceOf(PointRangeQuery.class)); + assertThat( + ((PointRangeQuery) actual.delegateForSingleValuedSegments()).getLowerPoint(), + equalTo(((PointRangeQuery) expected).getLowerPoint()) + ); + assertThat( + ((PointRangeQuery) actual.delegateForSingleValuedSegments()).getUpperPoint(), + equalTo(((PointRangeQuery) expected).getUpperPoint()) + ); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java index a79412537817b..5bc00a2e7dc28 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregatorTests.java @@ -36,7 +36,6 @@ import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketOrder; -import org.elasticsearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; @@ -55,7 +54,6 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.not; -import static org.mockito.Mockito.mock; public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCase { /** @@ -1212,13 +1210,7 @@ private void aggregationImplementationChoiceTestCase( ); } try (IndexReader reader = indexWriter.getReader()) { - SearchContext context = createSearchContext( - new IndexSearcher(reader), - createIndexSettings(), - new MatchAllDocsQuery(), - mock(MultiBucketConsumer.class), - ft - ); + SearchContext context = createSearchContext(new IndexSearcher(reader), new MatchAllDocsQuery(), ft); Aggregator agg = createAggregator(builder, context); Matcher matcher = instanceOf(DateHistogramAggregator.FromDateRange.class); if (usesFromRange == false) { From 6b5c09ad14bd9cce098418dea6b8be0b46b33086 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 28 Oct 2020 17:33:47 -0400 Subject: [PATCH 43/48] missing tests --- .../bucket/filter/FiltersAggregator.java | 2 - .../bucket/filter/MergedPointRangeQuery.java | 70 ++++++------------ .../filter/MergedPointRangeQueryTests.java | 73 ++++++++++++++++--- 3 files changed, 82 insertions(+), 63 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 31a81f1b12c87..5d0449bba8bee 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -58,8 +58,6 @@ import java.util.Objects; import java.util.function.BiConsumer; -import static java.util.Arrays.compareUnsigned; - /** * Aggregator for {@code filters}. There are two known subclasses, * {@link FilterByFilter} which is fast but only works in some cases and diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index 4426d8bb094ab..1be4550fc3dca 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -37,21 +37,36 @@ public static Query merge(PointRangeQuery lhs, PointRangeQuery rhs) { if (lhs.getField() != rhs.getField() || lhs.getNumDims() != rhs.getNumDims() || lhs.getBytesPerDim() != rhs.getBytesPerDim()) { return null; } + return new MergedPointRangeQuery(lhs, rhs); + } + + private final String field; + private final Query delegateForMultiValuedSegments; + private final Query delegateForSingleValuedSegments; + + private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs) { + field = lhs.getField(); + delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); int numDims = lhs.getNumDims(); int bytesPerDim = lhs.getBytesPerDim(); - byte[] lower = mergeBound(lhs.getLowerPoint(), rhs.getLowerPoint(), numDims, bytesPerDim, true); - byte[] upper = mergeBound(lhs.getUpperPoint(), rhs.getUpperPoint(), numDims, bytesPerDim, false); + this.delegateForSingleValuedSegments = pickDelegateForSingleValuedSegments( + mergeBound(lhs.getLowerPoint(), rhs.getLowerPoint(), numDims, bytesPerDim, true), + mergeBound(lhs.getUpperPoint(), rhs.getUpperPoint(), numDims, bytesPerDim, false), + numDims, + bytesPerDim + ); + } + private Query pickDelegateForSingleValuedSegments(byte[] lower, byte[] upper, int numDims, int bytesPerDim) { // If we ended up with disjoint ranges in any dimension then on single valued segments we can't match any docs. for (int dim = 0; dim < numDims; dim++) { int offset = dim * bytesPerDim; if (compareUnsigned(lower, offset, offset + bytesPerDim, upper, offset, offset + bytesPerDim) > 0) { - return new MergedPointRangeQuery(lhs, rhs, new MatchNoDocsQuery("disjoint ranges")); + return new MatchNoDocsQuery("disjoint ranges"); } } - // Otherwise on single valued segments we can only match docs the match the UNION of the two ranges. - PointRangeQuery delegateForSingleValuedSegments = new PointRangeQuery(lhs.getField(), lower, upper, lhs.getNumDims()) { + return new PointRangeQuery(field, lower, upper, numDims) { @Override protected String toString(int dimension, byte[] value) { // Stolen from Lucene's Binary range query. It'd be best to delegate, but the method isn't visible. @@ -67,17 +82,6 @@ protected String toString(int dimension, byte[] value) { return sb.toString(); } }; - return new MergedPointRangeQuery(lhs, rhs, delegateForSingleValuedSegments); - } - - private final String field; - private final BooleanQuery delegateForMultiValuedSegments; - private final Query delegateForSingleValuedSegments; - - private MergedPointRangeQuery(PointRangeQuery lhs, PointRangeQuery rhs, Query delegateForSingleValuedSegments) { - field = lhs.getField(); - delegateForMultiValuedSegments = new BooleanQuery.Builder().add(lhs, Occur.MUST).add(rhs, Occur.MUST).build(); - this.delegateForSingleValuedSegments = delegateForSingleValuedSegments; } @Override @@ -148,7 +152,7 @@ Query delegateForSingleValuedSegments() { @Override public String toString(String field) { - return "MergedPointRange[" + delegateForMultiValuedSegments.toString(field) + "]"; + return "MergedPointRange[" + delegateForMultiValuedSegments.toString(field) + "]"; } @Override @@ -175,36 +179,4 @@ private static byte[] mergeBound(byte[] lhs, byte[] rhs, int numDims, int bytesP } return merged; } - - /** - * Figure out if lhs point is closer to the origin in all dimensions than - * the rhs point or if it is further. Return null if it is closer - * in some dimensions and further in others. - */ - private static Integer compareAllDims(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim) { - int runningCmp = 0; - for (int dim = 0; dim < numDims; dim++) { - int cmp = cmpDim(lhs, rhs, dim, bytesPerDim); - if (runningCmp == 0) { - // Previous dimensions were all equal - runningCmp = cmp; - continue; - } - if (cmp == 0) { - // This dimension has the same value. - continue; - } - // TODO can't we merge these here instead of give up? - if ((runningCmp ^ cmp) < 0) { - // Signs differ so this dimension doesn't compare the same way as the previous ones so we can't merge. - return null; - } - } - return runningCmp; - } - - private static int cmpDim(byte[] lhs, byte[] rhs, int dim, int bytesPerDim) { - int offset = dim * bytesPerDim; - return compareUnsigned(lhs, offset, offset + bytesPerDim, rhs, offset, offset + bytesPerDim); - } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java index 875250e4dbefd..d6a12a3b441d9 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java @@ -1,5 +1,6 @@ package org.elasticsearch.search.aggregations.bucket.filter; +import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.IndexReader; @@ -11,28 +12,18 @@ import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.EqualsHashCodeTestUtils; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.nullValue; public class MergedPointRangeQueryTests extends ESTestCase { - private Query merge(Query lhs, Query rhs) { - assertThat("error in test assumptions", lhs, instanceOf(PointRangeQuery.class)); - assertThat("error in test assumptions", rhs, instanceOf(PointRangeQuery.class)); - return MergedPointRangeQuery.merge((PointRangeQuery) lhs, (PointRangeQuery) rhs); - } - - private MergedPointRangeQuery mergeToMergedQuery(Query lhs, Query rhs) { - Query merged = merge(lhs, rhs); - assertThat(merged, instanceOf(MergedPointRangeQuery.class)); - return (MergedPointRangeQuery) merged; - } - public void testDifferentField() { assertThat(merge(LongPoint.newExactQuery("a", 0), LongPoint.newExactQuery("b", 0)), nullValue()); } @@ -130,7 +121,65 @@ public void test2dNoOverlapInOneDimension() throws IOException { } public void testEqualsAndHashCode() { + String field = randomAlphaOfLength(5); + int dims = randomBoolean() ? 1 : between(2, 16); + Supplier supplier = randomFrom( + List.of( + () -> randomIntPointRangequery(field, dims), + () -> randomLongPointRangequery(field, dims), + () -> randomDoublePointRangequery(field, dims) + ) + ); + Query lhs = supplier.get(); + Query rhs = randomValueOtherThan(lhs, supplier); + MergedPointRangeQuery query = mergeToMergedQuery(lhs, rhs); + EqualsHashCodeTestUtils.checkEqualsAndHashCode( + query, + ignored -> mergeToMergedQuery(lhs, rhs), + ignored -> mergeToMergedQuery(lhs, randomValueOtherThan(lhs, () -> randomValueOtherThan(rhs, supplier)))); + } + + + private Query randomIntPointRangequery(String field, int dims) { + int[] lower = new int[dims]; + int[] upper = new int[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomIntBetween(Integer.MIN_VALUE, Integer.MAX_VALUE - 1); + upper[i] = randomIntBetween(lower[i], Integer.MAX_VALUE); + } + return IntPoint.newRangeQuery(field, lower, upper); + } + private Query randomLongPointRangequery(String field, int dims) { + long[] lower = new long[dims]; + long[] upper = new long[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomLongBetween(Long.MIN_VALUE, Long.MAX_VALUE - 1); + upper[i] = randomLongBetween(lower[i], Long.MAX_VALUE); + } + return LongPoint.newRangeQuery(field, lower, upper); + } + + private Query randomDoublePointRangequery(String field, int dims) { + double[] lower = new double[dims]; + double[] upper = new double[dims]; + for (int i = 0; i < dims; i++) { + lower[i] = randomDoubleBetween(Double.MIN_VALUE, 0, true); + upper[i] = randomDoubleBetween(lower[i], Double.MAX_VALUE, true); + } + return DoublePoint.newRangeQuery(field, lower, upper); + } + + private Query merge(Query lhs, Query rhs) { + assertThat("error in test assumptions", lhs, instanceOf(PointRangeQuery.class)); + assertThat("error in test assumptions", rhs, instanceOf(PointRangeQuery.class)); + return MergedPointRangeQuery.merge((PointRangeQuery) lhs, (PointRangeQuery) rhs); + } + + private MergedPointRangeQuery mergeToMergedQuery(Query lhs, Query rhs) { + Query merged = merge(lhs, rhs); + assertThat(merged, instanceOf(MergedPointRangeQuery.class)); + return (MergedPointRangeQuery) merged; } private boolean matches1d(Query query, long... values) throws IOException { From 9c26fd879a34d00274bd729f0199737d1b4cd2bd Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 28 Oct 2020 17:39:51 -0400 Subject: [PATCH 44/48] remove! --- .../aggregations/bucket/filter/FiltersAggregatorTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java index 55177fe162455..a36fff08e1a4e 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregatorTests.java @@ -38,7 +38,6 @@ import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.filter.FiltersAggregator.KeyedFilter; -import org.elasticsearch.search.aggregations.bucket.terms.InternalTerms; import org.elasticsearch.search.aggregations.support.AggregationInspectionHelper; import org.junit.Before; From 66684f2983799e2430770b8a8daa95349f20ea7e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 29 Oct 2020 10:27:41 -0400 Subject: [PATCH 45/48] I think this is more normal --- .../bucket/filter/FiltersAggregator.java | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index ba71b114751ad..5e2f4244b0ff7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -22,15 +22,15 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.CollectionTerminatedException; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.ParseField; @@ -302,28 +302,14 @@ protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucket } Bits live = ctx.reader().getLiveDocs(); for (int filterOrd = 0; filterOrd < filters.length; filterOrd++) { - int count = 0; - Scorer scorer = filterWeights[filterOrd].scorer(ctx); + BulkScorer scorer = filterWeights[filterOrd].bulkScorer(ctx); if (scorer == null) { // the filter doesn't match any docs continue; } - DocIdSetIterator itr = scorer.iterator(); - if (live == null) { - while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - // There aren't any children so we don't have to call `collectBucket` and we can just count instead - count++; - } - } else { - segmentsWithDeletedDocs++; - while (itr.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - if (live.get(itr.docID())) { - // There aren't any children so we don't have to call `collectBucket` and we can just count instead - count++; - } - } - } - incrementBucketDocCount(filterOrd, count); + TotalHitCountCollector collector = new TotalHitCountCollector(); + scorer.score(collector, live); + incrementBucketDocCount(filterOrd, collector.getTotalHits()); } // Throwing this exception is how we communicate to the collection mechanism that we don't need the segment. throw new CollectionTerminatedException(); From 997e9b9948631e6463009961308790e3d414abf1 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 5 Nov 2020 15:01:43 -0500 Subject: [PATCH 46/48] Feedback --- .../bucket/filter/FiltersAggregator.java | 2 +- .../bucket/filter/MergedPointRangeQuery.java | 31 +++++++++++++------ .../filter/MergedPointRangeQueryTests.java | 4 +-- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index 5e2f4244b0ff7..ff710b99f6f23 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -414,7 +414,7 @@ private Query filterMatchingBoth(Query lhs, Query rhs) { if (unwrappedLhs instanceof PointRangeQuery && unwrappedRhs instanceof PointRangeQuery) { Query merged = MergedPointRangeQuery.merge((PointRangeQuery) unwrappedLhs, (PointRangeQuery) unwrappedRhs); if (merged != null) { - // TODO rewrap? + // Should we rewrap here? return merged; } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index 36a9a278a2d96..a51dd56a02668 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -105,13 +105,13 @@ protected String toString(int dimension, byte[] value) { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Weight delegateForMultiValuedSegmentsWeight = delegateForMultiValuedSegments.createWeight(searcher, scoreMode, boost); return new Weight(this) { - Weight mostCompactWeight; + Weight multiValuedSegmentWeight; + Weight singleValuedSegmentWeight; @Override public boolean isCacheable(LeafReaderContext ctx) { - return delegateForMultiValuedSegmentsWeight.isCacheable(ctx); + return true; } @Override @@ -136,12 +136,9 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti } if (points.size() == points.getDocCount()) { // Each doc that has points has exactly one point. - if (mostCompactWeight == null) { - mostCompactWeight = delegateForSingleValuedSegments.createWeight(searcher, scoreMode, boost); - } - return mostCompactWeight.scorerSupplier(context); + return singleValuedSegmentWeight().scorerSupplier(context); } - return delegateForMultiValuedSegmentsWeight.scorerSupplier(context); + return multiValuedSegmentWeight().scorerSupplier(context); } @Override @@ -152,12 +149,26 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { @Override @Deprecated public void extractTerms(Set terms) { - delegateForMultiValuedSegmentsWeight.extractTerms(terms); + // We don't have Terms, just numbers } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return delegateForMultiValuedSegmentsWeight.explain(context, doc); + return multiValuedSegmentWeight().explain(context, doc); + } + + private Weight singleValuedSegmentWeight() throws IOException { + if (singleValuedSegmentWeight == null) { + singleValuedSegmentWeight = delegateForSingleValuedSegments.createWeight(searcher, scoreMode, boost); + } + return singleValuedSegmentWeight; + } + + private Weight multiValuedSegmentWeight() throws IOException { + if (multiValuedSegmentWeight == null) { + multiValuedSegmentWeight = delegateForMultiValuedSegments.createWeight(searcher, scoreMode, boost); + } + return multiValuedSegmentWeight; } }; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java index 5d590aff928a6..d24e756d422f2 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQueryTests.java @@ -155,10 +155,10 @@ public void testEqualsAndHashCode() { EqualsHashCodeTestUtils.checkEqualsAndHashCode( query, ignored -> mergeToMergedQuery(lhs, rhs), - ignored -> mergeToMergedQuery(lhs, randomValueOtherThan(lhs, () -> randomValueOtherThan(rhs, supplier)))); + ignored -> mergeToMergedQuery(lhs, randomValueOtherThan(lhs, () -> randomValueOtherThan(rhs, supplier))) + ); } - private Query randomIntPointRangequery(String field, int dims) { int[] lower = new int[dims]; int[] upper = new int[dims]; From 1e1b1dd99159fdfca5688210ac62afa2de90ddfb Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 6 Nov 2020 08:52:09 -0500 Subject: [PATCH 47/48] ConstantScoreWeight --- .../bucket/filter/MergedPointRangeQuery.java | 27 ++++++++----------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index a51dd56a02668..a1221ae4ea11f 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -21,11 +21,10 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BulkScorer; -import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -36,7 +35,6 @@ import org.apache.lucene.search.Weight; import java.io.IOException; -import java.util.Set; import static java.util.Arrays.compareUnsigned; @@ -105,7 +103,7 @@ protected String toString(int dimension, byte[] value) { @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - return new Weight(this) { + return new ConstantScoreWeight(this, boost) { Weight multiValuedSegmentWeight; Weight singleValuedSegmentWeight; @@ -143,18 +141,15 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { - return super.bulkScorer(context); - } - - @Override - @Deprecated - public void extractTerms(Set terms) { - // We don't have Terms, just numbers - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return multiValuedSegmentWeight().explain(context, doc); + PointValues points = context.reader().getPointValues(field); + if (points == null) { + return null; + } + if (points.size() == points.getDocCount()) { + // Each doc that has points has exactly one point. + return singleValuedSegmentWeight().bulkScorer(context); + } + return multiValuedSegmentWeight().bulkScorer(context); } private Weight singleValuedSegmentWeight() throws IOException { From 5b5d6ccf659b2cf24b5a39ef1fbc6b6f0d99a6be Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 9 Nov 2020 11:35:31 -0500 Subject: [PATCH 48/48] Iter --- .../aggregations/bucket/filter/MergedPointRangeQuery.java | 6 ++++-- .../bucket/histogram/DateHistogramAggregator.java | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java index a1221ae4ea11f..b21cf396ede81 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/MergedPointRangeQuery.java @@ -35,6 +35,7 @@ import org.apache.lucene.search.Weight; import java.io.IOException; +import java.util.Objects; import static java.util.Arrays.compareUnsigned; @@ -186,12 +187,13 @@ public boolean equals(Object obj) { return false; } MergedPointRangeQuery other = (MergedPointRangeQuery) obj; - return delegateForMultiValuedSegments.equals(other.delegateForMultiValuedSegments); + return delegateForMultiValuedSegments.equals(other.delegateForMultiValuedSegments) + && delegateForSingleValuedSegments.equals(other.delegateForSingleValuedSegments); } @Override public int hashCode() { - return classHash() * 31 + delegateForMultiValuedSegments.hashCode(); + return Objects.hash(classHash(), delegateForMultiValuedSegments, delegateForSingleValuedSegments); } private static byte[] mergeBound(byte[] lhs, byte[] rhs, int numDims, int bytesPerDim, boolean lower) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 323061a72971c..125ca3d2c50c6 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -148,7 +148,7 @@ private static FromDateRange adaptIntoRangeOrNull( CardinalityUpperBound cardinality, Map metadata ) throws IOException { - if (hardBounds != null) { + if (hardBounds != null || extendedBounds != null) { return null; } long[] fixedRoundingPoints = preparedRounding.fixedRoundingPoints();