Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
469cf51
Combining filter rewrite and skip list approaches for further optimiz…
jainankitk Oct 8, 2025
e20f702
Removing parent aggregation check for perf benchmark
jainankitk Oct 8, 2025
82bc95d
Adding changelog entry
jainankitk Oct 8, 2025
aff3dc6
Applying the skip list optimization for AutoDateHistogram
jainankitk Oct 9, 2025
1c29540
Addressing checkstyle failures
jainankitk Oct 9, 2025
b9e9f2b
Apply spotless
jainankitk Oct 9, 2025
a28b9c1
Merge branch 'main' into agg-perf
jainankitk Oct 9, 2025
0a9ef40
Minor bug fix
jainankitk Oct 10, 2025
8d4ccf7
Merge branch 'main' into agg-perf
jainankitk Oct 21, 2025
4e7d9e6
Merge branch 'main' into agg-perf
jainankitk Oct 23, 2025
23fbad3
Add unit test for filter rewrite with date histogram with skiplist.
asimmahmood1 Oct 27, 2025
7eb64f7
Spotless check
asimmahmood1 Oct 27, 2025
35834e4
Fix unit test
asimmahmood1 Oct 27, 2025
2b593c9
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Oct 27, 2025
3cdc37d
Not ready for check-in, just throwing this out to come up with differ…
asimmahmood1 Nov 10, 2025
d0eeb37
Revert auto date changes for this PR
asimmahmood1 Nov 10, 2025
66ffef1
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 10, 2025
0ec357a
Switch to Lucene's version of BitSetDocIdStream
asimmahmood1 Nov 12, 2025
7a7209f
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 12, 2025
37f4641
Merge branch 'main' into agg-perf
jainankitk Nov 14, 2025
eaf7e52
Resolving merge conflict issue
jainankitk Nov 14, 2025
7c05efe
Fixing build failure
jainankitk Nov 14, 2025
1d97bee
Merge branch 'main' into agg-perf
jainankitk Nov 14, 2025
d8448f5
Merge branch 'main' into agg-perf
jainankitk Nov 15, 2025
6888b6c
This is more concise method I can of. It doesn't guarentee only SugAg…
asimmahmood1 Nov 17, 2025
7bb162e
Fix unit test
asimmahmood1 Nov 17, 2025
d961a87
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 17, 2025
d208ed7
Fix unit test
asimmahmood1 Nov 19, 2025
97ea5cb
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 19, 2025
ddad6e4
Updated with more restricted use for LeafCollectorModeEnum.
asimmahmood1 Nov 23, 2025
d8ff524
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 23, 2025
3fa6b0f
Add javadoc
asimmahmood1 Nov 23, 2025
18d42fa
Spotless
asimmahmood1 Nov 23, 2025
555628a
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 24, 2025
ab67e80
Fixed bug while refactoring, and code coverage
asimmahmood1 Nov 25, 2025
d6236a2
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Nov 25, 2025
8904653
Remove unused code
asimmahmood1 Dec 1, 2025
5348994
Merge remote-tracking branch 'upstream/main' into agg-perf
asimmahmood1 Dec 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support dynamic consumer configuration update in pull-based ingestion ([#19963](https://github.com/opensearch-project/OpenSearch/pull/19963))

### Changed
- Combining filter rewrite and skip list to optimize sub aggregation([#19573](https://github.com/opensearch-project/OpenSearch/pull/19573))
- Faster `terms` query creation for `keyword` field with index and docValues enabled ([#19350](https://github.com/opensearch-project/OpenSearch/pull/19350))
- Refactor to move prepareIndex and prepareDelete methods to Engine class ([#19551](https://github.com/opensearch-project/OpenSearch/pull/19551))
- Omit maxScoreCollector in SimpleTopDocsCollectorContext when concurrent segment search enabled ([#19584](https://github.com/opensearch-project/OpenSearch/pull/19584))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ public abstract class AggregatorBase extends Aggregator {
private Map<String, Aggregator> subAggregatorbyName;
private final CircuitBreakerService breakerService;
private long requestBytesUsed;
protected LeafCollectionMode leafCollectorMode = LeafCollectionMode.NORMAL;

/**
* Constructs a new Aggregator.
Expand Down Expand Up @@ -236,6 +237,23 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws
return false;
}

/**
* To be used in conjunction with <code>tryPrecomputeAggregationForLeaf()</code>
* or <code>getLeafCollector</code> method.
*/
public LeafCollectionMode getLeafCollectorMode() {
return leafCollectorMode;
}

/**
* To be used in conjunction with <code>tryPrecomputeAggregationForLeaf()</code>
* or <code>getLeafCollector</code> method.
*/
public enum LeafCollectionMode {
NORMAL,
FILTER_REWRITE
}

@Override
public final void preCollection() throws IOException {
List<BucketCollector> collectors = Arrays.asList(subAggregators);
Expand Down Expand Up @@ -343,4 +361,5 @@ protected void checkCancelled() {
throw new TaskCancelledException("The query has been cancelled");
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.search.aggregations.bucket;

import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.DocIdStream;
import org.apache.lucene.search.Scorable;
import org.opensearch.common.Rounding;
import org.opensearch.search.aggregations.LeafBucketCollector;
import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds;

import java.io.IOException;

/**
* Histogram collection logic using skip list.
*
* @opensearch.internal
*/
public class HistogramSkiplistLeafCollector extends LeafBucketCollector {

private final NumericDocValues values;
private final DocValuesSkipper skipper;
private final Rounding.Prepared preparedRounding;
private final LongKeyedBucketOrds bucketOrds;
private final LeafBucketCollector sub;
private final BucketsAggregator aggregator;

/**
* Max doc ID (inclusive) up to which all docs values may map to the same
* bucket.
*/
private int upToInclusive = -1;

/**
* Whether all docs up to {@link #upToInclusive} values map to the same bucket.
*/
private boolean upToSameBucket;

/**
* Index in bucketOrds for docs up to {@link #upToInclusive}.
*/
private long upToBucketIndex;

public HistogramSkiplistLeafCollector(
NumericDocValues values,
DocValuesSkipper skipper,
Rounding.Prepared preparedRounding,
LongKeyedBucketOrds bucketOrds,
LeafBucketCollector sub,
BucketsAggregator aggregator
) {
this.values = values;
this.skipper = skipper;
this.preparedRounding = preparedRounding;
this.bucketOrds = bucketOrds;
this.sub = sub;
this.aggregator = aggregator;
}

@Override
public void setScorer(Scorable scorer) throws IOException {
if (sub != null) {
sub.setScorer(scorer);
}
}

private void advanceSkipper(int doc, long owningBucketOrd) throws IOException {
if (doc > skipper.maxDocID(0)) {
skipper.advance(doc);
}
upToSameBucket = false;

if (skipper.minDocID(0) > doc) {
// Corner case which happens if `doc` doesn't have a value and is between two
// intervals of
// the doc-value skip index.
upToInclusive = skipper.minDocID(0) - 1;
return;
}

upToInclusive = skipper.maxDocID(0);

// Now find the highest level where all docs map to the same bucket.
for (int level = 0; level < skipper.numLevels(); ++level) {
int totalDocsAtLevel = skipper.maxDocID(level) - skipper.minDocID(level) + 1;
long minBucket = preparedRounding.round(skipper.minValue(level));
long maxBucket = preparedRounding.round(skipper.maxValue(level));

if (skipper.docCount(level) == totalDocsAtLevel && minBucket == maxBucket) {
// All docs at this level have a value, and all values map to the same bucket.
upToInclusive = skipper.maxDocID(level);
upToSameBucket = true;
upToBucketIndex = bucketOrds.add(owningBucketOrd, maxBucket);
if (upToBucketIndex < 0) {
upToBucketIndex = -1 - upToBucketIndex;
}
} else {
break;
}
}
}

@Override
public void collect(int doc, long owningBucketOrd) throws IOException {
if (doc > upToInclusive) {
advanceSkipper(doc, owningBucketOrd);
}

if (upToSameBucket) {
aggregator.incrementBucketDocCount(upToBucketIndex, 1L);
sub.collect(doc, upToBucketIndex);
} else if (values.advanceExact(doc)) {
final long value = values.longValue();
long bucketIndex = bucketOrds.add(owningBucketOrd, preparedRounding.round(value));
if (bucketIndex < 0) {
bucketIndex = -1 - bucketIndex;
aggregator.collectExistingBucket(sub, doc, bucketIndex);
} else {
aggregator.collectBucket(sub, doc, bucketIndex);
}
}
}

@Override
public void collect(DocIdStream stream) throws IOException {
// This will only be called if its the top agg
collect(stream, 0);
}

@Override
public void collect(DocIdStream stream, long owningBucketOrd) throws IOException {
// This will only be called if its the sub aggregation
for (;;) {
int upToExclusive = upToInclusive + 1;
if (upToExclusive < 0) { // overflow
upToExclusive = Integer.MAX_VALUE;
}

if (upToSameBucket) {
if (sub == NO_OP_COLLECTOR) {
// stream.count maybe faster when we don't need to handle sub-aggs
long count = stream.count(upToExclusive);
aggregator.incrementBucketDocCount(upToBucketIndex, count);
} else {
final int[] count = { 0 };
stream.forEach(upToExclusive, doc -> {
sub.collect(doc, upToBucketIndex);
count[0]++;
});
aggregator.incrementBucketDocCount(upToBucketIndex, count[0]);
}
} else {
stream.forEach(upToExclusive, doc -> collect(doc, owningBucketOrd));
}

if (stream.mayHaveRemaining()) {
advanceSkipper(upToExclusive, owningBucketOrd);
} else {
break;
}
}
}
}
Loading
Loading