Skip to content

Commit 0b01ea1

Browse files
sandeshkr419guojialiang92
authored andcommitted
[Star Tree] [Search] Resolving Range aggregations with Star-tree (#17273)
* range aggs changes --------- Signed-off-by: Sandesh Kumar <[email protected]>
1 parent 98d0f2c commit 0b01ea1

File tree

6 files changed

+580
-40
lines changed

6 files changed

+580
-40
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
2525
- Implement fixed interval refresh task scheduling ([#17777](https://github.com/opensearch-project/OpenSearch/pull/17777))
2626
- Add GRPC DocumentService and Bulk endpoint ([#17727](https://github.com/opensearch-project/OpenSearch/pull/17727))
2727
- Added scale to zero (`search_only` mode) support for OpenSearch reader writer separation ([#17299](https://github.com/opensearch-project/OpenSearch/pull/17299)
28+
- [Star Tree] [Search] Resolving numeric range aggregation with metric aggregation using star-tree ([#17273](https://github.com/opensearch-project/OpenSearch/pull/17273))
2829

2930
### Changed
3031
- Migrate BC libs to their FIPS counterparts ([#14912](https://github.com/opensearch-project/OpenSearch/pull/14912))

server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java

Lines changed: 175 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232
package org.opensearch.search.aggregations.bucket.range;
3333

3434
import org.apache.lucene.index.LeafReaderContext;
35+
import org.apache.lucene.search.DocIdSetIterator;
3536
import org.apache.lucene.search.ScoreMode;
37+
import org.apache.lucene.util.FixedBitSet;
3638
import org.opensearch.core.ParseField;
3739
import org.opensearch.core.common.io.stream.StreamInput;
3840
import org.opensearch.core.common.io.stream.StreamOutput;
@@ -43,7 +45,13 @@
4345
import org.opensearch.core.xcontent.ToXContentObject;
4446
import org.opensearch.core.xcontent.XContentBuilder;
4547
import org.opensearch.core.xcontent.XContentParser;
48+
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
49+
import org.opensearch.index.compositeindex.datacube.MetricStat;
50+
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
51+
import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils;
52+
import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator;
4653
import org.opensearch.index.fielddata.SortedNumericDoubleValues;
54+
import org.opensearch.index.mapper.NumberFieldMapper;
4755
import org.opensearch.search.DocValueFormat;
4856
import org.opensearch.search.aggregations.Aggregator;
4957
import org.opensearch.search.aggregations.AggregatorFactories;
@@ -53,12 +61,17 @@
5361
import org.opensearch.search.aggregations.LeafBucketCollector;
5462
import org.opensearch.search.aggregations.LeafBucketCollectorBase;
5563
import org.opensearch.search.aggregations.NonCollectingAggregator;
64+
import org.opensearch.search.aggregations.StarTreeBucketCollector;
65+
import org.opensearch.search.aggregations.StarTreePreComputeCollector;
5666
import org.opensearch.search.aggregations.bucket.BucketsAggregator;
5767
import org.opensearch.search.aggregations.bucket.filterrewrite.FilterRewriteOptimizationContext;
5868
import org.opensearch.search.aggregations.bucket.filterrewrite.RangeAggregatorBridge;
5969
import org.opensearch.search.aggregations.support.ValuesSource;
6070
import org.opensearch.search.aggregations.support.ValuesSourceConfig;
6171
import org.opensearch.search.internal.SearchContext;
72+
import org.opensearch.search.startree.StarTreeQueryHelper;
73+
import org.opensearch.search.startree.StarTreeTraversalUtil;
74+
import org.opensearch.search.startree.filter.DimensionFilter;
6275

6376
import java.io.IOException;
6477
import java.util.ArrayList;
@@ -70,16 +83,18 @@
7083

7184
import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg;
7285
import static org.opensearch.search.aggregations.bucket.filterrewrite.AggregatorBridge.segmentMatchAll;
86+
import static org.opensearch.search.startree.StarTreeQueryHelper.getSupportedStarTree;
7387

7488
/**
7589
* Aggregate all docs that match given ranges.
7690
*
7791
* @opensearch.internal
7892
*/
79-
public class RangeAggregator extends BucketsAggregator {
93+
public class RangeAggregator extends BucketsAggregator implements StarTreePreComputeCollector {
8094

8195
public static final ParseField RANGES_FIELD = new ParseField("ranges");
8296
public static final ParseField KEYED_FIELD = new ParseField("keyed");
97+
public final String fieldName;
8398

8499
/**
85100
* Range for the range aggregator
@@ -298,6 +313,9 @@ protected Function<Object, Long> bucketOrdProducer() {
298313
}
299314
};
300315
filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context);
316+
this.fieldName = (valuesSource instanceof ValuesSource.Numeric.FieldData)
317+
? ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName()
318+
: null;
301319
}
302320

303321
@Override
@@ -310,8 +328,13 @@ public ScoreMode scoreMode() {
310328

311329
@Override
312330
protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException {
313-
if (segmentMatchAll(context, ctx)) {
314-
return filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false);
331+
if (segmentMatchAll(context, ctx) && filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false)) {
332+
return true;
333+
}
334+
CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context.getQueryShardContext());
335+
if (supportedStarTree != null) {
336+
preComputeWithStarTree(ctx, supportedStarTree);
337+
return true;
315338
}
316339
return false;
317340
}
@@ -333,52 +356,107 @@ public void collect(int doc, long bucket) throws IOException {
333356
}
334357

335358
private int collect(int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException {
336-
int lo = lowBound, hi = ranges.length - 1; // all candidates are between these indexes
337-
int mid = (lo + hi) >>> 1;
338-
while (lo <= hi) {
339-
if (value < ranges[mid].from) {
340-
hi = mid - 1;
341-
} else if (value >= maxTo[mid]) {
342-
lo = mid + 1;
343-
} else {
344-
break;
359+
MatchedRange range = new MatchedRange(ranges, lowBound, value, maxTo);
360+
for (int i = range.startLo; i <= range.endHi; ++i) {
361+
if (ranges[i].matches(value)) {
362+
collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i));
345363
}
346-
mid = (lo + hi) >>> 1;
347364
}
348-
if (lo > hi) return lo; // no potential candidate
349-
350-
// binary search the lower bound
351-
int startLo = lo, startHi = mid;
352-
while (startLo <= startHi) {
353-
final int startMid = (startLo + startHi) >>> 1;
354-
if (value >= maxTo[startMid]) {
355-
startLo = startMid + 1;
356-
} else {
357-
startHi = startMid - 1;
358-
}
365+
return range.endHi + 1;
366+
}
367+
};
368+
}
369+
370+
private void preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException {
371+
StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, starTree, null);
372+
FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet();
373+
374+
int numBits = matchingDocsBitSet.length();
375+
376+
if (numBits > 0) {
377+
for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits)
378+
? matchingDocsBitSet.nextSetBit(bit + 1)
379+
: DocIdSetIterator.NO_MORE_DOCS) {
380+
starTreeBucketCollector.collectStarTreeEntry(bit, 0);
381+
}
382+
}
383+
}
384+
385+
@Override
386+
public StarTreeBucketCollector getStarTreeBucketCollector(
387+
LeafReaderContext ctx,
388+
CompositeIndexFieldInfo starTree,
389+
StarTreeBucketCollector parentCollector
390+
) throws IOException {
391+
assert parentCollector == null;
392+
StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree);
393+
// TODO: Evaluate optimizing StarTree traversal filter with specific ranges instead of MATCH_ALL_DEFAULT
394+
return new StarTreeBucketCollector(
395+
starTreeValues,
396+
StarTreeTraversalUtil.getStarTreeResult(
397+
starTreeValues,
398+
StarTreeQueryHelper.mergeDimensionFilterIfNotExists(
399+
context.getQueryShardContext().getStarTreeQueryContext().getBaseQueryStarTreeFilter(),
400+
fieldName,
401+
List.of(DimensionFilter.MATCH_ALL_DEFAULT)
402+
),
403+
context
404+
)
405+
) {
406+
@Override
407+
public void setSubCollectors() throws IOException {
408+
for (Aggregator aggregator : subAggregators) {
409+
this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this));
410+
}
411+
}
412+
413+
SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues
414+
.getDimensionValuesIterator(fieldName);
415+
416+
String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues(
417+
starTree.getField(),
418+
"_doc_count",
419+
MetricStat.DOC_COUNT.getTypeName()
420+
);
421+
422+
SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues
423+
.getMetricValuesIterator(metricName);
424+
425+
@Override
426+
public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException {
427+
if (!valuesIterator.advanceExact(starTreeEntry)) {
428+
return;
359429
}
360430

361-
// binary search the upper bound
362-
int endLo = mid, endHi = hi;
363-
while (endLo <= endHi) {
364-
final int endMid = (endLo + endHi) >>> 1;
365-
if (value < ranges[endMid].from) {
366-
endHi = endMid - 1;
431+
for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) {
432+
long dimensionLongValue = valuesIterator.nextValue();
433+
double dimensionValue;
434+
435+
// Only numeric & floating points are supported as of now in star-tree
436+
// TODO: Add support for isBigInteger() when it gets supported in star-tree
437+
if (valuesSource.isFloatingPoint()) {
438+
dimensionValue = ((NumberFieldMapper.NumberFieldType) context.mapperService().fieldType(fieldName)).toDoubleValue(
439+
dimensionLongValue
440+
);
367441
} else {
368-
endLo = endMid + 1;
442+
dimensionValue = dimensionLongValue;
369443
}
370-
}
371444

372-
assert startLo == lowBound || value >= maxTo[startLo - 1];
373-
assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from;
445+
MatchedRange matchedRange = new MatchedRange(ranges, 0, dimensionValue, maxTo);
446+
if (matchedRange.startLo > matchedRange.endHi) {
447+
continue; // No matching range
448+
}
374449

375-
for (int i = startLo; i <= endHi; ++i) {
376-
if (ranges[i].matches(value)) {
377-
collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i));
450+
if (docCountsIterator.advanceExact(starTreeEntry)) {
451+
long metricValue = docCountsIterator.nextValue();
452+
for (int j = matchedRange.startLo; j <= matchedRange.endHi; ++j) {
453+
if (ranges[j].matches(dimensionValue)) {
454+
long bucketOrd = subBucketOrdinal(owningBucketOrd, j);
455+
collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry);
456+
}
457+
}
378458
}
379459
}
380-
381-
return endHi + 1;
382460
}
383461
};
384462
}
@@ -421,6 +499,63 @@ public InternalAggregation buildEmptyAggregation() {
421499
return rangeFactory.create(name, buckets, format, keyed, metadata());
422500
}
423501

502+
static class MatchedRange {
503+
int startLo, endHi;
504+
505+
MatchedRange(RangeAggregator.Range[] ranges, int lowBound, double value, double[] maxTo) {
506+
computeMatchingRange(ranges, lowBound, value, maxTo);
507+
}
508+
509+
private void computeMatchingRange(RangeAggregator.Range[] ranges, int lowBound, double value, double[] maxTo) {
510+
int lo = lowBound, hi = ranges.length - 1;
511+
int mid = (lo + hi) >>> 1;
512+
513+
while (lo <= hi) {
514+
if (value < ranges[mid].from) {
515+
hi = mid - 1;
516+
} else if (value >= maxTo[mid]) {
517+
lo = mid + 1;
518+
} else {
519+
break;
520+
}
521+
mid = (lo + hi) >>> 1;
522+
}
523+
if (lo > hi) {
524+
this.startLo = lo;
525+
this.endHi = lo - 1;
526+
return;
527+
}
528+
529+
// binary search the lower bound
530+
int startLo = lo, startHi = mid;
531+
while (startLo <= startHi) {
532+
int startMid = (startLo + startHi) >>> 1;
533+
if (value >= maxTo[startMid]) {
534+
startLo = startMid + 1;
535+
} else {
536+
startHi = startMid - 1;
537+
}
538+
}
539+
540+
// binary search the upper bound
541+
int endLo = mid, endHi = hi;
542+
while (endLo <= endHi) {
543+
int endMid = (endLo + endHi) >>> 1;
544+
if (value < ranges[endMid].from) {
545+
endHi = endMid - 1;
546+
} else {
547+
endLo = endMid + 1;
548+
}
549+
}
550+
551+
assert startLo == lowBound || value >= maxTo[startLo - 1];
552+
assert endHi == ranges.length - 1 || value < ranges[endHi + 1].from;
553+
554+
this.startLo = startLo;
555+
this.endHi = endHi;
556+
}
557+
}
558+
424559
/**
425560
* Unmapped range
426561
*
@@ -456,7 +591,7 @@ public Unmapped(
456591
public InternalAggregation buildEmptyAggregation() {
457592
InternalAggregations subAggs = buildEmptySubAggregations();
458593
List<org.opensearch.search.aggregations.bucket.range.Range.Bucket> buckets = new ArrayList<>(ranges.length);
459-
for (RangeAggregator.Range range : ranges) {
594+
for (Range range : ranges) {
460595
buckets.add(factory.createBucket(range.key, range.from, range.to, 0, subAggs, keyed, format));
461596
}
462597
return factory.create(name, buckets, format, keyed, metadata());

server/src/main/java/org/opensearch/search/startree/StarTreeQueryContext.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
import org.opensearch.index.compositeindex.datacube.Dimension;
1616
import org.opensearch.index.compositeindex.datacube.Metric;
1717
import org.opensearch.index.compositeindex.datacube.MetricStat;
18+
import org.opensearch.index.compositeindex.datacube.NumericDimension;
1819
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter;
1920
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
2021
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
2122
import org.opensearch.index.query.QueryBuilder;
2223
import org.opensearch.search.aggregations.AggregatorFactory;
2324
import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorFactory;
25+
import org.opensearch.search.aggregations.bucket.range.RangeAggregatorFactory;
2426
import org.opensearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
2527
import org.opensearch.search.aggregations.metrics.MetricAggregatorFactory;
2628
import org.opensearch.search.internal.SearchContext;
@@ -120,6 +122,10 @@ public boolean consolidateAllFilters(SearchContext context) {
120122
continue;
121123
}
122124

125+
// validation for range aggregation
126+
if (validateRangeAggregationSupport(compositeMappedFieldType, aggregatorFactory)) {
127+
continue;
128+
}
123129
// invalid query shape
124130
return false;
125131
}
@@ -184,6 +190,33 @@ private static boolean validateKeywordTermsAggregationSupport(
184190
return true;
185191
}
186192

193+
private static boolean validateRangeAggregationSupport(
194+
CompositeDataCubeFieldType compositeIndexFieldInfo,
195+
AggregatorFactory aggregatorFactory
196+
) {
197+
if (!(aggregatorFactory instanceof RangeAggregatorFactory rangeAggregatorFactory)) {
198+
return false;
199+
}
200+
201+
// Validate request field is part of dimensions & is a numeric field
202+
// TODO: Add support for date type ranges
203+
if (compositeIndexFieldInfo.getDimensions()
204+
.stream()
205+
.noneMatch(
206+
dimension -> rangeAggregatorFactory.getField().equals(dimension.getField()) && dimension instanceof NumericDimension
207+
)) {
208+
return false;
209+
}
210+
211+
// Validate all sub-factories
212+
for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) {
213+
if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) {
214+
return false;
215+
}
216+
}
217+
return true;
218+
}
219+
187220
private StarTreeFilter getStarTreeFilter(
188221
SearchContext context,
189222
QueryBuilder queryBuilder,

0 commit comments

Comments
 (0)