Skip to content

Commit 807163c

Browse files
Approximation Framework Enhancement: Update the BKD traversal logic to improve the performance on skewed data (#18439)
* Initial commit for skewed datasets Signed-off-by: Prudhvi Godithi <[email protected]> * Approximation optimization Signed-off-by: Prudhvi Godithi <[email protected]> * Fix test Signed-off-by: Prudhvi Godithi <[email protected]> * Revert NumberFieldMapper changes Signed-off-by: Prudhvi Godithi <[email protected]> * Test nyc_taxis with updated NumberFieldMapper Signed-off-by: Prudhvi Godithi <[email protected]> * More optimization for asc sorts Signed-off-by: Prudhvi Godithi <[email protected]> * More optimization for asc sorts Signed-off-by: Prudhvi Godithi <[email protected]> * Revert NumberFieldMapper and add tests Signed-off-by: Prudhvi Godithi <[email protected]> * Updated CHANGELOG.md Signed-off-by: Prudhvi Godithi <[email protected]> * Fix flaky test in ApproximatePointRangeQueryTests Signed-off-by: Prudhvi Godithi <[email protected]> --------- Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent cf13a80 commit 807163c

File tree

3 files changed

+229
-71
lines changed

3 files changed

+229
-71
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4545
- Add FIPS build tooling ([#4254](https://github.com/opensearch-project/security/issues/4254))
4646
- Support Nested Aggregations as part of Star-Tree ([#18048](https://github.com/opensearch-project/OpenSearch/pull/18048))
4747
- [Star-Tree] Support for date-range queries with star-tree supported aggregations ([#17855](https://github.com/opensearch-project/OpenSearch/pull/17855)
48+
- Approximation Framework Enhancement: Update the BKD traversal logic to improve the performance on skewed data ([#18439](https://github.com/opensearch-project/OpenSearch/issues/18439))
4849

4950
### Changed
5051
- Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269)))

server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java

Lines changed: 73 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
*/
4141
public class ApproximatePointRangeQuery extends ApproximateQuery {
4242
public static final Function<byte[], String> LONG_FORMAT = bytes -> Long.toString(LongPoint.decodeDimension(bytes, 0));
43+
4344
private int size;
4445

4546
private SortOrder sortOrder;
@@ -247,45 +248,44 @@ public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.Poin
247248
return;
248249
}
249250
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
250-
switch (r) {
251-
case CELL_OUTSIDE_QUERY:
252-
// This cell is fully outside the query shape: stop recursing
253-
break;
254-
case CELL_INSIDE_QUERY:
255-
// If the cell is fully inside, we keep moving to child until we reach a point where we can no longer move or when
256-
// we have sufficient doc count. We first move down and then move to the left child
257-
if (pointTree.moveToChild() && docCount[0] < size) {
258-
do {
259-
intersectLeft(visitor, pointTree, docCount);
260-
} while (pointTree.moveToSibling() && docCount[0] < size);
261-
pointTree.moveToParent();
262-
} else {
263-
// we're at the leaf node, if we're under the size, visit all the docIds in this node.
264-
if (docCount[0] < size) {
265-
pointTree.visitDocIDs(visitor);
266-
}
267-
}
268-
break;
269-
case CELL_CROSSES_QUERY:
270-
// The cell crosses the shape boundary, or the cell fully contains the query, so we fall
271-
// through and do full filtering:
272-
if (pointTree.moveToChild() && docCount[0] < size) {
273-
do {
274-
intersectLeft(visitor, pointTree, docCount);
275-
} while (pointTree.moveToSibling() && docCount[0] < size);
276-
pointTree.moveToParent();
277-
} else {
278-
// TODO: we can assert that the first value here in fact matches what the pointTree
279-
// claimed?
280-
// Leaf node; scan and filter all points in this block:
281-
if (docCount[0] < size) {
282-
pointTree.visitDocValues(visitor);
283-
}
284-
}
285-
break;
286-
default:
287-
throw new IllegalArgumentException("Unreachable code");
251+
if (r == PointValues.Relation.CELL_OUTSIDE_QUERY) {
252+
return;
253+
}
254+
// Handle leaf nodes
255+
if (pointTree.moveToChild() == false) {
256+
if (r == PointValues.Relation.CELL_INSIDE_QUERY) {
257+
pointTree.visitDocIDs(visitor);
258+
} else {
259+
// CELL_CROSSES_QUERY
260+
pointTree.visitDocValues(visitor);
261+
}
262+
return;
263+
}
264+
// For CELL_INSIDE_QUERY, check if we can skip right child
265+
if (r == PointValues.Relation.CELL_INSIDE_QUERY) {
266+
long leftSize = pointTree.size();
267+
long needed = size - docCount[0];
268+
269+
if (leftSize >= needed) {
270+
// Process only left child
271+
intersectLeft(visitor, pointTree, docCount);
272+
pointTree.moveToParent();
273+
return;
274+
}
275+
}
276+
// We need both children - now clone right
277+
PointValues.PointTree rightChild = null;
278+
if (pointTree.moveToSibling()) {
279+
rightChild = pointTree.clone();
280+
pointTree.moveToParent();
281+
pointTree.moveToChild();
282+
}
283+
// Process both children: left first, then right if needed
284+
intersectLeft(visitor, pointTree, docCount);
285+
if (docCount[0] < size && rightChild != null) {
286+
intersectLeft(visitor, rightChild, docCount);
288287
}
288+
pointTree.moveToParent();
289289
}
290290

291291
// custom intersect visitor to walk the right of tree (from rightmost leaf going left)
@@ -295,40 +295,42 @@ public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.Poi
295295
return;
296296
}
297297
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
298-
switch (r) {
299-
case CELL_INSIDE_QUERY:
300-
case CELL_CROSSES_QUERY:
301-
if (pointTree.moveToChild() && docCount[0] < size) {
302-
PointValues.PointTree leftChild = pointTree.clone();
303-
// BKD is binary today, so one moveToSibling() is enough to land on the right child.
304-
// If PointTree ever becomes n-ary, update the traversal below to visit all siblings or re-enable a full loop.
305-
if (pointTree.moveToSibling()) {
306-
// We have two children - visit right first
307-
intersectRight(visitor, pointTree, docCount);
308-
// Then visit left if we still need more docs
309-
if (docCount[0] < size) {
310-
intersectRight(visitor, leftChild, docCount);
311-
}
312-
} else {
313-
// Only one child - visit it
314-
intersectRight(visitor, leftChild, docCount);
315-
}
316-
pointTree.moveToParent();
317-
} else {
318-
if (docCount[0] < size) {
319-
if (r == PointValues.Relation.CELL_INSIDE_QUERY) {
320-
pointTree.visitDocIDs(visitor);
321-
} else {
322-
pointTree.visitDocValues(visitor);
323-
}
324-
}
325-
}
326-
break;
327-
case CELL_OUTSIDE_QUERY:
328-
break;
329-
default:
330-
throw new IllegalArgumentException("Unreachable code");
298+
if (r == PointValues.Relation.CELL_OUTSIDE_QUERY) {
299+
return;
300+
}
301+
// Handle leaf nodes
302+
if (pointTree.moveToChild() == false) {
303+
if (r == PointValues.Relation.CELL_INSIDE_QUERY) {
304+
pointTree.visitDocIDs(visitor);
305+
} else {
306+
// CELL_CROSSES_QUERY
307+
pointTree.visitDocValues(visitor);
308+
}
309+
return;
310+
}
311+
// Internal node - get left child reference (we're at left child initially)
312+
PointValues.PointTree leftChild = pointTree.clone();
313+
// Move to right child if it exists
314+
boolean hasRightChild = pointTree.moveToSibling();
315+
// For CELL_INSIDE_QUERY, check if we can skip left child
316+
if (r == PointValues.Relation.CELL_INSIDE_QUERY && hasRightChild) {
317+
long rightSize = pointTree.size();
318+
long needed = size - docCount[0];
319+
if (rightSize >= needed) {
320+
// Right child has all we need - only process right
321+
intersectRight(visitor, pointTree, docCount);
322+
pointTree.moveToParent();
323+
return;
324+
}
325+
}
326+
// Process both children: right first (for DESC), then left if needed
327+
if (hasRightChild) {
328+
intersectRight(visitor, pointTree, docCount);
329+
}
330+
if (docCount[0] < size) {
331+
intersectRight(visitor, leftChild, docCount);
331332
}
333+
pointTree.moveToParent();
332334
}
333335

334336
@Override

server/src/test/java/org/opensearch/search/approximate/ApproximatePointRangeQueryTests.java

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,4 +653,159 @@ public void testIntersectRightSingleChildNode() throws IOException {
653653
}
654654
}
655655
}
656+
657+
// Following test replicates the http_logs dataset
658+
public void testHttpLogTimestampDistribution() throws IOException {
659+
try (Directory directory = newDirectory()) {
660+
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) {
661+
int dims = 1;
662+
// Sparse range: 100-199 (100 docs, one per value)
663+
for (int i = 100; i < 200; i++) {
664+
Document doc = new Document();
665+
doc.add(new LongPoint("timestamp", i));
666+
doc.add(new NumericDocValuesField("timestamp", i));
667+
iw.addDocument(doc);
668+
}
669+
// Dense range: 1000-1999 (5000 docs, 5 per value)
670+
for (int i = 0; i < 5000; i++) {
671+
long value = 1000 + (i / 5); // Creates 5 docs per value from 1000-1999
672+
Document doc = new Document();
673+
doc.add(new LongPoint("timestamp", value));
674+
doc.add(new NumericDocValuesField("timestamp", value));
675+
iw.addDocument(doc);
676+
}
677+
// 0-99 (100 docs)
678+
for (int i = 0; i < 100; i++) {
679+
Document doc = new Document();
680+
doc.add(new LongPoint("timestamp", i));
681+
doc.add(new NumericDocValuesField("timestamp", i));
682+
iw.addDocument(doc);
683+
}
684+
iw.flush();
685+
iw.forceMerge(1);
686+
try (IndexReader reader = iw.getReader()) {
687+
IndexSearcher searcher = new IndexSearcher(reader);
688+
// Test sparse region
689+
testApproximateVsExactQuery(searcher, "timestamp", 100, 199, 50, dims);
690+
// Test dense region
691+
testApproximateVsExactQuery(searcher, "timestamp", 1000, 1500, 100, dims);
692+
// Test across regions
693+
testApproximateVsExactQuery(searcher, "timestamp", 0, 2000, 200, dims);
694+
}
695+
}
696+
}
697+
}
698+
699+
// Following test replicates the nyx_taxis dataset
700+
public void testNycTaxiDataDistribution() throws IOException {
701+
try (Directory directory = newDirectory()) {
702+
try (RandomIndexWriter iw = new RandomIndexWriter(random(), directory, new WhitespaceAnalyzer())) {
703+
int dims = 1;
704+
// Create NYC taxi fare distribution with different ranges
705+
for (long fare = 250; fare <= 500; fare++) {
706+
iw.addDocument(asList(new LongPoint("fare_amount", fare), new NumericDocValuesField("fare_amount", fare)));
707+
}
708+
// Typical fares: 1000-3000 (dense, 5 docs per value)
709+
for (long fare = 1000; fare <= 3000; fare++) {
710+
for (int dup = 0; dup < 5; dup++) {
711+
iw.addDocument(asList(new LongPoint("fare_amount", fare), new NumericDocValuesField("fare_amount", fare)));
712+
}
713+
}
714+
// High fares: 10000-20000 (sparse, 1 doc every 100)
715+
for (long fare = 10000; fare <= 20000; fare += 100) {
716+
iw.addDocument(asList(new LongPoint("fare_amount", fare), new NumericDocValuesField("fare_amount", fare)));
717+
}
718+
iw.flush();
719+
iw.forceMerge(1);
720+
try (IndexReader reader = iw.getReader()) {
721+
IndexSearcher searcher = new IndexSearcher(reader);
722+
// Test 1: Query for typical fare range
723+
testApproximateVsExactQuery(searcher, "fare_amount", 1000, 3000, 100, dims);
724+
// Test 2: Query for high fare range
725+
testApproximateVsExactQuery(searcher, "fare_amount", 10000, 20000, 50, dims);
726+
// Test 3: Query for low fares
727+
testApproximateVsExactQuery(searcher, "fare_amount", 250, 500, 50, dims);
728+
}
729+
}
730+
}
731+
}
732+
733+
private void testApproximateVsExactQuery(IndexSearcher searcher, String field, long lower, long upper, int size, int dims)
734+
throws IOException {
735+
// Test with approximate query
736+
ApproximatePointRangeQuery approxQuery = new ApproximatePointRangeQuery(
737+
field,
738+
pack(lower).bytes,
739+
pack(upper).bytes,
740+
dims,
741+
size,
742+
null,
743+
ApproximatePointRangeQuery.LONG_FORMAT
744+
);
745+
// Test with exact query
746+
Query exactQuery = LongPoint.newRangeQuery(field, lower, upper);
747+
TopDocs approxDocs = searcher.search(approxQuery, size);
748+
TopDocs exactDocs = searcher.search(exactQuery, size);
749+
// Verify approximate query returns correct number of results
750+
assertTrue("Approximate query should return at most " + size + " docs", approxDocs.scoreDocs.length <= size);
751+
// If exact query returns fewer docs than size, approximate should match
752+
if (exactDocs.totalHits.value() <= size) {
753+
assertEquals(
754+
"When exact results fit in size, approximate should match exactly",
755+
exactDocs.totalHits.value(),
756+
approxDocs.totalHits.value()
757+
);
758+
}
759+
// Test with sorting (ASC and DESC)
760+
Sort ascSort = new Sort(new SortField(field, SortField.Type.LONG));
761+
Sort descSort = new Sort(new SortField(field, SortField.Type.LONG, true));
762+
// Test ASC sort
763+
ApproximatePointRangeQuery approxQueryAsc = new ApproximatePointRangeQuery(
764+
field,
765+
pack(lower).bytes,
766+
pack(upper).bytes,
767+
dims,
768+
size,
769+
SortOrder.ASC,
770+
ApproximatePointRangeQuery.LONG_FORMAT
771+
);
772+
TopDocs approxDocsAsc = searcher.search(approxQueryAsc, size, ascSort);
773+
TopDocs exactDocsAsc = searcher.search(exactQuery, size, ascSort);
774+
// Verify results match
775+
for (int i = 0; i < size; i++) {
776+
assertEquals("ASC sorted results should match at position " + i, exactDocsAsc.scoreDocs[i].doc, approxDocsAsc.scoreDocs[i].doc);
777+
}
778+
assertEquals("Should return exactly size value documents", size, approxDocsAsc.scoreDocs.length);
779+
assertEquals(
780+
"Should return exactly size value documents as regular query",
781+
exactDocsAsc.scoreDocs.length,
782+
approxDocsAsc.scoreDocs.length
783+
);
784+
// Test DESC sort
785+
ApproximatePointRangeQuery approxQueryDesc = new ApproximatePointRangeQuery(
786+
field,
787+
pack(lower).bytes,
788+
pack(upper).bytes,
789+
dims,
790+
size,
791+
SortOrder.DESC,
792+
ApproximatePointRangeQuery.LONG_FORMAT
793+
);
794+
TopDocs approxDocsDesc = searcher.search(approxQueryDesc, size, descSort);
795+
TopDocs exactDocsDesc = searcher.search(exactQuery, size, descSort);
796+
// Verify the results match
797+
for (int i = 0; i < size; i++) {
798+
assertEquals(
799+
"DESC sorted results should match at position " + i,
800+
exactDocsDesc.scoreDocs[i].doc,
801+
approxDocsDesc.scoreDocs[i].doc
802+
);
803+
}
804+
assertEquals("Should return exactly size value documents", size, approxDocsAsc.scoreDocs.length);
805+
assertEquals(
806+
"Should return exactly size value documents as regular query",
807+
exactDocsAsc.scoreDocs.length,
808+
approxDocsAsc.scoreDocs.length
809+
);
810+
}
656811
}

0 commit comments

Comments
 (0)