diff --git a/docs/changelog/111644.yaml b/docs/changelog/111644.yaml new file mode 100644 index 0000000000000..3705d697c95e3 --- /dev/null +++ b/docs/changelog/111644.yaml @@ -0,0 +1,6 @@ +pr: 111644 +summary: Force using the last centroid during merging +area: Aggregations +type: bug +issues: + - 111065 diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java index 0be2b68d76a21..172b0f24dfd99 100644 --- a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java @@ -92,7 +92,7 @@ public class MergingDigest extends AbstractTDigest { private final int[] order; // if true, alternate upward and downward merge passes - public boolean useAlternatingSort = true; + public boolean useAlternatingSort = false; // if true, use higher working value of compression during construction, then reduce on presentation public boolean useTwoLevelCompression = true; @@ -302,9 +302,13 @@ private void merge( addThis = projectedW <= wLimit; } if (i == 1 || i == incomingCount - 1) { - // force last centroid to never merge + // force first and last centroid to never merge addThis = false; } + if (lastUsedCell == mean.length - 1) { + // use the last centroid, there's no more + addThis = true; + } if (addThis) { // next point will fit diff --git a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java index 16a81bad50756..9fadf2218f203 100644 --- a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java +++ b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java @@ -151,4 +151,14 @@ public void testFill() { i++; } } + + public void testLargeInputSmallCompression() { + MergingDigest td = new MergingDigest(10); + for (int i = 0; i < 10_000_000; i++) { + td.add(between(0, 3_600_000)); + } + assertTrue(td.centroidCount() < 100); + assertTrue(td.quantile(0.00001) < 100_000); + assertTrue(td.quantile(0.99999) > 3_000_000); + } } diff --git a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java index 72b460da19da2..815346100532c 100644 --- a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java +++ b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java @@ -152,7 +152,7 @@ public void testQuantile() { hist2.compress(); double x1 = hist1.quantile(0.5); double x2 = hist2.quantile(0.5); - assertEquals(Dist.quantile(0.5, data), x1, 0.2); + assertEquals(Dist.quantile(0.5, data), x1, 0.25); assertEquals(x1, x2, 0.01); }