From e456ae9ca01e0b121e007e216b1185a920e64b74 Mon Sep 17 00:00:00 2001 From: Lyublena Antova Date: Fri, 8 Dec 2023 15:27:50 -0800 Subject: [PATCH] Fix bug in tracking partial aggregation stats when the query is a group-by on the partitioning key During tracking of histories, we adjust the output byte count to account for hash variables introduced during planning. In certain cases, the reported byte count is 0 (for example when the grouping key is a partition key, its value does not contribute to the byte count). After accounting for hash variables, the 0 becomes a negative number, which raises a NaN exception that looks like this: java.lang.IllegalArgumentException: value is NaN at com.facebook.presto.spi.statistics.Estimate.of(Estimate.java:54) at com.facebook.presto.cost.HistoryBasedPlanStatisticsTracker.constructAggregationNodeStatistics(HistoryBasedPlanStatisticsTracker.java:250) at com.facebook.presto.cost.HistoryBasedPlanStatisticsTracker.getQueryStats(HistoryBasedPlanStatisticsTracker.java:164) This PR fixes that by turning NaN values into unknown like it is done in a few other places of the HBO tracker code. --- .../TestHiveHistoryBasedStatsTracking.java | 22 +++++++++++++++++++ .../HistoryBasedPlanStatisticsTracker.java | 5 +++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveHistoryBasedStatsTracking.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveHistoryBasedStatsTracking.java index 9c96e9fe9c0cf..5c25c2bca80c0 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveHistoryBasedStatsTracking.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveHistoryBasedStatsTracking.java @@ -185,6 +185,28 @@ public void testPartialAggStatistics() } } + @Test + public void testPartialAggStatisticsGroupByPartKey() + { + try { + // CBO Statistics + getQueryRunner().execute("CREATE TABLE test_orders WITH (partitioned_by = ARRAY['ds']) AS " + + "SELECT orderkey, orderpriority, comment, custkey, '2020-09-01' as ds FROM orders where orderkey < 2000 "); + + // collect HBO Statistics + String queryGBPartitionKey = "SELECT ds FROM test_orders group by ds"; + + Plan plan = plan(queryGBPartitionKey, createSession("always")); + + assertTrue(PlanNodeSearcher.searchFrom(plan.getRoot()) + .where(node -> node instanceof AggregationNode && ((AggregationNode) node).getStep() == AggregationNode.Step.PARTIAL).findFirst().isPresent()); + executeAndTrackHistory(queryGBPartitionKey, createSession("always")); + } + finally { + getQueryRunner().execute("DROP TABLE IF EXISTS test_orders"); + } + } + @Override protected void assertPlan(@Language("SQL") String query, PlanMatchPattern pattern) { diff --git a/presto-main/src/main/java/com/facebook/presto/cost/HistoryBasedPlanStatisticsTracker.java b/presto-main/src/main/java/com/facebook/presto/cost/HistoryBasedPlanStatisticsTracker.java index c6ad6f5019d9c..1fd4622919191 100644 --- a/presto-main/src/main/java/com/facebook/presto/cost/HistoryBasedPlanStatisticsTracker.java +++ b/presto-main/src/main/java/com/facebook/presto/cost/HistoryBasedPlanStatisticsTracker.java @@ -248,8 +248,9 @@ private PartialAggregationStatistics constructAggregationNodeStatistics(PlanNode PlanNodeStats childNodeStats = planNodeStatsMap.get(childNode.getId()); if (childNodeStats != null) { double partialAggregationInputBytes = adjustedOutputBytes(childNode, childNodeStats); - return new PartialAggregationStatistics(Estimate.of(partialAggregationInputBytes), - Estimate.of(outputBytes), + return new PartialAggregationStatistics( + Double.isNaN(partialAggregationInputBytes) ? Estimate.unknown() : Estimate.of(partialAggregationInputBytes), + Double.isNaN(outputBytes) ? Estimate.unknown() : Estimate.of(outputBytes), Estimate.of(childNodeStats.getPlanNodeOutputPositions()), Estimate.of(outputPositions)); }