trinodb · dain · Aug 13, 2022 · Aug 12, 2022 · sopel39 · Aug 12, 2022
diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/DistinctAccumulatorFactory.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/DistinctAccumulatorFactory.java
@@ -233,7 +233,7 @@ public void addInput(GroupByIdBlock groupIdsBlock, Page page, Optional<Block> ma
                 columnIndexes[i] = i + 1;
             }
             Page filtered = filteredWithGroup.getColumns(columnIndexes);
-
+            // NOTE: the accumulator must be called even if the filtered page is empty to inform the accumulator about the group count
             accumulator.addInput(groupIds, filtered, Optional.of(distinctMask));
         }
 

diff --git a/core/trino-main/src/main/java/io/trino/operator/aggregation/OrderedAccumulatorFactory.java b/core/trino-main/src/main/java/io/trino/operator/aggregation/OrderedAccumulatorFactory.java
@@ -211,7 +211,16 @@ public void addInput(GroupByIdBlock groupIdsBlock, Page page, Optional<Block> ma
             if (mask.isPresent()) {
                 page = filter(page, mask.orElseThrow());
             }
-            pagesIndex.addPage(page);
+            if (page.getPositionCount() == 0) {
+                // page was entirely filtered out, but we need to inform the accumulator of the new group count
+                accumulator.addInput(
+                        new GroupByIdBlock(groupCount, page.getBlock(page.getChannelCount() - 1)),
+                        page.getColumns(argumentChannels),
+                        Optional.empty());
+            }
+            else {
+                pagesIndex.addPage(page);
+            }
         }
 
         @Override

diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctAggregations.java b/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctAggregations.java
@@ -297,4 +297,31 @@ public void testIpAddressDistinct()
                         "             (IPADDRESS'2001:db8:0:0:1::1')) AS t (ipaddress_col)"))
                 .matches("VALUES IPADDRESS'2001:db8:0:0:1::1'");
     }
+
+    @Test
+    public void testCompletelyFilteredGroup()
+    {
+        // This query filters out all values to a most groups, which results in an accumulator with no pages to sort.
+        // This can cause a failure if the ordering code does not inform the accumulator of the max row group.
+        assertThat(assertions.query("" +
+                "SELECT count(id) > 15000, sum(cardinality(v)) " +
+                "FROM ( " +
+                "    SELECT " +
+                "        id, " +
+                "        array_agg(DISTINCT v) filter (WHERE v IS NOT NULL) AS v " +
+                "    from ( " +
+                "        ( " +
+                "            SELECT 'filtered' AS id, cast('value' AS varchar) AS v " +
+                "            FROM (VALUES 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) " +
+                "        ) " +
+                "        UNION ALL " +
+                "        ( " +
+                "            SELECT cast(uuid() AS varchar) AS id, cast(null AS varchar) AS v " +
+                "            FROM UNNEST(combinations(ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], 5)) " +
+                "        ) " +
+                "    ) " +
+                "    GROUP BY id " +
+                ")"))
+                .matches("VALUES (TRUE, BIGINT '1')");
+    }
 }
diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestOrderedAggregation.java b/core/trino-main/src/test/java/io/trino/sql/query/TestOrderedAggregation.java
@@ -167,4 +167,31 @@ public void testRepeatedSortItems()
         assertThat(assertions.query("SELECT count(x ORDER BY y, y) FROM (VALUES ('a', 2)) t(x, y)"))
                 .matches("VALUES BIGINT '1'");
     }
+
+    @Test
+    public void testCompletelyFilteredGroup()
+    {
+        // This query filters out all values to a most groups, which results in an accumulator with no pages to sort.
+        // This can cause a failure if the ordering code does not inform the accumulator of the max row group.
+        assertThat(assertions.query("" +
+                "SELECT count(id) > 15000, sum(cardinality(v)) " +
+                "FROM ( " +
+                "    SELECT " +
+                "        id, " +
+                "        array_agg( v ORDER BY t DESC) filter (WHERE v IS NOT NULL) AS v " +
+                "    FROM ( " +
+                "        ( " +
+                "            SELECT 'filtered' AS id, cast('value' AS varchar) AS v, 'sort' AS t " +
+                "            FROM (VALUES 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) " +
+                "        ) " +
+                "        UNION ALL " +
+                "        ( " +
+                "            SELECT cast(uuid() AS varchar) AS id, cast(null AS varchar) AS v, 'sort' AS t " +
+                "            FROM UNNEST(combinations(ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], 5)) " +
+                "        ) " +
+                "    ) " +
+                "    GROUP BY id " +
+                ")"))
+                .matches("VALUES (TRUE, BIGINT '10')");
+    }
 }