Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
AS testData(a, b);

-- CUBE on overlapping columns
SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE;

SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE;

-- ROLLUP on overlapping columns
SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;

SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP;

CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
AS courseSales(course, year, earnings);

-- ROLLUP
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year;

-- CUBE
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year;

-- GROUPING SETS
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ());
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);

-- Partial ROLLUP/CUBE/GROUPING SETS
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year;
SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year;

-- GROUPING SETS with aggregate functions containing groupBy columns
SELECT course, udaf(earnings) AS sum FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;

-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2);
SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b);
SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k);

-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr
SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b);
SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b);
SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b);
SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b);
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ());
SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ());
SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ());

-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b));
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()));

SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)));
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b));
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)));

SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b));
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()));
SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ());
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
-- group by ordinal positions

create temporary view data as select * from values
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 2)
as data(a, b);

-- basic case
select a, udaf(b) from data group by 1;

-- constant case
select 1, 2, udaf(b) from data group by 1, 2;

-- duplicate group by column
select a, 1, udaf(b) from data group by a, 1;
select a, 1, udaf(b) from data group by 1, 2;

-- group by a non-aggregate expression's ordinal
select a, b + 2, udaf(2) from data group by a, 2;

-- with alias
select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2;

-- foldable non-literal: this should be the same as no grouping.
select udaf(b) from data group by 1 + 0;

-- negative case: position is an aggregate expression
select a, b, udaf(b) from data group by 3;
select a, b, udaf(b) + 2 from data group by 3;

-- negative case: nondeterministic expression
select a, rand(0), udaf(b)
from
(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;

-- group by ordinal followed by order by
select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1;

-- group by ordinal followed by having
select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0;

-- mixed cases: group-by ordinals and aliases
select a, a AS k, udaf(b) from data group by k, 1;

-- can use ordinal in CUBE
select a, b, udaf(1) from data group by cube(1, 2);

-- mixed cases: can use ordinal in CUBE
select a, b, udaf(1) from data group by cube(1, b);

-- can use ordinal with cube
select a, b, udaf(1) from data group by 1, 2 with cube;

-- can use ordinal in ROLLUP
select a, b, udaf(1) from data group by rollup(1, 2);

-- mixed cases: can use ordinal in ROLLUP
select a, b, udaf(1) from data group by rollup(1, b);

-- can use ordinal with rollup
select a, b, udaf(1) from data group by 1, 2 with rollup;

-- can use ordinal in GROUPING SETS
select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2));

-- mixed cases: can use ordinal in GROUPING SETS
select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2));

select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2));

-- range error
select a, b, udaf(1) from data group by a, -1;

select a, b, udaf(1) from data group by a, 3;

select a, b, udaf(1) from data group by cube(-1, 2);

select a, b, udaf(1) from data group by cube(1, 3);

-- turn off group by ordinal
set spark.sql.groupByOrdinal=false;

-- can now group by negative literal
select udaf(b) from data group by -1;
110 changes: 110 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
-- Test aggregate operator with codegen on and off.
--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN

-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
AS testData(a, b);

-- Aggregate with empty GroupBy expressions.
SELECT a, udaf(b) FROM testData;
SELECT udaf(a), udaf(b) FROM testData;

-- Aggregate with non-empty GroupBy expressions.
SELECT a, udaf(b) FROM testData GROUP BY a;
SELECT a, udaf(b) FROM testData GROUP BY b;
SELECT udaf(a), udaf(b) FROM testData GROUP BY a;

-- Aggregate grouped by literals.
SELECT 'foo', udaf(a) FROM testData GROUP BY 1;

-- Aggregate grouped by literals (hash aggregate).
SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1;

-- Aggregate grouped by literals (sort aggregate).
SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;

-- Aggregate with complex GroupBy expressions.
SELECT a + b, udaf(b) FROM testData GROUP BY a + b;
SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1;
SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1;

-- Aggregate with nulls.
SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
FROM testData;

-- Aggregate with foldable input and multiple distinct groups.
SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;

-- Aliases in SELECT could be used in GROUP BY
SELECT a AS k, udaf(b) FROM testData GROUP BY k;
SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1;

-- GROUP BY alias with invalid col in SELECT list
SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k;

-- Test data.
CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a;

-- turn off group by aliases
set spark.sql.groupByAliases=false;

-- Check analysis exceptions
SELECT a AS k, udaf(b) FROM testData GROUP BY k;

-- Aggregate with empty input and non-empty GroupBy expressions.
SELECT a, udaf(1) FROM testData WHERE false GROUP BY a;

-- Aggregate with empty input and empty GroupBy expressions.
SELECT udaf(1) FROM testData WHERE false;
SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t;

-- Aggregate with empty GroupBy expressions and filter on top
SELECT 1 from (
SELECT 1 AS z,
udaf(a.x)
FROM (select 1 as x) a
WHERE false
) b
where b.z != b.z;

-- SPARK-25708 HAVING without GROUP BY means global aggregate
SELECT 1 FROM range(10) HAVING udaf(id) > 0;

-- Test data
CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
(1, true), (1, false),
(2, true),
(3, false), (3, null),
(4, null), (4, null),
(5, null), (5, true), (5, false) AS test_agg(k, v);

-- empty table
SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;

-- all null values
SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;

-- aggregates are null Filtering
SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;

-- group by
SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;

-- having
SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false;
SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL;

-- basic subquery path to make sure rewrite happens in both parent and child plans.
SELECT k,
udaf(v) AS count
FROM test_agg
WHERE k = 2
AND v IN (SELECT Any(v)
FROM test_agg
WHERE k = 1)
GROUP BY k;
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
("1", "2", "3", 1),
("4", "5", "6", 1),
("7", "8", "9", 1)
as grouping(a, b, c, d);

-- SPARK-17849: grouping set throws NPE #1
SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());

-- SPARK-17849: grouping set throws NPE #2
SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));

-- SPARK-17849: grouping set throws NPE #3
SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));

-- Group sets without explicit group by
SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);

-- Group sets without group by and with grouping
SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);

-- Mutiple grouping within a grouping set
SELECT c1, c2, udaf(c3), grouping__id
FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) )
HAVING GROUPING__ID > 1;

-- complex expression in grouping sets
SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b));

-- complex expression in grouping sets
SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b));

-- negative tests - must have at least one grouping expression
SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP;

SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE;

-- duplicate entries in grouping sets
SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));

SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));

SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));

-- grouping_id function
SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1);
Loading