diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql new file mode 100644 index 0000000000000..0249d98b6be8a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql @@ -0,0 +1,70 @@ +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2) +AS testData(a, b); + +-- CUBE on overlapping columns +SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE; + +SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE; + +-- ROLLUP on overlapping columns +SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP; + +SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP; + +CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES +("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) +AS courseSales(course, year, earnings); + +-- ROLLUP +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year; + +-- CUBE +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year; + +-- GROUPING SETS +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year); +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ()); +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course); +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year); + +-- Partial ROLLUP/CUBE/GROUPING SETS +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year; +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year; + +-- GROUPING SETS with aggregate functions containing groupBy columns +SELECT course, udaf(earnings) AS sum FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; +SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum; + +-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS +SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2); +SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b); +SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k); + +-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr +SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b); +SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b); +SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b); +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()); +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()); +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()); + +-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())); + +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b))); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b)); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b))); + +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b)); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ())); +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ()); diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql new file mode 100644 index 0000000000000..ded3e74d20a7e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql @@ -0,0 +1,88 @@ +-- group by ordinal positions + +create temporary view data as select * from values + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 1), + (3, 2) + as data(a, b); + +-- basic case +select a, udaf(b) from data group by 1; + +-- constant case +select 1, 2, udaf(b) from data group by 1, 2; + +-- duplicate group by column +select a, 1, udaf(b) from data group by a, 1; +select a, 1, udaf(b) from data group by 1, 2; + +-- group by a non-aggregate expression's ordinal +select a, b + 2, udaf(2) from data group by a, 2; + +-- with alias +select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2; + +-- foldable non-literal: this should be the same as no grouping. +select udaf(b) from data group by 1 + 0; + +-- negative case: position is an aggregate expression +select a, b, udaf(b) from data group by 3; +select a, b, udaf(b) + 2 from data group by 3; + +-- negative case: nondeterministic expression +select a, rand(0), udaf(b) +from +(select /*+ REPARTITION(1) */ a, b from data) group by a, 2; + +-- group by ordinal followed by order by +select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1; + +-- group by ordinal followed by having +select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0; + +-- mixed cases: group-by ordinals and aliases +select a, a AS k, udaf(b) from data group by k, 1; + +-- can use ordinal in CUBE +select a, b, udaf(1) from data group by cube(1, 2); + +-- mixed cases: can use ordinal in CUBE +select a, b, udaf(1) from data group by cube(1, b); + +-- can use ordinal with cube +select a, b, udaf(1) from data group by 1, 2 with cube; + +-- can use ordinal in ROLLUP +select a, b, udaf(1) from data group by rollup(1, 2); + +-- mixed cases: can use ordinal in ROLLUP +select a, b, udaf(1) from data group by rollup(1, b); + +-- can use ordinal with rollup +select a, b, udaf(1) from data group by 1, 2 with rollup; + +-- can use ordinal in GROUPING SETS +select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2)); + +-- mixed cases: can use ordinal in GROUPING SETS +select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2)); + +select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2)); + +-- range error +select a, b, udaf(1) from data group by a, -1; + +select a, b, udaf(1) from data group by a, 3; + +select a, b, udaf(1) from data group by cube(-1, 2); + +select a, b, udaf(1) from data group by cube(1, 3); + +-- turn off group by ordinal +set spark.sql.groupByOrdinal=false; + +-- can now group by negative literal +select udaf(b) from data group by -1; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql new file mode 100644 index 0000000000000..eaac13bcf6a93 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql @@ -0,0 +1,110 @@ +-- Test aggregate operator with codegen on and off. +--CONFIG_DIM1 spark.sql.codegen.wholeStage=true +--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY +--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN + +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b); + +-- Aggregate with empty GroupBy expressions. +SELECT a, udaf(b) FROM testData; +SELECT udaf(a), udaf(b) FROM testData; + +-- Aggregate with non-empty GroupBy expressions. +SELECT a, udaf(b) FROM testData GROUP BY a; +SELECT a, udaf(b) FROM testData GROUP BY b; +SELECT udaf(a), udaf(b) FROM testData GROUP BY a; + +-- Aggregate grouped by literals. +SELECT 'foo', udaf(a) FROM testData GROUP BY 1; + +-- Aggregate grouped by literals (hash aggregate). +SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1; + +-- Aggregate grouped by literals (sort aggregate). +SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1; + +-- Aggregate with complex GroupBy expressions. +SELECT a + b, udaf(b) FROM testData GROUP BY a + b; +SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1; +SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1; + +-- Aggregate with nulls. +SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a) +FROM testData; + +-- Aggregate with foldable input and multiple distinct groups. +SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a; + +-- Aliases in SELECT could be used in GROUP BY +SELECT a AS k, udaf(b) FROM testData GROUP BY k; +SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1; + +-- GROUP BY alias with invalid col in SELECT list +SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k; + +-- Test data. +CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES +(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v); +SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a; + +-- turn off group by aliases +set spark.sql.groupByAliases=false; + +-- Check analysis exceptions +SELECT a AS k, udaf(b) FROM testData GROUP BY k; + +-- Aggregate with empty input and non-empty GroupBy expressions. +SELECT a, udaf(1) FROM testData WHERE false GROUP BY a; + +-- Aggregate with empty input and empty GroupBy expressions. +SELECT udaf(1) FROM testData WHERE false; +SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t; + +-- Aggregate with empty GroupBy expressions and filter on top +SELECT 1 from ( + SELECT 1 AS z, + udaf(a.x) + FROM (select 1 as x) a + WHERE false +) b +where b.z != b.z; + +-- SPARK-25708 HAVING without GROUP BY means global aggregate +SELECT 1 FROM range(10) HAVING udaf(id) > 0; + +-- Test data +CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES + (1, true), (1, false), + (2, true), + (3, false), (3, null), + (4, null), (4, null), + (5, null), (5, true), (5, false) AS test_agg(k, v); + +-- empty table +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0; + +-- all null values +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4; + +-- aggregates are null Filtering +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5; + +-- group by +SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k; + +-- having +SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false; +SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL; + +-- basic subquery path to make sure rewrite happens in both parent and child plans. +SELECT k, + udaf(v) AS count +FROM test_agg +WHERE k = 2 + AND v IN (SELECT Any(v) + FROM test_agg + WHERE k = 1) +GROUP BY k; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql new file mode 100644 index 0000000000000..1217b9e5b09db --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql @@ -0,0 +1,47 @@ +CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES + ("1", "2", "3", 1), + ("4", "5", "6", 1), + ("7", "8", "9", 1) + as grouping(a, b, c, d); + +-- SPARK-17849: grouping set throws NPE #1 +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (()); + +-- SPARK-17849: grouping set throws NPE #2 +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)); + +-- SPARK-17849: grouping set throws NPE #3 +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)); + +-- Group sets without explicit group by +SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1); + +-- Group sets without group by and with grouping +SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1); + +-- Mutiple grouping within a grouping set +SELECT c1, c2, udaf(c3), grouping__id +FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3) +GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) ) +HAVING GROUPING__ID > 1; + +-- complex expression in grouping sets +SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b)); + +-- complex expression in grouping sets +SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b)); + +-- negative tests - must have at least one grouping expression +SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP; + +SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE; + +-- duplicate entries in grouping sets +SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)); + +SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)); + +SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)); + +-- grouping_id function +SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1); diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql similarity index 100% rename from sql/core/src/test/resources/sql-tests/inputs/udaf.sql rename to sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out new file mode 100644 index 0000000000000..4b16407e1a0c0 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out @@ -0,0 +1,1150 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2) +AS testData(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE +-- !query schema +struct<(a + b):int,b:int,udaf((a - b)):int> +-- !query output +2 1 1 +2 NULL 0 +3 1 1 +3 2 1 +3 NULL 0 +4 1 1 +4 2 1 +4 NULL 0 +5 2 1 +5 NULL 0 +NULL 1 3 +NULL 2 3 +NULL NULL 0 + + +-- !query +SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 0 +2 1 1 +2 2 1 +2 NULL 0 +3 1 1 +3 2 1 +3 NULL 0 +NULL 1 3 +NULL 2 3 +NULL NULL 0 + + +-- !query +SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP +-- !query schema +struct<(a + b):int,b:int,udaf((a - b)):int> +-- !query output +2 1 1 +2 NULL 0 +3 1 1 +3 2 1 +3 NULL 0 +4 1 1 +4 2 1 +4 NULL 0 +5 2 1 +5 NULL 0 +NULL NULL 0 + + +-- !query +SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 0 +2 1 1 +2 2 1 +2 NULL 0 +3 1 1 +3 2 1 +3 NULL 0 +NULL NULL 0 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES +("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000) +AS courseSales(course, year, earnings) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 5 +Java NULL 2 +Java 2012 1 +Java 2013 1 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 5 +Java NULL 2 +Java 2012 1 +Java 2012 1 +Java 2013 1 +Java 2013 1 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2013 1 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Empty set in ROLLUP grouping sets is not supported.(line 1, pos 62) + +== SQL == +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year +--------------------------------------------------------------^^^ + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 5 +NULL 2012 3 +NULL 2013 2 +Java NULL 2 +Java 2012 1 +Java 2013 1 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 5 +NULL 2012 3 +NULL 2013 2 +Java NULL 2 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Empty set in CUBE grouping sets is not supported.(line 1, pos 62) + +== SQL == +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year +--------------------------------------------------------------^^^ + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year) +-- !query schema +struct +-- !query output +Java NULL 2 +NULL 2012 3 +NULL 2013 2 +dotNET NULL 3 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ()) +-- !query schema +struct +-- !query output +Java NULL 2 +NULL 2012 3 +NULL 2013 2 +NULL NULL 5 +dotNET NULL 3 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course) +-- !query schema +struct +-- !query output +Java NULL 2 +dotNET NULL 3 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year) +-- !query schema +struct +-- !query output +NULL 2012 3 +NULL 2013 2 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +Java NULL 2 +Java NULL 2 +Java 2012 1 +Java 2012 1 +Java 2013 1 +Java 2013 1 +dotNET NULL 3 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2013 1 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL NULL 5 +NULL 2012 3 +NULL 2013 2 +Java NULL 2 +Java NULL 2 +Java NULL 2 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +dotNET NULL 3 +dotNET NULL 3 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 + + +-- !query +SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year +-- !query schema +struct +-- !query output +NULL 2012 3 +NULL 2012 3 +NULL 2013 2 +NULL 2013 2 +Java NULL 2 +Java NULL 2 +Java NULL 2 +Java NULL 2 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2012 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +Java 2013 1 +dotNET NULL 3 +dotNET NULL 3 +dotNET NULL 3 +dotNET NULL 3 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2012 2 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 +dotNET 2013 1 + + +-- !query +SELECT course, udaf(earnings) AS sum FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query schema +struct +-- !query output +NULL 0 +Java 0 +Java 1 +Java 1 +dotNET 0 +dotNET 1 +dotNET 1 +dotNET 1 + + +-- !query +SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales +GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum +-- !query schema +struct +-- !query output +NULL 0 3 +Java 0 1 +Java 1 0 +Java 1 0 +dotNET 0 1 +dotNET 1 0 +dotNET 1 0 +dotNET 1 0 + + +-- !query +SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2) +-- !query schema +struct +-- !query output +2 1 1 +2 NULL 1 +3 1 1 +3 2 1 +3 NULL 2 +4 1 1 +4 2 1 +4 NULL 2 +5 2 1 +5 NULL 1 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b) +-- !query schema +struct +-- !query output +2 1 1 +2 NULL 0 +3 1 1 +3 2 1 +3 NULL 0 +4 1 1 +4 2 1 +4 NULL 0 +5 2 1 +5 NULL 0 +NULL NULL 0 + + +-- !query +SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) +-- !query schema +struct<(a + b):int,k:int,udaf((a - b)):int> +-- !query output +NULL 1 3 +NULL 2 3 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ())) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b))) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b)) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b))) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b)) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ())) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 + + +-- !query +SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ()) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 1 1 +1 2 1 +1 2 1 +1 2 1 +1 NULL 2 +1 NULL 2 +1 NULL 2 +1 NULL 2 +2 1 1 +2 1 1 +2 1 1 +2 2 1 +2 2 1 +2 2 1 +2 NULL 2 +2 NULL 2 +2 NULL 2 +2 NULL 2 +3 1 1 +3 1 1 +3 1 1 +3 2 1 +3 2 1 +3 2 1 +3 NULL 2 +3 NULL 2 +3 NULL 2 +3 NULL 2 diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out new file mode 100644 index 0000000000000..d6e3f111001af --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out @@ -0,0 +1,405 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view data as select * from values + (1, 1), + (1, 2), + (2, 1), + (2, 2), + (3, 1), + (3, 2) + as data(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +select a, udaf(b) from data group by 1 +-- !query schema +struct +-- !query output +1 2 +2 2 +3 2 + + +-- !query +select 1, 2, udaf(b) from data group by 1, 2 +-- !query schema +struct<1:int,2:int,udaf(b):int> +-- !query output +1 2 6 + + +-- !query +select a, 1, udaf(b) from data group by a, 1 +-- !query schema +struct +-- !query output +1 1 2 +2 1 2 +3 1 2 + + +-- !query +select a, 1, udaf(b) from data group by 1, 2 +-- !query schema +struct +-- !query output +1 1 2 +2 1 2 +3 1 2 + + +-- !query +select a, b + 2, udaf(2) from data group by a, 2 +-- !query schema +struct +-- !query output +1 3 1 +1 4 1 +2 3 1 +2 4 1 +3 3 1 +3 4 1 + + +-- !query +select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2 +-- !query schema +struct +-- !query output +1 3 1 +1 4 1 +2 3 1 +2 4 1 +3 3 1 +3 4 1 + + +-- !query +select udaf(b) from data group by 1 + 0 +-- !query schema +struct +-- !query output +6 + + +-- !query +select a, b, udaf(b) from data group by 3 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"a\"" + } +} + + +-- !query +select a, b, udaf(b) + 2 from data group by 3 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"a\"" + } +} + + +-- !query +select a, rand(0), udaf(b) +from +(select /*+ REPARTITION(1) */ a, b from data) group by a, 2 +-- !query schema +struct +-- !query output +1 0.5234194256885571 1 +1 0.7604953758285915 1 +2 0.0953472826424725 1 +2 0.3163249920547614 1 +3 0.2710259815484829 1 +3 0.7141011170991605 1 + + +-- !query +select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1 +-- !query schema +struct +-- !query output +1 1 + + +-- !query +select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0 +-- !query schema +struct +-- !query output +1 1 + + +-- !query +select a, a AS k, udaf(b) from data group by k, 1 +-- !query schema +struct +-- !query output +1 1 2 +2 2 2 +3 3 2 + + +-- !query +select a, b, udaf(1) from data group by cube(1, 2) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by cube(1, b) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by 1, 2 with cube +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by rollup(1, 2) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by rollup(1, b) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by 1, 2 with rollup +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL NULL 6 + + +-- !query +select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 + + +-- !query +select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 + + +-- !query +select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2)) +-- !query schema +struct +-- !query output +1 1 1 +1 2 1 +1 NULL 2 +2 1 1 +2 2 1 +2 NULL 2 +3 1 1 +3 2 1 +3 NULL 2 +NULL 1 3 +NULL 2 3 + + +-- !query +select a, b, udaf(1) from data group by a, -1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE", + "sqlState" : "42000", + "messageParameters" : { + "index" : "-1", + "size" : "3" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 44, + "stopIndex" : 45, + "fragment" : "-1" + } ] +} + + +-- !query +select a, b, udaf(1) from data group by a, 3 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"b\"" + } +} + + +-- !query +select a, b, udaf(1) from data group by cube(-1, 2) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE", + "sqlState" : "42000", + "messageParameters" : { + "index" : "-1", + "size" : "3" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 46, + "stopIndex" : 47, + "fragment" : "-1" + } ] +} + + +-- !query +select a, b, udaf(1) from data group by cube(1, 3) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping expressions sequence is empty, and 'data.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'data.a' in first() (or first_value) if you don't care which value you get. + + +-- !query +set spark.sql.groupByOrdinal=false +-- !query schema +struct +-- !query output +spark.sql.groupByOrdinal false + + +-- !query +select udaf(b) from data group by -1 +-- !query schema +struct +-- !query output +6 diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out new file mode 100644 index 0000000000000..bb98d244faa12 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out @@ -0,0 +1,411 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES +(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) +AS testData(a, b) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a, udaf(b) FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get. + + +-- !query +SELECT udaf(a), udaf(b) FROM testData +-- !query schema +struct +-- !query output +7 7 + + +-- !query +SELECT a, udaf(b) FROM testData GROUP BY a +-- !query schema +struct +-- !query output +1 2 +2 2 +3 2 +NULL 1 + + +-- !query +SELECT a, udaf(b) FROM testData GROUP BY b +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"a\"" + } +} + + +-- !query +SELECT udaf(a), udaf(b) FROM testData GROUP BY a +-- !query schema +struct +-- !query output +0 1 +2 2 +2 2 +3 2 + + +-- !query +SELECT 'foo', udaf(a) FROM testData GROUP BY 1 +-- !query schema +struct +-- !query output +foo 7 + + +-- !query +SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1 +-- !query schema +struct +-- !query output + + + +-- !query +SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1 +-- !query schema +struct +-- !query output + + + +-- !query +SELECT a + b, udaf(b) FROM testData GROUP BY a + b +-- !query schema +struct<(a + b):int,udaf(b):int> +-- !query output +2 1 +3 2 +4 2 +5 1 +NULL 1 + + +-- !query +SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"a\"" + } +} + + +-- !query +SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1 +-- !query schema +struct<((a + 1) + 1):int,udaf(b):int> +-- !query output +3 2 +4 2 +5 2 +NULL 1 + + +-- !query +SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a) +FROM testData +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT", + "messageParameters" : { + "functionList" : "`udaf`" + } +} + + +-- !query +SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Function pythonudf does not support DISTINCT; line 1 pos 7 + + +-- !query +SELECT a AS k, udaf(b) FROM testData GROUP BY k +-- !query schema +struct +-- !query output +1 2 +2 2 +3 2 +NULL 1 + + +-- !query +SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1 +-- !query schema +struct +-- !query output +2 2 +3 2 + + +-- !query +SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN", + "errorSubClass" : "WITH_SUGGESTION", + "sqlState" : "42000", + "messageParameters" : { + "objectName" : "`non_existing`", + "proposal" : "`testdata`.`a`, `testdata`.`b`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 21, + "stopIndex" : 32, + "fragment" : "non_existing" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES +(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "COLUMN_NOT_IN_GROUP_BY_CLAUSE", + "sqlState" : "42000", + "messageParameters" : { + "expression" : "\"k\"" + } +} + + +-- !query +set spark.sql.groupByAliases=false +-- !query schema +struct +-- !query output +spark.sql.groupByAliases false + + +-- !query +SELECT a AS k, udaf(b) FROM testData GROUP BY k +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN", + "errorSubClass" : "WITH_SUGGESTION", + "sqlState" : "42000", + "messageParameters" : { + "objectName" : "`k`", + "proposal" : "`testdata`.`a`, `testdata`.`b`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 47, + "stopIndex" : 47, + "fragment" : "k" + } ] +} + + +-- !query +SELECT a, udaf(1) FROM testData WHERE false GROUP BY a +-- !query schema +struct +-- !query output + + + +-- !query +SELECT udaf(1) FROM testData WHERE false +-- !query schema +struct +-- !query output + + + +-- !query +SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +SELECT 1 from ( + SELECT 1 AS z, + udaf(a.x) + FROM (select 1 as x) a + WHERE false +) b +where b.z != b.z +-- !query schema +struct<1:int> +-- !query output + + + +-- !query +SELECT 1 FROM range(10) HAVING udaf(id) > 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "UNRESOLVED_COLUMN", + "errorSubClass" : "WITH_SUGGESTION", + "sqlState" : "42000", + "messageParameters" : { + "objectName" : "`id`", + "proposal" : "`1`" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 37, + "stopIndex" : 38, + "fragment" : "id" + } ] +} + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES + (1, true), (1, false), + (2, true), + (3, false), (3, null), + (4, null), (4, null), + (5, null), (5, true), (5, false) AS test_agg(k, v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT", + "messageParameters" : { + "functionList" : "`udaf`" + } +} + + +-- !query +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT", + "messageParameters" : { + "functionList" : "`udaf`" + } +} + + +-- !query +SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT", + "messageParameters" : { + "functionList" : "`udaf`" + } +} + + +-- !query +SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +{ + "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT", + "messageParameters" : { + "functionList" : "`udaf`" + } +} + + +-- !query +SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false +-- !query schema +struct +-- !query output +4 0 + + +-- !query +SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL +-- !query schema +struct +-- !query output + + + +-- !query +SELECT k, + udaf(v) AS count +FROM test_agg +WHERE k = 2 + AND v IN (SELECT Any(v) + FROM test_agg + WHERE k = 1) +GROUP BY k +-- !query schema +struct +-- !query output +2 1 diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out new file mode 100644 index 0000000000000..c9b27dca0c03c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out @@ -0,0 +1,175 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES + ("1", "2", "3", 1), + ("4", "5", "6", 1), + ("7", "8", "9", 1) + as grouping(a, b, c, d) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (()) +-- !query schema +struct +-- !query output +NULL NULL NULL 3 + + +-- !query +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a)) +-- !query schema +struct +-- !query output +1 NULL NULL 1 +4 NULL NULL 1 +7 NULL NULL 1 + + +-- !query +SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c)) +-- !query schema +struct +-- !query output +NULL NULL 3 1 +NULL NULL 6 1 +NULL NULL 9 1 + + +-- !query +SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1) +-- !query schema +struct +-- !query output +x 1 +y 1 + + +-- !query +SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1) +-- !query schema +struct +-- !query output +x 1 0 +y 1 0 + + +-- !query +SELECT c1, c2, udaf(c3), grouping__id +FROM (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3) +GROUP BY GROUPING SETS ( ( c1 ), ( c2 ) ) +HAVING GROUPING__ID > 1 +-- !query schema +struct +-- !query output +NULL a 1 2 +NULL b 1 2 + + +-- !query +SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b)) +-- !query schema +struct<(a + b):int,b:int,udaf(c):int> +-- !query output +2 NULL 1 +4 NULL 1 +NULL 1 1 +NULL 2 1 + + +-- !query +SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b)) +-- !query schema +struct<(a + b):int,b:int,udaf(c):int> +-- !query output +2 NULL 1 +2 NULL 1 +4 NULL 1 +4 NULL 1 +NULL 1 1 +NULL 2 1 + + +-- !query +SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42000", + "messageParameters" : { + "error" : "'ROLLUP'", + "hint" : ": extra input 'ROLLUP'" + } +} + + +-- !query +SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException +{ + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42000", + "messageParameters" : { + "error" : "'CUBE'", + "hint" : ": extra input 'CUBE'" + } +} + + +-- !query +SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)) +-- !query schema +struct +-- !query output +1 1 1 +1 1 1 +1 NULL 1 +2 2 1 +2 2 1 +2 NULL 1 + + +-- !query +SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)) +-- !query schema +struct +-- !query output +0 1 1 1 +0 1 1 1 +0 2 2 1 +0 2 2 1 +1 1 NULL 1 +1 2 NULL 1 + + +-- !query +SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)) +-- !query schema +struct +-- !query output +0 1 1 1 +0 1 1 1 +0 1 NULL 1 +0 2 2 1 +0 2 2 1 +0 2 NULL 1 + + +-- !query +SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1) +-- !query schema +struct +-- !query output +0 1 +0 1 +1 1 +1 1 diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out new file mode 100644 index 0000000000000..0605af1c808db --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out @@ -0,0 +1,67 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES +(1), (2), (3), (4) +as t1(int_col1) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT default.myDoubleAvg(int_col1) as my_avg from t1 +-- !query schema +struct +-- !query output +102.5 + + +-- !query +SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Invalid number of arguments for function spark_catalog.default.mydoubleavg. Expected: 1; Found: 2; line 1 pos 7 + + +-- !query +CREATE FUNCTION udaf1 AS 'test.non.existent.udaf' +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT default.udaf1(int_col1) as udaf1 from t1 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Can not load class 'test.non.existent.udaf' when registering the function 'spark_catalog.default.udaf1', please make sure it is on the classpath; line 1 pos 7 + + +-- !query +DROP FUNCTION myDoubleAvg +-- !query schema +struct<> +-- !query output + + + +-- !query +DROP FUNCTION udaf1 +-- !query schema +struct<> +-- !query output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index cca9bb6741f68..5af7fa3898d90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -216,6 +216,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper resultFile: String, udf: TestUDF) extends TestCase with UDFTest + /** A UDAF test case. */ + protected case class UDAFTestCase( + name: String, + inputFile: String, + resultFile: String, + udf: TestUDF) extends TestCase with UDFTest + /** A UDF PostgreSQL test case. */ protected case class UDFPgSQLTestCase( name: String, @@ -436,6 +443,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs => s"${testCase.name}${System.lineSeparator()}" + s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}" + case udfTestCase: UDFTest + if udfTestCase.udf.isInstanceOf[TestGroupedAggPandasUDF] && + shouldTestGroupedAggPandasUDFs => + s"${testCase.name}${System.lineSeparator()}" + + s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}" case _ => s"${testCase.name}${System.lineSeparator()}" } @@ -495,6 +507,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper UDFTestCase( s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf) } + } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) { + Seq(TestGroupedAggPandasUDF("udaf")).map { udf => + UDAFTestCase( + s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf) + } } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) { PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala index 69e01cef5ab1e..b850ffccd4ee3 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala @@ -65,6 +65,7 @@ import org.apache.spark.sql.types._ * 1. Support UDF testing. * 2. Support DESC command. * 3. Support SHOW command. + * 4. Support UDAF testing. */ // scalastyle:on line.size.limit class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer { @@ -247,6 +248,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) { Seq.empty + } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) { + Seq.empty } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) { PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {