apache · shivusondur · Jul 15, 2019 · Jul 15, 2019 · Jul 19, 2019 · Jul 21, 2019
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql
@@ -0,0 +1,62 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- SELECT_HAVING
+-- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
+--
+-- This test file was converted from inputs/pgSQL/select_having.sql
+
+-- load test data
+CREATE TABLE test_having (a int, b int, c string, d string) USING parquet;
+INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A');
+INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b');
+INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c');
+INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D');
+INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e');
+INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F');
+INSERT INTO test_having VALUES (6, 4, 'cccc', 'g');
+INSERT INTO test_having VALUES (7, 4, 'cccc', 'h');
+INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I');
+INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j');
+
+SELECT udf(b), udf(c) FROM test_having
+	GROUP BY b, c HAVING udf(count(*)) = 1 ORDER BY b, c;
+
+-- HAVING is effectively equivalent to WHERE in this case
+SELECT udf(b), udf(c) FROM test_having
+	GROUP BY b, c HAVING udf(b) = 3 ORDER BY b, c;
+
+-- [SPARK-28386] Cannot resolve ORDER BY columns with GROUP BY and HAVING
+-- SELECT lower(c), count(c) FROM test_having
+-- 	GROUP BY lower(c) HAVING count(*) > 2 OR min(a) = max(a)
+-- 	ORDER BY lower(c);
+
+SELECT udf(c), max(udf(a)) FROM test_having
+	GROUP BY c HAVING udf(count(*)) > 2 OR udf(min(a)) = udf(max(a))
+	ORDER BY c;
+
+-- test degenerate cases involving HAVING without GROUP BY
+-- Per SQL spec, these should generate 0 or 1 row, even without aggregates
+
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) = udf(max(a));
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) < udf(max(a));
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(udf(min(a))) < udf(max(a));
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(udf(min(a))) < udf(udf(max(a)));
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) < udf(udf(max(a)));
+
+
+-- errors: ungrouped column references
+SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a));
+SELECT 1 AS one FROM test_having HAVING udf(a) > 1;
+
+-- the really degenerate case: need not scan table at all
+SELECT 1 AS one FROM test_having HAVING udf(1 > 2);
+SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2));
+SELECT 1 AS one FROM test_having HAVING udf(1 < 2);
+SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2));
+
+-- and just to prove that we aren't scanning the table:
+SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2;
+
+DROP TABLE test_having;
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out
@@ -0,0 +1,227 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 27
+
+
+-- !query 0
+CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+INSERT INTO test_having VALUES (0, 1, 'XXXX', 'A')
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+INSERT INTO test_having VALUES (1, 2, 'AAAA', 'b')
+-- !query 2 schema
+struct<>
+-- !query 2 output
+
+
+
+-- !query 3
+INSERT INTO test_having VALUES (2, 2, 'AAAA', 'c')
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+INSERT INTO test_having VALUES (3, 3, 'BBBB', 'D')
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+INSERT INTO test_having VALUES (4, 3, 'BBBB', 'e')
+-- !query 5 schema
+struct<>
+-- !query 5 output
+
+
+
+-- !query 6
+INSERT INTO test_having VALUES (5, 3, 'bbbb', 'F')
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+INSERT INTO test_having VALUES (6, 4, 'cccc', 'g')
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+INSERT INTO test_having VALUES (7, 4, 'cccc', 'h')
+-- !query 8 schema
+struct<>
+-- !query 8 output
+
+
+
+-- !query 9
+INSERT INTO test_having VALUES (8, 4, 'CCCC', 'I')
+-- !query 9 schema
+struct<>
+-- !query 9 output
+
+
+
+-- !query 10
+INSERT INTO test_having VALUES (9, 4, 'CCCC', 'j')
+-- !query 10 schema
+struct<>
+-- !query 10 output
+
+
+
+-- !query 11
+SELECT udf(b), udf(c) FROM test_having
+	GROUP BY b, c HAVING udf(count(*)) = 1 ORDER BY b, c
+-- !query 11 schema
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c as string)) AS STRING):string>
+-- !query 11 output
+1	XXXX
+3	bbbb
+
+
+-- !query 12
+SELECT udf(b), udf(c) FROM test_having
+	GROUP BY b, c HAVING udf(b) = 3 ORDER BY b, c
+-- !query 12 schema
+struct<CAST(udf(cast(b as string)) AS INT):int,CAST(udf(cast(c as string)) AS STRING):string>
+-- !query 12 output
+3	BBBB
+3	bbbb
+
+
+-- !query 13
+SELECT udf(c), max(udf(a)) FROM test_having
+	GROUP BY c HAVING udf(count(*)) > 2 OR udf(min(a)) = udf(max(a))
+	ORDER BY c
+-- !query 13 schema
+struct<CAST(udf(cast(c as string)) AS STRING):string,max(CAST(udf(cast(a as string)) AS INT)):int>
+-- !query 13 output
+XXXX	0
+bbbb	5
+
+
+-- !query 14
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) = udf(max(a))
+-- !query 14 schema
+struct<CAST(udf(cast(min(a) as string)) AS INT):int,CAST(udf(cast(max(a) as string)) AS INT):int>
+-- !query 14 output
+
+
+
+-- !query 15
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) < udf(max(a))
+-- !query 15 schema
+struct<CAST(udf(cast(min(a) as string)) AS INT):int,CAST(udf(cast(max(a) as string)) AS INT):int>
+-- !query 15 output
+0	9
+
+
+-- !query 16
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(udf(min(a))) < udf(max(a))
+-- !query 16 schema
+struct<CAST(udf(cast(min(a) as string)) AS INT):int,CAST(udf(cast(max(a) as string)) AS INT):int>
+-- !query 16 output
+0	9
+
+
+-- !query 17
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(udf(min(a))) < udf(udf(max(a)))
+-- !query 17 schema
+struct<CAST(udf(cast(min(a) as string)) AS INT):int,CAST(udf(cast(max(a) as string)) AS INT):int>
+-- !query 17 output
+0	9
+
+
+-- !query 18
+SELECT udf(min(a)), udf(max(a)) FROM test_having HAVING udf(min(a)) < udf(udf(max(a)))
+-- !query 18 schema
+struct<CAST(udf(cast(min(a) as string)) AS INT):int,CAST(udf(cast(max(a) as string)) AS INT):int>
+-- !query 18 output
+0	9
+
+
+-- !query 19
+SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a))
+-- !query 19 schema
+struct<>
+-- !query 19 output
+org.apache.spark.sql.AnalysisException
+grouping expressions sequence is empty, and 'default.test_having.`a`' is not an aggregate function. Wrap '(min(default.test_having.`a`) AS `min(a#x)`, max(default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
+
+
+-- !query 20
+SELECT 1 AS one FROM test_having HAVING udf(a) > 1
+-- !query 20 schema
+struct<>
+-- !query 20 output
+org.apache.spark.sql.AnalysisException
+cannot resolve '`a`' given input columns: [one]; line 1 pos 44
+
+
+-- !query 21
+SELECT 1 AS one FROM test_having HAVING udf(1 > 2)
+-- !query 21 schema
+struct<one:int>
+-- !query 21 output
+
+
+
+-- !query 22
+SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2))
+-- !query 22 schema
+struct<one:int>
+-- !query 22 output
+
+
+
+-- !query 23
+SELECT 1 AS one FROM test_having HAVING udf(1 < 2)
+-- !query 23 schema
+struct<one:int>
+-- !query 23 output
+1
+
+
+-- !query 24
+SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2))
+-- !query 24 schema
+struct<one:int>
+-- !query 24 output
+1
+
+
+-- !query 25
+SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2
+-- !query 25 schema
+struct<one:int>
+-- !query 25 output
+1
+
+
+-- !query 26
+DROP TABLE test_having
+-- !query 26 schema
+struct<>
+-- !query 26 output
+