From aefdf4fccf6f6ae6ac524dd00da6c6430907d2ab Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Wed, 10 Jul 2019 17:41:08 -0700
Subject: [PATCH 1/2] [SPARK-28285][SQL][PYTHON][TESTS] Convert and port
 'outer-join.sql' into UDF test base

---
 .../sql-tests/inputs/udf/udf-outer-join.sql   | 45 ++++++++++
 .../results/udf/udf-outer-join.sql.out        | 88 +++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
new file mode 100644
index 0000000000000..0fe27ac906970
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
@@ -0,0 +1,45 @@
+-- This test file was converted from outer-join.sql.
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
+-- SPARK-17099: Incorrect result when HAVING clause is added to group by query
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+(-234), (145), (367), (975), (298)
+as t1(int_col1);
+
+CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
+(-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
+as t2(int_col0, int_col1);
+
+SELECT
+  (udf(SUM(COALESCE(t1.int_col1, t2.int_col0)))),
+     (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
+FROM t1
+RIGHT JOIN t2
+  ON (t2.int_col0) = (t1.int_col1)
+GROUP BY udf(GREATEST(COALESCE(t2.int_col1, 109), COALESCE(t1.int_col1, -449))),
+         COALESCE(t1.int_col1, t2.int_col0)
+HAVING (udf(SUM(COALESCE(t1.int_col1, t2.int_col0))))
+            > (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2);
+
+
+-- SPARK-17120: Analyzer incorrectly optimizes plan to empty LocalRelation
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1);
+
+CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1);
+
+-- Set the cross join enabled flag for the LEFT JOIN test since there's no join condition.
+-- Ultimately the join should be optimized away.
+set spark.sql.crossJoin.enabled = true;
+SELECT *
+FROM (
+SELECT
+    udf(COALESCE(t2.int_col1, udf(t1.int_col1))) AS int_col
+    FROM t1
+    LEFT JOIN t2 ON false
+) t where (udf(t.int_col)) is not null;
+set spark.sql.crossJoin.enabled = false;
+
+
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
new file mode 100644
index 0000000000000..6394dad0f4acc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
@@ -0,0 +1,88 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 8
+
+
+-- !query 0
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+(-234), (145), (367), (975), (298)
+as t1(int_col1)
+-- !query 0 schema
+struct<>
+-- !query 0 output
+
+
+
+-- !query 1
+CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
+(-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
+as t2(int_col0, int_col1)
+-- !query 1 schema
+struct<>
+-- !query 1 output
+
+
+
+-- !query 2
+SELECT
+  (udf(SUM(COALESCE(t1.int_col1, t2.int_col0)))),
+     (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
+FROM t1
+RIGHT JOIN t2
+  ON (t2.int_col0) = (t1.int_col1)
+GROUP BY udf(GREATEST(COALESCE(t2.int_col1, 109), COALESCE(t1.int_col1, -449))),
+         COALESCE(t1.int_col1, t2.int_col0)
+HAVING (udf(SUM(COALESCE(t1.int_col1, t2.int_col0))))
+            > (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
+-- !query 2 schema
+struct<udf(sum(cast(coalesce(int_col1, int_col0) as bigint))):string,(CAST(udf(coalesce(int_col1, int_col0)) AS DOUBLE) * CAST(2 AS DOUBLE)):double>
+-- !query 2 output
+-367	-734.0
+-507	-1014.0
+-769	-1538.0
+-800	-1600.0
+
+
+-- !query 3
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
+-- !query 3 schema
+struct<>
+-- !query 3 output
+
+
+
+-- !query 4
+CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1)
+-- !query 4 schema
+struct<>
+-- !query 4 output
+
+
+
+-- !query 5
+set spark.sql.crossJoin.enabled = true
+-- !query 5 schema
+struct<key:string,value:string>
+-- !query 5 output
+spark.sql.crossJoin.enabled	true
+
+
+-- !query 6
+SELECT *
+FROM (
+SELECT
+    udf(COALESCE(t2.int_col1, udf(t1.int_col1))) AS int_col
+    FROM t1
+    LEFT JOIN t2 ON false
+) t where (udf(t.int_col)) is not null
+-- !query 6 schema
+struct<int_col:string>
+-- !query 6 output
+97
+
+
+-- !query 7
+set spark.sql.crossJoin.enabled = false
+-- !query 7 schema
+struct<key:string,value:string>
+-- !query 7 output
+spark.sql.crossJoin.enabled	false

From 5955d46daddcb8b875dffbf645af1c0f5e63a986 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Wed, 17 Jul 2019 14:48:51 -0700
Subject: [PATCH 2/2] add a few more udf

---
 .../sql-tests/inputs/udf/udf-outer-join.sql   | 10 ++++-----
 .../results/udf/udf-outer-join.sql.out        | 22 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
index 0fe27ac906970..4eb0805c9cc67 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-outer-join.sql
@@ -14,14 +14,14 @@ CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES
 as t2(int_col0, int_col1);
 
 SELECT
-  (udf(SUM(COALESCE(t1.int_col1, t2.int_col0)))),
+  (udf(SUM(udf(COALESCE(t1.int_col1, t2.int_col0))))),
      (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
 FROM t1
 RIGHT JOIN t2
-  ON (t2.int_col0) = (t1.int_col1)
-GROUP BY udf(GREATEST(COALESCE(t2.int_col1, 109), COALESCE(t1.int_col1, -449))),
+  ON udf(t2.int_col0) = udf(t1.int_col1)
+GROUP BY udf(GREATEST(COALESCE(udf(t2.int_col1), 109), COALESCE(t1.int_col1, udf(-449)))),
          COALESCE(t1.int_col1, t2.int_col0)
-HAVING (udf(SUM(COALESCE(t1.int_col1, t2.int_col0))))
+HAVING (udf(SUM(COALESCE(udf(t1.int_col1), udf(t2.int_col0)))))
             > (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2);
 
 
@@ -36,7 +36,7 @@ set spark.sql.crossJoin.enabled = true;
 SELECT *
 FROM (
 SELECT
-    udf(COALESCE(t2.int_col1, udf(t1.int_col1))) AS int_col
+    udf(COALESCE(udf(t2.int_col1), udf(t1.int_col1))) AS int_col
     FROM t1
     LEFT JOIN t2 ON false
 ) t where (udf(t.int_col)) is not null;
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
index 6394dad0f4acc..819f786070882 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
@@ -24,22 +24,22 @@ struct<>
 
 -- !query 2
 SELECT
-  (udf(SUM(COALESCE(t1.int_col1, t2.int_col0)))),
+  (udf(SUM(udf(COALESCE(t1.int_col1, t2.int_col0))))),
      (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
 FROM t1
 RIGHT JOIN t2
-  ON (t2.int_col0) = (t1.int_col1)
-GROUP BY udf(GREATEST(COALESCE(t2.int_col1, 109), COALESCE(t1.int_col1, -449))),
+  ON udf(t2.int_col0) = udf(t1.int_col1)
+GROUP BY udf(GREATEST(COALESCE(udf(t2.int_col1), 109), COALESCE(t1.int_col1, udf(-449)))),
          COALESCE(t1.int_col1, t2.int_col0)
-HAVING (udf(SUM(COALESCE(t1.int_col1, t2.int_col0))))
+HAVING (udf(SUM(COALESCE(udf(t1.int_col1), udf(t2.int_col0)))))
             > (udf(COALESCE(t1.int_col1, t2.int_col0)) * 2)
 -- !query 2 schema
-struct<udf(sum(cast(coalesce(int_col1, int_col0) as bigint))):string,(CAST(udf(coalesce(int_col1, int_col0)) AS DOUBLE) * CAST(2 AS DOUBLE)):double>
+struct<CAST(udf(cast(sum(cast(cast(udf(cast(coalesce(int_col1, int_col0) as string)) as int) as bigint)) as string)) AS BIGINT):bigint,(CAST(udf(cast(coalesce(int_col1, int_col0) as string)) AS INT) * 2):int>
 -- !query 2 output
--367	-734.0
--507	-1014.0
--769	-1538.0
--800	-1600.0
+-367	-734
+-507	-1014
+-769	-1538
+-800	-1600
 
 
 -- !query 3
@@ -70,12 +70,12 @@ spark.sql.crossJoin.enabled	true
 SELECT *
 FROM (
 SELECT
-    udf(COALESCE(t2.int_col1, udf(t1.int_col1))) AS int_col
+    udf(COALESCE(udf(t2.int_col1), udf(t1.int_col1))) AS int_col
     FROM t1
     LEFT JOIN t2 ON false
 ) t where (udf(t.int_col)) is not null
 -- !query 6 schema
-struct<int_col:string>
+struct<int_col:int>
 -- !query 6 output
 97