From af80ac7c2f468d00fb2ffd040abe3f4fa0bed762 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 16 Jul 2019 09:08:58 +0800 Subject: [PATCH 1/3] Add converted sql test file and output. --- .../sql-tests/inputs/udf/udf-cross-join.sql | 37 +++++ .../results/udf/udf-cross-join.sql.out | 146 ++++++++++++++++++ 2 files changed, 183 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/udf/udf-cross-join.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-cross-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-cross-join.sql new file mode 100644 index 000000000000..9aa6de7e92b8 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-cross-join.sql @@ -0,0 +1,37 @@ +-- Cross join detection and error checking is done in JoinSuite since explain output is +-- used in the error message and the ids are not stable. Only positive cases are checked here. +-- This test file was converted from cross-join.sql. + +create temporary view nt1 as select * from values + ("one", 1), + ("two", 2), + ("three", 3) + as nt1(k, v1); + +create temporary view nt2 as select * from values + ("one", 1), + ("two", 22), + ("one", 5) + as nt2(k, v2); + +-- Cross joins with and without predicates +SELECT * FROM nt1 cross join nt2; +SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k); +SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k)); +SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22"; + +SELECT udf(a.key), udf(b.key) FROM +(SELECT udf(k) key FROM nt1 WHERE v1 < 2) a +CROSS JOIN +(SELECT udf(k) key FROM nt2 WHERE v2 = 22) b; + +-- Join reordering +create temporary view A(a, va) as select * from nt1; +create temporary view B(b, vb) as select * from nt1; +create temporary view C(c, vc) as select * from nt1; +create temporary view D(d, vd) as select * from nt1; + +-- Allowed since cross join with C is explicit +select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d)); +-- Cross joins with non-equal predicates +SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k)); diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out new file mode 100644 index 000000000000..11c1e01d5408 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out @@ -0,0 +1,146 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 13 + + +-- !query 0 +create temporary view nt1 as select * from values + ("one", 1), + ("two", 2), + ("three", 3) + as nt1(k, v1) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +create temporary view nt2 as select * from values + ("one", 1), + ("two", 22), + ("one", 5) + as nt2(k, v2) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +SELECT * FROM nt1 cross join nt2 +-- !query 2 schema +struct +-- !query 2 output +one 1 one 1 +one 1 one 5 +one 1 two 22 +three 3 one 1 +three 3 one 5 +three 3 two 22 +two 2 one 1 +two 2 one 5 +two 2 two 22 + + +-- !query 3 +SELECT * FROM nt1 cross join nt2 where udf(nt1.k) = udf(nt2.k) +-- !query 3 schema +struct +-- !query 3 output +one 1 one 1 +one 1 one 5 +two 2 two 22 + + +-- !query 4 +SELECT * FROM nt1 cross join nt2 on (udf(nt1.k) = udf(nt2.k)) +-- !query 4 schema +struct +-- !query 4 output +one 1 one 1 +one 1 one 5 +two 2 two 22 + + +-- !query 5 +SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22" +-- !query 5 schema +struct +-- !query 5 output +one 1 two 22 + + +-- !query 6 +SELECT udf(a.key), udf(b.key) FROM +(SELECT udf(k) key FROM nt1 WHERE v1 < 2) a +CROSS JOIN +(SELECT udf(k) key FROM nt2 WHERE v2 = 22) b +-- !query 6 schema +struct +-- !query 6 output +one two + + +-- !query 7 +create temporary view A(a, va) as select * from nt1 +-- !query 7 schema +struct<> +-- !query 7 output + + + +-- !query 8 +create temporary view B(b, vb) as select * from nt1 +-- !query 8 schema +struct<> +-- !query 8 output + + + +-- !query 9 +create temporary view C(c, vc) as select * from nt1 +-- !query 9 schema +struct<> +-- !query 9 output + + + +-- !query 10 +create temporary view D(d, vd) as select * from nt1 +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d)) +-- !query 11 schema +struct<> +-- !query 11 output +org.apache.spark.sql.AnalysisException +Detected implicit cartesian product for INNER join between logical plans +Filter (udf(a#x) = udf(b#x)) ++- Join Inner + :- Project [k#x AS a#x, v1#x AS va#x] + : +- LocalRelation [k#x, v1#x] + +- Project [k#x AS b#x, v1#x AS vb#x] + +- LocalRelation [k#x, v1#x] +and +Project [k#x AS d#x, v1#x AS vd#x] ++- LocalRelation [k#x, v1#x] +Join condition is missing or trivial. +Either: use the CROSS JOIN syntax to allow cartesian products between these +relations, or: enable implicit cartesian products by setting the configuration +variable spark.sql.crossJoin.enabled=true; + + +-- !query 12 +SELECT * FROM nt1 CROSS JOIN nt2 ON (udf(nt1.k) > udf(nt2.k)) +-- !query 12 schema +struct +-- !query 12 output +three 3 one 1 +three 3 one 5 +two 2 one 1 +two 2 one 5 From ac20743bf09d6a976f632c586da683220ff8bdf5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 16 Jul 2019 15:27:15 +0800 Subject: [PATCH 2/3] Update test result. --- .../results/udf/udf-cross-join.sql.out | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out index 11c1e01d5408..9de1af534b81 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out @@ -116,23 +116,17 @@ struct<> -- !query 11 select * from ((A join B on (udf(a) = udf(b))) cross join C) join D on (udf(a) = udf(d)) -- !query 11 schema -struct<> +struct -- !query 11 output -org.apache.spark.sql.AnalysisException -Detected implicit cartesian product for INNER join between logical plans -Filter (udf(a#x) = udf(b#x)) -+- Join Inner - :- Project [k#x AS a#x, v1#x AS va#x] - : +- LocalRelation [k#x, v1#x] - +- Project [k#x AS b#x, v1#x AS vb#x] - +- LocalRelation [k#x, v1#x] -and -Project [k#x AS d#x, v1#x AS vd#x] -+- LocalRelation [k#x, v1#x] -Join condition is missing or trivial. -Either: use the CROSS JOIN syntax to allow cartesian products between these -relations, or: enable implicit cartesian products by setting the configuration -variable spark.sql.crossJoin.enabled=true; +one 1 one 1 one 1 one 1 +one 1 one 1 three 3 one 1 +one 1 one 1 two 2 one 1 +three 3 three 3 one 1 three 3 +three 3 three 3 three 3 three 3 +three 3 three 3 two 2 three 3 +two 2 two 2 one 1 two 2 +two 2 two 2 three 3 two 2 +two 2 two 2 two 2 two 2 -- !query 12 From 0dbc98598ce5760c7f801434497a09a0f134fdbd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 18 Jul 2019 14:58:18 +0800 Subject: [PATCH 3/3] Updated output file. --- .../test/resources/sql-tests/results/udf/udf-cross-join.sql.out | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out index 9de1af534b81..98d3ad37a8df 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out @@ -76,7 +76,7 @@ SELECT udf(a.key), udf(b.key) FROM CROSS JOIN (SELECT udf(k) key FROM nt2 WHERE v2 = 22) b -- !query 6 schema -struct +struct -- !query 6 output one two