From 0f55d25acae704f1816da1acee7fd88e6fc61284 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Fri, 25 Feb 2022 14:34:55 +0100 Subject: [PATCH 1/2] Fix typo in SQL statement --- .../src/test/java/com/linkedin/coral/spark/TestUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java b/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java index ab5e86191..574e3366a 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java @@ -73,7 +73,7 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS foo_view", "AS", "SELECT b AS bcol, sum(c) AS sum_c", "FROM foo", "GROUP BY b")); - run(driver, "DROP VIEW IF EXITS foo_v1"); + run(driver, "DROP VIEW IF EXISTS foo_v1"); run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS foo_v1 ", "AS ", "SELECT DATE '2013-01-01', '2017-08-22 01:02:03', CAST(123 AS SMALLINT), CAST(123 AS TINYINT) ", "FROM foo", From 6ca047fa98517e3dd0b088faa7aceb280dbfab91 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Fri, 25 Feb 2022 15:47:18 +0100 Subject: [PATCH 2/2] Set the schema case sensitivity to false for Hive In the translation of views, when joining tables that have the namesake column names ,irrespective of their case, make sure that the resulting relation is using names of the columns which have unique names in order to avoid the situation where the SQL statement created contains ambiguous column names. --- .../com/linkedin/coral/common/HiveTypeSystem.java | 5 +++++ .../hive/hive2rel/HiveToRelConverterTest.java | 15 +++++++++++++++ .../linkedin/coral/hive/hive2rel/TestUtils.java | 10 +++++++++- .../com/linkedin/coral/spark/CoralSparkTest.java | 12 ++++++++++++ .../java/com/linkedin/coral/spark/TestUtils.java | 6 ++++++ .../trino/rel2trino/HiveToTrinoConverterTest.java | 9 ++++++++- .../linkedin/coral/trino/rel2trino/TestUtils.java | 5 +++++ 7 files changed, 60 insertions(+), 2 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java index 004796fc6..5f91ba462 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java @@ -156,6 +156,11 @@ public boolean shouldConvertRaggedUnionTypesToVarying() { return true; } + @Override + public boolean isSchemaCaseSensitive() { + return false; + } + private RelDataType nullableType(RelDataTypeFactory typeFactory, SqlTypeName typeName) { return typeFactory.createTypeWithNullability(typeFactory.createSqlType(typeName), true); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java index 870fb1e51..778dbfef4 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java @@ -16,6 +16,7 @@ import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.runtime.CalciteContextException; +import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.sql.type.SqlTypeName; @@ -618,6 +619,20 @@ public void testCastToDecimalDefault() { assertEquals(generated, expected); } + @Test + public void testNameSakeColumnNamesShouldGetUniqueIdentifiers() { + String expected = "SELECT \"some_id\"\n" + + "FROM (SELECT \"duplicate_column_name_a\".\"some_id\", \"t\".\"SOME_ID\" AS \"SOME_ID0\"\n" + + "FROM \"hive\".\"default\".\"duplicate_column_name_a\"\n" + + "LEFT JOIN (SELECT TRIM(\"some_id\") AS \"SOME_ID\", CAST(TRIM(\"some_id\") AS VARCHAR(10485760)) AS \"$f1\"\n" + + "FROM \"hive\".\"default\".\"duplicate_column_name_b\") AS \"t\" ON \"duplicate_column_name_a\".\"some_id\" = \"t\".\"$f1\") AS \"t0\"\n" + + "WHERE \"t0\".\"some_id\" <> ''"; + SqlNode node = viewToSqlNode("default", "view_namesake_column_names"); + converter.getSqlValidator().validate(node); + String generated = nodeToStr(node); + assertEquals(generated, expected); + } + private String relToString(String sql) { return RelOptUtil.toString(converter.convertSql(sql)); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java index 4b9f82cc2..4c788c276 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java @@ -197,11 +197,19 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException { driver.run( "CREATE TABLE IF NOT EXISTS nested_union(foo uniontype>>)"); + driver.run("CREATE TABLE IF NOT EXISTS duplicate_column_name_a (some_id string)"); + driver.run("CREATE TABLE IF NOT EXISTS duplicate_column_name_b (some_id string)"); + driver.run("CREATE VIEW IF NOT EXISTS view_namesake_column_names AS\n" + + " SELECT a.some_id FROM duplicate_column_name_a a\n" + + " LEFT JOIN ( SELECT trim(some_id) AS SOME_ID FROM duplicate_column_name_b) b ON a.some_id = b.some_id\n" + + " WHERE a.some_id != ''"); + testHive.databases = ImmutableList.of( new TestHive.DB("test", ImmutableList.of("tableOne", "tableTwo", "tableOneView")), new TestHive.DB("default", ImmutableList.of("bar", "complex", "foo", "foo_view", "null_check_view", "null_check_wrapper", - "schema_evolve", "view_schema_evolve", "view_schema_evolve_wrapper", "union_table", "nested_union")), + "schema_evolve", "view_schema_evolve", "view_schema_evolve_wrapper", "union_table", "nested_union", + "duplicate_column_name_a", "duplicate_column_name_b", "view_namesake_column_names")), new TestHive.DB("fuzzy_union", ImmutableList.of("tableA", "tableB", "tableC", "union_view", "union_view_with_more_than_two_tables", "union_view_with_alias", "union_view_single_branch_evolved", diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index a82353d8e..6922680ff 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -727,4 +727,16 @@ public void testDeduplicateUdf() { List udfJars = coralSpark.getSparkUDFInfoList(); assertEquals(1, udfJars.size()); } + + @Test + public void testNameSakeColumnNamesShouldGetUniqueIdentifiers() { + String targetSql = String.join("\n", "SELECT some_id", "FROM (SELECT tablea.some_id, t.SOME_ID SOME_ID0", + "FROM duplicate_column_name.tablea", + "LEFT JOIN (SELECT TRIM(some_id) SOME_ID, CAST(TRIM(some_id) AS STRING) $f1", + "FROM duplicate_column_name.tableb) t ON tablea.some_id = t.$f1) t0", "WHERE t0.some_id <> ''"); + RelNode relNode = TestUtils.toRelNode("duplicate_column_name", "view_namesake_column_names"); + CoralSpark coralSpark = CoralSpark.create(relNode); + String expandedSql = coralSpark.getSparkSql(); + assertEquals(expandedSql, targetSql); + } } diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java b/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java index 574e3366a..29c7479e1 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/TestUtils.java @@ -114,6 +114,12 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS named_struct_view", "AS", "SELECT named_struct('abc', 123, 'def', 'xyz') AS named_struc", "FROM bar")); + run(driver, String.join("\n", "", "CREATE DATABASE IF NOT EXISTS duplicate_column_name")); + run(driver, "CREATE TABLE duplicate_column_name.tableA (some_id string)"); + run(driver, "CREATE TABLE duplicate_column_name.tableB (some_id string)"); + run(driver, "CREATE VIEW IF NOT EXISTS duplicate_column_name.view_namesake_column_names AS " + + "SELECT a.some_id FROM duplicate_column_name.tableA a LEFT JOIN (SELECT trim(some_id) AS SOME_ID FROM duplicate_column_name.tableB) b ON a.some_id = b.some_id WHERE a.some_id != ''"); + // Views and tables used in FuzzyUnionViewTest run(driver, String.join("\n", "", "CREATE DATABASE IF NOT EXISTS fuzzy_union")); diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index fe9317129..fad34e611 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -196,7 +196,14 @@ public Object[][] viewTestCasesProvider() { + "FROM \"test\".\"table_ints_strings\"" }, { "test", "cast_decimal_view", "SELECT CAST(\"a\" AS DECIMAL(6, 2)) AS \"casted_decimal\"\n" - + "FROM \"test\".\"table_ints_strings\"" } }; + + "FROM \"test\".\"table_ints_strings\"" }, + + { "test", "view_namesake_column_names", "SELECT \"some_id\"\n" + + "FROM (SELECT \"duplicate_column_name_a\".\"some_id\" AS \"some_id\", \"t\".\"SOME_ID\" AS \"SOME_ID0\"\n" + + "FROM \"test\".\"duplicate_column_name_a\"\n" + + "LEFT JOIN (SELECT TRIM(\"some_id\") AS \"SOME_ID\", CAST(TRIM(\"some_id\") AS VARCHAR(65536)) AS \"$f1\"\n" + + "FROM \"test\".\"duplicate_column_name_b\") AS \"t\" ON \"duplicate_column_name_a\".\"some_id\" = \"t\".\"$f1\") AS \"t0\"\n" + + "WHERE \"t0\".\"some_id\" <> ''" } }; } @Test diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java index 094277915..8cd401104 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java @@ -359,6 +359,11 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce run(driver, "CREATE VIEW IF NOT EXISTS test.view_with_transform_column_name_reset AS SELECT struct_col AS structCol FROM (SELECT * FROM test.viewA UNION ALL SELECT * FROM test.viewB) X"); run(driver, "ALTER TABLE test.tableT CHANGE COLUMN structCol structCol struct"); + + run(driver, "CREATE TABLE test.duplicate_column_name_a (some_id string)"); + run(driver, "CREATE TABLE test.duplicate_column_name_b (some_id string)"); + run(driver, "CREATE VIEW IF NOT EXISTS test.view_namesake_column_names AS \n" + + "SELECT a.some_id FROM test.duplicate_column_name_a a LEFT JOIN ( SELECT trim(some_id) AS SOME_ID FROM test.duplicate_column_name_b) b ON a.some_id = b.some_id WHERE a.some_id != ''"); } public static RelNode convertView(String db, String view) {