diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java index 416c0299f..f1c011946 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java @@ -12,13 +12,19 @@ import java.util.List; import java.util.Map; +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.linq4j.tree.Expressions; import org.apache.calcite.rel.BiRel; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttleImpl; import org.apache.calcite.rel.core.*; +import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.logical.LogicalTableFunctionScan; import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexLiteral; @@ -453,4 +459,75 @@ public SqlNode toSql(RexProgram program, RexNode rex) { } }; } + + @Override + public Result setOpToSql(SqlSetOperator operator, RelNode rel) { + SqlNode node = null; + for (Ord input : Ord.zip(rel.getInputs())) { + RelNode adapted = input.e.accept(new SetProjectAdapterShuttle(rel.getRowType())); + final Result result = visitChild(input.i, adapted); + if (node == null) { + node = result.asSelect(); + } else { + node = operator.createCall(POS, node, result.asSelect()); + } + } + final List clauses = Expressions.list(Clause.SET_OP); + return result(node, clauses, rel, null); + } + + /** + * This visitor class is used to adapt `LogicalProject` relational + * expression in set statements (e.g.: `UNION`, `INTERSECT`, `MINUS`) + * in case that the branches of the set statement contain fields in + * char family which have different types. + * The `char` fields which are differing from the expected `varchar` output + * of the set statement will be adapted through an explicit `CAST` to the `varchar` type. + * + * @see https://github.com/trinodb/trino/issues/9031 + */ + private static class SetProjectAdapterShuttle extends RelShuttleImpl { + private final RelDataType setRowType; + + public SetProjectAdapterShuttle(RelDataType setRowType) { + this.setRowType = setRowType; + + } + + @Override + public RelNode visit(LogicalProject project) { + List setNodeFieldList = setRowType.getFieldList(); + RelDataType projectRowType = project.getRowType(); + List projectFieldList = projectRowType.getFieldList(); + if (setNodeFieldList.size() != projectFieldList.size()) { + return project; + } + + RexBuilder rexBuilder = project.getCluster().getRexBuilder(); + List projects = new ArrayList<>(setNodeFieldList.size()); + List projectFieldTypes = new ArrayList<>(setNodeFieldList.size()); + boolean useAdjustedProjectFieldTypes = false; + for (int fieldIndex = 0; fieldIndex < projectRowType.getFieldCount(); fieldIndex++) { + RexNode expression = project.getProjects().get(fieldIndex); + if (setNodeFieldList.get(fieldIndex).getType() != null && projectFieldList.get(fieldIndex).getType() != null + && !setNodeFieldList.get(fieldIndex).getType().equals(projectFieldList.get(fieldIndex).getType()) + && SqlTypeName.VARCHAR == setNodeFieldList.get(fieldIndex).getType().getSqlTypeName() + && SqlTypeName.CHAR == projectFieldList.get(fieldIndex).getType().getSqlTypeName()) { + // Work-around for the Trino limitation in dealing set statements between `char` and `varchar` columns. See https://github.com/trinodb/trino/issues/9031 + RexNode castRexNode = rexBuilder.makeCast(setRowType.getFieldList().get(fieldIndex).getType(), expression); + projects.add(castRexNode); + projectFieldTypes.add(setRowType.getFieldList().get(fieldIndex)); + useAdjustedProjectFieldTypes = true; + } else { + projects.add(expression); + projectFieldTypes.add(projectRowType.getFieldList().get(fieldIndex)); + } + } + + RelDataType setOutputRowType = useAdjustedProjectFieldTypes + ? new RelRecordType(projectRowType.getStructKind(), projectFieldTypes, projectRowType.isNullable()) + : projectRowType; + return LogicalProject.create(project.getInput(), projects, setOutputRowType); + } + } } diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 754ccecfc..0e2037d84 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -193,7 +193,55 @@ public Object[][] viewTestCasesProvider() { + "FROM \"test\".\"duplicate_column_name_a\" AS \"duplicate_column_name_a\"\n" + "LEFT JOIN (SELECT TRIM(\"duplicate_column_name_b\".\"some_id\") AS \"SOME_ID\", CAST(TRIM(\"duplicate_column_name_b\".\"some_id\") AS VARCHAR(65536)) AS \"$f1\"\n" + "FROM \"test\".\"duplicate_column_name_b\" AS \"duplicate_column_name_b\") AS \"t\" ON \"duplicate_column_name_a\".\"some_id\" = \"t\".\"$f1\") AS \"t0\"\n" - + "WHERE \"t0\".\"some_id\" <> ''" } }; + + "WHERE \"t0\".\"some_id\" <> ''" }, + + { "test", "view_char_different_size_in_union", "SELECT CAST(\"table_with_mixed_columns\".\"a_char1\" AS VARCHAR(255)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT CAST(\"table_with_mixed_columns0\".\"a_char255\" AS VARCHAR(255)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "view_cast_char_to_varchar", "SELECT CAST(\"table_with_mixed_columns\".\"a_char1\" AS VARCHAR(65535)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"" }, + + { "test", "view_cast_char_to_varchar_in_union", "SELECT CAST(\"table_with_mixed_columns\".\"a_char1\" AS VARCHAR(65535)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT CAST(CASE WHEN \"table_with_mixed_columns0\".\"a_char1\" IS NOT NULL THEN \"table_with_mixed_columns0\".\"a_char1\" ELSE 'N' END AS VARCHAR(65535)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "view_cast_char_to_varchar_in_union_flipped", "SELECT CAST(CASE WHEN \"table_with_mixed_columns\".\"a_char1\" IS NOT NULL THEN \"table_with_mixed_columns\".\"a_char1\" ELSE 'N' END AS VARCHAR(65535)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT CAST(\"table_with_mixed_columns0\".\"a_char1\" AS VARCHAR(65535)) AS \"col\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "view_cast_char_to_varchar_with_other_fields_in_union", "SELECT CAST(\"table_with_mixed_columns\".\"a_char1\" AS VARCHAR(65535)) AS \"text\", \"table_with_mixed_columns\".\"a_boolean\" AS \"a_boolean\", \"table_with_mixed_columns\".\"a_smallint\" AS \"a_number\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT CAST(CASE WHEN \"table_with_mixed_columns0\".\"a_char1\" IS NOT NULL THEN \"table_with_mixed_columns0\".\"a_char1\" ELSE 'N' END AS VARCHAR(65535)) AS \"text\", \"table_with_mixed_columns0\".\"a_boolean\" AS \"a_boolean\", \"table_with_mixed_columns0\".\"a_integer\" AS \"a_number\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "view_char_and_null_in_union", "SELECT \"table_with_mixed_columns\".\"a_char1\" AS \"text\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT NULL AS \"text\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "view_different_numerical_types_in_union", "SELECT *\n" + "FROM (SELECT *\n" + + "FROM (SELECT \"table_with_mixed_columns\".\"a_tinyint\" AS \"a_number\", \"table_with_mixed_columns\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT \"table_with_mixed_columns0\".\"a_smallint\" AS \"a_number\", \"table_with_mixed_columns0\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\") AS \"t1\"\n" + "UNION ALL\n" + + "SELECT \"table_with_mixed_columns1\".\"a_integer\" AS \"a_number\", \"table_with_mixed_columns1\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns1\") AS \"t3\"\n" + "UNION ALL\n" + + "SELECT \"table_with_mixed_columns2\".\"a_bigint\" AS \"a_number\", \"table_with_mixed_columns2\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns2\"" }, + + { "test", "view_union_no_casting", "SELECT \"table_with_mixed_columns\".\"a_tinyint\" AS \"a_tinyint\", \"table_with_mixed_columns\".\"a_smallint\" AS \"a_smallint\", \"table_with_mixed_columns\".\"a_integer\" AS \"a_integer\", \"table_with_mixed_columns\".\"a_bigint\" AS \"a_bigint\", \"table_with_mixed_columns\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns\"\n" + "UNION ALL\n" + + "SELECT \"table_with_mixed_columns0\".\"a_tinyint\" AS \"a_tinyint\", \"table_with_mixed_columns0\".\"a_smallint\" AS \"a_smallint\", \"table_with_mixed_columns0\".\"a_integer\" AS \"a_integer\", \"table_with_mixed_columns0\".\"a_bigint\" AS \"a_bigint\", \"table_with_mixed_columns0\".\"a_float\" AS \"a_float\"\n" + + "FROM \"test\".\"table_with_mixed_columns\" AS \"table_with_mixed_columns0\"" }, + + { "test", "fuzzy_union_view_char_casting", "SELECT *\n" + + "FROM \"test\".\"table_with_string_column\" AS \"table_with_string_column\"\n" + "UNION ALL\n" + + "SELECT CAST(\"table_with_char32_column\".\"a\" AS VARCHAR(65536)) AS \"a\"\n" + + "FROM \"test\".\"table_with_char32_column\" AS \"table_with_char32_column\"" } }; } @Test diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java index 2c6bb0f97..e94c423af 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java @@ -378,6 +378,47 @@ public static void initializeTablesAndViews(HiveConf conf) throws HiveException, run(driver, "CREATE TABLE test.table_with_binary_column (b binary)"); + run(driver, + "CREATE TABLE test.table_with_mixed_columns (a_char1 char(1), a_char255 char(255), a_string string, a_tinyint tinyint, a_smallint smallint, a_integer int, a_bigint bigint, a_float float, a_double double, a_boolean boolean)"); + run(driver, "CREATE VIEW IF NOT EXISTS test.view_cast_char_to_varchar AS \n" + + "SELECT CAST(a_char1 AS VARCHAR(65535)) AS col FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_char_different_size_in_union AS \n" + + "SELECT a_char1 AS col FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT a_char255 AS col FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_cast_char_to_varchar_in_union AS \n" + + "SELECT CAST(a_char1 AS VARCHAR(65535)) AS col FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT COALESCE(a_char1, 'N') AS col FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_cast_char_to_varchar_in_union_flipped AS \n" + + "SELECT COALESCE(a_char1, 'N') as col FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT CAST(a_char1 AS VARCHAR(65535)) AS col FROM test.table_with_mixed_columns"); + run(driver, "CREATE VIEW IF NOT EXISTS test.view_cast_char_to_varchar_with_other_fields_in_union AS \n" + + "SELECT CAST(a_char1 AS VARCHAR(65535)) AS text , a_boolean, a_smallint as a_number FROM test.table_with_mixed_columns \n" + + "UNION ALL\n" + + "SELECT COALESCE(a_char1, 'N') as text, a_boolean, a_integer as a_number FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_char_and_null_in_union AS \n" + + "SELECT a_char1 as text FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT NULL text FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_different_numerical_types_in_union AS \n" + + "SELECT a_tinyint AS a_number, a_float FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT a_smallint AS a_number, a_float FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT a_integer AS a_number, a_float FROM test.table_with_mixed_columns \n" + "UNION ALL\n" + + "SELECT a_bigint AS a_number, a_float FROM test.table_with_mixed_columns"); + run(driver, + "CREATE VIEW IF NOT EXISTS test.view_union_no_casting AS \n" + + "SELECT a_tinyint, a_smallint, a_integer, a_bigint, a_float FROM test.table_with_mixed_columns \n" + + "UNION ALL\n" + + "SELECT a_tinyint, a_smallint, a_integer, a_bigint, a_float FROM test.table_with_mixed_columns"); + + run(driver, "CREATE TABLE IF NOT EXISTS test.table_with_string_column(a string)"); + run(driver, "CREATE TABLE IF NOT EXISTS test.table_with_char32_column(a char(32))"); + run(driver, "CREATE VIEW IF NOT EXISTS test.fuzzy_union_view_char_casting AS \n" + + "SELECT * from test.table_with_string_column UNION ALL SELECT * from test.table_with_char32_column"); + // Tables used in RelToTrinoConverterTest run(driver, "CREATE TABLE IF NOT EXISTS test.tableOne(icol int, dcol double, scol string, tcol timestamp, acol array)");