diff --git a/docs/src/main/sphinx/connector/hive.md b/docs/src/main/sphinx/connector/hive.md index cc715695b1f2..e6cc99c80cb6 100644 --- a/docs/src/main/sphinx/connector/hive.md +++ b/docs/src/main/sphinx/connector/hive.md @@ -656,6 +656,8 @@ type conversions. * - `DECIMAL` - `DOUBLE`, `REAL`, `VARCHAR`, `TINYINT`, `SMALLINT`, `INTEGER`, `BIGINT`, as well as narrowing and widening conversions for `DECIMAL` +* - `DATE` + - `VARCHAR` * - `TIMESTAMP` - `VARCHAR` ::: diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java index a0aa9435c00e..824ace9b9c5a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/CoercionUtils.java @@ -17,6 +17,7 @@ import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.plugin.hive.HiveType; import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer; +import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer; import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToDateCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; @@ -199,6 +200,9 @@ public static Type createTypeFromCoercer(TypeManager typeManager, HiveType fromH } return Optional.empty(); } + if (fromType instanceof DateType && toType instanceof VarcharType toVarcharType) { + return Optional.of(new DateToVarcharCoercer(toVarcharType)); + } if (fromType == DOUBLE && toType instanceof VarcharType toVarcharType) { return Optional.of(new DoubleToVarcharCoercer(toVarcharType, coercionContext.treatNaNAsNull())); } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java index 3b9398a9e925..752f72ac7ceb 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/DateCoercer.java @@ -13,16 +13,23 @@ */ package io.trino.plugin.hive.coercions; +import io.airlift.slice.Slice; import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.DateType; import io.trino.spi.type.VarcharType; +import java.time.DateTimeException; import java.time.LocalDate; import java.time.format.DateTimeParseException; +import static io.airlift.slice.SliceUtf8.countCodePoints; +import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION; +import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; +import static io.trino.spi.type.DateType.DATE; +import static java.lang.String.format; import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE; public final class DateCoercer @@ -55,4 +62,32 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos } } } + + public static class DateToVarcharCoercer + extends TypeCoercer + { + public DateToVarcharCoercer(VarcharType toType) + { + super(DATE, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + int value = fromType.getInt(block, position); + try { + if (value < START_OF_MODERN_ERA_DAYS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + Slice converted = utf8Slice(ISO_LOCAL_DATE.format(LocalDate.ofEpochDay(value))); + if (!toType.isUnbounded() && countCodePoints(converted) > toType.getBoundedLength()) { + throw new TrinoException(INVALID_ARGUMENTS, format("Varchar representation of '%s' exceeds %s bounds", converted.toStringUtf8(), toType)); + } + toType.writeSlice(blockBuilder, converted); + } + catch (DateTimeException ignored) { + throw new IllegalArgumentException("Invalid date value: " + value + " is exceeding supported date range"); + } + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 00f050fa97b4..8e1b77f559d1 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -15,6 +15,7 @@ import io.trino.orc.metadata.OrcType.OrcTypeKind; import io.trino.plugin.hive.coercions.BooleanCoercer.BooleanToVarcharCoercer; +import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer; import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer; import io.trino.plugin.hive.coercions.DoubleToVarcharCoercer; import io.trino.plugin.hive.coercions.IntegerNumberToDoubleCoercer; @@ -34,6 +35,7 @@ import static io.trino.orc.metadata.OrcType.OrcTypeKind.BOOLEAN; import static io.trino.orc.metadata.OrcType.OrcTypeKind.BYTE; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.DATE; import static io.trino.orc.metadata.OrcType.OrcTypeKind.DOUBLE; import static io.trino.orc.metadata.OrcType.OrcTypeKind.INT; import static io.trino.orc.metadata.OrcType.OrcTypeKind.LONG; @@ -63,6 +65,9 @@ private OrcTypeTranslator() {} } return Optional.empty(); } + if (fromOrcType == DATE && toTrinoType instanceof VarcharType varcharType) { + return Optional.of(new DateToVarcharCoercer(varcharType)); + } if (isVarcharType(fromOrcType)) { if (toTrinoType instanceof TimestampType timestampType) { if (timestampType.isShort()) { diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java index d424fcda56ee..a7e9311304c8 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java @@ -81,6 +81,7 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest fromHiveType.equals(HIVE_LONG) || fromHiveType.equals(HIVE_TIMESTAMP) || fromHiveType.equals(HIVE_DOUBLE) || + fromHiveType.equals(HIVE_DATE) || fromType instanceof DecimalType; } if (toHiveType.equals(HIVE_DATE)) { diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java index 581fb2dbb259..37a86dad7803 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestDateCoercer.java @@ -14,8 +14,8 @@ package io.trino.plugin.hive.coercions; import io.trino.plugin.hive.coercions.CoercionUtils.CoercionContext; +import io.trino.spi.TrinoException; import io.trino.spi.block.Block; -import io.trino.spi.type.DateType; import io.trino.spi.type.Type; import org.junit.jupiter.api.Test; @@ -27,7 +27,10 @@ import static io.trino.plugin.hive.coercions.CoercionUtils.createCoercer; import static io.trino.spi.predicate.Utils.blockToNativeValue; import static io.trino.spi.predicate.Utils.nativeValueToBlock; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; +import static io.trino.spi.type.VarcharType.createVarcharType; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; @@ -66,6 +69,28 @@ public void testThrowsExceptionWhenDateIsTooOld() .hasMessageMatching(".*Coercion on historical dates is not supported.*"); } + @Test + public void testDateToVarchar() + { + assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("2023-01-10"), "2023-01-10"); + assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("+10000-04-25"), "+10000-04-25"); + } + + @Test + public void testDateToLowerBoundedVarchar() + { + assertThatThrownBy(() -> assertDateToVarcharCoercion(createVarcharType(8), LocalDate.parse("2023-10-23"), "2023-10-23")) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Varchar representation of '2023-10-23' exceeds varchar(8) bounds"); + } + + @Test + public void testHistoricalDateToVarchar() + { + assertThatThrownBy(() -> assertDateToVarcharCoercion(createUnboundedVarcharType(), LocalDate.parse("1899-12-31"), null)) + .hasMessageMatching(".*Coercion on historical dates is not supported.*"); + } + private void assertVarcharToDateCoercion(Type fromType, String date) { assertVarcharToDateCoercion(fromType, date, fromDateToEpochDate(date)); @@ -73,12 +98,20 @@ private void assertVarcharToDateCoercion(Type fromType, String date) private void assertVarcharToDateCoercion(Type fromType, String date, Long expected) { - Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DateType.DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow() + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(DATE), new CoercionContext(NANOSECONDS, false)).orElseThrow() .apply(nativeValueToBlock(fromType, utf8Slice(date))); - assertThat(blockToNativeValue(DateType.DATE, coercedValue)) + assertThat(blockToNativeValue(DATE, coercedValue)) .isEqualTo(expected); } + private void assertDateToVarcharCoercion(Type toType, LocalDate date, String expected) + { + Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(DATE), toHiveType(toType), new CoercionContext(NANOSECONDS, false)).orElseThrow() + .apply(nativeValueToBlock(DATE, date.toEpochDay())); + assertThat(blockToNativeValue(VARCHAR, coercedValue)) + .isEqualTo(utf8Slice(expected)); + } + private long fromDateToEpochDate(String dateString) { LocalDate date = LocalDate.parse(dateString); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index f02df83c37c1..9ac4cd6d8751 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -147,6 +147,8 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "string_to_double", "varchar_to_double_infinity", "varchar_to_special_double", + "date_to_string", + "date_to_bounded_varchar", "char_to_bigger_char", "char_to_smaller_char", "timestamp_millis_to_date", @@ -237,6 +239,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " '1234.01234', " + " 'Infinity'," + " 'NaN'," + + " DATE '2023-09-28', " + + " DATE '2000-04-13', " + " 'abc', " + " 'abc', " + " TIMESTAMP '2022-12-31 23:59:59.999', " + @@ -299,6 +303,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " '0', " + " '-Infinity'," + " 'Invalid Double'," + + " DATE '2123-09-27', " + + " DATE '1900-01-01', " + " '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " + " '\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + @@ -519,6 +525,12 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("varchar_to_special_double", Arrays.asList( coercedNaN == null ? null : Double.NaN, null)) + .put("date_to_string", ImmutableList.of( + "2023-09-28", + "2123-09-27")) + .put("date_to_bounded_varchar", ImmutableList.of( + "2000-04-13", + "1900-01-01")) .put("char_to_bigger_char", ImmutableList.of( "abc ", "\uD83D\uDCB0\uD83D\uDCB0\uD83D\uDCB0 ")) @@ -977,6 +989,8 @@ private void assertProperAlteredTableSchema(String tableName) row("string_to_double", "double"), row("varchar_to_double_infinity", "double"), row("varchar_to_special_double", "double"), + row("date_to_string", "varchar"), + row("date_to_bounded_varchar", "varchar(12)"), row("char_to_bigger_char", "char(4)"), row("char_to_smaller_char", "char(2)"), row("timestamp_millis_to_date", "date"), @@ -1055,6 +1069,8 @@ private void assertColumnTypes( .put("string_to_double", DOUBLE) .put("varchar_to_double_infinity", DOUBLE) .put("varchar_to_special_double", DOUBLE) + .put("date_to_string", VARCHAR) + .put("date_to_bounded_varchar", VARCHAR) .put("char_to_bigger_char", CHAR) .put("char_to_smaller_char", CHAR) .put("id", BIGINT) @@ -1128,6 +1144,8 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_bigger_varchar varchar_to_bigger_varchar varchar(4)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_smaller_varchar varchar_to_smaller_varchar varchar(2)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_date varchar_to_date date", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_string date_to_string string", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN date_to_bounded_varchar date_to_bounded_varchar varchar(12)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_distant_date varchar_to_distant_date date", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_double varchar_to_double double", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN string_to_double string_to_double double", tableName)); diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index 8960585ad40a..d7fb66af5fb9 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -150,6 +150,8 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " string_to_double STRING," + " varchar_to_double_infinity VARCHAR(40)," + " varchar_to_special_double VARCHAR(40)," + + " date_to_string DATE," + + " date_to_bounded_varchar DATE," + " char_to_bigger_char CHAR(3)," + " char_to_smaller_char CHAR(3)," + " timestamp_millis_to_date TIMESTAMP," + diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index f11ceb4a1520..ab99a9316ac9 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -99,6 +99,8 @@ varchar_to_double VARCHAR(40), string_to_double STRING, varchar_to_double_infinity VARCHAR(40), varchar_to_special_double VARCHAR(40), + date_to_string DATE, + date_to_bounded_varchar DATE, char_to_bigger_char CHAR(3), char_to_smaller_char CHAR(3), timestamp_millis_to_date TIMESTAMP,