diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java index c628d970b120..2018867e6381 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java @@ -67,6 +67,7 @@ public enum HiveErrorCode HIVE_TABLE_LOCK_NOT_ACQUIRED(40, EXTERNAL), HIVE_VIEW_TRANSLATION_ERROR(41, EXTERNAL), HIVE_PARTITION_NOT_FOUND(42, USER_ERROR), + HIVE_INVALID_TIMESTAMP_COERCION(43, EXTERNAL), /**/; private final ErrorCode errorCode; diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java index 0300c6d9b2a8..eca796d64cbd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java @@ -24,6 +24,8 @@ import io.trino.plugin.hive.coercions.IntegerNumberUpscaleCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.ShortTimestampToVarcharCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer; import io.trino.plugin.hive.coercions.VarcharCoercer; import io.trino.plugin.hive.coercions.VarcharToIntegerNumberCoercer; import io.trino.plugin.hive.type.Category; @@ -314,6 +316,12 @@ public static Optional> createCoercer(TypeManager typeMan if (fromType instanceof VarcharType fromVarcharType && (toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || toHiveType.equals(HIVE_LONG))) { return Optional.of(new VarcharToIntegerNumberCoercer<>(fromVarcharType, toType)); } + if (fromType instanceof VarcharType varcharType && toType instanceof TimestampType timestampType) { + if (timestampType.isShort()) { + return Optional.of(new VarcharToShortTimestampCoercer(varcharType, timestampType)); + } + return Optional.of(new VarcharToLongTimestampCoercer(varcharType, timestampType)); + } if (fromType instanceof VarcharType fromVarcharType && toType instanceof VarcharType toVarcharType) { if (narrowerThan(toVarcharType, fromVarcharType)) { return Optional.of(new VarcharCoercer(fromVarcharType, toVarcharType)); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index 4b12227bf59a..e7bffbcaa494 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -211,7 +211,7 @@ public static Optional createHivePageSource( Optional bucketValidator = createBucketValidator(path, bucketValidation, tableBucketNumber, regularAndInterimColumnMappings); for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) { - List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager); + List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, true, typeManager, getTimestampPrecision(session)); Optional readerWithProjections = pageSourceFactory.createPageSource( configuration, @@ -249,7 +249,7 @@ public static Optional createHivePageSource( } for (HiveRecordCursorProvider provider : cursorProviders) { - List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, false, typeManager); + List desiredColumns = toColumnHandles(regularAndInterimColumnMappings, false, typeManager, getTimestampPrecision(session)); Optional readerWithProjections = provider.createRecordCursor( configuration, session, @@ -544,7 +544,7 @@ public static List extractRegularAndInterimColumnMappings(List toColumnHandles(List regularColumnMappings, boolean doCoercion, TypeManager typeManager) + public static List toColumnHandles(List regularColumnMappings, boolean doCoercion, TypeManager typeManager, HiveTimestampPrecision timestampPrecision) { return regularColumnMappings.stream() .map(columnMapping -> { @@ -560,14 +560,14 @@ public static List toColumnHandles(List regular projectedColumn.getDereferenceIndices(), projectedColumn.getDereferenceNames(), fromHiveType, - fromHiveType.getType(typeManager)); + fromHiveType.getType(typeManager, timestampPrecision)); }); return new HiveColumnHandle( columnHandle.getBaseColumnName(), columnHandle.getBaseHiveColumnIndex(), fromHiveTypeBase, - fromHiveTypeBase.getType(typeManager), + fromHiveTypeBase.getType(typeManager, timestampPrecision), newColumnProjectionInfo, columnHandle.getColumnType(), columnHandle.getComment()); diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java index 2e15793365e5..a47ea28d8303 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java @@ -13,7 +13,9 @@ */ package io.trino.plugin.hive.coercions; +import io.airlift.slice.Slice; import io.airlift.slice.Slices; +import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.block.BlockBuilder; import io.trino.spi.type.LongTimestamp; @@ -24,10 +26,14 @@ import java.time.chrono.IsoChronology; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION; import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND; import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND; +import static io.trino.spi.type.Timestamps.round; +import static io.trino.spi.type.Timestamps.roundDiv; import static io.trino.spi.type.Varchars.truncateToLength; import static java.lang.Math.floorDiv; import static java.lang.Math.floorMod; @@ -35,6 +41,8 @@ import static java.time.ZoneOffset.UTC; import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE; import static java.time.format.DateTimeFormatter.ISO_LOCAL_TIME; +import static java.time.format.ResolverStyle.LENIENT; +import static org.joda.time.DateTimeConstants.SECONDS_PER_DAY; public final class TimestampCoercer { @@ -44,8 +52,12 @@ public final class TimestampCoercer .appendLiteral(' ') .append(ISO_LOCAL_TIME) .toFormatter() + .withResolverStyle(LENIENT) .withChronology(IsoChronology.INSTANCE); + // Before 1900, Java Time and Joda Time are not consistent with java.sql.Date and java.util.Calendar + private static final long START_OF_MODERN_ERA_SECONDS = java.time.LocalDate.of(1900, 1, 1).toEpochDay() * SECONDS_PER_DAY; + private TimestampCoercer() {} public static class ShortTimestampToVarcharCoercer @@ -62,6 +74,9 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos long epochMicros = fromType.getLong(block, position); long epochSecond = floorDiv(epochMicros, MICROSECONDS_PER_SECOND); int nanoFraction = floorMod(epochMicros, MICROSECONDS_PER_SECOND) * NANOSECONDS_PER_MICROSECOND; + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } toType.writeSlice( blockBuilder, truncateToLength( @@ -88,6 +103,9 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos long microsFraction = floorMod(timestamp.getEpochMicros(), MICROSECONDS_PER_SECOND); // Hive timestamp has nanoseconds precision, so no truncation here long nanosFraction = (microsFraction * NANOSECONDS_PER_MICROSECOND) + (timestamp.getPicosOfMicro() / PICOSECONDS_PER_NANOSECOND); + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } toType.writeSlice( blockBuilder, @@ -97,4 +115,63 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos toType)); } } + + public static class VarcharToShortTimestampCoercer + extends TypeCoercer + { + public VarcharToShortTimestampCoercer(VarcharType fromType, TimestampType toType) + { + super(fromType, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + try { + Slice value = fromType.getSlice(block, position); + LocalDateTime dateTime = LOCAL_DATE_TIME.parse(value.toStringUtf8(), LocalDateTime::from); + long epochSecond = dateTime.toEpochSecond(UTC); + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + long epochMicros = epochSecond * MICROSECONDS_PER_SECOND + roundDiv(dateTime.getNano(), NANOSECONDS_PER_MICROSECOND); + toType.writeLong(blockBuilder, round(epochMicros, 6 - toType.getPrecision())); + } + catch (DateTimeParseException exception) { + // Hive treats invalid String as null instead of propagating exception + // In case of bigger tables with all values being invalid, log output will be huge so avoiding log here. + blockBuilder.appendNull(); + } + } + } + + public static class VarcharToLongTimestampCoercer + extends TypeCoercer + { + public VarcharToLongTimestampCoercer(VarcharType fromType, TimestampType toType) + { + super(fromType, toType); + } + + @Override + protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position) + { + try { + Slice value = fromType.getSlice(block, position); + LocalDateTime dateTime = LOCAL_DATE_TIME.parse(value.toStringUtf8(), LocalDateTime::from); + long epochSecond = dateTime.toEpochSecond(UTC); + if (epochSecond < START_OF_MODERN_ERA_SECONDS) { + throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported"); + } + long epochMicros = epochSecond * MICROSECONDS_PER_SECOND + dateTime.getNano() / NANOSECONDS_PER_MICROSECOND; + int picosOfMicro = (dateTime.getNano() % NANOSECONDS_PER_MICROSECOND) * PICOSECONDS_PER_NANOSECOND; + toType.writeObject(blockBuilder, new LongTimestamp(epochMicros, picosOfMicro)); + } + catch (DateTimeParseException exception) { + // Hive treats invalid String as null instead of propagating exception + // In case of bigger tables with all values being invalid, log output will be huge so avoiding log here. + blockBuilder.appendNull(); + } + } + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java index 700fa9499b88..f531dc867191 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java @@ -17,6 +17,8 @@ import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer; import io.trino.plugin.hive.coercions.TimestampCoercer.ShortTimestampToVarcharCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer; +import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToShortTimestampCoercer; import io.trino.plugin.hive.coercions.TypeCoercer; import io.trino.spi.type.TimestampType; import io.trino.spi.type.Type; @@ -24,7 +26,11 @@ import java.util.Optional; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.STRING; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP; +import static io.trino.orc.metadata.OrcType.OrcTypeKind.VARCHAR; import static io.trino.spi.type.TimestampType.createTimestampType; +import static io.trino.spi.type.VarcharType.createUnboundedVarcharType; public final class OrcTypeTranslator { @@ -32,13 +38,24 @@ private OrcTypeTranslator() {} public static Optional> createCoercer(OrcTypeKind fromOrcType, Type toTrinoType, HiveTimestampPrecision timestampPrecision) { - if (fromOrcType.equals(OrcTypeKind.TIMESTAMP) && toTrinoType instanceof VarcharType varcharType) { + if (fromOrcType == TIMESTAMP && toTrinoType instanceof VarcharType varcharType) { TimestampType timestampType = createTimestampType(timestampPrecision.getPrecision()); if (timestampType.isShort()) { return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType)); } return Optional.of(new LongTimestampToVarcharCoercer(timestampType, varcharType)); } + if (isVarcharType(fromOrcType) && toTrinoType instanceof TimestampType timestampType) { + if (timestampType.isShort()) { + return Optional.of(new VarcharToShortTimestampCoercer(createUnboundedVarcharType(), timestampType)); + } + return Optional.of(new VarcharToLongTimestampCoercer(createUnboundedVarcharType(), timestampType)); + } return Optional.empty(); } + + private static boolean isVarcharType(OrcTypeKind orcTypeKind) + { + return orcTypeKind == STRING || orcTypeKind == VARCHAR; + } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java index ffe29c344785..927c1b899e19 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/HiveCoercionPolicy.java @@ -63,7 +63,8 @@ private boolean canCoerce(HiveType fromHiveType, HiveType toHiveType, HiveTimest toHiveType.equals(HIVE_BYTE) || toHiveType.equals(HIVE_SHORT) || toHiveType.equals(HIVE_INT) || - toHiveType.equals(HIVE_LONG); + toHiveType.equals(HIVE_LONG) || + toHiveType.equals(HIVE_TIMESTAMP); } if (fromType instanceof CharType) { return toType instanceof CharType; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java index 91bf91fff433..8547c4a36ceb 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/coercions/TestTimestampCoercer.java @@ -15,6 +15,7 @@ import io.airlift.slice.Slices; import io.trino.plugin.hive.HiveTimestampPrecision; +import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.type.LongTimestamp; import io.trino.spi.type.SqlTimestamp; @@ -40,6 +41,7 @@ import static java.time.ZoneOffset.UTC; import static java.time.temporal.ChronoField.NANO_OF_SECOND; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; public class TestTimestampCoercer { @@ -59,6 +61,22 @@ public void testLongTimestampToVarchar(String timestampValue, String hiveTimesta assertLongTimestampToVarcharCoercions(TIMESTAMP_PICOS, new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), createUnboundedVarcharType(), hiveTimestampValue); } + @Test(dataProvider = "timestampValuesProvider") + public void testVarcharToShortTimestamp(String timestampValue, String hiveTimestampValue) + { + LocalDateTime localDateTime = LocalDateTime.parse(timestampValue); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice(hiveTimestampValue), TIMESTAMP_MICROS, timestamp.getEpochMicros()); + } + + @Test(dataProvider = "timestampValuesProvider") + public void testVarcharToLongTimestamp(String timestampValue, String hiveTimestampValue) + { + LocalDateTime localDateTime = LocalDateTime.parse(timestampValue); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToLongTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice(hiveTimestampValue), TIMESTAMP_PICOS, new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros())); + } + @Test public void testShortTimestampToSmallerVarchar() { @@ -103,11 +121,92 @@ public void testLongTimestampToSmallerVarchar() assertLongTimestampToVarcharCoercions(TIMESTAMP_PICOS, new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), createVarcharType(29), "2023-04-11 05:16:12.345678876"); } + @Test + public void testHistoricalShortTimestampToVarchar() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> assertShortTimestampToVarcharCoercions(TIMESTAMP_MICROS, timestamp.getEpochMicros(), createUnboundedVarcharType(), "1899-12-31 23:59:59.999999")) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + + @Test + public void testHistoricalLongTimestampToVarchar() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> assertLongTimestampToVarcharCoercions( + TIMESTAMP_PICOS, + new LongTimestamp(timestamp.getEpochMicros(), timestamp.getPicosOfMicros()), + createUnboundedVarcharType(), + "1899-12-31 23:59:59.999999999")) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + + @Test + public void testVarcharToShortTimestampWithExtendedRange() + { + LocalDateTime localDateTime = LocalDateTime.parse("2019-03-03T23:59:59.123"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice("2019-02-31 23:59:59.123"), TIMESTAMP_MICROS, timestamp.getEpochMicros()); + } + + @Test + public void testVarcharToLongTimestampWithInvalidRange() + { + LocalDateTime localDateTime = LocalDateTime.parse("2019-03-03T23:59:59.1234567"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice("2019-02-31 23:59:59.1234567"), TIMESTAMP_MICROS, timestamp.getEpochMicros()); + } + + @Test(dataProvider = "invalidValue") + public void testInvalidVarcharToShortTimestamp(String invalidValue) + { + assertVarcharToShortTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice(invalidValue), TIMESTAMP_MICROS, null); + } + + @Test(dataProvider = "invalidValue") + public void testInvalidVarcharLongTimestamp(String invalidValue) + { + assertVarcharToLongTimestampCoercions(createUnboundedVarcharType(), Slices.utf8Slice(invalidValue), TIMESTAMP_MICROS, null); + } + + @Test + public void testHistoricalVarcharToShortTimestamp() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_MICROS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> assertVarcharToShortTimestampCoercions( + createUnboundedVarcharType(), + Slices.utf8Slice("1899-12-31 23:59:59.999999"), + TIMESTAMP_MICROS, + timestamp.getEpochMicros())) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + + @Test + public void testHistoricalVarcharToLongTimestamp() + { + LocalDateTime localDateTime = LocalDateTime.parse("1899-12-31T23:59:59.999999"); + SqlTimestamp timestamp = SqlTimestamp.fromSeconds(TIMESTAMP_PICOS.getPrecision(), localDateTime.toEpochSecond(UTC), localDateTime.get(NANO_OF_SECOND)); + assertThatThrownBy(() -> assertVarcharToShortTimestampCoercions( + createUnboundedVarcharType(), + Slices.utf8Slice("1899-12-31 23:59:59.999999"), + TIMESTAMP_PICOS, + timestamp.getEpochMicros())) + .isInstanceOf(TrinoException.class) + .hasMessageContaining("Coercion on historical dates is not supported"); + } + @DataProvider public Object[][] timestampValuesProvider() { return new Object[][] { // before epoch + {"1900-01-01T00:00:00.000", "1900-01-01 00:00:00"}, {"1958-01-01T13:18:03.123", "1958-01-01 13:18:03.123"}, // after epoch {"2019-03-18T10:01:17.987", "2019-03-18 10:01:17.987"}, @@ -129,6 +228,16 @@ public Object[][] timestampValuesProvider() }; } + @DataProvider + public Object[][] invalidValue() + { + return new Object[][] { + {"Invalid timestamp"}, + {"2022"}, + {"2001-04-01T00:13:42.000"}, + }; + } + public static void assertShortTimestampToVarcharCoercions(TimestampType fromType, Long valueToBeCoerced, VarcharType toType, String expectedValue) { assertCoercions(fromType, valueToBeCoerced, toType, Slices.utf8Slice(expectedValue), MICROSECONDS); @@ -139,6 +248,16 @@ public static void assertLongTimestampToVarcharCoercions(TimestampType fromType, assertCoercions(fromType, valueToBeCoerced, toType, Slices.utf8Slice(expectedValue), NANOSECONDS); } + public static void assertVarcharToShortTimestampCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) + { + assertCoercions(fromType, valueToBeCoerced, toType, expectedValue, MICROSECONDS); + } + + public static void assertVarcharToLongTimestampCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue) + { + assertCoercions(fromType, valueToBeCoerced, toType, expectedValue, NANOSECONDS); + } + public static void assertCoercions(Type fromType, Object valueToBeCoerced, Type toType, Object expectedValue, HiveTimestampPrecision timestampPrecision) { Block coercedValue = createCoercer(TESTING_TYPE_MANAGER, toHiveType(fromType), toHiveType(toType), timestampPrecision).orElseThrow() diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java index 606c3c9c1b24..758651742f28 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/BaseTestHiveCoercion.java @@ -16,7 +16,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; import io.trino.jdbc.TrinoArray; +import io.trino.plugin.hive.HiveTimestampPrecision; import io.trino.tempto.assertions.QueryAssert.Row; import io.trino.tempto.fulfillment.table.MutableTablesState; import io.trino.tempto.fulfillment.table.TableDefinition; @@ -28,6 +30,8 @@ import java.math.BigDecimal; import java.sql.JDBCType; +import java.sql.SQLException; +import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -41,10 +45,13 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.airlift.testing.Assertions.assertEqualsIgnoreOrder; +import static io.trino.plugin.hive.HiveTimestampPrecision.NANOSECONDS; import static io.trino.tempto.assertions.QueryAssert.Row.row; +import static io.trino.tempto.assertions.QueryAssert.assertQueryFailure; import static io.trino.tempto.assertions.QueryAssert.assertThat; import static io.trino.tempto.context.ThreadLocalTestContextHolder.testContext; import static io.trino.tempto.fulfillment.table.TableHandle.tableHandle; +import static io.trino.tests.product.utils.JdbcDriverUtils.setSessionProperty; import static io.trino.tests.product.utils.QueryExecutors.onHive; import static io.trino.tests.product.utils.QueryExecutors.onTrino; import static java.lang.String.format; @@ -59,7 +66,9 @@ import static java.sql.JDBCType.REAL; import static java.sql.JDBCType.SMALLINT; import static java.sql.JDBCType.STRUCT; +import static java.sql.JDBCType.TIMESTAMP; import static java.sql.JDBCType.VARCHAR; +import static java.util.Collections.nCopies; import static java.util.Locale.ENGLISH; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; @@ -114,6 +123,8 @@ protected void doTestHiveCoercion(HiveTableDefinition tableDefinition) "timestamp_to_string", "timestamp_to_bounded_varchar", "timestamp_to_smaller_varchar", + "smaller_varchar_to_timestamp", + "varchar_to_timestamp", "id"); Function>> expected = engine -> expectedValuesForEngineProvider(engine, tableName, decimalToFloatVal, floatToDecimalVal); @@ -173,6 +184,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TIMESTAMP '2121-07-15 15:30:12.123', " + " TIMESTAMP '2121-07-15 15:30:12.123', " + " TIMESTAMP '2121-07-15 15:30:12.123', " + + " '2121', " + + " '2019-02-29 23:59:59.123', " + " 1), " + "(" + " CAST(ROW (NULL, 1, -100, -2323, -12345, 2) AS ROW(keep VARCHAR, ti2si TINYINT, si2int SMALLINT, int2bi INTEGER, bi2vc BIGINT, lower2uppercase BIGINT)), " + @@ -206,6 +219,8 @@ protected void insertTableRows(String tableName, String floatToDoubleType) " TIMESTAMP '1970-01-01 00:00:00.123', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + " TIMESTAMP '1970-01-01 00:00:00.123', " + + " '1970', " + + " '1970-01-01 00:00:00.123', " + " 1)", tableName, floatToDoubleType)); @@ -358,12 +373,244 @@ else if (getHiveVersionMajor() == 3 && isFormat.test("orc")) { .put("timestamp_to_smaller_varchar", ImmutableList.of( "2121", "1970")) + .put("smaller_varchar_to_timestamp", Arrays.asList( + null, + null)) + .put("varchar_to_timestamp", ImmutableList.of( + Timestamp.valueOf("2019-03-01 23:59:59.123"), + Timestamp.valueOf("1970-01-01 00:00:00.123"))) .put("id", ImmutableList.of( 1, 1)) .buildOrThrow(); } + protected void doTestHiveCoercionWithDifferentTimestampPrecision(HiveTableDefinition tableDefinition) + { + String tableName = mutableTableInstanceOf(tableDefinition).getNameInDatabase(); + + // Insert all the data with nanoseconds precision + setHiveTimestampPrecision(NANOSECONDS); + onTrino().executeQuery( + """ + INSERT INTO %s VALUES + (TIMESTAMP '2121-07-15 15:30:12.123499', TIMESTAMP '2121-07-15 15:30:12.123499', TIMESTAMP '0000-01-01 00:00:00.123499', '2120-14-41 15:30:12.123499', '0000-01-01 00:00:00.123499', 1), + (TIMESTAMP '2121-07-15 15:30:12.123500', TIMESTAMP '2121-07-15 15:30:12.123500', TIMESTAMP '0000-01-01 00:00:00.123500', '2019-02-31 15:30:12.123500', '0000-01-01 00:00:00.123500', 1), + (TIMESTAMP '2121-07-15 15:30:12.123501', TIMESTAMP '2121-07-15 15:30:12.123501', TIMESTAMP '0000-01-01 00:00:00.123501', '2120-14-41 75:30:12.123501', '0000-01-01 00:00:00.123501', 1), + (TIMESTAMP '2121-07-15 15:30:12.123499999', TIMESTAMP '2121-07-15 15:30:12.123499999', TIMESTAMP '0000-01-01 00:00:00.123499999', '2120-14-41 15:30:12.123499999', '0000-01-01 00:00:00.123499999', 1), + (TIMESTAMP '2121-07-15 15:30:12.123500000', TIMESTAMP '2121-07-15 15:30:12.123500000', TIMESTAMP '0000-01-01 00:00:00.123500000', '2019-02-31 15:30:12.123500000', '0000-01-01 00:00:00.123500000', 1), + (TIMESTAMP '2121-07-15 15:30:12.123500001', TIMESTAMP '2121-07-15 15:30:12.123500001', TIMESTAMP '0000-01-01 00:00:00.123500001', '2120-14-41 75:30:12.123500001', '0000-01-01 00:00:00.123500001', 1) + """.formatted(tableName)); + + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_varchar timestamp_to_varchar STRING", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN historical_timestamp_to_varchar historical_timestamp_to_varchar STRING", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_timestamp varchar_to_timestamp TIMESTAMP", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN historical_varchar_to_timestamp historical_varchar_to_timestamp TIMESTAMP", tableName)); + + for (HiveTimestampPrecision hiveTimestampPrecision : HiveTimestampPrecision.values()) { + String timestampType = "timestamp(%d)".formatted(hiveTimestampPrecision.getPrecision()); + setHiveTimestampPrecision(hiveTimestampPrecision); + assertThat(onTrino().executeQuery("SHOW COLUMNS FROM " + tableName).project(1, 2)).containsExactlyInOrder( + row("reference_timestamp", timestampType), + row("timestamp_to_varchar", "varchar"), + row("historical_timestamp_to_varchar", "varchar"), + row("varchar_to_timestamp", timestampType), + row("historical_varchar_to_timestamp", timestampType), + row("id", "bigint")); + + List allColumns = ImmutableList.of( + "reference_timestamp", + "timestamp_to_varchar", + "historical_timestamp_to_varchar", + "varchar_to_timestamp", + "historical_varchar_to_timestamp", + "id"); + + // For Trino, remove unsupported columns + List prestoReadColumns = removeUnsupportedColumnsForTrino(allColumns, tableName); + Map> expectedTinoResults = Maps.filterKeys( + expectedRowsForEngineProvider(Engine.TRINO, tableName, hiveTimestampPrecision), + prestoReadColumns::contains); + + String prestoSelectQuery = format("SELECT %s FROM %s", String.join(", ", prestoReadColumns), tableName); + assertQueryResults(Engine.TRINO, prestoSelectQuery, expectedTinoResults, prestoReadColumns, 6, tableName); + + assertQueryFailure(() -> onTrino().executeQuery("SELECT historical_timestamp_to_varchar FROM %s".formatted(tableName))) + .hasMessageContaining("Coercion on historical dates is not supported"); + + List hiveReadColumns = removeUnsupportedColumnsForHive(allColumns, tableName); + Map> expectedHiveResults = Maps.filterKeys( + expectedRowsForEngineProvider(Engine.HIVE, tableName, hiveTimestampPrecision), + hiveReadColumns::contains); + + String hiveSelectQuery = format("SELECT %s FROM %s", String.join(", ", hiveReadColumns), tableName); + assertQueryResults(Engine.HIVE, hiveSelectQuery, expectedHiveResults, hiveReadColumns, 6, tableName); + } + } + + protected Map> expectedRowsForEngineProvider(Engine engine, String tableName, HiveTimestampPrecision hiveTimestampPrecision) + { + List timestampValues = expectedTimestampValuesForEngineProvider(engine, hiveTimestampPrecision); + ImmutableMap.Builder> rowBuilder = ImmutableMap.>builder() + .put("reference_timestamp", timestampValues) + .put("timestamp_to_varchar", timestampValues.stream() + .map(Object::toString) + .collect(toImmutableList())) + .put("varchar_to_timestamp", coercedTimestampFromString(engine, tableName, hiveTimestampPrecision)) + .put("id", nCopies(6, 1)); + + if (engine == Engine.HIVE) { + rowBuilder.put("historical_timestamp_to_varchar", expectedHistoricalTimestampValuesProvider(tableName).stream() + .map(Object::toString) + .collect(toImmutableList())) + .put("historical_varchar_to_timestamp", expectedHistoricalTimestampValuesProviderForTimestamp(tableName)); + } + + return rowBuilder.buildOrThrow(); + } + + protected List expectedTimestampValuesForEngineProvider(Engine engine, HiveTimestampPrecision hiveTimestampPrecision) + { + if (engine == Engine.HIVE) { + return ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123499"), + Timestamp.valueOf("2121-07-15 15:30:12.123500"), + Timestamp.valueOf("2121-07-15 15:30:12.123501"), + Timestamp.valueOf("2121-07-15 15:30:12.123499999"), + Timestamp.valueOf("2121-07-15 15:30:12.123500000"), + Timestamp.valueOf("2121-07-15 15:30:12.123500001")); + } + + return switch (hiveTimestampPrecision) { + case MILLISECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123"), + Timestamp.valueOf("2121-07-15 15:30:12.123"), + Timestamp.valueOf("2121-07-15 15:30:12.124"), + Timestamp.valueOf("2121-07-15 15:30:12.123"), + Timestamp.valueOf("2121-07-15 15:30:12.124"), + Timestamp.valueOf("2121-07-15 15:30:12.124")); + + case MICROSECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123499"), + Timestamp.valueOf("2121-07-15 15:30:12.123500"), + Timestamp.valueOf("2121-07-15 15:30:12.123501"), + Timestamp.valueOf("2121-07-15 15:30:12.123500"), + Timestamp.valueOf("2121-07-15 15:30:12.123500"), + Timestamp.valueOf("2121-07-15 15:30:12.123500")); + + case NANOSECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-07-15 15:30:12.123499"), + Timestamp.valueOf("2121-07-15 15:30:12.123500"), + Timestamp.valueOf("2121-07-15 15:30:12.123501"), + Timestamp.valueOf("2121-07-15 15:30:12.123499999"), + Timestamp.valueOf("2121-07-15 15:30:12.123500000"), + Timestamp.valueOf("2121-07-15 15:30:12.123500001")); + default -> throw new IllegalStateException("Unsupported timestamp precision"); + }; + } + + protected List coercedTimestampFromString(Engine engine, String tableName, HiveTimestampPrecision hiveTimestampPrecision) + { + Predicate isFormat = formatName -> tableName.toLowerCase(ENGLISH).contains(formatName); + + if (engine == Engine.HIVE) { + if (isFormat.test("orc")) { + return Arrays.asList( + null, + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + null, + null, + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + null, + null); + } + return ImmutableList.of( + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + Timestamp.valueOf("2121-03-16 03:30:12.123501"), + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + Timestamp.valueOf("2121-03-16 03:30:12.123500001")); + } + + return switch (hiveTimestampPrecision) { + case MILLISECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-03-13 15:30:12.123"), + Timestamp.valueOf("2019-03-03 15:30:12.124"), + Timestamp.valueOf("2121-03-16 03:30:12.124"), + Timestamp.valueOf("2121-03-13 15:30:12.124"), + Timestamp.valueOf("2019-03-03 15:30:12.124"), + Timestamp.valueOf("2121-03-16 03:30:12.124")); + + case MICROSECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + Timestamp.valueOf("2121-03-16 03:30:12.123501"), + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.123500"), + Timestamp.valueOf("2121-03-16 03:30:12.123500")); + + case NANOSECONDS -> + ImmutableList.of( + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + Timestamp.valueOf("2121-03-16 03:30:12.123501"), + Timestamp.valueOf("2121-03-13 15:30:12.123499"), + Timestamp.valueOf("2019-03-03 15:30:12.1235"), + Timestamp.valueOf("2121-03-16 03:30:12.123500001")); + default -> throw new IllegalStateException("Unsupported timestamp precision"); + }; + } + + protected List expectedHistoricalTimestampValuesProvider(String tableName) + { + Predicate isFormat = formatName -> tableName.toLowerCase(ENGLISH).contains(formatName); + // ORC tables render `0000-01-01` date as `0001-01-03` + if (isFormat.test("orc")) { + return ImmutableList.of( + Timestamp.valueOf("0001-01-03 00:00:00.123499"), + Timestamp.valueOf("0001-01-03 00:00:00.123500"), + Timestamp.valueOf("0001-01-03 00:00:00.123501"), + Timestamp.valueOf("0001-01-03 00:00:00.123499999"), + Timestamp.valueOf("0001-01-03 00:00:00.123500000"), + Timestamp.valueOf("0001-01-03 00:00:00.123500001")); + } + return ImmutableList.of( + Timestamp.valueOf("0001-01-01 00:00:00.123499"), + Timestamp.valueOf("0001-01-01 00:00:00.123500"), + Timestamp.valueOf("0001-01-01 00:00:00.123501"), + Timestamp.valueOf("0001-01-01 00:00:00.123499999"), + Timestamp.valueOf("0001-01-01 00:00:00.123500000"), + Timestamp.valueOf("0001-01-01 00:00:00.123500001")); + } + + protected List expectedHistoricalTimestampValuesProviderForTimestamp(String tableName) + { + Predicate isFormat = formatName -> tableName.toLowerCase(ENGLISH).contains(formatName); + // ORC tables render `0000-01-01` date as `0001-01-03` + if (isFormat.test("orc")) { + return ImmutableList.of( + Timestamp.valueOf("0002-12-30 00:00:00.123499"), + Timestamp.valueOf("0002-12-30 00:00:00.123500"), + Timestamp.valueOf("0002-12-30 00:00:00.123501"), + Timestamp.valueOf("0002-12-30 00:00:00.123499999"), + Timestamp.valueOf("0002-12-30 00:00:00.123500000"), + Timestamp.valueOf("0002-12-30 00:00:00.123500001")); + } + return ImmutableList.of( + Timestamp.valueOf("0001-01-01 00:00:00.123499"), + Timestamp.valueOf("0001-01-01 00:00:00.123500"), + Timestamp.valueOf("0001-01-01 00:00:00.123501"), + Timestamp.valueOf("0001-01-01 00:00:00.123499999"), + Timestamp.valueOf("0001-01-01 00:00:00.123500000"), + Timestamp.valueOf("0001-01-01 00:00:00.123500001")); + } + protected List removeUnsupportedColumnsForHive(List columns, String tableName) { // TODO: assert exceptions being thrown for each column @@ -495,7 +742,18 @@ protected Map expectedExceptionsWithHiveContext() protected Map expectedExceptionsWithTrinoContext() { - return ImmutableMap.of(); + return ImmutableMap.builder() + .put(columnContext("orc", "historical_timestamp_to_varchar"), "Coercion on historical dates is not supported") + .put(columnContext("parquet", "historical_timestamp_to_varchar"), "Coercion on historical dates is not supported") + .put(columnContext("rcbinary", "historical_timestamp_to_varchar"), "Coercion on historical dates is not supported") + .put(columnContext("rctext", "historical_timestamp_to_varchar"), "Coercion on historical dates is not supported") + .put(columnContext("textfile", "historical_timestamp_to_varchar"), "Coercion on historical dates is not supported") + .put(columnContext("orc", "historical_varchar_to_timestamp"), "Coercion on historical dates is not supported") + .put(columnContext("parquet", "historical_varchar_to_timestamp"), "Coercion on historical dates is not supported") + .put(columnContext("rcbinary", "historical_varchar_to_timestamp"), "Coercion on historical dates is not supported") + .put(columnContext("rctext", "historical_varchar_to_timestamp"), "Coercion on historical dates is not supported") + .put(columnContext("textfile", "historical_varchar_to_timestamp"), "Coercion on historical dates is not supported") + .buildOrThrow(); } private void assertQueryResults( @@ -585,6 +843,8 @@ private void assertProperAlteredTableSchema(String tableName) row("timestamp_to_string", "varchar"), row("timestamp_to_bounded_varchar", "varchar(30)"), row("timestamp_to_smaller_varchar", "varchar(4)"), + row("smaller_varchar_to_timestamp", "timestamp(3)"), + row("varchar_to_timestamp", "timestamp(3)"), row("id", "bigint")); } @@ -636,6 +896,12 @@ private void assertColumnTypes( .put("timestamp_to_string", VARCHAR) .put("timestamp_to_bounded_varchar", VARCHAR) .put("timestamp_to_smaller_varchar", VARCHAR) + .put("smaller_varchar_to_timestamp", TIMESTAMP) + .put("varchar_to_timestamp", TIMESTAMP) + .put("reference_timestamp", TIMESTAMP) + .put("timestamp_to_varchar", VARCHAR) + .put("historical_timestamp_to_varchar", VARCHAR) + .put("historical_varchar_to_timestamp", TIMESTAMP) .buildOrThrow(); assertThat(queryResult) @@ -677,6 +943,8 @@ private static void alterTableColumnTypes(String tableName) onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_string timestamp_to_string string", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_bounded_varchar timestamp_to_bounded_varchar varchar(30)", tableName)); onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN timestamp_to_smaller_varchar timestamp_to_smaller_varchar varchar(4)", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN smaller_varchar_to_timestamp smaller_varchar_to_timestamp timestamp", tableName)); + onHive().executeQuery(format("ALTER TABLE %s CHANGE COLUMN varchar_to_timestamp varchar_to_timestamp timestamp", tableName)); } protected static TableInstance mutableTableInstanceOf(TableDefinition tableDefinition) @@ -763,4 +1031,14 @@ private static QueryResult execute(Engine engine, String sql, QueryExecutor.Quer { return engine.queryExecutor().executeQuery(sql, params); } + + private static void setHiveTimestampPrecision(HiveTimestampPrecision hiveTimestampPrecision) + { + try { + setSessionProperty(onTrino().getConnection(), "hive.timestamp_precision", hiveTimestampPrecision.name()); + } + catch (SQLException e) { + throw new RuntimeException(e); + } + } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java index 2004b547219d..3679d644903e 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnPartitionedTable.java @@ -24,6 +24,7 @@ import java.util.Optional; +import static io.trino.tempto.Requirements.compose; import static io.trino.tempto.assertions.QueryAssert.Row.row; import static io.trino.tempto.assertions.QueryAssert.assertThat; import static io.trino.tempto.fulfillment.table.MutableTableRequirement.State.CREATED; @@ -43,10 +44,18 @@ public class TestHiveCoercionOnPartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_TEXTFILE = tableDefinitionForTimestampCoercionBuilder("TEXTFILE", Optional.empty(), Optional.of("DELIMITED FIELDS TERMINATED BY '|'")) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_PARQUET = tableDefinitionBuilder("PARQUET", Optional.empty(), Optional.empty()) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_PARQUET = tableDefinitionForTimestampCoercionBuilder("PARQUET", Optional.empty(), Optional.empty()) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_AVRO = avroTableDefinitionBuilder() .setNoData() .build(); @@ -55,14 +64,26 @@ public class TestHiveCoercionOnPartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_ORC = tableDefinitionForTimestampCoercionBuilder("ORC", Optional.empty(), Optional.empty()) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_RCTEXT = tableDefinitionBuilder("RCFILE", Optional.of("RCTEXT"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'")) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_RCTEXT = tableDefinitionForTimestampCoercionBuilder("RCFILE", Optional.of("RCTEXT"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'")) + .setNoData() + .build(); + public static final HiveTableDefinition HIVE_COERCION_RCBINARY = tableDefinitionBuilder("RCFILE", Optional.of("RCBINARY"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'")) .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_RCBINARY = tableDefinitionForTimestampCoercionBuilder("RCFILE", Optional.of("RCBINARY"), Optional.of("SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'")) + .setNoData() + .build(); + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBuilder(String fileFormat, Optional recommendTableName, Optional rowFormat) { String tableName = format("%s_hive_coercion", recommendTableName.orElse(fileFormat).toLowerCase(ENGLISH)); @@ -101,7 +122,26 @@ private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBui " char_to_smaller_char CHAR(3)," + " timestamp_to_string TIMESTAMP," + " timestamp_to_bounded_varchar TIMESTAMP," + - " timestamp_to_smaller_varchar TIMESTAMP" + + " timestamp_to_smaller_varchar TIMESTAMP," + + " smaller_varchar_to_timestamp VARCHAR(4)," + + " varchar_to_timestamp STRING" + + ") " + + "PARTITIONED BY (id BIGINT) " + + rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") + + "STORED AS " + fileFormat); + } + + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionForTimestampCoercionBuilder(String fileFormat, Optional recommendTableName, Optional rowFormat) + { + String tableName = format("%s_hive_timestamp_coercion", recommendTableName.orElse(fileFormat).toLowerCase(ENGLISH)); + return HiveTableDefinition.builder(tableName) + .setCreateTableDDLTemplate("" + + "CREATE TABLE %NAME%(" + + " reference_timestamp TIMESTAMP," + + " timestamp_to_varchar TIMESTAMP," + + " historical_timestamp_to_varchar TIMESTAMP," + + " varchar_to_timestamp STRING," + + " historical_varchar_to_timestamp STRING" + ") " + "PARTITIONED BY (id BIGINT) " + rowFormat.map(s -> format("ROW FORMAT %s ", s)).orElse("") + @@ -126,7 +166,9 @@ public static final class TextRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_TEXTFILE).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_TEXTFILE).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_TEXTFILE).withState(CREATED).build()); } } @@ -136,7 +178,9 @@ public static final class OrcRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_ORC).withState(CREATED).build()); } } @@ -146,7 +190,9 @@ public static final class RcTextRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_RCTEXT).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_RCTEXT).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_RCTEXT).withState(CREATED).build()); } } @@ -156,7 +202,9 @@ public static final class RcBinaryRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_RCBINARY).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_RCBINARY).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_RCBINARY).withState(CREATED).build()); } } @@ -166,7 +214,9 @@ public static final class ParquetRequirements @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_PARQUET).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_PARQUET).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_PARQUET).withState(CREATED).build()); } } @@ -187,6 +237,13 @@ public void testHiveCoercionTextFile() doTestHiveCoercion(HIVE_COERCION_TEXTFILE); } + @Requires(TextRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercionTextFile() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_TEXTFILE); + } + @Requires(OrcRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionOrc() @@ -194,6 +251,13 @@ public void testHiveCoercionOrc() doTestHiveCoercion(HIVE_COERCION_ORC); } + @Requires(OrcRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercionOrc() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_ORC); + } + @Requires(RcTextRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionRcText() @@ -201,6 +265,13 @@ public void testHiveCoercionRcText() doTestHiveCoercion(HIVE_COERCION_RCTEXT); } + @Requires(RcTextRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercionRcText() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_RCTEXT); + } + @Requires(RcBinaryRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionRcBinary() @@ -208,6 +279,13 @@ public void testHiveCoercionRcBinary() doTestHiveCoercion(HIVE_COERCION_RCBINARY); } + @Requires(RcBinaryRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercionRcBinary() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_RCBINARY); + } + @Requires(ParquetRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionParquet() @@ -215,6 +293,13 @@ public void testHiveCoercionParquet() doTestHiveCoercion(HIVE_COERCION_PARQUET); } + @Requires(ParquetRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercionParquet() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_PARQUET); + } + @Requires(AvroRequirements.class) @Test(groups = {HIVE_COERCION, JDBC}) public void testHiveCoercionAvro() diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java index a430a1fd2ff6..0905e133ced9 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveCoercionOnUnpartitionedTable.java @@ -24,6 +24,7 @@ import java.util.Map; +import static io.trino.tempto.Requirements.compose; import static io.trino.tempto.fulfillment.table.MutableTableRequirement.State.CREATED; import static io.trino.tests.product.TestGroups.HIVE_COERCION; import static io.trino.tests.product.TestGroups.JDBC; @@ -37,6 +38,10 @@ public class TestHiveCoercionOnUnpartitionedTable .setNoData() .build(); + public static final HiveTableDefinition HIVE_TIMESTAMP_COERCION_ORC = tableDefinitionForTimestampCoercionBuilder("ORC") + .setNoData() + .build(); + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionBuilder(String fileFormat) { String tableName = format("%s_hive_coercion_unpartitioned", fileFormat.toLowerCase(ENGLISH)); @@ -75,17 +80,36 @@ char_to_smaller_char CHAR(3), timestamp_to_string TIMESTAMP, timestamp_to_bounded_varchar TIMESTAMP, timestamp_to_smaller_varchar TIMESTAMP, + smaller_varchar_to_timestamp VARCHAR(4), + varchar_to_timestamp STRING, id BIGINT) STORED AS\s""" + fileFormat); } + private static HiveTableDefinition.HiveTableDefinitionBuilder tableDefinitionForTimestampCoercionBuilder(String fileFormat) + { + String tableName = format("%s_hive_timestamp_coercion_unpartitioned", fileFormat.toLowerCase(ENGLISH)); + return HiveTableDefinition.builder(tableName) + .setCreateTableDDLTemplate(""" + CREATE TABLE %NAME%( + reference_timestamp TIMESTAMP, + timestamp_to_varchar TIMESTAMP, + historical_timestamp_to_varchar TIMESTAMP, + varchar_to_timestamp STRING, + historical_varchar_to_timestamp STRING, + id BIGINT) + STORED AS\s""" + fileFormat); + } + public static final class OrcRequirements implements RequirementsProvider { @Override public Requirement getRequirements(Configuration configuration) { - return MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(); + return compose( + MutableTableRequirement.builder(HIVE_COERCION_ORC).withState(CREATED).build(), + MutableTableRequirement.builder(HIVE_TIMESTAMP_COERCION_ORC).withState(CREATED).build()); } } @@ -96,11 +120,20 @@ public void testHiveCoercionOrc() doTestHiveCoercion(HIVE_COERCION_ORC); } + @Requires(OrcRequirements.class) + @Test(groups = {HIVE_COERCION, JDBC}) + public void testHiveTimestampCoercion() + { + doTestHiveCoercionWithDifferentTimestampPrecision(HIVE_TIMESTAMP_COERCION_ORC); + } + @Override protected Map expectedExceptionsWithTrinoContext() { // TODO: These expected failures should be fixed. return ImmutableMap.builder() + // Expected failures from BaseTestHiveCoercion + .putAll(super.expectedExceptionsWithTrinoContext()) // ORC .put(columnContext("orc", "row_to_row"), "Cannot read SQL type 'smallint' from ORC stream '.row_to_row.ti2si' of type BYTE") .put(columnContext("orc", "list_to_list"), "Cannot read SQL type 'integer' from ORC stream '.list_to_list.item.ti2int' of type BYTE")