trinodb · Praveen2112 · May 5, 2023 · Jun 2, 2023 · May 23, 2023 · Jun 5, 2023
diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HiveErrorCode.java
@@ -67,6 +67,7 @@ public enum HiveErrorCode
     HIVE_TABLE_LOCK_NOT_ACQUIRED(40, EXTERNAL),
     HIVE_VIEW_TRANSLATION_ERROR(41, EXTERNAL),
     HIVE_PARTITION_NOT_FOUND(42, USER_ERROR),
+    HIVE_INVALID_TIMESTAMP_COERCION(43, EXTERNAL),
     /**/;
 
     private final ErrorCode errorCode;

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSource.java
@@ -23,7 +23,6 @@
 import io.trino.plugin.hive.coercions.IntegerNumberToVarcharCoercer;
 import io.trino.plugin.hive.coercions.IntegerNumberUpscaleCoercer;
 import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer;
-import io.trino.plugin.hive.coercions.TimestampCoercer.ShortTimestampToVarcharCoercer;
 import io.trino.plugin.hive.coercions.VarcharCoercer;
 import io.trino.plugin.hive.coercions.VarcharToIntegerNumberCoercer;
 import io.trino.plugin.hive.type.Category;
@@ -96,6 +95,7 @@
 import static io.trino.spi.block.ColumnarRow.toColumnarRow;
 import static io.trino.spi.type.DoubleType.DOUBLE;
 import static io.trino.spi.type.RealType.REAL;
+import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS;
 import static java.lang.String.format;
 import static java.util.Objects.requireNonNull;
 
@@ -305,6 +305,8 @@ public static Optional<Function<Block, Block>> createCoercer(TypeManager typeMan
             return Optional.empty();
         }
 
+        // Hive treats TIMESTAMP with NANOSECONDS precision and when we try to coerce from a timestamp column,
+        // we read it as TIMESTAMP(9) column and coerce accordingly.
         Type fromType = fromHiveType.getType(typeManager, timestampPrecision);
         Type toType = toHiveType.getType(typeManager, timestampPrecision);
 
@@ -359,11 +361,8 @@ public static Optional<Function<Block, Block>> createCoercer(TypeManager typeMan
         if (fromType == REAL && toType instanceof DecimalType toDecimalType) {
             return Optional.of(createRealToDecimalCoercer(toDecimalType));
         }
-        if (fromType instanceof TimestampType timestampType && toType instanceof VarcharType varcharType) {
-            if (timestampType.isShort()) {
-                return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType));
-            }
-            return Optional.of(new LongTimestampToVarcharCoercer(timestampType, varcharType));
+        if (fromType instanceof TimestampType && toType instanceof VarcharType varcharType) {
+            return Optional.of(new LongTimestampToVarcharCoercer(TIMESTAMP_NANOS, varcharType));
         }
         if ((fromType instanceof ArrayType) && (toType instanceof ArrayType)) {
             return Optional.of(new ListCoercer(typeManager, fromHiveType, toHiveType, timestampPrecision));

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java
@@ -560,14 +560,16 @@ public static List<HiveColumnHandle> toColumnHandles(List<ColumnMapping> regular
                                     projectedColumn.getDereferenceIndices(),
                                     projectedColumn.getDereferenceNames(),
                                     fromHiveType,
-                                    fromHiveType.getType(typeManager));
+                                    // Hive treats TIMESTAMP with NANOSECONDS precision and when we try to coerce from a timestamp column,
+                                    // we read it as TIMESTAMP(9) column and coerce accordingly.
+                                    fromHiveType.getType(typeManager, HiveTimestampPrecision.NANOSECONDS));
                         });
 
                         return new HiveColumnHandle(
                                 columnHandle.getBaseColumnName(),
                                 columnHandle.getBaseHiveColumnIndex(),
                                 fromHiveTypeBase,
-                                fromHiveTypeBase.getType(typeManager),
+                                fromHiveTypeBase.getType(typeManager, HiveTimestampPrecision.NANOSECONDS),
                                 newColumnProjectionInfo,
                                 columnHandle.getColumnType(),
                                 columnHandle.getComment());

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/coercions/TimestampCoercer.java
@@ -14,6 +14,7 @@
 package io.trino.plugin.hive.coercions;
 
 import io.airlift.slice.Slices;
+import io.trino.spi.TrinoException;
 import io.trino.spi.block.Block;
 import io.trino.spi.block.BlockBuilder;
 import io.trino.spi.type.LongTimestamp;
@@ -25,9 +26,11 @@
 import java.time.format.DateTimeFormatter;
 import java.time.format.DateTimeFormatterBuilder;
 
+import static io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_TIMESTAMP_COERCION;
 import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND;
 import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND;
 import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_NANOSECOND;
+import static io.trino.spi.type.Timestamps.SECONDS_PER_DAY;
 import static io.trino.spi.type.Varchars.truncateToLength;
 import static java.lang.Math.floorDiv;
 import static java.lang.Math.floorMod;
@@ -46,30 +49,10 @@ public final class TimestampCoercer
             .toFormatter()
             .withChronology(IsoChronology.INSTANCE);
 
-    private TimestampCoercer() {}
+    // Before 1900, Java Time and Joda Time are not consistent with java.sql.Date and java.util.Calendar
+    private static final long START_OF_MODERN_ERA_SECONDS = java.time.LocalDate.of(1900, 1, 1).toEpochDay() * SECONDS_PER_DAY;
 
-    public static class ShortTimestampToVarcharCoercer
-            extends TypeCoercer<TimestampType, VarcharType>
-    {
-        public ShortTimestampToVarcharCoercer(TimestampType fromType, VarcharType toType)
-        {
-            super(fromType, toType);
-        }
-
-        @Override
-        protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
-        {
-            long epochMicros = fromType.getLong(block, position);
-            long epochSecond = floorDiv(epochMicros, MICROSECONDS_PER_SECOND);
-            int nanoFraction = floorMod(epochMicros, MICROSECONDS_PER_SECOND) * NANOSECONDS_PER_MICROSECOND;
-            toType.writeSlice(
-                    blockBuilder,
-                    truncateToLength(
-                            Slices.utf8Slice(
-                                    LOCAL_DATE_TIME.format(LocalDateTime.ofEpochSecond(epochSecond, nanoFraction, UTC))),
-                            toType));
-        }
-    }
+    private TimestampCoercer() {}
 
     public static class LongTimestampToVarcharCoercer
             extends TypeCoercer<TimestampType, VarcharType>
@@ -88,6 +71,9 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
             long microsFraction = floorMod(timestamp.getEpochMicros(), MICROSECONDS_PER_SECOND);
             // Hive timestamp has nanoseconds precision, so no truncation here
             long nanosFraction = (microsFraction * NANOSECONDS_PER_MICROSECOND) + (timestamp.getPicosOfMicro() / PICOSECONDS_PER_NANOSECOND);
+            if (epochSecond < START_OF_MODERN_ERA_SECONDS) {
+                throw new TrinoException(HIVE_INVALID_TIMESTAMP_COERCION, "Coercion on historical dates is not supported");
+            }
 
             toType.writeSlice(
                     blockBuilder,

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcTypeTranslator.java
@@ -16,28 +16,25 @@
 import io.trino.orc.metadata.OrcType.OrcTypeKind;
 import io.trino.plugin.hive.HiveTimestampPrecision;
 import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer;
-import io.trino.plugin.hive.coercions.TimestampCoercer.ShortTimestampToVarcharCoercer;
 import io.trino.plugin.hive.coercions.TypeCoercer;
-import io.trino.spi.type.TimestampType;
 import io.trino.spi.type.Type;
 import io.trino.spi.type.VarcharType;
 
 import java.util.Optional;
 
-import static io.trino.spi.type.TimestampType.createTimestampType;
+import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP;
+import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS;
 
 public final class OrcTypeTranslator
 {
     private OrcTypeTranslator() {}
 
     public static Optional<TypeCoercer<? extends Type, ? extends Type>> createCoercer(OrcTypeKind fromOrcType, Type toTrinoType, HiveTimestampPrecision timestampPrecision)
     {
-        if (fromOrcType.equals(OrcTypeKind.TIMESTAMP) && toTrinoType instanceof VarcharType varcharType) {
-            TimestampType timestampType = createTimestampType(timestampPrecision.getPrecision());
-            if (timestampType.isShort()) {
-                return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType));
-            }
-            return Optional.of(new LongTimestampToVarcharCoercer(timestampType, varcharType));
+        if (fromOrcType == TIMESTAMP && toTrinoType instanceof VarcharType varcharType) {
+            // Hive treats TIMESTAMP with NANOSECONDS precision and when we try to coerce from a timestamp column,
+            // we read it as TIMESTAMP(9) column and coerce accordingly.
+            return Optional.of(new LongTimestampToVarcharCoercer(TIMESTAMP_NANOS, varcharType));
         }
         return Optional.empty();
     }

diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java
@@ -34,7 +34,6 @@
 import io.trino.plugin.hive.HiveColumnHandle;
 import io.trino.plugin.hive.HiveConfig;
 import io.trino.plugin.hive.HivePageSourceFactory;
-import io.trino.plugin.hive.HiveTimestampPrecision;
 import io.trino.plugin.hive.ReaderColumns;
 import io.trino.plugin.hive.ReaderPageSource;
 import io.trino.plugin.hive.acid.AcidTransaction;
@@ -45,7 +44,6 @@
 import io.trino.spi.connector.EmptyPageSource;
 import io.trino.spi.predicate.TupleDomain;
 import io.trino.spi.type.Type;
-import io.trino.spi.type.TypeManager;
 import org.apache.hadoop.conf.Configuration;
 import org.joda.time.DateTimeZone;
 
@@ -62,7 +60,6 @@
 import static io.trino.plugin.hive.HiveErrorCode.HIVE_BAD_DATA;
 import static io.trino.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT;
 import static io.trino.plugin.hive.HivePageSourceProvider.projectBaseColumns;
-import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision;
 import static io.trino.plugin.hive.ReaderPageSource.noProjectionAdaptation;
 import static io.trino.plugin.hive.util.HiveClassNames.COLUMNAR_SERDE_CLASS;
 import static io.trino.plugin.hive.util.HiveClassNames.LAZY_BINARY_COLUMNAR_SERDE_CLASS;
@@ -77,15 +74,13 @@ public class RcFilePageSourceFactory
 {
     private static final DataSize BUFFER_SIZE = DataSize.of(8, Unit.MEGABYTE);
 
-    private final TypeManager typeManager;
     private final TrinoFileSystemFactory fileSystemFactory;
     private final FileFormatDataSourceStats stats;
     private final DateTimeZone timeZone;
 
     @Inject
-    public RcFilePageSourceFactory(TypeManager typeManager, TrinoFileSystemFactory fileSystemFactory, FileFormatDataSourceStats stats, HiveConfig hiveConfig)
+    public RcFilePageSourceFactory(TrinoFileSystemFactory fileSystemFactory, FileFormatDataSourceStats stats, HiveConfig hiveConfig)
     {
-        this.typeManager = requireNonNull(typeManager, "typeManager is null");
         this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null");
         this.stats = requireNonNull(stats, "stats is null");
         this.timeZone = hiveConfig.getRcfileDateTimeZone();
@@ -168,9 +163,8 @@ else if (deserializerClassName.equals(COLUMNAR_SERDE_CLASS)) {
 
         try {
             ImmutableMap.Builder<Integer, Type> readColumns = ImmutableMap.builder();
-            HiveTimestampPrecision timestampPrecision = getTimestampPrecision(session);
             for (HiveColumnHandle column : projectedReaderColumns) {
-                readColumns.put(column.getBaseHiveColumnIndex(), column.getHiveType().getType(typeManager, timestampPrecision));
+                readColumns.put(column.getBaseHiveColumnIndex(), column.getType());
             }
 
             RcFileReader rcFileReader = new RcFileReader(

diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/HiveTestUtils.java
@@ -206,7 +206,7 @@ public static Set<HivePageSourceFactory> getDefaultHivePageSourceFactories(HdfsE
                 .add(new RegexPageSourceFactory(fileSystemFactory, stats, hiveConfig))
                 .add(new SimpleTextFilePageSourceFactory(fileSystemFactory, stats, hiveConfig))
                 .add(new SimpleSequenceFilePageSourceFactory(fileSystemFactory, stats, hiveConfig))
-                .add(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, fileSystemFactory, stats, hiveConfig))
+                .add(new RcFilePageSourceFactory(fileSystemFactory, stats, hiveConfig))
                 .add(new OrcPageSourceFactory(new OrcReaderConfig(), fileSystemFactory, stats, hiveConfig))
                 .add(new ParquetPageSourceFactory(fileSystemFactory, stats, new ParquetReaderConfig(), hiveConfig))
                 .build();

diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java
@@ -282,7 +282,7 @@ public void testRcTextPageSource(int rowCount, long fileSizePadding)
                 .withColumns(TEST_COLUMNS)
                 .withRowsCount(rowCount)
                 .withFileSizePadding(fileSizePadding)
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test(dataProvider = "rowCount")
@@ -299,7 +299,7 @@ public void testRcTextOptimizedWriter(int rowCount)
                 .withRowsCount(rowCount)
                 .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
                 .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test(dataProvider = "rowCount")
@@ -316,7 +316,7 @@ public void testRcBinaryPageSource(int rowCount)
         assertThatFileFormat(RCBINARY)
                 .withColumns(testColumns)
                 .withRowsCount(rowCount)
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test(dataProvider = "rowCount")
@@ -341,7 +341,7 @@ public void testRcBinaryOptimizedWriter(int rowCount)
                 // generic Hive writer corrupts timestamps
                 .withSkipGenericWriterTest()
                 .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
                 .withColumns(testColumnsNoTimestamps)
                 .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
     }
@@ -571,13 +571,13 @@ public void testTruncateVarcharColumn()
         assertThatFileFormat(RCTEXT)
                 .withWriteColumns(ImmutableList.of(writeColumn))
                 .withReadColumns(ImmutableList.of(readColumn))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
                 .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
 
         assertThatFileFormat(RCBINARY)
                 .withWriteColumns(ImmutableList.of(writeColumn))
                 .withReadColumns(ImmutableList.of(readColumn))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()))
                 .isReadableByRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
 
         assertThatFileFormat(ORC)
@@ -804,7 +804,7 @@ public void testRCTextProjectedColumnsPageSource(int rowCount)
                 .withWriteColumns(writeColumns)
                 .withReadColumns(readColumns)
                 .withRowsCount(rowCount)
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test(dataProvider = "rowCount")
@@ -837,7 +837,7 @@ public void testRCBinaryProjectedColumns(int rowCount)
                 // generic Hive writer corrupts timestamps
                 .withSkipGenericWriterTest()
                 .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test(dataProvider = "rowCount")
@@ -867,7 +867,7 @@ public void testRCBinaryProjectedColumnsPageSource(int rowCount)
                 // generic Hive writer corrupts timestamps
                 .withSkipGenericWriterTest()
                 .withFileWriterFactory(new RcFileFileWriterFactory(FILE_SYSTEM_FACTORY, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE))
-                .isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
+                .isReadableByPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()));
     }
 
     @Test
@@ -884,12 +884,12 @@ public void testFailForLongVarcharPartitionColumn()
 
         assertThatFileFormat(RCTEXT)
                 .withColumns(columns)
-                .isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage)
+                .isFailingForPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage)
                 .isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
 
         assertThatFileFormat(RCBINARY)
                 .withColumns(columns)
-                .isFailingForPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage)
+                .isFailingForPageSource(new RcFilePageSourceFactory(FILE_SYSTEM_FACTORY, STATS, new HiveConfig()), expectedErrorCode, expectedMessage)
                 .isFailingForRecordCursor(createGenericHiveRecordCursorProvider(HDFS_ENVIRONMENT), expectedErrorCode, expectedMessage);
 
         assertThatFileFormat(ORC)