diff --git a/pom.xml b/pom.xml index 790bc74736c9..eb42c1b19e6d 100644 --- a/pom.xml +++ b/pom.xml @@ -416,7 +416,7 @@ io.prestosql.hive hive-apache - 3.0.0-1 + 3.1.1-1-SNAPSHOT diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java index a4f1c45fd45d..6ab6c1625238 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursor.java @@ -18,6 +18,7 @@ import io.prestosql.hadoop.TextLineLengthLimitExceededException; import io.prestosql.spi.PrestoException; import io.prestosql.spi.block.Block; +import io.prestosql.spi.connector.ConnectorSession; import io.prestosql.spi.connector.RecordCursor; import io.prestosql.spi.type.DecimalType; import io.prestosql.spi.type.Decimals; @@ -25,7 +26,9 @@ import io.prestosql.spi.type.TypeManager; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; @@ -44,12 +47,10 @@ import java.io.IOException; import java.io.UncheckedIOException; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; import java.util.Arrays; import java.util.List; import java.util.Properties; -import java.util.concurrent.TimeUnit; +import java.util.function.LongUnaryOperator; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -90,6 +91,7 @@ class GenericHiveRecordCursor private final V value; private final Deserializer deserializer; + private final LongUnaryOperator timestampConverter; private final Type[] types; private final HiveType[] hiveTypes; @@ -107,7 +109,6 @@ class GenericHiveRecordCursor private final boolean[] nulls; private final long totalBytes; - private final DateTimeZone hiveStorageTimeZone; private long completedBytes; private Object rowData; @@ -121,6 +122,7 @@ public GenericHiveRecordCursor( Properties splitSchema, List columns, DateTimeZone hiveStorageTimeZone, + ConnectorSession session, TypeManager typeManager) { requireNonNull(path, "path is null"); @@ -129,17 +131,25 @@ public GenericHiveRecordCursor( requireNonNull(splitSchema, "splitSchema is null"); requireNonNull(columns, "columns is null"); requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); + requireNonNull(session, "session is null"); this.path = path; this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); - this.hiveStorageTimeZone = hiveStorageTimeZone; this.deserializer = getDeserializer(configuration, splitSchema); this.rowInspector = getTableObjectInspector(deserializer); + if (session.isLegacyTimestamp()) { + // convert using the real time zone for the underlying data + timestampConverter = millis -> hiveStorageTimeZone.convertLocalToUTC(millis, false); + } + else { + timestampConverter = LongUnaryOperator.identity(); + } + int size = columns.size(); this.types = new Type[size]; @@ -280,35 +290,19 @@ private void parseLongColumn(int column) else { Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveJavaObject(fieldData); checkState(fieldValue != null, "fieldValue should not be null"); - longs[column] = getLongExpressedValue(fieldValue, hiveStorageTimeZone); + longs[column] = getLongExpressedValue(fieldValue); nulls[column] = false; } } - private static long getLongExpressedValue(Object value, DateTimeZone hiveTimeZone) + private long getLongExpressedValue(Object value) { if (value instanceof Date) { - long storageTime = ((Date) value).getTime(); - // convert date from VM current time zone to UTC - long utcMillis = storageTime + DateTimeZone.getDefault().getOffset(storageTime); - return TimeUnit.MILLISECONDS.toDays(utcMillis); + return ((Date) value).toEpochDay(); } if (value instanceof Timestamp) { - // The Hive SerDe parses timestamps using the default time zone of - // this JVM, but the data might have been written using a different - // time zone. We need to convert it to the configured time zone. - - // the timestamp that Hive parsed using the JVM time zone - long parsedJvmMillis = ((Timestamp) value).getTime(); - - // remove the JVM time zone correction from the timestamp - DateTimeZone jvmTimeZone = DateTimeZone.getDefault(); - long hiveMillis = jvmTimeZone.convertUTCToLocal(parsedJvmMillis); - - // convert to UTC using the real time zone for the underlying data - long utcMillis = hiveTimeZone.convertLocalToUTC(hiveMillis, false); - - return utcMillis; + long millis = ((Timestamp) value).toEpochMilli(); + return timestampConverter.applyAsLong(millis); } if (value instanceof Float) { return floatToRawIntBits(((Float) value)); @@ -451,7 +445,7 @@ private void parseObjectColumn(int column) nulls[column] = true; } else { - objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column]); + objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column], timestampConverter); nulls[column] = false; } } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java index c96ebe18cdc4..f91ab64727c0 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/GenericHiveRecordCursorProvider.java @@ -79,6 +79,7 @@ public Optional createRecordCursor( schema, columns, hiveStorageTimeZone, + session, typeManager)); } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriteUtils.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriteUtils.java index 590bf625a7fa..450df2f704f8 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriteUtils.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/HiveWriteUtils.java @@ -52,8 +52,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.viewfs.ViewFileSystem; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.ProtectMode; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; @@ -61,11 +63,11 @@ import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.Serializer; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -86,12 +88,9 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; -import org.joda.time.DateTimeZone; import java.io.IOException; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -99,7 +98,6 @@ import java.util.Map; import java.util.Optional; import java.util.Properties; -import java.util.concurrent.TimeUnit; import static com.google.common.base.Strings.padEnd; import static com.google.common.io.BaseEncoding.base16; @@ -154,7 +152,6 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo; import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getVarcharTypeInfo; -import static org.joda.time.DateTimeZone.UTC; public final class HiveWriteUtils { @@ -315,12 +312,10 @@ public static Object getField(Type type, Block block, int position) return type.getSlice(block, position).getBytes(); } if (DateType.DATE.equals(type)) { - long days = type.getLong(block, position); - return new Date(UTC.getMillisKeepLocal(DateTimeZone.getDefault(), TimeUnit.DAYS.toMillis(days))); + return Date.ofEpochDay(toIntExact(type.getLong(block, position))); } if (TimestampType.TIMESTAMP.equals(type)) { - long millisUtc = type.getLong(block, position); - return new Timestamp(millisUtc); + return Timestamp.ofEpochMilli(type.getLong(block, position)); } if (type instanceof DecimalType) { DecimalType decimalType = (DecimalType) type; @@ -933,7 +928,7 @@ public void setField(Block block, int position) private static class DateFieldSetter extends FieldSetter { - private final DateWritable value = new DateWritable(); + private final DateWritableV2 value = new DateWritableV2(); public DateFieldSetter(SettableStructObjectInspector rowInspector, Object row, StructField field) { @@ -951,7 +946,7 @@ public void setField(Block block, int position) private static class TimestampFieldSetter extends FieldSetter { - private final TimestampWritable value = new TimestampWritable(); + private final TimestampWritableV2 value = new TimestampWritableV2(); public TimestampFieldSetter(SettableStructObjectInspector rowInspector, Object row, StructField field) { @@ -961,8 +956,7 @@ public TimestampFieldSetter(SettableStructObjectInspector rowInspector, Object r @Override public void setField(Block block, int position) { - long millisUtc = TimestampType.TIMESTAMP.getLong(block, position); - value.setTime(millisUtc); + value.set(Timestamp.ofEpochMilli(TimestampType.TIMESTAMP.getLong(block, position))); rowInspector.setStructFieldData(row, field, value); } } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursor.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursor.java index 51539d5794fd..b715a1c8d512 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursor.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursor.java @@ -13,6 +13,7 @@ */ package io.prestosql.plugin.hive; +import io.prestosql.spi.connector.ConnectorSession; import io.prestosql.spi.type.TypeManager; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -51,9 +52,10 @@ public S3SelectRecordCursor( Properties splitSchema, List columns, DateTimeZone hiveStorageTimeZone, + ConnectorSession session, TypeManager typeManager) { - super(configuration, path, recordReader, totalBytes, updateSplitSchema(splitSchema, columns), columns, hiveStorageTimeZone, typeManager); + super(configuration, path, recordReader, totalBytes, updateSplitSchema(splitSchema, columns), columns, hiveStorageTimeZone, session, typeManager); } // since s3select only returns the required column, not the whole columns diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursorProvider.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursorProvider.java index ed7be2695cc6..8ba2d3b7a142 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursorProvider.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/S3SelectRecordCursorProvider.java @@ -87,7 +87,7 @@ public Optional createRecordCursor( IonSqlQueryBuilder queryBuilder = new IonSqlQueryBuilder(typeManager); String ionSqlQuery = queryBuilder.buildSql(columns, effectivePredicate); S3SelectLineRecordReader recordReader = new S3SelectCsvRecordReader(configuration, clientConfig, path, start, length, schema, ionSqlQuery, s3ClientFactory); - return Optional.of(new S3SelectRecordCursor(configuration, path, recordReader, length, schema, columns, hiveStorageTimeZone, typeManager)); + return Optional.of(new S3SelectRecordCursor<>(configuration, path, recordReader, length, schema, columns, hiveStorageTimeZone, session, typeManager)); } // unsupported serdes diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/thrift/ThriftHiveMetastore.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/thrift/ThriftHiveMetastore.java index 8dd8fb985493..807c98eec5d5 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/thrift/ThriftHiveMetastore.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/metastore/thrift/ThriftHiveMetastore.java @@ -1307,7 +1307,8 @@ private PrivilegeBag buildPrivilegeBag( new HiveObjectRef(TABLE, databaseName, tableName, null, null), grantee.getName(), fromPrestoPrincipalType(grantee.getType()), - privilegeGrantInfo)); + privilegeGrantInfo, + "SQL")); } return new PrivilegeBag(privilegeBagBuilder.build()); } diff --git a/presto-hive/src/main/java/io/prestosql/plugin/hive/util/SerDeUtils.java b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/SerDeUtils.java index 07c6aa9e23f6..53afb19a2b0e 100644 --- a/presto-hive/src/main/java/io/prestosql/plugin/hive/util/SerDeUtils.java +++ b/presto-hive/src/main/java/io/prestosql/plugin/hive/util/SerDeUtils.java @@ -54,12 +54,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.joda.time.DateTimeZone; -import java.sql.Timestamp; import java.util.List; import java.util.Map; -import java.util.concurrent.TimeUnit; +import java.util.function.LongUnaryOperator; import static com.google.common.base.Preconditions.checkArgument; import static io.prestosql.spi.type.Chars.truncateToLengthAndTrimSpaces; @@ -71,36 +69,37 @@ public final class SerDeUtils { private SerDeUtils() {} - public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector) + public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector, LongUnaryOperator timestampConverter) { - return requireNonNull(serializeObject(type, null, object, objectInspector), "serialized result is null"); + Block block = serializeObject(type, null, object, objectInspector, timestampConverter); + return requireNonNull(block, "serialized result is null"); } - public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector) + public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, LongUnaryOperator timestampConverter) { - return serializeObject(type, builder, object, inspector, true); + return serializeObject(type, builder, object, inspector, timestampConverter, true); } // This version supports optionally disabling the filtering of null map key, which should only be used for building test data sets // that contain null map keys. For production, null map keys are not allowed. @VisibleForTesting - public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys) + public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, LongUnaryOperator timestampConverter, boolean filterNullMapKeys) { switch (inspector.getCategory()) { case PRIMITIVE: - serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector); + serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector, timestampConverter); return null; case LIST: - return serializeList(type, builder, object, (ListObjectInspector) inspector); + return serializeList(type, builder, object, (ListObjectInspector) inspector, timestampConverter); case MAP: - return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys); + return serializeMap(type, builder, object, (MapObjectInspector) inspector, timestampConverter, filterNullMapKeys); case STRUCT: - return serializeStruct(type, builder, object, (StructObjectInspector) inspector); + return serializeStruct(type, builder, object, (StructObjectInspector) inspector, timestampConverter); } throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory()); } - private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) + private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector, LongUnaryOperator timestampConverter) { requireNonNull(builder, "parent builder is null"); @@ -146,7 +145,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector)); return; case TIMESTAMP: - TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector)); + TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector, timestampConverter)); return; case BINARY: VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object))); @@ -165,7 +164,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory()); } - private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector) + private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector, LongUnaryOperator timestampConverter) { List list = inspector.getList(object); if (list == null) { @@ -186,7 +185,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec } for (Object element : list) { - serializeObject(elementType, currentBuilder, element, elementInspector); + serializeObject(elementType, currentBuilder, element, elementInspector, timestampConverter); } if (builder != null) { @@ -199,7 +198,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec } } - private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, boolean filterNullMapKeys) + private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, LongUnaryOperator timestampConverter, boolean filterNullMapKeys) { Map map = inspector.getMap(object); if (map == null) { @@ -225,8 +224,8 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object for (Map.Entry entry : map.entrySet()) { // Hive skips map entries with null keys if (!filterNullMapKeys || entry.getKey() != null) { - serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector); - serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector); + serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector, timestampConverter); + serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector, timestampConverter); } } @@ -239,7 +238,7 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object } } - private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) + private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector, LongUnaryOperator timestampConverter) { if (object == null) { requireNonNull(builder, "parent builder is null").appendNull(); @@ -260,7 +259,7 @@ private static Block serializeStruct(Type type, BlockBuilder builder, Object obj for (int i = 0; i < typeParameters.size(); i++) { StructField field = allStructFieldRefs.get(i); - serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector()); + serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector(), timestampConverter); } builder.closeEntry(); @@ -272,6 +271,7 @@ private static Block serializeStruct(Type type, BlockBuilder builder, Object obj } } + @SuppressWarnings("deprecation") private static long formatDateAsLong(Object object, DateObjectInspector inspector) { if (object instanceof LazyDate) { @@ -280,27 +280,14 @@ private static long formatDateAsLong(Object object, DateObjectInspector inspecto if (object instanceof DateWritable) { return ((DateWritable) object).getDays(); } - - // Hive will return java.sql.Date at midnight in JVM time zone - long millisLocal = inspector.getPrimitiveJavaObject(object).getTime(); - // Convert it to midnight in UTC - long millisUtc = DateTimeZone.getDefault().getMillisKeepLocal(DateTimeZone.UTC, millisLocal); - // Convert midnight UTC to days - return TimeUnit.MILLISECONDS.toDays(millisUtc); - } - - private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector) - { - Timestamp timestamp = getTimestamp(object, inspector); - return timestamp.getTime(); + return inspector.getPrimitiveJavaObject(object).toEpochDay(); } - private static Timestamp getTimestamp(Object object, TimestampObjectInspector inspector) + private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector, LongUnaryOperator timestampConverter) { - // handle broken ObjectInspectors if (object instanceof TimestampWritable) { - return ((TimestampWritable) object).getTimestamp(); + return ((TimestampWritable) object).getTimestamp().getTime(); } - return inspector.getPrimitiveJavaObject(object); + return timestampConverter.applyAsLong(inspector.getPrimitiveJavaObject(object).toEpochMilli()); } } diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveClient.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveClient.java index 81b8f44fa760..ea8a6b27ac6e 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveClient.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveClient.java @@ -3898,6 +3898,18 @@ else if (rowNumber % 39 == 1) { } } + // ARRAY + index = columnIndex.get("t_array_timestamp"); + if (index != null) { + if ((rowNumber % 43) == 0) { + assertNull(row.getField(index)); + } + else { + SqlTimestamp expected = sqlTimestampOf(2011, 5, 6, 7, 8, 9, 123, timeZone, UTC_KEY, SESSION); + assertEquals(row.getField(index), ImmutableList.of(expected)); + } + } + // ARRAY> index = columnIndex.get("t_array_struct"); if (index != null) { diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java index 84c203a9061c..e4f1c91f3b30 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/AbstractTestHiveFileFormats.java @@ -43,9 +43,11 @@ import io.prestosql.tests.StructuralTestUtil; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.serde2.Serializer; @@ -70,8 +72,6 @@ import java.lang.invoke.MethodHandle; import java.math.BigDecimal; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -148,15 +148,18 @@ @Test(groups = "hive") public abstract class AbstractTestHiveFileFormats { + protected static final DateTimeZone HIVE_STORAGE_TIME_ZONE = DateTimeZone.forID("America/Bahia_Banderas"); + private static final double EPSILON = 0.001; private static final long DATE_MILLIS_UTC = new DateTime(2011, 5, 6, 0, 0, UTC).getMillis(); private static final long DATE_DAYS = TimeUnit.MILLISECONDS.toDays(DATE_MILLIS_UTC); private static final String DATE_STRING = DateTimeFormat.forPattern("yyyy-MM-dd").withZoneUTC().print(DATE_MILLIS_UTC); - private static final Date SQL_DATE = new Date(UTC.getMillisKeepLocal(DateTimeZone.getDefault(), DATE_MILLIS_UTC)); + private static final Date HIVE_DATE = Date.ofEpochMilli(DATE_MILLIS_UTC); private static final long TIMESTAMP = new DateTime(2011, 5, 6, 7, 8, 9, 123).getMillis(); private static final String TIMESTAMP_STRING = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss.SSS").print(TIMESTAMP); + private static final Timestamp HIVE_TIMESTAMP = Timestamp.ofEpochMilli(HIVE_STORAGE_TIME_ZONE.convertUTCToLocal(TIMESTAMP)); private static final String VARCHAR_MAX_LENGTH_STRING; @@ -271,8 +274,8 @@ public abstract class AbstractTestHiveFileFormats .add(new TestColumn("t_double", javaDoubleObjectInspector, 6.2, 6.2)) .add(new TestColumn("t_boolean_true", javaBooleanObjectInspector, true, true)) .add(new TestColumn("t_boolean_false", javaBooleanObjectInspector, false, false)) - .add(new TestColumn("t_date", javaDateObjectInspector, SQL_DATE, DATE_DAYS)) - .add(new TestColumn("t_timestamp", javaTimestampObjectInspector, new Timestamp(TIMESTAMP), TIMESTAMP)) + .add(new TestColumn("t_date", javaDateObjectInspector, HIVE_DATE, DATE_DAYS)) + .add(new TestColumn("t_timestamp", javaTimestampObjectInspector, HIVE_TIMESTAMP, TIMESTAMP)) .add(new TestColumn("t_decimal_precision_2", DECIMAL_INSPECTOR_PRECISION_2, WRITE_DECIMAL_PRECISION_2, EXPECTED_DECIMAL_PRECISION_2)) .add(new TestColumn("t_decimal_precision_4", DECIMAL_INSPECTOR_PRECISION_4, WRITE_DECIMAL_PRECISION_4, EXPECTED_DECIMAL_PRECISION_4)) .add(new TestColumn("t_decimal_precision_8", DECIMAL_INSPECTOR_PRECISION_8, WRITE_DECIMAL_PRECISION_8, EXPECTED_DECIMAL_PRECISION_8)) @@ -324,11 +327,11 @@ public abstract class AbstractTestHiveFileFormats mapBlockOf(BOOLEAN, BOOLEAN, true, true))) .add(new TestColumn("t_map_date", getStandardMapObjectInspector(javaDateObjectInspector, javaDateObjectInspector), - ImmutableMap.of(SQL_DATE, SQL_DATE), + ImmutableMap.of(HIVE_DATE, HIVE_DATE), mapBlockOf(DateType.DATE, DateType.DATE, DATE_DAYS, DATE_DAYS))) .add(new TestColumn("t_map_timestamp", getStandardMapObjectInspector(javaTimestampObjectInspector, javaTimestampObjectInspector), - ImmutableMap.of(new Timestamp(TIMESTAMP), new Timestamp(TIMESTAMP)), + ImmutableMap.of(HIVE_TIMESTAMP, HIVE_TIMESTAMP), mapBlockOf(TimestampType.TIMESTAMP, TimestampType.TIMESTAMP, TIMESTAMP, TIMESTAMP))) .add(new TestColumn("t_map_decimal_precision_2", getStandardMapObjectInspector(DECIMAL_INSPECTOR_PRECISION_2, DECIMAL_INSPECTOR_PRECISION_2), @@ -375,11 +378,11 @@ public abstract class AbstractTestHiveFileFormats arrayBlockOf(createCharType(10), "test"))) .add(new TestColumn("t_array_date", getStandardListObjectInspector(javaDateObjectInspector), - ImmutableList.of(SQL_DATE), + ImmutableList.of(HIVE_DATE), arrayBlockOf(DateType.DATE, DATE_DAYS))) .add(new TestColumn("t_array_timestamp", getStandardListObjectInspector(javaTimestampObjectInspector), - ImmutableList.of(new Timestamp(TIMESTAMP)), + ImmutableList.of(HIVE_TIMESTAMP), StructuralTestUtil.arrayBlockOf(TimestampType.TIMESTAMP, TIMESTAMP))) .add(new TestColumn("t_array_decimal_precision_2", getStandardListObjectInspector(DECIMAL_INSPECTOR_PRECISION_2), @@ -488,7 +491,7 @@ protected List getColumnHandles(List testColumns) return columns; } - public static FileSplit createTestFile( + public static FileSplit createTestFilePresto( String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, @@ -516,6 +519,7 @@ public static FileSplit createTestFile( pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), + millis -> HIVE_STORAGE_TIME_ZONE.convertLocalToUTC(millis, false), false); } } @@ -545,7 +549,7 @@ public static FileSplit createTestFile( return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]); } - public static FileSplit createTestFile( + public static FileSplit createTestFileHive( String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java index eddcabe44b8b..14626e8af9bc 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/HiveTestUtils.java @@ -42,9 +42,12 @@ import io.prestosql.spi.type.TypeManager; import io.prestosql.spi.type.TypeSignatureParameter; import io.prestosql.testing.TestingConnectorSession; +import org.apache.hadoop.hive.common.type.Timestamp; import java.lang.invoke.MethodHandle; import java.math.BigDecimal; +import java.time.LocalDateTime; +import java.time.ZoneOffset; import java.util.List; import java.util.Set; @@ -179,4 +182,9 @@ public static boolean isDistinctFrom(MethodHandle handle, Block left, Block righ throw new AssertionError(t); } } + + public static Timestamp hiveTimestamp(LocalDateTime local) + { + return Timestamp.ofEpochSecond(local.toEpochSecond(ZoneOffset.UTC), local.getNano()); + } } diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveBucketing.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveBucketing.java index f320287a249e..6fe8461a3259 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveBucketing.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/TestHiveBucketing.java @@ -136,7 +136,11 @@ private static void assertBucketEquals(List hiveTypeStrings, List !column.getName().equals("t_map_float")) .filter(column -> !column.getName().equals("t_map_double")) // null map keys are not supported - .filter(column -> !column.getName().equals("t_map_null_key")) - .filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")) - .filter(column -> !column.getName().equals("t_map_null_key_complex_value")) + .filter(TestHiveFileFormats::withoutNullMapKeyTests) // decimal(38) is broken or not supported .filter(column -> !column.getName().equals("t_decimal_precision_38")) .filter(column -> !column.getName().equals("t_map_decimal_precision_38")) @@ -164,22 +160,6 @@ public void testJson(int rowCount) .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); } - @Test(dataProvider = "rowCount") - public void testRCText(int rowCount) - throws Exception - { - List testColumns = ImmutableList.copyOf(filter(TEST_COLUMNS, testColumn -> { - // TODO: This is a bug in the RC text reader - // RC file does not support complex type as key of a map - return !testColumn.getName().equals("t_struct_null") - && !testColumn.getName().equals("t_map_null_key_complex_key_value"); - })); - assertThatFileFormat(RCTEXT) - .withColumns(testColumns) - .withRowsCount(rowCount) - .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); - } - @Test(dataProvider = "rowCount") public void testRcTextPageSource(int rowCount) throws Exception @@ -207,29 +187,15 @@ public void testRcTextOptimizedWriter(int rowCount) .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); } - @Test(dataProvider = "rowCount") - public void testRCBinary(int rowCount) - throws Exception - { - // RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls - List testColumns = TEST_COLUMNS.stream() - .filter(testColumn -> { - String name = testColumn.getName(); - return !name.equals("t_map_null_key_complex_key_value") && !name.equals("t_empty_varchar"); - }).collect(toList()); - assertThatFileFormat(RCBINARY) - .withColumns(testColumns) - .withRowsCount(rowCount) - .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); - } - @Test(dataProvider = "rowCount") public void testRcBinaryPageSource(int rowCount) throws Exception { // RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls + // Hive binary writers are broken for timestamps List testColumns = TEST_COLUMNS.stream() .filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")) + .filter(TestHiveFileFormats::withoutTimestamps) .collect(toList()); assertThatFileFormat(RCBINARY) @@ -249,20 +215,31 @@ public void testRcBinaryOptimizedWriter(int rowCount) .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); + // Hive cannot read timestamps from old files + List testColumnsNoTimestamps = testColumns.stream() + .filter(TestHiveFileFormats::withoutTimestamps) + .collect(toList()); + assertThatFileFormat(RCBINARY) .withColumns(testColumns) .withRowsCount(rowCount) .withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)) - .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)) - .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)); + .isReadableByPageSource(new RcFilePageSourceFactory(TYPE_MANAGER, HDFS_ENVIRONMENT, STATS)) + .withColumns(testColumnsNoTimestamps) + .isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT)); } @Test(dataProvider = "rowCount") public void testOrc(int rowCount) throws Exception { + // Hive binary writers are broken for timestamps + List testColumns = TEST_COLUMNS.stream() + .filter(TestHiveFileFormats::withoutTimestamps) + .collect(toImmutableList()); + assertThatFileFormat(ORC) - .withColumns(TEST_COLUMNS) + .withColumns(testColumns) .withRowsCount(rowCount) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, false, HDFS_ENVIRONMENT, STATS)); } @@ -280,7 +257,7 @@ public void testOrcOptimizedWriter(int rowCount) // A Presto page can not contain a map with null keys, so a page based writer can not write null keys List testColumns = TEST_COLUMNS.stream() - .filter(testColumn -> !testColumn.getName().equals("t_map_null_key") && !testColumn.getName().equals("t_map_null_key_complex_value") && !testColumn.getName().equals("t_map_null_key_complex_key_value")) + .filter(TestHiveFileFormats::withoutNullMapKeyTests) .collect(toList()); assertThatFileFormat(ORC) @@ -298,10 +275,15 @@ public void testOrcUseColumnNames(int rowCount) { TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties()); + // Hive binary writers are broken for timestamps + List testColumns = TEST_COLUMNS.stream() + .filter(TestHiveFileFormats::withoutTimestamps) + .collect(toImmutableList()); + assertThatFileFormat(ORC) - .withWriteColumns(TEST_COLUMNS) + .withWriteColumns(testColumns) .withRowsCount(rowCount) - .withReadColumns(Lists.reverse(TEST_COLUMNS)) + .withReadColumns(Lists.reverse(testColumns)) .withSession(session) .isReadableByPageSource(new OrcPageSourceFactory(TYPE_MANAGER, true, HDFS_ENVIRONMENT, STATS)); } @@ -374,9 +356,12 @@ private static List getTestColumnsSupportedByParquet() // Write of complex hive data to Parquet is broken // TODO: empty arrays or maps with null keys don't seem to work // Parquet does not support DATE + // Hive binary writers are broken for timestamps return TEST_COLUMNS.stream() - .filter(column -> !ImmutableSet.of("t_null_array_int", "t_array_empty", "t_map_null_key", "t_map_null_key_complex_value", "t_map_null_key_complex_key_value") - .contains(column.getName())) + .filter(TestHiveFileFormats::withoutTimestamps) + .filter(TestHiveFileFormats::withoutNullMapKeyTests) + .filter(column -> !column.getName().equals("t_null_array_int")) + .filter(column -> !column.getName().equals("t_array_empty")) .filter(column -> column.isPartitionKey() || ( !hasType(column.getObjectInspector(), PrimitiveCategory.DATE)) && !hasType(column.getObjectInspector(), PrimitiveCategory.SHORT) && @@ -601,6 +586,14 @@ private static boolean withoutNullMapKeyTests(TestColumn testColumn) !name.equals("t_map_null_key_complex_value"); } + private static boolean withoutTimestamps(TestColumn testColumn) + { + String name = testColumn.getName(); + return !name.equals("t_timestamp") && + !name.equals("t_map_timestamp") && + !name.equals("t_array_timestamp"); + } + private FileFormatAssertion assertThatFileFormat(HiveStorageFormat hiveStorageFormat) { return new FileFormatAssertion(hiveStorageFormat.name()) @@ -732,10 +725,10 @@ private void assertRead(Optional pageSourceFactory, Optio try { FileSplit split; if (fileWriterFactory != null) { - split = createTestFile(file.getAbsolutePath(), storageFormat, compressionCodec, writeColumns, session, rowsCount, fileWriterFactory); + split = createTestFilePresto(file.getAbsolutePath(), storageFormat, compressionCodec, writeColumns, session, rowsCount, fileWriterFactory); } else { - split = createTestFile(file.getAbsolutePath(), storageFormat, compressionCodec, writeColumns, rowsCount); + split = createTestFileHive(file.getAbsolutePath(), storageFormat, compressionCodec, writeColumns, rowsCount); } if (pageSourceFactory.isPresent()) { testPageSourceFactory(pageSourceFactory.get(), split, storageFormat, readColumns, session, rowsCount); diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/AbstractTestParquetReader.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/AbstractTestParquetReader.java index b01d7c30b106..a49b950c726e 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/AbstractTestParquetReader.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/AbstractTestParquetReader.java @@ -28,7 +28,9 @@ import io.prestosql.spi.type.SqlTimestamp; import io.prestosql.spi.type.SqlVarbinary; import io.prestosql.spi.type.Type; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -39,9 +41,6 @@ import java.math.BigDecimal; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; -import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -1774,7 +1773,6 @@ private static Timestamp intToTimestamp(Integer input) if (input == null) { return null; } - Timestamp timestamp = new Timestamp(0); long seconds = (input / 1000); int nanos = ((input % 1000) * 1_000_000); @@ -1789,9 +1787,7 @@ private static Timestamp intToTimestamp(Integer input) nanos -= 1_000_000_000; seconds += 1; } - timestamp.setTime(seconds * 1000); - timestamp.setNanos(nanos); - return timestamp; + return Timestamp.ofEpochSecond(seconds, nanos); } private static SqlTimestamp intToSqlTimestamp(Integer input) @@ -1807,7 +1803,7 @@ private static Date intToDate(Integer input) if (input == null) { return null; } - return Date.valueOf(LocalDate.ofEpochDay(input)); + return Date.ofEpochDay(input); } private static SqlDate intToSqlDate(Integer input) diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/write/TestDataWritableWriter.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/write/TestDataWritableWriter.java index 6e5ad143ce99..1dfab863939b 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/write/TestDataWritableWriter.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/parquet/write/TestDataWritableWriter.java @@ -14,11 +14,12 @@ package io.prestosql.plugin.hive.parquet.write; import io.airlift.log.Logger; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -46,8 +47,6 @@ import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.Type; -import java.sql.Date; -import java.sql.Timestamp; import java.util.List; import java.util.Map; @@ -377,7 +376,7 @@ private void writePrimitive(final Object value, final PrimitiveObjectInspector i break; case DATE: Date vDate = ((DateObjectInspector) inspector).getPrimitiveJavaObject(value); - recordConsumer.addInteger(DateWritable.dateToDays(vDate)); + recordConsumer.addInteger(vDate.toEpochDay()); break; default: throw new IllegalArgumentException("Unsupported primitive data type: " + inspector.getPrimitiveCategory()); diff --git a/presto-hive/src/test/java/io/prestosql/plugin/hive/util/TestSerDeUtils.java b/presto-hive/src/test/java/io/prestosql/plugin/hive/util/TestSerDeUtils.java index adf5d41a97fd..6af54e915381 100644 --- a/presto-hive/src/test/java/io/prestosql/plugin/hive/util/TestSerDeUtils.java +++ b/presto-hive/src/test/java/io/prestosql/plugin/hive/util/TestSerDeUtils.java @@ -28,6 +28,8 @@ import io.prestosql.spi.type.ArrayType; import io.prestosql.spi.type.RowType; import io.prestosql.type.TypeRegistry; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.io.BytesWritable; @@ -35,11 +37,12 @@ import org.testng.annotations.Test; import java.lang.reflect.Type; -import java.sql.Timestamp; +import java.time.LocalDate; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.function.LongUnaryOperator; import static io.airlift.slice.Slices.utf8Slice; import static io.prestosql.plugin.hive.HiveTestUtils.mapType; @@ -60,6 +63,7 @@ import static io.prestosql.tests.StructuralTestUtil.rowBlockOf; import static java.lang.Double.doubleToLongBits; import static java.lang.Float.floatToRawIntBits; +import static java.lang.Math.toIntExact; import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector; @@ -157,10 +161,16 @@ public void testPrimitiveSlice() Block actualString = toBinaryBlock(createUnboundedVarcharType(), "abdd", getInspector(String.class)); assertBlockEquals(actualString, expectedString); + // date + int date = toIntExact(LocalDate.of(2008, 10, 28).toEpochDay()); + Block expectedDate = VARBINARY.createBlockBuilder(null, 1).writeInt(date).closeEntry().build(); + Block actualDate = toBinaryBlock(BIGINT, Date.ofEpochDay(date), getInspector(Date.class)); + assertBlockEquals(actualDate, expectedDate); + // timestamp DateTime dateTime = new DateTime(2008, 10, 28, 16, 7, 15, 0); Block expectedTimestamp = VARBINARY.createBlockBuilder(null, 1).writeLong(dateTime.getMillis()).closeEntry().build(); - Block actualTimestamp = toBinaryBlock(BIGINT, new Timestamp(dateTime.getMillis()), getInspector(Timestamp.class)); + Block actualTimestamp = toBinaryBlock(BIGINT, Timestamp.ofEpochMilli(dateTime.getMillis()), getInspector(Timestamp.class)); assertBlockEquals(actualTimestamp, expectedTimestamp); // binary @@ -291,7 +301,7 @@ public void testReuse() Type type = new TypeToken>() {}.getType(); ObjectInspector inspector = getInspector(type); - Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector); + Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector, LongUnaryOperator.identity()); Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L); assertBlockEquals(actual, expected); @@ -315,13 +325,13 @@ private static Block toBinaryBlock(io.prestosql.spi.type.Type type, Object objec if (inspector.getCategory() == Category.PRIMITIVE) { return getPrimitiveBlock(type, object, inspector); } - return getBlockObject(type, object, inspector); + return getBlockObject(type, object, inspector, LongUnaryOperator.identity()); } private static Block getPrimitiveBlock(io.prestosql.spi.type.Type type, Object object, ObjectInspector inspector) { BlockBuilder builder = VARBINARY.createBlockBuilder(null, 1); - serializeObject(type, builder, object, inspector); + serializeObject(type, builder, object, inspector, LongUnaryOperator.identity()); return builder.build(); } } diff --git a/presto-hive/src/test/sql/create-test.sql b/presto-hive/src/test/sql/create-test.sql index e68f675d6fba..e0dd0e5d68ce 100644 --- a/presto-hive/src/test/sql/create-test.sql +++ b/presto-hive/src/test/sql/create-test.sql @@ -253,6 +253,7 @@ CREATE TABLE presto_test_types_textfile ( , t_char CHAR(25) , t_map MAP , t_array_string ARRAY +, t_array_timestamp ARRAY , t_array_struct ARRAY> , t_struct STRUCT , t_complex MAP>> @@ -277,6 +278,7 @@ SELECT , CASE n % 41 WHEN 0 THEN NULL WHEN 1 THEN '' ELSE 'test char' END , CASE WHEN n % 27 = 0 THEN NULL ELSE map('test key', 'test value') END , CASE WHEN n % 29 = 0 THEN NULL ELSE array('abc', 'xyz', 'data') END +, CASE WHEN n % 43 = 0 THEN NULL ELSE array(timestamp '2011-05-06 07:08:09.1234567') END , CASE WHEN n % 31 = 0 THEN NULL ELSE array(named_struct('s_string', 'test abc', 's_double', 0.1), named_struct('s_string' , 'test xyz', 's_double', 0.2)) END diff --git a/presto-orc/src/test/java/io/prestosql/orc/OrcTester.java b/presto-orc/src/test/java/io/prestosql/orc/OrcTester.java index 3b70791506fe..bff7f2c397cd 100644 --- a/presto-orc/src/test/java/io/prestosql/orc/OrcTester.java +++ b/presto-orc/src/test/java/io/prestosql/orc/OrcTester.java @@ -47,8 +47,10 @@ import io.prestosql.type.TypeRegistry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions; @@ -59,11 +61,11 @@ import org.apache.hadoop.hive.ql.io.orc.Reader; import org.apache.hadoop.hive.ql.io.orc.RecordReader; import org.apache.hadoop.hive.serde2.Serializer; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -87,11 +89,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; -import java.time.LocalDate; -import java.time.ZoneId; -import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -136,7 +133,6 @@ import static io.prestosql.testing.TestingConnectorSession.SESSION; import static java.lang.Math.toIntExact; import static java.util.Arrays.asList; -import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS; import static org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR; @@ -715,7 +711,8 @@ private static void assertFileContentsOrcHive( Reader reader = OrcFile.createReader( new Path(tempFile.getFile().getAbsolutePath()), - new ReaderOptions(configuration)); + new ReaderOptions(configuration) + .useUTCTimestamp(false)); RecordReader recordReader = reader.rows(); StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector(); @@ -745,8 +742,8 @@ else if (actualValue instanceof ByteWritable) { else if (actualValue instanceof BytesWritable) { actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes()); } - else if (actualValue instanceof DateWritable) { - actualValue = new SqlDate(((DateWritable) actualValue).getDays()); + else if (actualValue instanceof DateWritableV2) { + actualValue = new SqlDate(((DateWritableV2) actualValue).getDays()); } else if (actualValue instanceof DoubleWritable) { actualValue = ((DoubleWritable) actualValue).get(); @@ -776,9 +773,8 @@ else if (actualValue instanceof HiveDecimalWritable) { else if (actualValue instanceof Text) { actualValue = actualValue.toString(); } - else if (actualValue instanceof TimestampWritable) { - TimestampWritable timestamp = (TimestampWritable) actualValue; - actualValue = sqlTimestampOf((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), SESSION); + else if (actualValue instanceof TimestampWritableV2) { + actualValue = sqlTimestampOf(((TimestampWritableV2) actualValue).getTimestamp().toEpochMilli(), SESSION); } else if (actualValue instanceof OrcStruct) { List fields = new ArrayList<>(); @@ -964,19 +960,10 @@ private static Object preprocessWriteValueHive(Type type, Object value) return ((SqlVarbinary) value).getBytes(); } if (type.equals(DATE)) { - int days = ((SqlDate) value).getDays(); - LocalDate localDate = LocalDate.ofEpochDay(days); - ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault()); - - long millis = SECONDS.toMillis(zonedDateTime.toEpochSecond()); - Date date = new Date(0); - // millis must be set separately to avoid masking - date.setTime(millis); - return date; + return Date.ofEpochDay(((SqlDate) value).getDays()); } if (type.equals(TIMESTAMP)) { - long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc(); - return new Timestamp(millisUtc); + return Timestamp.ofEpochMilli(((SqlTimestamp) value).getMillisUtc()); } if (type instanceof DecimalType) { return HiveDecimal.create(((SqlDecimal) value).toBigDecimal()); diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/ParquetReaderUtils.java b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetReaderUtils.java new file mode 100644 index 000000000000..915ed1cb39cc --- /dev/null +++ b/presto-parquet/src/main/java/io/prestosql/parquet/ParquetReaderUtils.java @@ -0,0 +1,32 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.prestosql.parquet; + +import io.airlift.slice.Slice; +import org.apache.parquet.bytes.ByteBufferInputStream; + +public final class ParquetReaderUtils +{ + private ParquetReaderUtils() {} + + public static ByteBufferInputStream toInputStream(Slice slice) + { + return ByteBufferInputStream.wrap(slice.toByteBuffer()); + } + + public static ByteBufferInputStream toInputStream(DictionaryPage page) + { + return toInputStream(page.getSlice()); + } +} diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DictionaryReader.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DictionaryReader.java index 5a81c2f266a8..d5f1575dfb38 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DictionaryReader.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DictionaryReader.java @@ -21,8 +21,6 @@ import org.apache.parquet.io.api.Binary; import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; public class DictionaryReader extends ValuesReader @@ -36,10 +34,9 @@ public DictionaryReader(Dictionary dictionary) } @Override - public void initFromPage(int valueCount, ByteBuffer page, int offset) + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { - InputStream in = new ByteBufferInputStream(page, offset, page.limit() - offset); int bitWidth = BytesUtils.readIntLittleEndianOnOneByte(in); decoder = new RunLengthBitPackingHybridDecoder(bitWidth, in); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DoubleDictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DoubleDictionary.java index a87fa94fb001..45d064241dd1 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DoubleDictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/DoubleDictionary.java @@ -19,6 +19,7 @@ import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; +import static io.prestosql.parquet.ParquetReaderUtils.toInputStream; public class DoubleDictionary extends Dictionary @@ -31,7 +32,7 @@ public DoubleDictionary(DictionaryPage dictionaryPage) super(dictionaryPage.getEncoding()); content = new double[dictionaryPage.getDictionarySize()]; DoublePlainValuesReader doubleReader = new DoublePlainValuesReader(); - doubleReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryPage.getSlice().getBytes(), 0); + doubleReader.initFromPage(dictionaryPage.getDictionarySize(), toInputStream(dictionaryPage)); for (int i = 0; i < content.length; i++) { content[i] = doubleReader.readDouble(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/FloatDictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/FloatDictionary.java index b84d6f1219d1..604e9f6bb984 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/FloatDictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/FloatDictionary.java @@ -19,6 +19,7 @@ import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; +import static io.prestosql.parquet.ParquetReaderUtils.toInputStream; public class FloatDictionary extends Dictionary @@ -31,7 +32,7 @@ public FloatDictionary(DictionaryPage dictionaryPage) super(dictionaryPage.getEncoding()); content = new float[dictionaryPage.getDictionarySize()]; FloatPlainValuesReader floatReader = new FloatPlainValuesReader(); - floatReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryPage.getSlice().getBytes(), 0); + floatReader.initFromPage(dictionaryPage.getDictionarySize(), toInputStream(dictionaryPage)); for (int i = 0; i < content.length; i++) { content[i] = floatReader.readFloat(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/IntegerDictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/IntegerDictionary.java index e1915728ce5f..8504c341fafb 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/IntegerDictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/IntegerDictionary.java @@ -19,6 +19,7 @@ import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; +import static io.prestosql.parquet.ParquetReaderUtils.toInputStream; public class IntegerDictionary extends Dictionary @@ -31,7 +32,7 @@ public IntegerDictionary(DictionaryPage dictionaryPage) super(dictionaryPage.getEncoding()); content = new int[dictionaryPage.getDictionarySize()]; IntegerPlainValuesReader intReader = new IntegerPlainValuesReader(); - intReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryPage.getSlice().getBytes(), 0); + intReader.initFromPage(dictionaryPage.getDictionarySize(), toInputStream(dictionaryPage)); for (int i = 0; i < content.length; i++) { content[i] = intReader.readInteger(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/LongDictionary.java b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/LongDictionary.java index c74f9e99f909..0937eb3bcc0b 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/LongDictionary.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/dictionary/LongDictionary.java @@ -19,6 +19,7 @@ import java.io.IOException; import static com.google.common.base.MoreObjects.toStringHelper; +import static io.prestosql.parquet.ParquetReaderUtils.toInputStream; public class LongDictionary extends Dictionary @@ -31,7 +32,7 @@ public LongDictionary(DictionaryPage dictionaryPage) super(dictionaryPage.getEncoding()); content = new long[dictionaryPage.getDictionarySize()]; LongPlainValuesReader longReader = new LongPlainValuesReader(); - longReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryPage.getSlice().getBytes(), 0); + longReader.initFromPage(dictionaryPage.getDictionarySize(), toInputStream(dictionaryPage)); for (int i = 0; i < content.length; i++) { content[i] = longReader.readLong(); } diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/MetadataReader.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/MetadataReader.java index ace262c61371..ac26701aa65f 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/MetadataReader.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/MetadataReader.java @@ -33,6 +33,7 @@ import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.OriginalType; +import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName; import org.apache.parquet.schema.Type.Repetition; import org.apache.parquet.schema.Types; @@ -125,13 +126,14 @@ public static ParquetMetadata readFooter(FSDataInputStream inputStream, Path fil .map(value -> value.toLowerCase(Locale.ENGLISH)) .toArray(String[]::new); ColumnPath columnPath = ColumnPath.get(path); - PrimitiveTypeName primitiveTypeName = messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(); + PrimitiveType primitiveType = messageType.getType(columnPath.toArray()).asPrimitiveType(); ColumnChunkMetaData column = ColumnChunkMetaData.get( columnPath, - primitiveTypeName, + primitiveType, CompressionCodecName.fromParquet(metaData.codec), + null, readEncodings(metaData.encodings), - readStats(metaData.statistics, primitiveTypeName), + readStats(metaData.statistics, primitiveType.getPrimitiveTypeName()), metaData.data_page_offset, metaData.dictionary_page_offset, metaData.num_values, diff --git a/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java b/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java index c483f61136e4..db7668c9df5b 100644 --- a/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java +++ b/presto-parquet/src/main/java/io/prestosql/parquet/reader/PrimitiveColumnReader.java @@ -29,6 +29,7 @@ import io.prestosql.spi.type.Type; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.BytesUtils; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.values.ValuesReader; @@ -42,6 +43,7 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; +import static io.prestosql.parquet.ParquetReaderUtils.toInputStream; import static io.prestosql.parquet.ParquetTypeUtils.createDecimalType; import static io.prestosql.parquet.ValuesType.DEFINITION_LEVEL; import static io.prestosql.parquet.ValuesType.REPETITION_LEVEL; @@ -271,12 +273,10 @@ private ValuesReader readPageV1(DataPageV1 page) repetitionReader = new LevelValuesReader(rlReader); definitionReader = new LevelValuesReader(dlReader); try { - byte[] bytes = page.getSlice().getBytes(); - rlReader.initFromPage(page.getValueCount(), bytes, 0); - int offset = rlReader.getNextOffset(); - dlReader.initFromPage(page.getValueCount(), bytes, offset); - offset = dlReader.getNextOffset(); - return initDataReader(page.getValueEncoding(), bytes, offset, page.getValueCount()); + ByteBufferInputStream in = toInputStream(page.getSlice()); + rlReader.initFromPage(page.getValueCount(), in); + dlReader.initFromPage(page.getValueCount(), in); + return initDataReader(page.getValueEncoding(), page.getValueCount(), in); } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e); @@ -287,7 +287,7 @@ private ValuesReader readPageV2(DataPageV2 page) { repetitionReader = buildLevelRLEReader(columnDescriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); definitionReader = buildLevelRLEReader(columnDescriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); - return initDataReader(page.getDataEncoding(), page.getSlice().getBytes(), 0, page.getValueCount()); + return initDataReader(page.getDataEncoding(), page.getValueCount(), toInputStream(page.getSlice())); } private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) @@ -298,7 +298,7 @@ private LevelReader buildLevelRLEReader(int maxLevel, Slice slice) return new LevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(slice.getBytes()))); } - private ValuesReader initDataReader(ParquetEncoding dataEncoding, byte[] bytes, int offset, int valueCount) + private ValuesReader initDataReader(ParquetEncoding dataEncoding, int valueCount, ByteBufferInputStream in) { ValuesReader valuesReader; if (dataEncoding.usesDictionary()) { @@ -312,7 +312,7 @@ private ValuesReader initDataReader(ParquetEncoding dataEncoding, byte[] bytes, } try { - valuesReader.initFromPage(valueCount, bytes, offset); + valuesReader.initFromPage(valueCount, in); return valuesReader; } catch (IOException e) { diff --git a/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTimestampUtils.java b/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTimestampUtils.java index d15984c3cb5c..bad3d948cf64 100644 --- a/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTimestampUtils.java +++ b/presto-parquet/src/test/java/io/prestosql/parquet/TestParquetTimestampUtils.java @@ -14,12 +14,11 @@ package io.prestosql.parquet; import io.prestosql.spi.PrestoException; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; import org.apache.parquet.io.api.Binary; import org.testng.annotations.Test; -import java.sql.Timestamp; - import static io.prestosql.parquet.ParquetTimestampUtils.getTimestampMillis; import static io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED; import static org.testng.Assert.assertEquals; @@ -52,6 +51,6 @@ private static void assertTimestampCorrect(String timestampString) Timestamp timestamp = Timestamp.valueOf(timestampString); Binary timestampBytes = NanoTimeUtils.getNanoTime(timestamp, false).toBinary(); long decodedTimestampMillis = getTimestampMillis(timestampBytes); - assertEquals(decodedTimestampMillis, timestamp.getTime()); + assertEquals(decodedTimestampMillis, timestamp.toEpochMilli()); } } diff --git a/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java b/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java index d8a4ef946747..0b4a5ce51ef7 100644 --- a/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java +++ b/presto-parquet/src/test/java/io/prestosql/parquet/TestTupleDomainParquetPredicate.java @@ -191,7 +191,7 @@ public void testDouble() // fail on corrupted statistics assertThatExceptionOfType(ParquetCorruptionException.class) .isThrownBy(() -> getDomain(DOUBLE, 10, doubleColumnStats(42.24, 3.3), ID, column, true)) - .withMessage("Corrupted statistics for column \"DoubleColumn\" in Parquet file \"testFile\": [min: 42.24000, max: 3.30000, num_nulls: 0]"); + .withMessage("Corrupted statistics for column \"DoubleColumn\" in Parquet file \"testFile\": [min: 42.24, max: 3.3, num_nulls: 0]"); } private static DoubleStatistics doubleColumnStats(double minimum, double maximum) @@ -219,7 +219,7 @@ public void testString() // fail on corrupted statistics assertThatExceptionOfType(ParquetCorruptionException.class) .isThrownBy(() -> getDomain(createUnboundedVarcharType(), 10, stringColumnStats("taco", "apple"), ID, column, true)) - .withMessage("Corrupted statistics for column \"StringColumn\" in Parquet file \"testFile\": [min: taco, max: apple, num_nulls: 0]"); + .withMessage("Corrupted statistics for column \"StringColumn\" in Parquet file \"testFile\": [min: 0x7461636F, max: 0x6170706C65, num_nulls: 0]"); } private static BinaryStatistics stringColumnStats(String minimum, String maximum) @@ -250,7 +250,7 @@ public void testFloat() // fail on corrupted statistics assertThatExceptionOfType(ParquetCorruptionException.class) .isThrownBy(() -> getDomain(REAL, 10, floatColumnStats(maximum, minimum), ID, column, true)) - .withMessage("Corrupted statistics for column \"FloatColumn\" in Parquet file \"testFile\": [min: 40.30000, max: 4.30000, num_nulls: 0]"); + .withMessage("Corrupted statistics for column \"FloatColumn\" in Parquet file \"testFile\": [min: 40.3, max: 4.3, num_nulls: 0]"); } @Test diff --git a/presto-rcfile/src/test/java/io/prestosql/rcfile/RcFileTester.java b/presto-rcfile/src/test/java/io/prestosql/rcfile/RcFileTester.java index 1591ef2d6efe..6eae7dd88ee6 100644 --- a/presto-rcfile/src/test/java/io/prestosql/rcfile/RcFileTester.java +++ b/presto-rcfile/src/test/java/io/prestosql/rcfile/RcFileTester.java @@ -49,7 +49,9 @@ import io.prestosql.type.TypeRegistry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; @@ -60,10 +62,10 @@ import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; -import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.lazy.LazyArray; import org.apache.hadoop.hive.serde2.lazy.LazyMap; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; @@ -102,11 +104,6 @@ import java.io.InputStream; import java.io.UncheckedIOException; import java.math.BigInteger; -import java.sql.Date; -import java.sql.Timestamp; -import java.time.LocalDate; -import java.time.ZoneId; -import java.time.ZonedDateTime; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -151,11 +148,11 @@ import static io.prestosql.spi.type.StandardTypes.ARRAY; import static io.prestosql.spi.type.StandardTypes.MAP; import static io.prestosql.spi.type.StandardTypes.ROW; -import static io.prestosql.spi.type.TimeZoneKey.UTC_KEY; import static io.prestosql.spi.type.TimestampType.TIMESTAMP; import static io.prestosql.spi.type.TinyintType.TINYINT; import static io.prestosql.spi.type.VarbinaryType.VARBINARY; import static io.prestosql.spi.type.VarcharType.VARCHAR; +import static io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf; import static io.prestosql.testing.TestingConnectorSession.SESSION; import static java.lang.Math.toIntExact; import static java.util.Collections.nCopies; @@ -828,8 +825,8 @@ else if (actualValue instanceof ByteWritable) { else if (actualValue instanceof BytesWritable) { actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes()); } - else if (actualValue instanceof DateWritable) { - actualValue = new SqlDate(((DateWritable) actualValue).getDays()); + else if (actualValue instanceof DateWritableV2) { + actualValue = new SqlDate(((DateWritableV2) actualValue).getDays()); } else if (actualValue instanceof DoubleWritable) { actualValue = ((DoubleWritable) actualValue).get(); @@ -856,14 +853,8 @@ else if (actualValue instanceof HiveDecimalWritable) { else if (actualValue instanceof Text) { actualValue = actualValue.toString(); } - else if (actualValue instanceof TimestampWritable) { - TimestampWritable timestamp = (TimestampWritable) actualValue; - if (SESSION.isLegacyTimestamp()) { - actualValue = new SqlTimestamp((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), UTC_KEY); - } - else { - actualValue = new SqlTimestamp((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L)); - } + else if (actualValue instanceof TimestampWritableV2) { + actualValue = sqlTimestampOf(((TimestampWritableV2) actualValue).getTimestamp().toEpochMilli(), SESSION); } else if (actualValue instanceof StructObject) { StructObject structObject = (StructObject) actualValue; @@ -1041,19 +1032,10 @@ private static Object preprocessWriteValueOld(Type type, Object value) return ((SqlVarbinary) value).getBytes(); } if (type.equals(DATE)) { - int days = ((SqlDate) value).getDays(); - LocalDate localDate = LocalDate.ofEpochDay(days); - ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault()); - - long millis = zonedDateTime.toEpochSecond() * 1000; - Date date = new Date(0); - // mills must be set separately to avoid masking - date.setTime(millis); - return date; + return Date.ofEpochDay(((SqlDate) value).getDays()); } if (type.equals(TIMESTAMP)) { - long millisUtc = (int) ((SqlTimestamp) value).getMillisUtc(); - return new Timestamp(millisUtc); + return Timestamp.ofEpochMilli(((SqlTimestamp) value).getMillisUtc()); } if (type instanceof DecimalType) { return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());