diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java b/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java index eddc776ed3c71..9f573500a87d3 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/GenericHiveRecordCursor.java @@ -513,7 +513,7 @@ private void parseObjectColumn(int column) nulls[column] = true; } else { - objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column]); + objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column], hiveStorageTimeZone); nulls[column] = false; } } diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/SerDeUtils.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/SerDeUtils.java index 209824be0c5eb..8056035a2f57b 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/SerDeUtils.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/SerDeUtils.java @@ -69,38 +69,40 @@ public final class SerDeUtils { + private static final DateTimeZone JVM_TIME_ZONE = DateTimeZone.getDefault(); + private SerDeUtils() {} - public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector) + public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector, DateTimeZone hiveStorageTimeZone) { - return requireNonNull(serializeObject(type, null, object, objectInspector), "serialized result is null"); + return requireNonNull(serializeObject(type, null, object, objectInspector, hiveStorageTimeZone), "serialized result is null"); } - public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector) + public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { - return serializeObject(type, builder, object, inspector, true); + return serializeObject(type, builder, object, inspector, true, hiveStorageTimeZone); } // This version supports optionally disabling the filtering of null map key, which should only be used for building test data sets // that contain null map keys. For production, null map keys are not allowed. @VisibleForTesting - public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys) + public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys, DateTimeZone hiveStorageTimeZone) { switch (inspector.getCategory()) { case PRIMITIVE: - serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector); + serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector, hiveStorageTimeZone); return null; case LIST: - return serializeList(type, builder, object, (ListObjectInspector) inspector); + return serializeList(type, builder, object, (ListObjectInspector) inspector, hiveStorageTimeZone); case MAP: - return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys); + return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys, hiveStorageTimeZone); case STRUCT: - return serializeStruct(type, builder, object, (StructObjectInspector) inspector); + return serializeStruct(type, builder, object, (StructObjectInspector) inspector, hiveStorageTimeZone); } throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory()); } - private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector) + private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { requireNonNull(builder, "parent builder is null"); @@ -146,7 +148,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector)); return; case TIMESTAMP: - TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector)); + TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector, hiveStorageTimeZone)); return; case BINARY: VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object))); @@ -165,7 +167,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory()); } - private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector) + private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { List list = inspector.getList(object); if (list == null) { @@ -186,7 +188,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec } for (Object element : list) { - serializeObject(elementType, currentBuilder, element, elementInspector); + serializeObject(elementType, currentBuilder, element, elementInspector, hiveStorageTimeZone); } if (builder != null) { @@ -199,7 +201,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec } } - private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, boolean filterNullMapKeys) + private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, boolean filterNullMapKeys, DateTimeZone hiveStorageTimeZone) { Map map = inspector.getMap(object); if (map == null) { @@ -225,8 +227,8 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object for (Map.Entry entry : map.entrySet()) { // Hive skips map entries with null keys if (!filterNullMapKeys || entry.getKey() != null) { - serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector); - serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector); + serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector, hiveStorageTimeZone); + serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector, hiveStorageTimeZone); } } @@ -239,7 +241,7 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object } } - private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector) + private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { if (object == null) { requireNonNull(builder, "parent builder is null").appendNull(); @@ -260,7 +262,7 @@ private static Block serializeStruct(Type type, BlockBuilder builder, Object obj for (int i = 0; i < typeParameters.size(); i++) { StructField field = allStructFieldRefs.get(i); - serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector()); + serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector(), hiveStorageTimeZone); } builder.closeEntry(); @@ -289,10 +291,16 @@ private static long formatDateAsLong(Object object, DateObjectInspector inspecto return TimeUnit.MILLISECONDS.toDays(millisUtc); } - private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector) + private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { Timestamp timestamp = getTimestamp(object, inspector); - return timestamp.getTime(); + long parsedJvmMillis = timestamp.getTime(); + + // remove the JVM time zone correction from the timestamp + long hiveMillis = JVM_TIME_ZONE.convertUTCToLocal(parsedJvmMillis); + + // convert to UTC using the real time zone for the underlying data + return hiveStorageTimeZone.convertLocalToUTC(hiveMillis, false); } private static Timestamp getTimestamp(Object object, TimestampObjectInspector inspector) diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java index d5d99ac4a892e..69f71d0247afd 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/AbstractTestHiveFileFormats.java @@ -523,7 +523,8 @@ public static FileSplit createTestFile( pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), - false); + false, + DateTimeZone.getDefault()); } } Page page = pageBuilder.build(); diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index 170e05f8557c8..815b600881094 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -15,6 +15,7 @@ import com.facebook.presto.Session; import com.facebook.presto.common.QualifiedObjectName; +import com.facebook.presto.common.type.TimeZoneKey; import com.facebook.presto.common.type.Type; import com.facebook.presto.common.type.TypeSignature; import com.facebook.presto.cost.StatsAndCosts; @@ -2780,6 +2781,31 @@ public void testShowCreateTable() assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql); } + @Test + public void testTextfileAmbiguousTimestamp() + { + try { + // set the session time zone to the same as the system timezone to avoid extra time zone conversions outside of what we are trying to test + Session timezoneSession = Session.builder(getSession()).setTimeZoneKey(TimeZoneKey.getTimeZoneKey("America/Bahia_Banderas")).build(); + @Language("SQL") String createTableSql = format("" + + "CREATE TABLE test_timestamp_textfile \n" + + "WITH (\n" + + " format = 'TEXTFILE'\n" + + ")\n" + + "AS SELECT TIMESTAMP '2022-10-30 01:16:13.000' ts, ARRAY[TIMESTAMP '2022-10-30 01:16:13.000'] array_ts", + getSession().getCatalog().get(), + getSession().getSchema().get()); + assertUpdate(timezoneSession, createTableSql, 1); + // 2022-10-30 01:16:13.00 is an ambiguous timestamp in America/Bahia_Banderas because + // it occurs during a fall DST transition where the hour from 1-2am repeats + // Ambiguous timestamps should be interpreted as the earlier of the two possible unixtimes for consistency. + assertQuery(timezoneSession, "SELECT to_unixtime(ts), to_unixtime(array_ts[1]) FROM test_timestamp_textfile", "SELECT 1.667110573E9, 1.667110573E9"); + } + finally { + assertUpdate("DROP TABLE IF EXISTS test_timestamp_textfile"); + } + } + @Test public void testCreateExternalTable() throws Exception diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestSerDeUtils.java b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestSerDeUtils.java index 687dc40eef03d..463fa5cf36b0d 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/util/TestSerDeUtils.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/util/TestSerDeUtils.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.io.BytesWritable; import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.testng.annotations.Test; import java.lang.reflect.Type; @@ -275,6 +276,17 @@ public void testStructBlock() assertBlockEquals(actual, rowBlockOf(outerRowParameterTypes, outerRowValues.build().toArray())); } + @Test + public void testTimestampWithDifferentStorageZone() + { + DateTimeZone storageTimeZone = DateTimeZone.forID("Europe/Prague"); + DateTime dateTimeInJvmZone = new DateTime(2008, 10, 28, 16, 7, 15, 0); + DateTime dateTimeInStorageZone = dateTimeInJvmZone.withZoneRetainFields(storageTimeZone); + Block expectedTimestamp = VARBINARY.createBlockBuilder(null, 1).writeLong(dateTimeInStorageZone.getMillis()).closeEntry().build(); + Block actualTimestamp = getPrimitiveBlock(BIGINT, new Timestamp(dateTimeInJvmZone.getMillis()), getInspector(Timestamp.class), storageTimeZone); + assertBlockEquals(actualTimestamp, expectedTimestamp); + } + @Test public void testReuse() { @@ -289,7 +301,7 @@ public void testReuse() Type type = new TypeToken>() {}.getType(); ObjectInspector inspector = getInspector(type); - Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector); + Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector, DateTimeZone.getDefault()); Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L); assertBlockEquals(actual, expected); @@ -310,16 +322,17 @@ private Slice blockToSlice(Block block) private static Block toBinaryBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector) { + DateTimeZone zone = DateTimeZone.getDefault(); if (inspector.getCategory() == Category.PRIMITIVE) { - return getPrimitiveBlock(type, object, inspector); + return getPrimitiveBlock(type, object, inspector, zone); } - return getBlockObject(type, object, inspector); + return getBlockObject(type, object, inspector, zone); } - private static Block getPrimitiveBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector) + private static Block getPrimitiveBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector, DateTimeZone hiveStorageTimeZone) { BlockBuilder builder = VARBINARY.createBlockBuilder(null, 1); - serializeObject(type, builder, object, inspector); + serializeObject(type, builder, object, inspector, hiveStorageTimeZone); return builder.build(); } }