Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ private void parseObjectColumn(int column)
nulls[column] = true;
}
else {
objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column]);
objects[column] = getBlockObject(types[column], fieldData, fieldInspectors[column], hiveStorageTimeZone);
nulls[column] = false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,38 +69,40 @@

public final class SerDeUtils
{
private static final DateTimeZone JVM_TIME_ZONE = DateTimeZone.getDefault();

private SerDeUtils() {}

public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector)
public static Block getBlockObject(Type type, Object object, ObjectInspector objectInspector, DateTimeZone hiveStorageTimeZone)
{
return requireNonNull(serializeObject(type, null, object, objectInspector), "serialized result is null");
return requireNonNull(serializeObject(type, null, object, objectInspector, hiveStorageTimeZone), "serialized result is null");
}

public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector)
public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
return serializeObject(type, builder, object, inspector, true);
return serializeObject(type, builder, object, inspector, true, hiveStorageTimeZone);
}

// This version supports optionally disabling the filtering of null map key, which should only be used for building test data sets
// that contain null map keys. For production, null map keys are not allowed.
@VisibleForTesting
public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys)
public static Block serializeObject(Type type, BlockBuilder builder, Object object, ObjectInspector inspector, boolean filterNullMapKeys, DateTimeZone hiveStorageTimeZone)
{
switch (inspector.getCategory()) {
case PRIMITIVE:
serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector);
serializePrimitive(type, builder, object, (PrimitiveObjectInspector) inspector, hiveStorageTimeZone);
return null;
case LIST:
return serializeList(type, builder, object, (ListObjectInspector) inspector);
return serializeList(type, builder, object, (ListObjectInspector) inspector, hiveStorageTimeZone);
case MAP:
return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys);
return serializeMap(type, builder, object, (MapObjectInspector) inspector, filterNullMapKeys, hiveStorageTimeZone);
case STRUCT:
return serializeStruct(type, builder, object, (StructObjectInspector) inspector);
return serializeStruct(type, builder, object, (StructObjectInspector) inspector, hiveStorageTimeZone);
}
throw new RuntimeException("Unknown object inspector category: " + inspector.getCategory());
}

private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector)
private static void serializePrimitive(Type type, BlockBuilder builder, Object object, PrimitiveObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
requireNonNull(builder, "parent builder is null");

Expand Down Expand Up @@ -146,7 +148,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o
DateType.DATE.writeLong(builder, formatDateAsLong(object, (DateObjectInspector) inspector));
return;
case TIMESTAMP:
TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector));
TimestampType.TIMESTAMP.writeLong(builder, formatTimestampAsLong(object, (TimestampObjectInspector) inspector, hiveStorageTimeZone));
return;
case BINARY:
VARBINARY.writeSlice(builder, Slices.wrappedBuffer(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(object)));
Expand All @@ -165,7 +167,7 @@ private static void serializePrimitive(Type type, BlockBuilder builder, Object o
throw new RuntimeException("Unknown primitive type: " + inspector.getPrimitiveCategory());
}

private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector)
private static Block serializeList(Type type, BlockBuilder builder, Object object, ListObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
List<?> list = inspector.getList(object);
if (list == null) {
Expand All @@ -186,7 +188,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec
}

for (Object element : list) {
serializeObject(elementType, currentBuilder, element, elementInspector);
serializeObject(elementType, currentBuilder, element, elementInspector, hiveStorageTimeZone);
}

if (builder != null) {
Expand All @@ -199,7 +201,7 @@ private static Block serializeList(Type type, BlockBuilder builder, Object objec
}
}

private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, boolean filterNullMapKeys)
private static Block serializeMap(Type type, BlockBuilder builder, Object object, MapObjectInspector inspector, boolean filterNullMapKeys, DateTimeZone hiveStorageTimeZone)
{
Map<?, ?> map = inspector.getMap(object);
if (map == null) {
Expand All @@ -225,8 +227,8 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object
for (Map.Entry<?, ?> entry : map.entrySet()) {
// Hive skips map entries with null keys
if (!filterNullMapKeys || entry.getKey() != null) {
serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector);
serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector);
serializeObject(keyType, currentBuilder, entry.getKey(), keyInspector, hiveStorageTimeZone);
serializeObject(valueType, currentBuilder, entry.getValue(), valueInspector, hiveStorageTimeZone);
}
}

Expand All @@ -239,7 +241,7 @@ private static Block serializeMap(Type type, BlockBuilder builder, Object object
}
}

private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector)
private static Block serializeStruct(Type type, BlockBuilder builder, Object object, StructObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
if (object == null) {
requireNonNull(builder, "parent builder is null").appendNull();
Expand All @@ -260,7 +262,7 @@ private static Block serializeStruct(Type type, BlockBuilder builder, Object obj

for (int i = 0; i < typeParameters.size(); i++) {
StructField field = allStructFieldRefs.get(i);
serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector());
serializeObject(typeParameters.get(i), currentBuilder, inspector.getStructFieldData(object, field), field.getFieldObjectInspector(), hiveStorageTimeZone);
}

builder.closeEntry();
Expand Down Expand Up @@ -289,10 +291,16 @@ private static long formatDateAsLong(Object object, DateObjectInspector inspecto
return TimeUnit.MILLISECONDS.toDays(millisUtc);
}

private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector)
private static long formatTimestampAsLong(Object object, TimestampObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
Timestamp timestamp = getTimestamp(object, inspector);
return timestamp.getTime();
long parsedJvmMillis = timestamp.getTime();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// remove the JVM time zone correction from the timestamp
long hiveMillis = JVM_TIME_ZONE.convertUTCToLocal(parsedJvmMillis);

// convert to UTC using the real time zone for the underlying data
return hiveStorageTimeZone.convertLocalToUTC(hiveMillis, false);
}

private static Timestamp getTimestamp(Object object, TimestampObjectInspector inspector)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ public static FileSplit createTestFile(
pageBuilder.getBlockBuilder(columnNumber),
testColumns.get(columnNumber).getWriteValue(),
testColumns.get(columnNumber).getObjectInspector(),
false);
false,
DateTimeZone.getDefault());
}
}
Page page = pageBuilder.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import com.facebook.presto.Session;
import com.facebook.presto.common.QualifiedObjectName;
import com.facebook.presto.common.type.TimeZoneKey;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.TypeSignature;
import com.facebook.presto.cost.StatsAndCosts;
Expand Down Expand Up @@ -2780,6 +2781,31 @@ public void testShowCreateTable()
assertEquals(getOnlyElement(actualResult.getOnlyColumnAsSet()), createTableSql);
}

@Test
public void testTextfileAmbiguousTimestamp()
{
try {
// set the session time zone to the same as the system timezone to avoid extra time zone conversions outside of what we are trying to test
Session timezoneSession = Session.builder(getSession()).setTimeZoneKey(TimeZoneKey.getTimeZoneKey("America/Bahia_Banderas")).build();
@Language("SQL") String createTableSql = format("" +
"CREATE TABLE test_timestamp_textfile \n" +
"WITH (\n" +
" format = 'TEXTFILE'\n" +
")\n" +
"AS SELECT TIMESTAMP '2022-10-30 01:16:13.000' ts, ARRAY[TIMESTAMP '2022-10-30 01:16:13.000'] array_ts",
getSession().getCatalog().get(),
getSession().getSchema().get());
assertUpdate(timezoneSession, createTableSql, 1);
// 2022-10-30 01:16:13.00 is an ambiguous timestamp in America/Bahia_Banderas because
// it occurs during a fall DST transition where the hour from 1-2am repeats
// Ambiguous timestamps should be interpreted as the earlier of the two possible unixtimes for consistency.
assertQuery(timezoneSession, "SELECT to_unixtime(ts), to_unixtime(array_ts[1]) FROM test_timestamp_textfile", "SELECT 1.667110573E9, 1.667110573E9");
}
finally {
assertUpdate("DROP TABLE IF EXISTS test_timestamp_textfile");
}
}

@Test
public void testCreateExternalTable()
throws Exception
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.io.BytesWritable;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.testng.annotations.Test;

import java.lang.reflect.Type;
Expand Down Expand Up @@ -275,6 +276,17 @@ public void testStructBlock()
assertBlockEquals(actual, rowBlockOf(outerRowParameterTypes, outerRowValues.build().toArray()));
}

@Test
public void testTimestampWithDifferentStorageZone()
{
DateTimeZone storageTimeZone = DateTimeZone.forID("Europe/Prague");
DateTime dateTimeInJvmZone = new DateTime(2008, 10, 28, 16, 7, 15, 0);
DateTime dateTimeInStorageZone = dateTimeInJvmZone.withZoneRetainFields(storageTimeZone);
Block expectedTimestamp = VARBINARY.createBlockBuilder(null, 1).writeLong(dateTimeInStorageZone.getMillis()).closeEntry().build();
Block actualTimestamp = getPrimitiveBlock(BIGINT, new Timestamp(dateTimeInJvmZone.getMillis()), getInspector(Timestamp.class), storageTimeZone);
assertBlockEquals(actualTimestamp, expectedTimestamp);
}

@Test
public void testReuse()
{
Expand All @@ -289,7 +301,7 @@ public void testReuse()
Type type = new TypeToken<Map<BytesWritable, Long>>() {}.getType();
ObjectInspector inspector = getInspector(type);

Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector);
Block actual = getBlockObject(mapType(createUnboundedVarcharType(), BIGINT), ImmutableMap.of(value, 0L), inspector, DateTimeZone.getDefault());
Block expected = mapBlockOf(createUnboundedVarcharType(), BIGINT, "bye", 0L);

assertBlockEquals(actual, expected);
Expand All @@ -310,16 +322,17 @@ private Slice blockToSlice(Block block)

private static Block toBinaryBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector)
{
DateTimeZone zone = DateTimeZone.getDefault();
if (inspector.getCategory() == Category.PRIMITIVE) {
return getPrimitiveBlock(type, object, inspector);
return getPrimitiveBlock(type, object, inspector, zone);
}
return getBlockObject(type, object, inspector);
return getBlockObject(type, object, inspector, zone);
}

private static Block getPrimitiveBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector)
private static Block getPrimitiveBlock(com.facebook.presto.common.type.Type type, Object object, ObjectInspector inspector, DateTimeZone hiveStorageTimeZone)
{
BlockBuilder builder = VARBINARY.createBlockBuilder(null, 1);
serializeObject(type, builder, object, inspector);
serializeObject(type, builder, object, inspector, hiveStorageTimeZone);
return builder.build();
}
}