diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java index 1a0cdfc517..c1b7d99fd8 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java @@ -32,6 +32,16 @@ import static java.util.Optional.empty; import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER; import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.UNDEFINED; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER; public abstract class LogicalTypeAnnotation { enum LogicalTypeToken { @@ -590,7 +600,15 @@ public int hashCode() { @Override PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { - return PrimitiveStringifier.TIME_STRINGIFIER; + switch (unit) { + case MICROS: + case MILLIS: + return isAdjustedToUTC ? TIME_UTC_STRINGIFIER : TIME_STRINGIFIER; + case NANOS: + return isAdjustedToUTC ? TIME_NANOS_UTC_STRINGIFIER : TIME_NANOS_STRINGIFIER; + default: + return super.valueStringifier(primitiveType); + } } } @@ -662,11 +680,11 @@ public int hashCode() { PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) { switch (unit) { case MICROS: - return PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; + return isAdjustedToUTC ? TIMESTAMP_MICROS_UTC_STRINGIFIER : TIMESTAMP_MICROS_STRINGIFIER; case MILLIS: - return PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; + return isAdjustedToUTC ? TIMESTAMP_MILLIS_UTC_STRINGIFIER : TIMESTAMP_MILLIS_STRINGIFIER; case NANOS: - return PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; + return isAdjustedToUTC ? TIMESTAMP_NANOS_UTC_STRINGIFIER : TIMESTAMP_NANOS_STRINGIFIER; default: return super.valueStringifier(primitiveType); } diff --git a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java index 03786ed738..4705ad94eb 100644 --- a/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java +++ b/parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveStringifier.java @@ -309,48 +309,89 @@ Instant getInstant(long value) { } }; - static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") { + static final PrimitiveStringifier TIMESTAMP_MILLIS_UTC_STRINGIFIER = new DateStringifier( + "TIMESTAMP_MILLIS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSZ") { @Override - public String stringify(int millis) { - return toTimeString(millis, MILLISECONDS); + Instant getInstant(long value) { + return Instant.ofEpochMilli(value); } + }; + static final PrimitiveStringifier TIMESTAMP_MICROS_UTC_STRINGIFIER = new DateStringifier( + "TIMESTAMP_MICROS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ") { @Override - public String stringify(long micros) { - return toTimeString(micros, MICROSECONDS); + Instant getInstant(long value) { + return Instant.ofEpochSecond(MICROSECONDS.toSeconds(value), MICROSECONDS.toNanos(value % SECONDS.toMicros(1))); } + }; - private String toTimeString(long duration, TimeUnit unit) { - String format = "%02d:%02d:%02d.%0" + (unit == MILLISECONDS ? "3d" : "6d"); + static final PrimitiveStringifier TIMESTAMP_NANOS_UTC_STRINGIFIER = new DateStringifier( + "TIMESTAMP_NANOS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSZ") { + @Override + Instant getInstant(long value) { + return Instant.ofEpochSecond(NANOSECONDS.toSeconds(value), NANOSECONDS.toNanos(value % SECONDS.toNanos(1))); + } + }; + + private abstract static class TimeStringifier extends PrimitiveStringifier { + private final boolean withZone; + + TimeStringifier(String name, boolean withZone) { + super(name); + this.withZone = withZone; + } + + protected String toTimeString(long duration, TimeUnit unit) { + String additionalFormat = (unit == MILLISECONDS ? "3d" : unit == MICROSECONDS ? "6d" : "9d"); + String timeZone = withZone ? "+0000" : ""; + String format = "%02d:%02d:%02d.%0" + additionalFormat + timeZone; return String.format(format, - unit.toHours(duration), - convert(duration, unit, MINUTES, HOURS), - convert(duration, unit, SECONDS, MINUTES), - convert(duration, unit, unit, SECONDS)); + unit.toHours(duration), + convert(duration, unit, MINUTES, HOURS), + convert(duration, unit, SECONDS, MINUTES), + convert(duration, unit, unit, SECONDS)); } - private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) { + protected long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) { return Math.abs(to.convert(duration, from) % to.convert(1, higher)); } + } + + static final PrimitiveStringifier TIME_STRINGIFIER = new TimeStringifier("TIME_STRINGIFIER", false) { + @Override + public String stringify(int millis) { + return toTimeString(millis, MILLISECONDS); + } + + @Override + public String stringify(long micros) { + return toTimeString(micros, MICROSECONDS); + } }; - static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new PrimitiveStringifier("TIME_NANOS_STRINGIFIER") { + static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new TimeStringifier("TIME_NANOS_STRINGIFIER", false) { @Override public String stringify(long nanos) { return toTimeString(nanos, NANOSECONDS); } + }; - private String toTimeString(long nanos, TimeUnit unit) { - String format = "%02d:%02d:%02d.%09d"; - return String.format(format, - unit.toHours(nanos), - convert(nanos, unit, MINUTES, HOURS), - convert(nanos, unit, SECONDS, MINUTES), - convert(nanos, unit, unit, SECONDS)); + static final PrimitiveStringifier TIME_UTC_STRINGIFIER = new TimeStringifier("TIME_UTC_STRINGIFIER", true) { + @Override + public String stringify(int millis) { + return toTimeString(millis, MILLISECONDS); } - private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) { - return Math.abs(to.convert(duration, from) % to.convert(1, higher)); + @Override + public String stringify(long micros) { + return toTimeString(micros, MICROSECONDS); + } + }; + + static final PrimitiveStringifier TIME_NANOS_UTC_STRINGIFIER = new TimeStringifier("TIME_NANOS_UTC_STRINGIFIER", true) { + @Override + public String stringify(long nanos) { + return toTimeString(nanos, NANOSECONDS); } }; diff --git a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java index afc8684cfa..b5de4f850e 100644 --- a/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java +++ b/parquet-column/src/test/java/org/apache/parquet/schema/TestPrimitiveStringifier.java @@ -19,6 +19,7 @@ package org.apache.parquet.schema; import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Arrays.asList; import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.MICROSECONDS; import static java.util.concurrent.TimeUnit.MILLISECONDS; @@ -28,8 +29,16 @@ import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER; +import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER; import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER; import static org.junit.Assert.assertEquals; @@ -37,7 +46,6 @@ import java.math.BigInteger; import java.nio.ByteBuffer; -import java.util.Arrays; import java.util.Calendar; import java.util.HashSet; import java.util.Set; @@ -162,102 +170,116 @@ public void testDateStringifier() { @Test public void testTimestampMillisStringifier() { - PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER; + for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MILLIS_STRINGIFIER, TIMESTAMP_MILLIS_UTC_STRINGIFIER)) { + String timezoneAmendment = (stringifier == TIMESTAMP_MILLIS_STRINGIFIER ? "" : "+0000"); - assertEquals("1970-01-01T00:00:00.000", stringifier.stringify(0l)); + assertEquals(withZoneString("1970-01-01T00:00:00.000", timezoneAmendment), stringifier.stringify(0l)); - Calendar cal = Calendar.getInstance(UTC); - cal.clear(); - cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54); - cal.set(Calendar.MILLISECOND, 120); - assertEquals("2017-12-15T10:09:54.120", stringifier.stringify(cal.getTimeInMillis())); + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54); + cal.set(Calendar.MILLISECOND, 120); + assertEquals(withZoneString("2017-12-15T10:09:54.120", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis())); - cal.clear(); - cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1); - cal.set(Calendar.MILLISECOND, 9); - assertEquals("1948-11-23T20:19:01.009", stringifier.stringify(cal.getTimeInMillis())); + cal.clear(); + cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1); + cal.set(Calendar.MILLISECOND, 9); + assertEquals(withZoneString("1948-11-23T20:19:01.009", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis())); - checkThrowingUnsupportedException(stringifier, Long.TYPE); + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } } @Test public void testTimestampMicrosStringifier() { - PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER; + for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MICROS_STRINGIFIER, TIMESTAMP_MICROS_UTC_STRINGIFIER)) { + String timezoneAmendment = (stringifier == TIMESTAMP_MICROS_STRINGIFIER ? "" : "+0000"); - assertEquals("1970-01-01T00:00:00.000000", stringifier.stringify(0l)); + assertEquals(withZoneString("1970-01-01T00:00:00.000000", timezoneAmendment), stringifier.stringify(0l)); - Calendar cal = Calendar.getInstance(UTC); - cal.clear(); - cal.set(2053, Calendar.JULY, 10, 22, 13, 24); - cal.set(Calendar.MILLISECOND, 84); - long micros = cal.getTimeInMillis() * 1000 + 900; - assertEquals("2053-07-10T22:13:24.084900", stringifier.stringify(micros)); + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2053, Calendar.JULY, 10, 22, 13, 24); + cal.set(Calendar.MILLISECOND, 84); + long micros = cal.getTimeInMillis() * 1000 + 900; + assertEquals(withZoneString("2053-07-10T22:13:24.084900", timezoneAmendment), stringifier.stringify(micros)); - cal.clear(); - cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); - cal.set(Calendar.MILLISECOND, 765); - micros = cal.getTimeInMillis() * 1000 - 1; - assertEquals("1848-03-15T09:23:59.764999", stringifier.stringify(micros)); + cal.clear(); + cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); + cal.set(Calendar.MILLISECOND, 765); + micros = cal.getTimeInMillis() * 1000 - 1; + assertEquals(withZoneString("1848-03-15T09:23:59.764999", timezoneAmendment), stringifier.stringify(micros)); - checkThrowingUnsupportedException(stringifier, Long.TYPE); + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } } @Test public void testTimestampNanosStringifier() { - PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER; + for (PrimitiveStringifier stringifier : asList(TIMESTAMP_NANOS_STRINGIFIER, TIMESTAMP_NANOS_UTC_STRINGIFIER)) { + String timezoneAmendment = (stringifier == TIMESTAMP_NANOS_STRINGIFIER ? "" : "+0000"); - assertEquals("1970-01-01T00:00:00.000000000", stringifier.stringify(0l)); + assertEquals(withZoneString("1970-01-01T00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l)); - Calendar cal = Calendar.getInstance(UTC); - cal.clear(); - cal.set(2053, Calendar.JULY, 10, 22, 13, 24); - cal.set(Calendar.MILLISECOND, 84); - long nanos = cal.getTimeInMillis() * 1_000_000 + 536; - assertEquals("2053-07-10T22:13:24.084000536", stringifier.stringify(nanos)); + Calendar cal = Calendar.getInstance(UTC); + cal.clear(); + cal.set(2053, Calendar.JULY, 10, 22, 13, 24); + cal.set(Calendar.MILLISECOND, 84); + long nanos = cal.getTimeInMillis() * 1_000_000 + 536; + assertEquals(withZoneString("2053-07-10T22:13:24.084000536", timezoneAmendment), stringifier.stringify(nanos)); - cal.clear(); - cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); - cal.set(Calendar.MILLISECOND, 765); - nanos = cal.getTimeInMillis() * 1_000_000 - 1; - assertEquals("1848-03-15T09:23:59.764999999", stringifier.stringify(nanos)); + cal.clear(); + cal.set(1848, Calendar.MARCH, 15, 9, 23, 59); + cal.set(Calendar.MILLISECOND, 765); + nanos = cal.getTimeInMillis() * 1_000_000 - 1; + assertEquals(withZoneString("1848-03-15T09:23:59.764999999", timezoneAmendment), stringifier.stringify(nanos)); - checkThrowingUnsupportedException(stringifier, Long.TYPE); + checkThrowingUnsupportedException(stringifier, Long.TYPE); + } } @Test public void testTimeStringifier() { - PrimitiveStringifier stringifier = TIME_STRINGIFIER; + for (PrimitiveStringifier stringifier : asList(TIME_STRINGIFIER, TIME_UTC_STRINGIFIER)) { + String timezoneAmendment = (stringifier == TIME_STRINGIFIER ? "" : "+0000"); - assertEquals("00:00:00.000", stringifier.stringify(0)); - assertEquals("00:00:00.000000", stringifier.stringify(0l)); + assertEquals(withZoneString("00:00:00.000", timezoneAmendment), stringifier.stringify(0)); + assertEquals(withZoneString("00:00:00.000000", timezoneAmendment), stringifier.stringify(0l)); - assertEquals("12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789))); - assertEquals("12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012))); + assertEquals(withZoneString("12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789))); + assertEquals(withZoneString("12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012))); - assertEquals("-12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789))); - assertEquals("-12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012))); + assertEquals(withZoneString("-12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789))); + assertEquals(withZoneString("-12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012))); - assertEquals("123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567))); - assertEquals("12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789))); + assertEquals(withZoneString("123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567))); + assertEquals(withZoneString("12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789))); - assertEquals("-123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567))); - assertEquals("-12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789))); + assertEquals(withZoneString("-123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567))); + assertEquals(withZoneString("-12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789))); - checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + } } @Test public void testTimeNanoStringifier() { - PrimitiveStringifier stringifier = TIME_NANOS_STRINGIFIER; + for (PrimitiveStringifier stringifier : asList(TIME_NANOS_STRINGIFIER, TIME_NANOS_UTC_STRINGIFIER)) { + String timezoneAmendment = (stringifier == TIME_NANOS_STRINGIFIER ? "" : "+0000"); - assertEquals("00:00:00.000000000", stringifier.stringify(0l)); + assertEquals(withZoneString("00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l)); - assertEquals("12:34:56.789012987", stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987))); - assertEquals("-12:34:56.000789012", stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012))); - assertEquals("12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789))); - assertEquals("-12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789))); + assertEquals(withZoneString("12:34:56.789012987", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987))); + assertEquals(withZoneString("-12:34:56.000789012", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012))); + assertEquals(withZoneString("12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789))); + assertEquals(withZoneString("-12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789))); - checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE); + } + } + + private String withZoneString(String expected, String zoneString) { + return expected + zoneString; } private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) { @@ -288,7 +310,7 @@ public void testDecimalStringifier() { } private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class... excludes) { - Set> set = new HashSet<>(Arrays.asList(excludes)); + Set> set = new HashSet<>(asList(excludes)); if (!set.contains(Integer.TYPE)) { try { stringifier.stringify(0);