Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@
import static java.util.Optional.empty;
import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER;
import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.UNDEFINED;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;

public abstract class LogicalTypeAnnotation {
enum LogicalTypeToken {
Expand Down Expand Up @@ -590,7 +600,15 @@ public int hashCode() {

@Override
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
return PrimitiveStringifier.TIME_STRINGIFIER;
switch (unit) {
case MICROS:
case MILLIS:
return isAdjustedToUTC ? TIME_UTC_STRINGIFIER : TIME_STRINGIFIER;
case NANOS:
return isAdjustedToUTC ? TIME_NANOS_UTC_STRINGIFIER : TIME_NANOS_STRINGIFIER;
default:
return super.valueStringifier(primitiveType);
}
}
}

Expand Down Expand Up @@ -662,11 +680,11 @@ public int hashCode() {
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
switch (unit) {
case MICROS:
return PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
return isAdjustedToUTC ? TIMESTAMP_MICROS_UTC_STRINGIFIER : TIMESTAMP_MICROS_STRINGIFIER;
case MILLIS:
return PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
return isAdjustedToUTC ? TIMESTAMP_MILLIS_UTC_STRINGIFIER : TIMESTAMP_MILLIS_STRINGIFIER;
case NANOS:
return PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
return isAdjustedToUTC ? TIMESTAMP_NANOS_UTC_STRINGIFIER : TIMESTAMP_NANOS_STRINGIFIER;
default:
return super.valueStringifier(primitiveType);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,48 +309,89 @@ Instant getInstant(long value) {
}
};

static final PrimitiveStringifier TIME_STRINGIFIER = new PrimitiveStringifier("TIME_STRINGIFIER") {
static final PrimitiveStringifier TIMESTAMP_MILLIS_UTC_STRINGIFIER = new DateStringifier(
"TIMESTAMP_MILLIS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSZ") {
@Override
public String stringify(int millis) {
return toTimeString(millis, MILLISECONDS);
Instant getInstant(long value) {
return Instant.ofEpochMilli(value);
}
};

static final PrimitiveStringifier TIMESTAMP_MICROS_UTC_STRINGIFIER = new DateStringifier(
"TIMESTAMP_MICROS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSZ") {
@Override
public String stringify(long micros) {
return toTimeString(micros, MICROSECONDS);
Instant getInstant(long value) {
return Instant.ofEpochSecond(MICROSECONDS.toSeconds(value), MICROSECONDS.toNanos(value % SECONDS.toMicros(1)));
}
};

private String toTimeString(long duration, TimeUnit unit) {
String format = "%02d:%02d:%02d.%0" + (unit == MILLISECONDS ? "3d" : "6d");
static final PrimitiveStringifier TIMESTAMP_NANOS_UTC_STRINGIFIER = new DateStringifier(
"TIMESTAMP_NANOS_UTC_STRINGIFIER", "yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSZ") {
@Override
Instant getInstant(long value) {
return Instant.ofEpochSecond(NANOSECONDS.toSeconds(value), NANOSECONDS.toNanos(value % SECONDS.toNanos(1)));
}
};

private abstract static class TimeStringifier extends PrimitiveStringifier {
private final boolean withZone;

TimeStringifier(String name, boolean withZone) {
super(name);
this.withZone = withZone;
}

protected String toTimeString(long duration, TimeUnit unit) {
String additionalFormat = (unit == MILLISECONDS ? "3d" : unit == MICROSECONDS ? "6d" : "9d");
String timeZone = withZone ? "+0000" : "";
String format = "%02d:%02d:%02d.%0" + additionalFormat + timeZone;
return String.format(format,
unit.toHours(duration),
convert(duration, unit, MINUTES, HOURS),
convert(duration, unit, SECONDS, MINUTES),
convert(duration, unit, unit, SECONDS));
unit.toHours(duration),
convert(duration, unit, MINUTES, HOURS),
convert(duration, unit, SECONDS, MINUTES),
convert(duration, unit, unit, SECONDS));
}

private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
protected long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
return Math.abs(to.convert(duration, from) % to.convert(1, higher));
}
}

static final PrimitiveStringifier TIME_STRINGIFIER = new TimeStringifier("TIME_STRINGIFIER", false) {
@Override
public String stringify(int millis) {
return toTimeString(millis, MILLISECONDS);
}

@Override
public String stringify(long micros) {
return toTimeString(micros, MICROSECONDS);
}
};

static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new PrimitiveStringifier("TIME_NANOS_STRINGIFIER") {
static final PrimitiveStringifier TIME_NANOS_STRINGIFIER = new TimeStringifier("TIME_NANOS_STRINGIFIER", false) {
@Override
public String stringify(long nanos) {
return toTimeString(nanos, NANOSECONDS);
}
};

private String toTimeString(long nanos, TimeUnit unit) {
String format = "%02d:%02d:%02d.%09d";
return String.format(format,
unit.toHours(nanos),
convert(nanos, unit, MINUTES, HOURS),
convert(nanos, unit, SECONDS, MINUTES),
convert(nanos, unit, unit, SECONDS));
static final PrimitiveStringifier TIME_UTC_STRINGIFIER = new TimeStringifier("TIME_UTC_STRINGIFIER", true) {
@Override
public String stringify(int millis) {
return toTimeString(millis, MILLISECONDS);
}

private long convert(long duration, TimeUnit from, TimeUnit to, TimeUnit higher) {
return Math.abs(to.convert(duration, from) % to.convert(1, higher));
@Override
public String stringify(long micros) {
return toTimeString(micros, MICROSECONDS);
}
};

static final PrimitiveStringifier TIME_NANOS_UTC_STRINGIFIER = new TimeStringifier("TIME_NANOS_UTC_STRINGIFIER", true) {
@Override
public String stringify(long nanos) {
return toTimeString(nanos, NANOSECONDS);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.parquet.schema;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Arrays.asList;
import static java.util.concurrent.TimeUnit.HOURS;
import static java.util.concurrent.TimeUnit.MICROSECONDS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
Expand All @@ -28,16 +29,23 @@
import static org.apache.parquet.schema.PrimitiveStringifier.DATE_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.DEFAULT_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.INTERVAL_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MILLIS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_NANOS_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.TIME_UTC_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.UNSIGNED_STRINGIFIER;
import static org.apache.parquet.schema.PrimitiveStringifier.UTF8_STRINGIFIER;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;

import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Set;
Expand Down Expand Up @@ -162,102 +170,116 @@ public void testDateStringifier() {

@Test
public void testTimestampMillisStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MILLIS_STRINGIFIER;
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MILLIS_STRINGIFIER, TIMESTAMP_MILLIS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_MILLIS_STRINGIFIER ? "" : "+0000");

assertEquals("1970-01-01T00:00:00.000", stringifier.stringify(0l));
assertEquals(withZoneString("1970-01-01T00:00:00.000", timezoneAmendment), stringifier.stringify(0l));

Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
cal.set(Calendar.MILLISECOND, 120);
assertEquals("2017-12-15T10:09:54.120", stringifier.stringify(cal.getTimeInMillis()));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2017, Calendar.DECEMBER, 15, 10, 9, 54);
cal.set(Calendar.MILLISECOND, 120);
assertEquals(withZoneString("2017-12-15T10:09:54.120", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));

cal.clear();
cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
cal.set(Calendar.MILLISECOND, 9);
assertEquals("1948-11-23T20:19:01.009", stringifier.stringify(cal.getTimeInMillis()));
cal.clear();
cal.set(1948, Calendar.NOVEMBER, 23, 20, 19, 1);
cal.set(Calendar.MILLISECOND, 9);
assertEquals(withZoneString("1948-11-23T20:19:01.009", timezoneAmendment), stringifier.stringify(cal.getTimeInMillis()));

checkThrowingUnsupportedException(stringifier, Long.TYPE);
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}

@Test
public void testTimestampMicrosStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_MICROS_STRINGIFIER, TIMESTAMP_MICROS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_MICROS_STRINGIFIER ? "" : "+0000");

assertEquals("1970-01-01T00:00:00.000000", stringifier.stringify(0l));
assertEquals(withZoneString("1970-01-01T00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));

Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long micros = cal.getTimeInMillis() * 1000 + 900;
assertEquals("2053-07-10T22:13:24.084900", stringifier.stringify(micros));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long micros = cal.getTimeInMillis() * 1000 + 900;
assertEquals(withZoneString("2053-07-10T22:13:24.084900", timezoneAmendment), stringifier.stringify(micros));

cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
micros = cal.getTimeInMillis() * 1000 - 1;
assertEquals("1848-03-15T09:23:59.764999", stringifier.stringify(micros));
cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
micros = cal.getTimeInMillis() * 1000 - 1;
assertEquals(withZoneString("1848-03-15T09:23:59.764999", timezoneAmendment), stringifier.stringify(micros));

checkThrowingUnsupportedException(stringifier, Long.TYPE);
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}

@Test
public void testTimestampNanosStringifier() {
PrimitiveStringifier stringifier = PrimitiveStringifier.TIMESTAMP_NANOS_STRINGIFIER;
for (PrimitiveStringifier stringifier : asList(TIMESTAMP_NANOS_STRINGIFIER, TIMESTAMP_NANOS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIMESTAMP_NANOS_STRINGIFIER ? "" : "+0000");

assertEquals("1970-01-01T00:00:00.000000000", stringifier.stringify(0l));
assertEquals(withZoneString("1970-01-01T00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));

Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long nanos = cal.getTimeInMillis() * 1_000_000 + 536;
assertEquals("2053-07-10T22:13:24.084000536", stringifier.stringify(nanos));
Calendar cal = Calendar.getInstance(UTC);
cal.clear();
cal.set(2053, Calendar.JULY, 10, 22, 13, 24);
cal.set(Calendar.MILLISECOND, 84);
long nanos = cal.getTimeInMillis() * 1_000_000 + 536;
assertEquals(withZoneString("2053-07-10T22:13:24.084000536", timezoneAmendment), stringifier.stringify(nanos));

cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
nanos = cal.getTimeInMillis() * 1_000_000 - 1;
assertEquals("1848-03-15T09:23:59.764999999", stringifier.stringify(nanos));
cal.clear();
cal.set(1848, Calendar.MARCH, 15, 9, 23, 59);
cal.set(Calendar.MILLISECOND, 765);
nanos = cal.getTimeInMillis() * 1_000_000 - 1;
assertEquals(withZoneString("1848-03-15T09:23:59.764999999", timezoneAmendment), stringifier.stringify(nanos));

checkThrowingUnsupportedException(stringifier, Long.TYPE);
checkThrowingUnsupportedException(stringifier, Long.TYPE);
}
}

@Test
public void testTimeStringifier() {
PrimitiveStringifier stringifier = TIME_STRINGIFIER;
for (PrimitiveStringifier stringifier : asList(TIME_STRINGIFIER, TIME_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIME_STRINGIFIER ? "" : "+0000");

assertEquals("00:00:00.000", stringifier.stringify(0));
assertEquals("00:00:00.000000", stringifier.stringify(0l));
assertEquals(withZoneString("00:00:00.000", timezoneAmendment), stringifier.stringify(0));
assertEquals(withZoneString("00:00:00.000000", timezoneAmendment), stringifier.stringify(0l));

assertEquals("12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
assertEquals("12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));
assertEquals(withZoneString("12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 12, 34, 56, 789)));
assertEquals(withZoneString("12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12, 34, 56, 789012)));

assertEquals("-12:34:56.789", stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
assertEquals("-12:34:56.789012", stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));
assertEquals(withZoneString("-12:34:56.789", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -12, -34, -56, -789)));
assertEquals(withZoneString("-12:34:56.789012", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12, -34, -56, -789012)));

assertEquals("123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
assertEquals("12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));
assertEquals(withZoneString("123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, 123, 12, 34, 567)));
assertEquals(withZoneString("12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, 12345, 12, 34, 56789)));

assertEquals("-123:12:34.567", stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
assertEquals("-12345:12:34.056789", stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));
assertEquals(withZoneString("-123:12:34.567", timezoneAmendment), stringifier.stringify((int) convert(MILLISECONDS, -123, -12, -34, -567)));
assertEquals(withZoneString("-12345:12:34.056789", timezoneAmendment), stringifier.stringify(convert(MICROSECONDS, -12345, -12, -34, -56789)));

checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
}
}

@Test
public void testTimeNanoStringifier() {
PrimitiveStringifier stringifier = TIME_NANOS_STRINGIFIER;
for (PrimitiveStringifier stringifier : asList(TIME_NANOS_STRINGIFIER, TIME_NANOS_UTC_STRINGIFIER)) {
String timezoneAmendment = (stringifier == TIME_NANOS_STRINGIFIER ? "" : "+0000");

assertEquals("00:00:00.000000000", stringifier.stringify(0l));
assertEquals(withZoneString("00:00:00.000000000", timezoneAmendment), stringifier.stringify(0l));

assertEquals("12:34:56.789012987", stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987)));
assertEquals("-12:34:56.000789012", stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012)));
assertEquals("12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789)));
assertEquals("-12345:12:34.000056789", stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789)));
assertEquals(withZoneString("12:34:56.789012987", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12, 34, 56, 789012987)));
assertEquals(withZoneString("-12:34:56.000789012", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12, -34, -56, -789012)));
assertEquals(withZoneString("12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, 12345, 12, 34, 56789)));
assertEquals(withZoneString("-12345:12:34.000056789", timezoneAmendment), stringifier.stringify(convert(NANOSECONDS, -12345, -12, -34, -56789)));

checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
checkThrowingUnsupportedException(stringifier, Integer.TYPE, Long.TYPE);
}
}

private String withZoneString(String expected, String zoneString) {
return expected + zoneString;
}

private long convert(TimeUnit unit, long hours, long minutes, long seconds, long rest) {
Expand Down Expand Up @@ -288,7 +310,7 @@ public void testDecimalStringifier() {
}

private void checkThrowingUnsupportedException(PrimitiveStringifier stringifier, Class<?>... excludes) {
Set<Class<?>> set = new HashSet<>(Arrays.asList(excludes));
Set<Class<?>> set = new HashSet<>(asList(excludes));
if (!set.contains(Integer.TYPE)) {
try {
stringifier.stringify(0);
Expand Down