From 0cbdeb8b2ccc3b5f9e525dc12a78147d117375bc Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 14:02:11 -0700 Subject: [PATCH 01/13] Use DateTimeUtil conversion instead of /. --- .../java/org/apache/iceberg/expressions/ExpressionUtil.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 9b6a3e5bf7eb..4d56529e62db 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -524,7 +524,7 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIMESTAMP: return sanitizeTimestamp((long) value, now); case TIMESTAMP_NANO: - return sanitizeTimestamp((long) value / 1000, now); + return sanitizeTimestamp(DateTimeUtil.nanosToMicros((long) value / 1000), now); case STRING: return sanitizeString((CharSequence) value, now, today); case BOOLEAN: @@ -547,7 +547,8 @@ private static String sanitize(Literal literal, long now, int today) { } else if (literal instanceof Literals.TimestampLiteral) { return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); } else if (literal instanceof Literals.TimestampNanoLiteral) { - return sanitizeTimestamp(((Literals.TimestampNanoLiteral) literal).value() / 1000, now); + return sanitizeTimestamp( + DateTimeUtil.nanosToMicros(((Literals.TimestampNanoLiteral) literal).value()), now); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { From 48626e56354d52c14bb6086dbec6c34782c42066 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:24:29 -0700 Subject: [PATCH 02/13] Update Literals to use DateTimeUtil, add new tests. --- .../apache/iceberg/expressions/Literals.java | 31 +-- .../apache/iceberg/transforms/Timestamps.java | 14 +- .../org/apache/iceberg/util/DateTimeUtil.java | 39 +++- .../TestTimestampLiteralConversions.java | 203 ++++++++++++++++++ 4 files changed, 258 insertions(+), 29 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index f2b064b7fee8..36a70ba32edc 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import java.time.Instant; import java.time.LocalDate; -import java.time.LocalDateTime; import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; @@ -301,6 +300,7 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) new TimestampLiteral(value()); case TIMESTAMP_NANO: + // assume micros and convert to nanos to match the behavior in the timestamp case above return new TimestampLiteral(value()).to(type); case DATE: if ((long) Integer.MAX_VALUE < value()) { @@ -440,11 +440,7 @@ public Literal to(Type type) { case TIMESTAMP: return (Literal) this; case DATE: - return (Literal) - new DateLiteral( - (int) - ChronoUnit.DAYS.between( - EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.MICROS).toLocalDate())); + return (Literal) new DateLiteral(DateTimeUtil.microsToDays(value())); case TIMESTAMP_NANO: return (Literal) new TimestampNanoLiteral(DateTimeUtil.microsToNanos(value())); default: @@ -468,9 +464,7 @@ static class TimestampNanoLiteral extends ComparableLiteral { public Literal to(Type type) { switch (type.typeId()) { case DATE: - return (Literal) - new DateLiteral( - (int) ChronoUnit.DAYS.between(EPOCH_DAY, EPOCH.plusNanos(value()).toLocalDate())); + return (Literal) new DateLiteral(DateTimeUtil.nanosToDays(value())); case TIMESTAMP: return (Literal) new TimestampLiteral(DateTimeUtil.nanosToMicros(value())); case TIMESTAMP_NANO: @@ -535,29 +529,18 @@ public Literal to(Type type) { case TIMESTAMP: if (((Types.TimestampType) type).shouldAdjustToUTC()) { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + long timestampMicros = DateTimeUtil.isoTimestamptzToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } else { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); + long timestampMicros = DateTimeUtil.isoTimestampToMicros(value().toString()); return (Literal) new TimestampLiteral(timestampMicros); } case TIMESTAMP_NANO: if (((Types.TimestampNanoType) type).shouldAdjustToUTC()) { - return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestamptzToNanos(value())); } else { - long timestampNanos = - ChronoUnit.NANOS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); - return (Literal) new TimestampNanoLiteral(timestampNanos); + return (Literal) new TimestampNanoLiteral(DateTimeUtil.isoTimestampToNanos(value())); } case STRING: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index c917c257ad88..7f89bbcd1237 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -167,7 +167,19 @@ public Integer apply(Long timestamp) { "Unsupported result type unit: " + resultTypeUnit); } case NANOS: - return Math.toIntExact(DateTimeUtil.convertNanos(timestamp, resultTypeUnit.unit)); + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.nanosToYears(timestamp); + case MONTHS: + return DateTimeUtil.nanosToMonths(timestamp); + case DAYS: + return DateTimeUtil.nanosToDays(timestamp); + case HOURS: + return DateTimeUtil.nanosToHours(timestamp); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } default: throw new UnsupportedOperationException( "Unsupported source type unit: " + sourceTypeUnit); diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 28c313fc3cb2..6d194431f840 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -27,6 +27,7 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.temporal.ChronoUnit; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; public class DateTimeUtil { private DateTimeUtil() {} @@ -77,6 +78,10 @@ public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -100,6 +105,10 @@ public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -135,9 +144,9 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } - public static long isoTimestampToNanos(CharSequence timestamp) { - return ChronoUnit.NANOS.between( - EPOCH, OffsetDateTime.parse(timestamp, DateTimeFormatter.ISO_DATE_TIME)); + public static long isoTimestamptzToNanos(CharSequence timestampString) { + return nanosFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } public static boolean isUTCTimestamptz(String timestampString) { @@ -151,6 +160,11 @@ public static long isoTimestampToMicros(String timestampString) { LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } + public static long isoTimestampToNanos(CharSequence timestampString) { + return nanosFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + } + public static int daysToYears(int days) { return convertDays(days, ChronoUnit.YEARS); } @@ -201,7 +215,24 @@ private static int convertMicros(long micros, ChronoUnit granularity) { } } - public static long convertNanos(long nanos, ChronoUnit granularity) { + public static int nanosToYears(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.YEARS)); + } + + public static int nanosToMonths(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.MONTHS)); + } + + public static int nanosToDays(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.DAYS)); + } + + public static int nanosToHours(long nanos) { + return Math.toIntExact(convertNanos(nanos, ChronoUnit.HOURS)); + } + + @VisibleForTesting + static long convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java index add374ee9a17..181f5f6eade6 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestTimestampLiteralConversions.java @@ -19,8 +19,11 @@ package org.apache.iceberg.expressions; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.time.format.DateTimeParseException; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; import org.junit.jupiter.api.Test; public class TestTimestampLiteralConversions { @@ -32,6 +35,76 @@ public void testTimestampToTimestampNanoConversion() { Literal timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); assertThat(timestampNano.value()).isEqualTo(1510842668000000000L); + + timestamp = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(0L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampNanoType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1000L); + } + + @Test + public void testTimestampToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0).isEqualTo(DateTimeUtil.isoDateToDays("1970-01-01")); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1).isEqualTo(DateTimeUtil.isoDateToDays("1969-12-31")); + } + + @Test + public void testTimestampMicrosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); } @Test @@ -42,5 +115,135 @@ public void testTimestampNanoToTimestampConversion() { Literal timestampNano = timestamp.to(Types.TimestampType.withoutZone()); assertThat(timestampNano.value()).isEqualTo(1510842668000000L); + + timestamp = + Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(0L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + + timestamp = + Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + assertThat(timestamp.value()).isEqualTo(-1000L); + + timestampNano = timestamp.to(Types.TimestampType.withoutZone()); + assertThat(timestampNano.value()).isEqualTo(-1L); + } + + @Test + public void testTimestampNanosToDateConversion() { + Literal ts = + Literal.of("2017-11-16T14:31:08.000000001").to(Types.TimestampNanoType.withoutZone()); + int dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(DateTimeUtil.isoDateToDays("2017-11-16")); + + ts = Literal.of("1970-01-01T00:00:00.000000001").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(0); + + ts = Literal.of("1969-12-31T23:59:59.999999999").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + + ts = Literal.of("1969-12-31T23:59:59.999999000").to(Types.TimestampNanoType.withoutZone()); + dateOrdinal = (Integer) ts.to(Types.DateType.get()).value(); + assertThat(dateOrdinal).isEqualTo(-1); + } + + @Test + public void testTimestampNanosWithZoneConversion() { + Literal isoTimestampNanosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001+00:00"); + + assertThatThrownBy( + () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000000001L); + } + + + @Test + public void testTimestampMicrosWithZoneConversion() { + Literal isoTimestampMicrosWithZoneOffset = + Literal.of("2017-11-16T14:31:08.000001+00:00"); + + assertThatThrownBy( + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withoutZone())) + .as("Should not convert timestamp with offset to a timestamp without zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampType.withZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat(isoTimestampMicrosWithZoneOffset.to(Types.TimestampNanoType.withZone()).value()) + .isEqualTo(1510842668000001000L); + } + + @Test + public void testTimestampNanosWithoutZoneConversion() { + Literal isoTimestampNanosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000000001"); + + assertThatThrownBy( + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000000L); + + assertThat(isoTimestampNanosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000000001L); + } + + @Test + public void testTimestampMicrosWithoutZoneConversion() { + Literal isoTimestampMicrosWithoutZoneOffset = + Literal.of("2017-11-16T14:31:08.000001"); + + assertThatThrownBy( + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThatThrownBy( + () -> isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withZone())) + .as("Should not convert timestamp without offset to a timestamp with zone") + .isInstanceOf(DateTimeParseException.class); + + assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampType.withoutZone()).value()) + .isEqualTo(1510842668000001L); + + assertThat(isoTimestampMicrosWithoutZoneOffset.to(Types.TimestampNanoType.withoutZone()).value()) + .isEqualTo(1510842668000001000L); } } From 743e872bf57f20738ef41686c716c08bb3d59267 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:34:57 -0700 Subject: [PATCH 03/13] Fix test for DateTimeUtil.isoTimestampToNanos. --- .../java/org/apache/iceberg/util/TestDateTimeUtil.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 6ae2891c35e0..618868603c5c 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -51,7 +51,13 @@ public void microsToNanos() { @Test public void isoTimestampToNanos() { - assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T14:31:08.000001001-08:00")) + assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T22:31:08.000001001")) + .isEqualTo(1510871468000001001L); + } + + @Test + public void isoTimestamptzToNanos() { + assertThat(DateTimeUtil.isoTimestamptzToNanos("2017-11-16T14:31:08.000001001-08:00")) .isEqualTo(1510871468000001001L); } From dedeb19351126fe54a1afd6c376daf96d89b5f80 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 15:52:44 -0700 Subject: [PATCH 04/13] Fix TestDateTimeUtil and add test cases. --- .../iceberg/transforms/TransformUtil.java | 12 +-- .../org/apache/iceberg/util/DateTimeUtil.java | 17 +++- .../apache/iceberg/util/TestDateTimeUtil.java | 93 +++++++------------ 3 files changed, 55 insertions(+), 67 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 4c6f9118dd37..88f6b64e9218 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -24,8 +24,8 @@ import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; -import java.time.temporal.ChronoUnit; import java.util.Base64; +import org.apache.iceberg.util.DateTimeUtil; class TransformUtil { @@ -55,19 +55,19 @@ static String humanTime(Long microsFromMidnight) { } static String humanTimestampWithZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); + return DateTimeUtil.microsToIsoTimestamptz(timestampMicros); } static String humanTimestampWithoutZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + return DateTimeUtil.microsToIsoTimestamp(timestampMicros); } static String humanTimestampNanoWithZone(Long timestampNanos) { - return ChronoUnit.NANOS.addTo(EPOCH, timestampNanos).toString(); + return DateTimeUtil.nanosToIsoTimestamptz(timestampNanos); } - static String humanTimestampNanoWithoutZone(Long timestampMicros) { - return ChronoUnit.NANOS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + static String humanTimestampNanoWithoutZone(Long timestampNanos) { + return DateTimeUtil.nanosToIsoTimestamp(timestampNanos); } static String humanHour(int hourOrdinal) { diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index 6d194431f840..bc901e492551 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -70,6 +70,10 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } @@ -126,11 +130,21 @@ public static String microsToIsoTimestamptz(long micros) { return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); } + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.atOffset(ZoneOffset.UTC).format(FORMATTER); + } + public static String microsToIsoTimestamp(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -231,8 +245,7 @@ public static int nanosToHours(long nanos) { return Math.toIntExact(convertNanos(nanos, ChronoUnit.HOURS)); } - @VisibleForTesting - static long convertNanos(long nanos, ChronoUnit granularity) { + private static long convertNanos(long nanos, ChronoUnit granularity) { if (nanos >= 0) { long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); diff --git a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java index 618868603c5c..6088fe51b57a 100644 --- a/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/TestDateTimeUtil.java @@ -20,23 +20,11 @@ import static org.assertj.core.api.Assertions.assertThat; -import java.time.ZonedDateTime; -import java.time.temporal.ChronoUnit; +import org.apache.iceberg.transforms.Transforms; +import org.apache.iceberg.types.Types; import org.junit.jupiter.api.Test; public class TestDateTimeUtil { - - @Test - public void formatTimestampMillis() { - String timestamp = "1970-01-01T00:00:00.001+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1L); - - timestamp = "1970-01-01T00:16:40+00:00"; - assertThat(DateTimeUtil.formatTimestampMillis(1000000L)).isEqualTo(timestamp); - assertThat(ZonedDateTime.parse(timestamp).toInstant().toEpochMilli()).isEqualTo(1000000L); - } - @Test public void nanosToMicros() { assertThat(DateTimeUtil.nanosToMicros(1510871468000001001L)).isEqualTo(1510871468000001L); @@ -53,64 +41,51 @@ public void microsToNanos() { public void isoTimestampToNanos() { assertThat(DateTimeUtil.isoTimestampToNanos("2017-11-16T22:31:08.000001001")) .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestampToNanos("1922-02-15T01:28:51.999998999")) + .isEqualTo(-1510871468000001001L); } @Test public void isoTimestamptzToNanos() { assertThat(DateTimeUtil.isoTimestamptzToNanos("2017-11-16T14:31:08.000001001-08:00")) .isEqualTo(1510871468000001001L); + assertThat(DateTimeUtil.isoTimestamptzToNanos("1922-02-15T01:28:51.999998999+00:00")) + .isEqualTo(-1510871468000001001L); } @Test public void convertNanos() { - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLENNIA)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.CENTURIES)).isEqualTo(0); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(4); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(47); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(574); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(2498); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(17486); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HALF_DAYS)) - .isEqualTo(34973); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.HOURS)).isEqualTo(419686); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MINUTES)) - .isEqualTo(25181191); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.SECONDS)) - .isEqualTo(1510871468); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MILLIS)) - .isEqualTo(1510871468000L); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.MICROS)) - .isEqualTo(1510871468000001L); - assertThat(DateTimeUtil.convertNanos(1510871468000001001L, ChronoUnit.NANOS)) - .isEqualTo(1510871468000001001L); + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withoutZone(), 1510871468000001001L)) + .isEqualTo("2017-11-16T22:31:08.000001001"); + assertThat(DateTimeUtil.nanosToYears(1510871468000001001L)).isEqualTo(47); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(1510871468000001001L)).isEqualTo(574); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(1510871468000001001L)).isEqualTo(17486); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(1510871468000001001L)).isEqualTo(419686); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); } @Test public void convertNanosNegative() { - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.ERAS)).isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLENNIA)) - .isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.CENTURIES)) - .isEqualTo(-1); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DECADES)).isEqualTo(-5); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.YEARS)).isEqualTo(-48); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MONTHS)).isEqualTo(-575); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.WEEKS)).isEqualTo(-2499); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.DAYS)).isEqualTo(-17487); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HALF_DAYS)) - .isEqualTo(-34974); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.HOURS)) - .isEqualTo(-419687); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MINUTES)) - .isEqualTo(-25181192); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.SECONDS)) - .isEqualTo(-1510871469); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MILLIS)) - .isEqualTo(-1510871468001L); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.MICROS)) - .isEqualTo(-1510871468000002L); - assertThat(DateTimeUtil.convertNanos(-1510871468000001001L, ChronoUnit.NANOS)) - .isEqualTo(-1510871468000001001L); + assertThat( + Transforms.identity() + .toHumanString(Types.TimestampNanoType.withZone(), -1510871468000001001L)) + .isEqualTo("1922-02-15T01:28:51.999998999+00:00"); + assertThat(DateTimeUtil.nanosToYears(-1510871468000001001L)).isEqualTo(-48); + assertThat(Transforms.year().toHumanString(Types.IntegerType.get(), 47)).isEqualTo("2017"); + assertThat(DateTimeUtil.nanosToMonths(-1510871468000001001L)).isEqualTo(-575); + assertThat(Transforms.month().toHumanString(Types.IntegerType.get(), 574)).isEqualTo("2017-11"); + assertThat(DateTimeUtil.nanosToDays(-1510871468000001001L)).isEqualTo(-17487); + assertThat(Transforms.day().toHumanString(Types.IntegerType.get(), 17486)) + .isEqualTo("2017-11-16"); + assertThat(DateTimeUtil.nanosToHours(-1510871468000001001L)).isEqualTo(-419687); + assertThat(Transforms.hour().toHumanString(Types.IntegerType.get(), 419686)) + .isEqualTo("2017-11-16-22"); } } From c9f427359739115e466b31770a656a1e33d86e96 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 16:18:59 -0700 Subject: [PATCH 05/13] Simplify Timestamps transform get. --- .../apache/iceberg/transforms/Timestamps.java | 90 +++++++------------ .../apache/iceberg/transforms/Transforms.java | 4 +- .../iceberg/transforms/TestTimestamps.java | 16 ---- 3 files changed, 31 insertions(+), 79 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 7f89bbcd1237..ddd08271ece3 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -52,67 +52,37 @@ class Timestamps implements Transform { static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.HOURS, "hour"); - static Timestamps get(Types.TimestampType type, String resultTypeUnit) { - switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { - case "year": - return get(type, ChronoUnit.YEARS); - case "month": - return get(type, ChronoUnit.MONTHS); - case "day": - return get(type, ChronoUnit.DAYS); - case "hour": - return get(type, ChronoUnit.HOURS); + static Timestamps get(Type type, String transform) { + String name = transform.toLowerCase(Locale.ENGLISH); + switch (type.typeId()) { + case TIMESTAMP: + switch (name) { + case "year": + return YEAR_FROM_MICROS; + case "month": + return MONTH_FROM_MICROS; + case "day": + return DAY_FROM_MICROS; + case "hour": + return HOUR_FROM_MICROS; + default: + throw new IllegalArgumentException("Unsupported transform: " + name); + } + case TIMESTAMP_NANO: + switch (name) { + case "year": + return YEAR_FROM_NANOS; + case "month": + return MONTH_FROM_NANOS; + case "day": + return DAY_FROM_NANOS; + case "hour": + return HOUR_FROM_NANOS; + default: + throw new IllegalArgumentException("Unsupported transform: " + name); + } default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampNanoType type, String resultTypeUnit) { - switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { - case "year": - return get(type, ChronoUnit.YEARS); - case "month": - return get(type, ChronoUnit.MONTHS); - case "day": - return get(type, ChronoUnit.DAYS); - case "hour": - return get(type, ChronoUnit.HOURS); - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampType type, ChronoUnit resultTypeUnit) { - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_MICROS; - case MONTHS: - return MONTH_FROM_MICROS; - case DAYS: - return DAY_FROM_MICROS; - case HOURS: - return HOUR_FROM_MICROS; - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); - } - } - - static Timestamps get(Types.TimestampNanoType type, ChronoUnit resultTypeUnit) { - switch (resultTypeUnit) { - case YEARS: - return YEAR_FROM_NANOS; - case MONTHS: - return MONTH_FROM_NANOS; - case DAYS: - return DAY_FROM_NANOS; - case HOURS: - return HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException( - "Unsupported source/result type units: " + type + " -> " + resultTypeUnit); + throw new IllegalArgumentException("Unsupported type for transform " + name + ": " + type); } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 610872f01065..2dccbf72b12f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -24,7 +24,6 @@ import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; /** * Factory methods for transforms. @@ -87,9 +86,8 @@ private Transforms() {} try { switch (type.typeId()) { case TIMESTAMP: - return Timestamps.get((Types.TimestampType) type, transform); case TIMESTAMP_NANO: - return Timestamps.get((Types.TimestampNanoType) type, transform); + return Timestamps.get(type, transform); case DATE: return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index c62a8f8a490e..134085a589bb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -478,14 +478,6 @@ public void testApplyRejectsBadResultType() { .hasMessageMatching("Unsupported result type unit: NANOS"); } - @Test - public void testGetOfTimestampTypeRejectsBadChronoUnit() { - Types.TimestampType timestampType = Types.TimestampType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampType, ChronoUnit.CENTURIES)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz -> Centuries"); - } - @Test public void testGetOfTimestampTypeRejectsBadString() { Types.TimestampType timestampType = Types.TimestampType.withZone(); @@ -494,14 +486,6 @@ public void testGetOfTimestampTypeRejectsBadString() { .hasMessageMatching("Unsupported source/result type units: timestamptz -> trash"); } - @Test - public void testGetOfTimestampNanoTypeRejectsBadChronoUnit() { - Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampNanoType, ChronoUnit.CENTURIES)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> Centuries"); - } - @Test public void testGetOfTimestampNanoTypeRejectsBadString() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); From 97489abd8b06f90030baf1281a6335df313de8c6 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 23 Aug 2024 16:30:18 -0700 Subject: [PATCH 06/13] Remove ChronoUnit wrapper enum. --- .../org/apache/iceberg/transforms/Days.java | 12 +---- .../org/apache/iceberg/transforms/Months.java | 12 +---- .../apache/iceberg/transforms/Timestamps.java | 53 +++++-------------- .../org/apache/iceberg/transforms/Years.java | 12 +---- .../iceberg/transforms/TestTimestamps.java | 23 +------- 5 files changed, 22 insertions(+), 90 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index e8aae2d18262..24f844ee50ab 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.DAY_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); } else if (other instanceof Days || other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 766b2874c16b..c1ac280b69bf 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.MONTH_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); } else if (other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index ddd08271ece3..5ab8e291044c 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import com.google.errorprone.annotations.Immutable; -import java.time.Duration; import java.time.temporal.ChronoUnit; import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; @@ -36,21 +35,20 @@ class Timestamps implements Transform { static final Timestamps YEAR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.YEARS, "year"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS, "year"); static final Timestamps MONTH_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.MONTHS, "month"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS, "month"); static final Timestamps DAY_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.DAYS, "day"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS, "day"); static final Timestamps HOUR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ResultTypeUnit.HOURS, "hour"); + new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS, "hour"); static final Timestamps YEAR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.YEARS, "year"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS, "year"); static final Timestamps MONTH_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.DAYS, "day"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS, "month"); + static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS, "day"); static final Timestamps HOUR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ResultTypeUnit.HOURS, "hour"); + new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS, "hour"); static Timestamps get(Type type, String transform) { String name = transform.toLowerCase(Locale.ENGLISH); @@ -86,31 +84,12 @@ static Timestamps get(Type type, String transform) { } } - enum ResultTypeUnit { - YEARS(ChronoUnit.YEARS), - MONTHS(ChronoUnit.MONTHS), - DAYS(ChronoUnit.DAYS), - HOURS(ChronoUnit.HOURS), - MICROS(ChronoUnit.MICROS), - NANOS(ChronoUnit.NANOS); - - private final ChronoUnit unit; - - ResultTypeUnit(final ChronoUnit unit) { - this.unit = unit; - } - - Duration duration() { - return unit.getDuration(); - } - } - @Immutable static class Apply implements SerializableFunction { private final ChronoUnit sourceTypeUnit; - private final ResultTypeUnit resultTypeUnit; + private final ChronoUnit resultTypeUnit; - Apply(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit) { + Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { this.sourceTypeUnit = sourceTypeUnit; this.resultTypeUnit = resultTypeUnit; } @@ -160,7 +139,7 @@ public Integer apply(Long timestamp) { private final String name; private final Apply apply; - Timestamps(ChronoUnit sourceTypeUnit, ResultTypeUnit resultTypeUnit, String name) { + Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit, String name) { this.name = name; this.apply = new Apply(sourceTypeUnit, resultTypeUnit); } @@ -183,16 +162,12 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (apply.resultTypeUnit == ResultTypeUnit.DAYS) { + if (apply.resultTypeUnit == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } - ResultTypeUnit resultTypeUnit() { - return apply.resultTypeUnit; - } - @Override public boolean preservesOrder() { return true; @@ -208,8 +183,8 @@ public boolean satisfiesOrderOf(Transform other) { // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies // the order of day Timestamps otherTransform = (Timestamps) other; - return apply.resultTypeUnit.duration().toHours() - <= otherTransform.apply.resultTypeUnit.duration().toHours(); + return apply.resultTypeUnit.getDuration().toHours() + <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 8b6ff318444a..d1a7d82ac8bd 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -57,16 +57,8 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - Timestamps.ResultTypeUnit otherResultTypeUnit = ((Timestamps) other).resultTypeUnit(); - switch (otherResultTypeUnit) { - case MICROS: - return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); - case NANOS: - return Timestamps.YEAR_FROM_NANOS.satisfiesOrderOf(other); - default: - throw new UnsupportedOperationException( - "Unsupported timestamp unit: " + otherResultTypeUnit); - } + // incoming type unit does not matter + return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); } else if (other instanceof Years) { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 134085a589bb..f62307788dd8 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -21,7 +21,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; -import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -460,30 +459,12 @@ public void testTimestampNanosReturnType() { assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } - @Test - public void testApplyRejectsBadSourceType() { - Timestamps badSourceType = - new Timestamps(ChronoUnit.CENTURIES, Timestamps.ResultTypeUnit.YEARS, "year"); - assertThatThrownBy(() -> badSourceType.apply(11L)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported source type unit: Centuries"); - } - - @Test - public void testApplyRejectsBadResultType() { - Timestamps badResultType = - new Timestamps(ChronoUnit.MICROS, Timestamps.ResultTypeUnit.NANOS, "nano"); - assertThatThrownBy(() -> badResultType.apply(11L)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported result type unit: NANOS"); - } - @Test public void testGetOfTimestampTypeRejectsBadString() { Types.TimestampType timestampType = Types.TimestampType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz -> trash"); + .hasMessageMatching("Unsupported transform: trash"); } @Test @@ -491,6 +472,6 @@ public void testGetOfTimestampNanoTypeRejectsBadString() { Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported source/result type units: timestamptz_ns -> trash"); + .hasMessageMatching("Unsupported transform: trash"); } } From 8c3cc6772768d7b256c098ea746c30bfb542b29a Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:31:20 -0700 Subject: [PATCH 07/13] Restore Timestamps as enum and simplify boilerplate. --- .../org/apache/iceberg/transforms/Dates.java | 14 +- .../org/apache/iceberg/transforms/Days.java | 37 +-- .../org/apache/iceberg/transforms/Hours.java | 34 +-- .../org/apache/iceberg/transforms/Months.java | 37 +-- .../transforms/PartitionSpecVisitor.java | 16 +- .../iceberg/transforms/SortOrderVisitor.java | 16 +- .../iceberg/transforms/TimeTransform.java | 36 +++ .../apache/iceberg/transforms/Timestamps.java | 254 ++++++++++-------- .../iceberg/transforms/TransformUtil.java | 7 + .../apache/iceberg/transforms/Transforms.java | 78 ++---- .../org/apache/iceberg/transforms/Years.java | 37 +-- .../apache/iceberg/transforms/TestDates.java | 69 +++++ .../apache/iceberg/transforms/TestDays.java | 32 --- .../apache/iceberg/transforms/TestHours.java | 35 --- .../transforms/TestTimeTransforms.java | 128 +++++++++ .../iceberg/transforms/TestTimestamps.java | 193 +++++++++++-- .../apache/iceberg/transforms/TestYears.java | 32 --- 17 files changed, 634 insertions(+), 421 deletions(-) delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestDays.java delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestHours.java create mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java delete mode 100644 api/src/test/java/org/apache/iceberg/transforms/TestYears.java diff --git a/api/src/main/java/org/apache/iceberg/transforms/Dates.java b/api/src/main/java/org/apache/iceberg/transforms/Dates.java index 3d26b542be7b..88db16797867 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Dates.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Dates.java @@ -97,6 +97,10 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -109,11 +113,11 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Dates) { - // test the granularity, in days. day(ts) => 1 day, months(ts) => 30 days, and day satisfies - // the order of months - Dates otherTransform = (Dates) other; - return granularity.getDuration().toDays() - <= otherTransform.granularity.getDuration().toDays(); + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index 24f844ee50ab..e2b829b86662 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Days get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.DAY_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.DAYS; } @Override - public Type getResultType(Type sourceType) { - return Types.DateType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.DAY, Timestamps.MICROS_TO_DAY, Timestamps.NANOS_TO_DAY); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.DAY_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.DAY.satisfiesOrderOf(other); - } else if (other instanceof Days || other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.DateType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index b198d0e94eac..2ff79f6a66a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -30,17 +31,16 @@ static Hours get() { return (Hours) INSTANCE; } + @Override + protected ChronoUnit granularity() { + return ChronoUnit.HOURS; + } + @Override @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { - switch (type.typeId()) { - case TIMESTAMP: - return (Transform) Timestamps.HOUR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + return (Transform) + fromSourceType(type, null, Timestamps.MICROS_TO_HOUR, Timestamps.NANOS_TO_HOUR); } @Override @@ -53,24 +53,6 @@ public Type getResultType(Type sourceType) { return Types.IntegerType.get(); } - @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - return other == Timestamps.HOUR_FROM_MICROS || other == Timestamps.HOUR_FROM_NANOS; - } else if (other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years) { - return true; - } - - return false; - } - @Override public String toHumanString(Type alwaysInt, Integer value) { return value != null ? TransformUtil.humanHour(value) : "null"; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index c1ac280b69bf..73ec50e5dd9a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Months get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.MONTH_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.MONTHS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.MONTH, Timestamps.MICROS_TO_MONTH, Timestamps.NANOS_TO_MONTH); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.MONTH_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Months || other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index b3f8d600bd38..0d80ef88a296 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -122,22 +122,22 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor int width = ((Truncate) transform).width(); return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR_FROM_MICROS - || transform == Timestamps.YEAR_FROM_NANOS + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH_FROM_MICROS - || transform == Timestamps.MONTH_FROM_NANOS + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.DAY - || transform == Timestamps.DAY_FROM_MICROS - || transform == Timestamps.DAY_FROM_NANOS + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR_FROM_MICROS - || transform == Timestamps.HOUR_FROM_NANOS + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 998c63d8e367..62cc9d3cdb33 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -85,25 +85,25 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR_FROM_MICROS - || transform == Timestamps.YEAR_FROM_NANOS + || transform == Timestamps.MICROS_TO_YEAR + || transform == Timestamps.NANOS_TO_YEAR || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH_FROM_MICROS - || transform == Timestamps.MONTH_FROM_NANOS + || transform == Timestamps.MICROS_TO_MONTH + || transform == Timestamps.NANOS_TO_MONTH || transform instanceof Months) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.DAY - || transform == Timestamps.DAY_FROM_MICROS - || transform == Timestamps.DAY_FROM_NANOS + || transform == Timestamps.MICROS_TO_DAY + || transform == Timestamps.NANOS_TO_DAY || transform instanceof Days) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR_FROM_MICROS - || transform == Timestamps.HOUR_FROM_NANOS + } else if (transform == Timestamps.MICROS_TO_HOUR + || transform == Timestamps.NANOS_TO_HOUR || transform instanceof Hours) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index 23ee38271e81..d28ffb465d70 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.transforms; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.UnboundPredicate; @@ -25,6 +26,24 @@ import org.apache.iceberg.util.SerializableFunction; abstract class TimeTransform implements Transform { + protected static R fromSourceType(Type type, R dateResult, R microsResult, R nanosResult) { + switch (type.typeId()) { + case DATE: + if (dateResult != null) { + return dateResult; + } + break; + case TIMESTAMP: + return microsResult; + case TIMESTAMP_NANO: + return nanosResult; + } + + throw new IllegalArgumentException("Unsupported type: " + type); + } + + protected abstract ChronoUnit granularity(); + protected abstract Transform toEnum(Type type); @Override @@ -37,6 +56,23 @@ public boolean preservesOrder() { return true; } + @Override + public boolean satisfiesOrderOf(Transform other) { + if (this == other) { + return true; + } + + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity(), ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity(), ((TimeTransform) other).granularity()); + } + + return false; + } + @Override public boolean canTransform(Type type) { return type.typeId() == Type.TypeID.DATE diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index 5ab8e291044c..8b8c2ca0a96b 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -20,7 +20,6 @@ import com.google.errorprone.annotations.Immutable; import java.time.temporal.ChronoUnit; -import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.Expression; @@ -32,116 +31,25 @@ import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; -class Timestamps implements Transform { - - static final Timestamps YEAR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS, "year"); - static final Timestamps MONTH_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS, "day"); - static final Timestamps HOUR_FROM_MICROS = - new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS, "hour"); - static final Timestamps YEAR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS, "year"); - static final Timestamps MONTH_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS, "month"); - static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS, "day"); - static final Timestamps HOUR_FROM_NANOS = - new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS, "hour"); - - static Timestamps get(Type type, String transform) { - String name = transform.toLowerCase(Locale.ENGLISH); - switch (type.typeId()) { - case TIMESTAMP: - switch (name) { - case "year": - return YEAR_FROM_MICROS; - case "month": - return MONTH_FROM_MICROS; - case "day": - return DAY_FROM_MICROS; - case "hour": - return HOUR_FROM_MICROS; - default: - throw new IllegalArgumentException("Unsupported transform: " + name); - } - case TIMESTAMP_NANO: - switch (name) { - case "year": - return YEAR_FROM_NANOS; - case "month": - return MONTH_FROM_NANOS; - case "day": - return DAY_FROM_NANOS; - case "hour": - return HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported transform: " + name); - } - default: - throw new IllegalArgumentException("Unsupported type for transform " + name + ": " + type); - } - } - - @Immutable - static class Apply implements SerializableFunction { - private final ChronoUnit sourceTypeUnit; - private final ChronoUnit resultTypeUnit; - - Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { - this.sourceTypeUnit = sourceTypeUnit; - this.resultTypeUnit = resultTypeUnit; - } - - @Override - public Integer apply(Long timestamp) { - if (timestamp == null) { - return null; - } +enum Timestamps implements Transform { + MICROS_TO_YEAR(ChronoUnit.YEARS, "year", MicrosToYears.INSTANCE), + MICROS_TO_MONTH(ChronoUnit.MONTHS, "month", MicrosToMonths.INSTANCE), + MICROS_TO_DAY(ChronoUnit.DAYS, "day", MicrosToDays.INSTANCE), + MICROS_TO_HOUR(ChronoUnit.HOURS, "hour", MicrosToHours.INSTANCE), - switch (sourceTypeUnit) { - case MICROS: - switch (resultTypeUnit) { - case YEARS: - return DateTimeUtil.microsToYears(timestamp); - case MONTHS: - return DateTimeUtil.microsToMonths(timestamp); - case DAYS: - return DateTimeUtil.microsToDays(timestamp); - case HOURS: - return DateTimeUtil.microsToHours(timestamp); - default: - throw new UnsupportedOperationException( - "Unsupported result type unit: " + resultTypeUnit); - } - case NANOS: - switch (resultTypeUnit) { - case YEARS: - return DateTimeUtil.nanosToYears(timestamp); - case MONTHS: - return DateTimeUtil.nanosToMonths(timestamp); - case DAYS: - return DateTimeUtil.nanosToDays(timestamp); - case HOURS: - return DateTimeUtil.nanosToHours(timestamp); - default: - throw new UnsupportedOperationException( - "Unsupported result type unit: " + resultTypeUnit); - } - default: - throw new UnsupportedOperationException( - "Unsupported source type unit: " + sourceTypeUnit); - } - } - } + NANOS_TO_YEAR(ChronoUnit.YEARS, "year", NanosToYears.INSTANCE), + NANOS_TO_MONTH(ChronoUnit.MONTHS, "month", NanosToMonths.INSTANCE), + NANOS_TO_DAY(ChronoUnit.DAYS, "day", NanosToDays.INSTANCE), + NANOS_TO_HOUR(ChronoUnit.HOURS, "hour", NanosToHours.INSTANCE); + private final ChronoUnit granularity; private final String name; - private final Apply apply; + private final SerializableFunction apply; - Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit, String name) { + Timestamps(ChronoUnit granularity, String name, SerializableFunction apply) { this.name = name; - this.apply = new Apply(sourceTypeUnit, resultTypeUnit); + this.granularity = granularity; + this.apply = apply; } @Override @@ -162,12 +70,16 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (apply.resultTypeUnit == ChronoUnit.DAYS) { + if (granularity == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } + ChronoUnit granularity() { + return granularity; + } + @Override public boolean preservesOrder() { return true; @@ -179,12 +91,12 @@ public boolean satisfiesOrderOf(Transform other) { return true; } - if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies - // the order of day - Timestamps otherTransform = (Timestamps) other; - return apply.resultTypeUnit.getDuration().toHours() - <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); + if (other instanceof Dates) { + return TransformUtil.satisfiesOrderOf(granularity, ((Dates) other).granularity()); + } else if (other instanceof Timestamps) { + return TransformUtil.satisfiesOrderOf(granularity, ((Timestamps) other).granularity()); + } else if (other instanceof TimeTransform) { + return TransformUtil.satisfiesOrderOf(granularity, ((TimeTransform) other).granularity()); } return false; @@ -242,7 +154,7 @@ public String toHumanString(Type outputType, Integer value) { return "null"; } - switch (apply.resultTypeUnit) { + switch (granularity) { case YEARS: return TransformUtil.humanYear(value); case MONTHS: @@ -252,7 +164,7 @@ public String toHumanString(Type outputType, Integer value) { case HOURS: return TransformUtil.humanHour(value); default: - throw new UnsupportedOperationException("Unsupported time unit: " + apply.resultTypeUnit); + throw new UnsupportedOperationException("Unsupported time unit: " + granularity); } } @@ -265,4 +177,116 @@ public String toString() { public String dedupName() { return "time"; } + + @Immutable + static class MicrosToYears implements SerializableFunction { + static final MicrosToYears INSTANCE = new MicrosToYears(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToYears(micros); + } + } + + @Immutable + static class MicrosToMonths implements SerializableFunction { + static final MicrosToMonths INSTANCE = new MicrosToMonths(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToMonths(micros); + } + } + + @Immutable + static class MicrosToDays implements SerializableFunction { + static final MicrosToDays INSTANCE = new MicrosToDays(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToDays(micros); + } + } + + @Immutable + static class MicrosToHours implements SerializableFunction { + static final MicrosToHours INSTANCE = new MicrosToHours(); + + @Override + public Integer apply(Long micros) { + if (micros == null) { + return null; + } + + return DateTimeUtil.microsToHours(micros); + } + } + + @Immutable + static class NanosToYears implements SerializableFunction { + static final NanosToYears INSTANCE = new NanosToYears(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToYears(nanos); + } + } + + @Immutable + static class NanosToMonths implements SerializableFunction { + static final NanosToMonths INSTANCE = new NanosToMonths(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToMonths(nanos); + } + } + + @Immutable + static class NanosToDays implements SerializableFunction { + static final NanosToDays INSTANCE = new NanosToDays(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToDays(nanos); + } + } + + @Immutable + static class NanosToHours implements SerializableFunction { + static final NanosToHours INSTANCE = new NanosToHours(); + + @Override + public Integer apply(Long nanos) { + if (nanos == null) { + return null; + } + + return DateTimeUtil.nanosToHours(nanos); + } + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 88f6b64e9218..dd7f97e950e8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -24,6 +24,7 @@ import java.time.LocalTime; import java.time.OffsetDateTime; import java.time.ZoneOffset; +import java.time.temporal.ChronoUnit; import java.util.Base64; import org.apache.iceberg.util.DateTimeUtil; @@ -81,4 +82,10 @@ static String base64encode(ByteBuffer buffer) { // use direct encoding because all of the encoded bytes are in ASCII return StandardCharsets.ISO_8859_1.decode(Base64.getEncoder().encode(buffer)).toString(); } + + static boolean satisfiesOrderOf(ChronoUnit leftGranularity, ChronoUnit rightGranularity) { + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies + // the order of day + return leftGranularity.getDuration().toHours() <= rightGranularity.getDuration().toHours(); + } } diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index 2dccbf72b12f..aacd4d430069 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -67,6 +67,10 @@ private Transforms() {} return new UnknownTransform<>(transform); } + /** + * @deprecated use {@link #identity()} instead; will be removed in 2.0.0 + */ + @Deprecated public static Transform fromString(Type type, String transform) { Matcher widthMatcher = HAS_WIDTH.matcher(transform); if (widthMatcher.matches()) { @@ -79,24 +83,20 @@ private Transforms() {} } } - if (transform.equalsIgnoreCase("identity")) { - return Identity.get(type); - } - - try { - switch (type.typeId()) { - case TIMESTAMP: - case TIMESTAMP_NANO: - return Timestamps.get(type, transform); - case DATE: - return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } - } catch (IllegalArgumentException ignored) { - // fall through to return unknown transform - } - - if (transform.equalsIgnoreCase("void")) { - return VoidTransform.get(); + String lowerTransform = transform.toLowerCase(Locale.ENGLISH); + switch (lowerTransform) { + case "identity": + return Identity.get(type); + case "year": + return Years.get().toEnum(type); + case "month": + return Months.get().toEnum(type); + case "day": + return Days.get().toEnum(type); + case "hour": + return Hours.get().toEnum(type); + case "void": + return VoidTransform.get(); } return new UnknownTransform<>(transform); @@ -126,16 +126,7 @@ public static Transform identity(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform year(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.YEAR_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by year"); - } + return (Transform) Years.get().toEnum(type); } /** @@ -149,16 +140,7 @@ public static Transform year(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform month(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.MONTH; - case TIMESTAMP: - return (Transform) Timestamps.MONTH_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.MONTH_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by month"); - } + return (Transform) Months.get().toEnum(type); } /** @@ -172,16 +154,7 @@ public static Transform month(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform day(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.DAY; - case TIMESTAMP: - return (Transform) Timestamps.DAY_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.DAY_FROM_NANOS; - default: - throw new IllegalArgumentException("Cannot partition type " + type + " by day"); - } + return (Transform) Days.get().toEnum(type); } /** @@ -195,14 +168,7 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - switch (type.typeId()) { - case TIMESTAMP: - return (Transform) Timestamps.HOUR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.HOUR_FROM_NANOS; - default: - throw new IllegalArgumentException(String.format("Cannot partition type %s by hour", type)); - } + return (Transform) Hours.get().toEnum(type); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index d1a7d82ac8bd..2920a37dc692 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -31,41 +32,19 @@ static Years get() { } @Override - @SuppressWarnings("unchecked") - protected Transform toEnum(Type type) { - switch (type.typeId()) { - case DATE: - return (Transform) Dates.YEAR; - case TIMESTAMP: - return (Transform) Timestamps.YEAR_FROM_MICROS; - case TIMESTAMP_NANO: - return (Transform) Timestamps.YEAR_FROM_NANOS; - default: - throw new IllegalArgumentException("Unsupported type: " + type); - } + protected ChronoUnit granularity() { + return ChronoUnit.YEARS; } @Override - public Type getResultType(Type sourceType) { - return Types.IntegerType.get(); + protected Transform toEnum(Type type) { + return (Transform) + fromSourceType(type, Dates.YEAR, Timestamps.MICROS_TO_YEAR, Timestamps.NANOS_TO_YEAR); } @Override - public boolean satisfiesOrderOf(Transform other) { - if (this == other) { - return true; - } - - if (other instanceof Timestamps) { - // incoming type unit does not matter - return Timestamps.YEAR_FROM_MICROS.satisfiesOrderOf(other); - } else if (other instanceof Dates) { - return Dates.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Years) { - return true; - } - - return false; + public Type getResultType(Type sourceType) { + return Types.IntegerType.get(); } @Override diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java index b9c380244666..c899b4cfa1cb 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDates.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestDates.java @@ -26,6 +26,75 @@ import org.junit.jupiter.api.Test; public class TestDates { + @Test + public void testSatisfiesOrderOfDates() { + assertThat(Dates.DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestamps() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimestampNanos() { + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testSatisfiesOrderOfTimeTransforms() { + assertThat(Dates.DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Dates.MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Dates.YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Dates.YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedDateTransform() { diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java b/api/src/test/java/org/apache/iceberg/transforms/TestDays.java deleted file mode 100644 index c090ad62c914..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestDays.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -public class TestDays { - @Test - public void testSatisfiesOrderOf() { - assertThatThrownBy(() -> Days.get().satisfiesOrderOf(Timestamps.DAY_FROM_NANOS)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: DAYS"); - } -} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java b/api/src/test/java/org/apache/iceberg/transforms/TestHours.java deleted file mode 100644 index 8e070aec8b9c..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestHours.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.apache.iceberg.types.Types; -import org.junit.jupiter.api.Test; - -public class TestHours { - @Test - public void testToEnum() { - Hours hours = Hours.get(); - Types.DateType type = Types.DateType.get(); - assertThatThrownBy(() -> hours.toEnum(type)) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported type: date"); - } -} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java new file mode 100644 index 000000000000..4093aa3b196f --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimeTransforms.java @@ -0,0 +1,128 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTimeTransforms { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Hours.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Hours.get().satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Hours.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Days.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Days.get().satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Days.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Months.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Months.get().satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Months.get().satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Years.get().satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Years.get().satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testHoursToEnum() { + Hours hours = Hours.get(); + Types.DateType type = Types.DateType.get(); + assertThatThrownBy(() -> hours.toEnum(type)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageMatching("Unsupported type: date"); + } + +} diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index f62307788dd8..52eab49f7414 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -19,7 +19,6 @@ package org.apache.iceberg.transforms; import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.assertThatThrownBy; import org.apache.iceberg.expressions.Literal; import org.apache.iceberg.types.Type; @@ -27,6 +26,182 @@ import org.junit.jupiter.api.Test; public class TestTimestamps { + @Test + public void testMicrosSatisfiesOrderOfDates() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testMicrosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.MICROS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.MICROS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfDates() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Dates.YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Dates.YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestamps() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.MICROS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimestampNanos() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_HOUR)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_DAY)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_MONTH)).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Timestamps.NANOS_TO_YEAR)).isTrue(); + } + + @Test + public void testNanosSatisfiesOrderOfTimeTransforms() { + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Hours.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_HOUR.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Days.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_DAY.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Months.get())).isTrue(); + assertThat(Timestamps.NANOS_TO_MONTH.satisfiesOrderOf(Years.get())).isTrue(); + + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Hours.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Days.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Months.get())).isFalse(); + assertThat(Timestamps.NANOS_TO_YEAR.satisfiesOrderOf(Years.get())).isTrue(); + } + @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { @@ -458,20 +633,4 @@ public void testTimestampNanosReturnType() { Type hourResultType = hour.getResultType(type); assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); } - - @Test - public void testGetOfTimestampTypeRejectsBadString() { - Types.TimestampType timestampType = Types.TimestampType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampType, "trash")) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported transform: trash"); - } - - @Test - public void testGetOfTimestampNanoTypeRejectsBadString() { - Types.TimestampNanoType timestampNanoType = Types.TimestampNanoType.withZone(); - assertThatThrownBy(() -> Timestamps.get(timestampNanoType, "trash")) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageMatching("Unsupported transform: trash"); - } } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java b/api/src/test/java/org/apache/iceberg/transforms/TestYears.java deleted file mode 100644 index d4b06b6f2a62..000000000000 --- a/api/src/test/java/org/apache/iceberg/transforms/TestYears.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.iceberg.transforms; - -import static org.assertj.core.api.Assertions.assertThatThrownBy; - -import org.junit.jupiter.api.Test; - -public class TestYears { - @Test - public void testSatisfiesOrderOf() { - assertThatThrownBy(() -> Years.get().satisfiesOrderOf(Timestamps.YEAR_FROM_NANOS)) - .isInstanceOf(UnsupportedOperationException.class) - .hasMessageMatching("Unsupported timestamp unit: YEARS"); - } -} From 6f25c99a83c9435bfdf5fe5806dd027b2de52dfe Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:40:05 -0700 Subject: [PATCH 08/13] Minor fix to bucket transform. --- .../main/java/org/apache/iceberg/transforms/Bucket.java | 7 ++++--- .../main/java/org/apache/iceberg/util/DateTimeUtil.java | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 7e1a089f5b51..0e4e782cc110 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -33,6 +33,7 @@ import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; import org.apache.iceberg.util.BucketUtil; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; class Bucket implements Transform, Serializable { @@ -217,7 +218,7 @@ protected int hash(Long value) { } } - // In order to bucket TimestampNano the same as Timestamp, we divide these values by 1000. + // In order to bucket TimestampNano the same as Timestamp, convert to micros before hashing. private static class BucketTimestampNano extends Bucket implements SerializableFunction { @@ -226,8 +227,8 @@ private BucketTimestampNano(int numBuckets) { } @Override - protected int hash(Long value) { - return BucketUtil.hash(Math.floorDiv(value, 1000)); + protected int hash(Long nanos) { + return BucketUtil.hash(DateTimeUtil.nanosToMicros(nanos)); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index bc901e492551..3c312486be00 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -27,7 +27,6 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.time.temporal.ChronoUnit; -import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; public class DateTimeUtil { private DateTimeUtil() {} From f006cb244bc5802a8a8d4410ce57e03e9eb1568f Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:44:36 -0700 Subject: [PATCH 09/13] Fix style --- api/src/main/java/org/apache/iceberg/types/Types.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index f9a683d46052..2352b9b52f13 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -301,10 +301,10 @@ public String toString() { public boolean equals(Object other) { if (this == other) { return true; - } - if (!(other instanceof TimestampNanoType)) { + } else if (!(other instanceof TimestampNanoType)) { return false; } + return adjustToUTC == ((TimestampNanoType) other).adjustToUTC; } From 066c9557a939ab804c4acb593865a96291c154dc Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 15:54:42 -0700 Subject: [PATCH 10/13] Fix typos in TestTimestamps. --- .../org/apache/iceberg/transforms/TestTimestamps.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 52eab49f7414..78b0e67c686b 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -208,7 +208,7 @@ public void testDeprecatedTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform years = Transforms.year(type); assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); @@ -241,7 +241,7 @@ public void testDeprecatedTimestampNanoTransform() { Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(type); assertThat((int) years.apply(ts.value())).as("Should produce 2017 - 1970 = 47").isEqualTo(47); @@ -273,7 +273,7 @@ public void testTimestampTransform() { Types.TimestampType type = Types.TimestampType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform years = Transforms.year(); assertThat((int) years.bind(type).apply(ts.value())) @@ -317,7 +317,7 @@ public void testTimestampNanoTransform() { Types.TimestampNanoType type = Types.TimestampNanoType.withoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194789").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); - Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + Literal nts = Literal.of("1969-12-31T23:59:59.999999999").to(type); Transform years = Transforms.year(); assertThat((int) years.bind(type).apply(ts.value())) @@ -577,7 +577,7 @@ public void testTimestampNullHumanString() { @Test public void testTimestampNanoNullHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampNanoType type = Types.TimestampNanoType.withZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") .isEqualTo("null"); From 4d7720281f936e751b69e4c724225c76958f766d Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:04:17 -0700 Subject: [PATCH 11/13] Add a comment to clarify conversion test. --- .../test/java/org/apache/iceberg/types/TestConversions.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 9764b8551313..e207cfd8d59a 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -104,10 +104,12 @@ public void testByteBufferConversions() { .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + // values passed to assertConversion and Literal.of differ because Literal.of(...) assumes + // the value is in micros, which gets converted when to(TimestampNanoType) is called assertConversion( - 400000L, TimestampNanoType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + 400000000L, TimestampNanoType.withoutZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertConversion( - 400000L, TimestampNanoType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + 400000000L, TimestampNanoType.withZone(), new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {0, -124, -41, 23, 0, 0, 0, 0}); assertThat(Literal.of(400000L).to(TimestampNanoType.withZone()).toByteBuffer().array()) From 9a3d16f7e828f5503d33b84572d6efffde6b0070 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:05:57 -0700 Subject: [PATCH 12/13] Split timestamp and timestamp_ns comparator test cases. --- .../test/java/org/apache/iceberg/types/TestComparators.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 89007480cb0f..07653ba3c8a8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -77,6 +77,10 @@ public void testTime() { public void testTimestamp() { assertComparesCorrectly(Comparators.forType(Types.TimestampType.withoutZone()), 111, 222); assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); + } + + @Test + public void testTimestampNanos() { assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withoutZone()), 111, 222); assertComparesCorrectly(Comparators.forType(Types.TimestampNanoType.withZone()), 111, 222); } From 274de5670158597bc1c39f85936e20657bcdeba7 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Sun, 25 Aug 2024 16:10:36 -0700 Subject: [PATCH 13/13] Fix spec update to specify microsecond hashing. --- format/spec.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/format/spec.md b/format/spec.md index 4727c48c6c26..9b447db56cb4 100644 --- a/format/spec.md +++ b/format/spec.md @@ -1050,10 +1050,10 @@ The 32-bit hash implementation is 32-bit Murmur3 hash, x86 variant, seeded with | **`time`** | `hashLong(microsecsFromMidnight(v))` | `22:31:08` → `-662762989` | | **`timestamp`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001` → `-1207196810` | | **`timestamptz`** | `hashLong(microsecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001-08:00` → `-1207196810` | -| **`timestamp_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | -| **`timestamptz_ns`** | `hashLong(nanosecsFromUnixEpoch(v))` | `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | +| **`timestamp_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3] | `2017-11-16T22:31:08` → `-2047944441`
`2017-11-16T22:31:08.000001001` → `-1207196810` | +| **`timestamptz_ns`** | `hashLong(microsecsFromUnixEpoch(v))` [3]| `2017-11-16T14:31:08-08:00` → `-2047944441`
`2017-11-16T14:31:08.000001001-08:00` → `-1207196810` | | **`string`** | `hashBytes(utf8Bytes(v))` | `iceberg` → `1210000089` | -| **`uuid`** | `hashBytes(uuidBytes(v))` [3] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | +| **`uuid`** | `hashBytes(uuidBytes(v))` [4] | `f79c3e09-677c-4bbd-a479-3f349cb785e7` → `1488055340` | | **`fixed(L)`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | | **`binary`** | `hashBytes(v)` | `00 01 02 03` → `-188683207` | @@ -1062,17 +1062,18 @@ The types below are not currently valid for bucketing, and so are not hashed. Ho | Primitive type | Hash specification | Test value | |--------------------|-------------------------------------------|--------------------------------------------| | **`boolean`** | `false: hashInt(0)`, `true: hashInt(1)` | `true` → `1392991556` | -| **`float`** | `hashLong(doubleToLongBits(double(v))` [4]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | -| **`double`** | `hashLong(doubleToLongBits(v))` [4]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | +| **`float`** | `hashLong(doubleToLongBits(double(v))` [5]| `1.0F` → `-142385009`, `0.0F` → `1669671676`, `-0.0F` → `1669671676` | +| **`double`** | `hashLong(doubleToLongBits(v))` [5]| `1.0D` → `-142385009`, `0.0D` → `1669671676`, `-0.0D` → `1669671676` | Notes: 1. Integer and long hash results must be identical for all integer values. This ensures that schema evolution does not change bucket partition values if integer types are promoted. 2. Decimal values are hashed using the minimum number of bytes required to hold the unscaled value as a two’s complement big-endian; this representation does not include padding bytes required for storage in a fixed-length array. Hash results are not dependent on decimal scale, which is part of the type, not the data value. -3. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: +3. Nanosecond timestamps must be converted to microsecond precision before hashing to ensure timestamps have the same hash value. +4. UUIDs are encoded using big endian. The test UUID for the example above is: `f79c3e09-677c-4bbd-a479-3f349cb785e7`. This UUID encoded as a byte array is: `F7 9C 3E 09 67 7C 4B BD A4 79 3F 34 9C B7 85 E7` -4. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. +5. `doubleToLongBits` must give the IEEE 754 compliant bit representation of the double value. All `NaN` bit patterns must be canonicalized to `0x7ff8000000000000L`. Negative zero (`-0.0`) must be canonicalized to positive zero (`0.0`). Float hash values are the result of hashing the float cast to double to ensure that schema evolution does not change hash values if float types are promoted. ## Appendix C: JSON serialization