diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 3708dafc4126..a31d986f7830 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -36,22 +36,28 @@ import org.apache.iceberg.transforms.Transforms; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; /** Expression utility methods. */ public class ExpressionUtil { private static final Function HASH_FUNC = Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get()); private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); - private static final long FIVE_MINUTES_IN_MICROS = TimeUnit.MINUTES.toMicros(5); + private static final long FIVE_MINUTES_IN_MILLIS = TimeUnit.MINUTES.toMillis(5); private static final long THREE_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(3); private static final long NINETY_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(90); private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); private static final Pattern TIME = Pattern.compile("\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); private static final Pattern TIMESTAMP = - Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?"); + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?"); + private static final Pattern TIMESTAMPNS = + Pattern.compile("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?"); private static final Pattern TIMESTAMPTZ = Pattern.compile( - "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,9})?)?([-+]\\d{2}:\\d{2}|Z)"); + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{1,6})?)?([-+]\\d{2}:\\d{2}|Z)"); + private static final Pattern TIMESTAMPTZNS = + Pattern.compile( + "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)"); static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10; private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5; @@ -247,13 +253,12 @@ public static UnboundTerm unbind(Term term) { private static class ExpressionSanitizer extends ExpressionVisitors.ExpressionVisitor { - private final long now; + private final long nowMillis; private final int today; private ExpressionSanitizer() { - long nowMillis = System.currentTimeMillis(); - OffsetDateTime nowDateTime = Instant.ofEpochMilli(nowMillis).atOffset(ZoneOffset.UTC); - this.now = nowMillis * 1000; + this.nowMillis = System.currentTimeMillis(); + OffsetDateTime nowDateTime = Instant.ofEpochMilli(this.nowMillis).atOffset(ZoneOffset.UTC); this.today = (int) ChronoUnit.DAYS.between(EPOCH, nowDateTime); } @@ -293,13 +298,13 @@ public Expression predicate(BoundPredicate pred) { return new UnboundPredicate<>( pred.op(), unbind(pred.term()), - (T) sanitize(bound.term().type(), bound.literal(), now, today)); + (T) sanitize(bound.term().type(), bound.literal(), nowMillis, today)); } else if (pred.isSetPredicate()) { BoundSetPredicate bound = (BoundSetPredicate) pred; Iterable iter = () -> bound.literalSet().stream() - .map(lit -> (T) sanitize(bound.term().type(), lit, now, today)) + .map(lit -> (T) sanitize(bound.term().type(), lit, nowMillis, today)) .iterator(); return new UnboundPredicate<>(pred.op(), unbind(pred.term()), iter); } @@ -326,11 +331,11 @@ public Expression predicate(UnboundPredicate pred) { case STARTS_WITH: case NOT_STARTS_WITH: return new UnboundPredicate<>( - pred.op(), pred.term(), (T) sanitize(pred.literal(), now, today)); + pred.op(), pred.term(), (T) sanitize(pred.literal(), nowMillis, today)); case IN: case NOT_IN: Iterable iter = - () -> pred.literals().stream().map(lit -> sanitize(lit, now, today)).iterator(); + () -> pred.literals().stream().map(lit -> sanitize(lit, nowMillis, today)).iterator(); return new UnboundPredicate<>(pred.op(), pred.term(), (Iterable) iter); default: throw new UnsupportedOperationException( @@ -340,13 +345,12 @@ public Expression predicate(UnboundPredicate pred) { } private static class StringSanitizer extends ExpressionVisitors.ExpressionVisitor { - private final long nowMicros; + private final long nowMillis; private final int today; private StringSanitizer() { - long nowMillis = System.currentTimeMillis(); - OffsetDateTime nowDateTime = Instant.ofEpochMilli(nowMillis).atOffset(ZoneOffset.UTC); - this.nowMicros = nowMillis * 1000; + this.nowMillis = System.currentTimeMillis(); + OffsetDateTime nowDateTime = Instant.ofEpochMilli(this.nowMillis).atOffset(ZoneOffset.UTC); this.today = (int) ChronoUnit.DAYS.between(EPOCH, nowDateTime); } @@ -376,7 +380,7 @@ public String or(String leftResult, String rightResult) { } private String value(BoundLiteralPredicate pred) { - return sanitize(pred.term().type(), pred.literal().value(), nowMicros, today); + return sanitize(pred.term().type(), pred.literal().value(), nowMillis, today); } @Override @@ -408,7 +412,7 @@ public String predicate(BoundPredicate pred) { + " IN " + abbreviateValues( pred.asSetPredicate().literalSet().stream() - .map(lit -> sanitize(pred.term().type(), lit, nowMicros, today)) + .map(lit -> sanitize(pred.term().type(), lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -417,7 +421,7 @@ public String predicate(BoundPredicate pred) { + " NOT IN " + abbreviateValues( pred.asSetPredicate().literalSet().stream() - .map(lit -> sanitize(pred.term().type(), lit, nowMicros, today)) + .map(lit -> sanitize(pred.term().type(), lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -444,23 +448,23 @@ public String predicate(UnboundPredicate pred) { case NOT_NAN: return "not_nan(" + term + ")"; case LT: - return term + " < " + sanitize(pred.literal(), nowMicros, today); + return term + " < " + sanitize(pred.literal(), nowMillis, today); case LT_EQ: - return term + " <= " + sanitize(pred.literal(), nowMicros, today); + return term + " <= " + sanitize(pred.literal(), nowMillis, today); case GT: - return term + " > " + sanitize(pred.literal(), nowMicros, today); + return term + " > " + sanitize(pred.literal(), nowMillis, today); case GT_EQ: - return term + " >= " + sanitize(pred.literal(), nowMicros, today); + return term + " >= " + sanitize(pred.literal(), nowMillis, today); case EQ: - return term + " = " + sanitize(pred.literal(), nowMicros, today); + return term + " = " + sanitize(pred.literal(), nowMillis, today); case NOT_EQ: - return term + " != " + sanitize(pred.literal(), nowMicros, today); + return term + " != " + sanitize(pred.literal(), nowMillis, today); case IN: return term + " IN " + abbreviateValues( pred.literals().stream() - .map(lit -> sanitize(lit, nowMicros, today)) + .map(lit -> sanitize(lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -469,14 +473,14 @@ public String predicate(UnboundPredicate pred) { + " NOT IN " + abbreviateValues( pred.literals().stream() - .map(lit -> sanitize(lit, nowMicros, today)) + .map(lit -> sanitize(lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); case STARTS_WITH: - return term + " STARTS WITH " + sanitize(pred.literal(), nowMicros, today); + return term + " STARTS WITH " + sanitize(pred.literal(), nowMillis, today); case NOT_STARTS_WITH: - return term + " NOT STARTS WITH " + sanitize(pred.literal(), nowMicros, today); + return term + " NOT STARTS WITH " + sanitize(pred.literal(), nowMillis, today); default: throw new UnsupportedOperationException( "Cannot sanitize unsupported predicate type: " + pred.op()); @@ -501,7 +505,7 @@ private static List abbreviateValues(List sanitizedValues) { return sanitizedValues; } - private static String sanitize(Type type, Object value, long now, int today) { + private static String sanitize(Type type, Object value, long nowMillis, int today) { switch (type.typeId()) { case INTEGER: case LONG: @@ -514,9 +518,9 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIME: return "(time)"; case TIMESTAMP: - return sanitizeTimestamp((long) value, now); + return sanitizeTimestamp(((Types.TimestampType) type).unit(), (long) value, nowMillis); case STRING: - return sanitizeString((CharSequence) value, now, today); + return sanitizeString((CharSequence) value, nowMillis, today); case BOOLEAN: case UUID: case DECIMAL: @@ -529,13 +533,14 @@ private static String sanitize(Type type, Object value, long now, int today) { String.format("Cannot sanitize value for unsupported type %s: %s", type, value)); } - private static String sanitize(Literal literal, long now, int today) { + private static String sanitize(Literal literal, long nowMillis, int today) { if (literal instanceof Literals.StringLiteral) { - return sanitizeString(((Literals.StringLiteral) literal).value(), now, today); + return sanitizeString(((Literals.StringLiteral) literal).value(), nowMillis, today); } else if (literal instanceof Literals.DateLiteral) { return sanitizeDate(((Literals.DateLiteral) literal).value(), today); } else if (literal instanceof Literals.TimestampLiteral) { - return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); + Literals.TimestampLiteral tsLiteral = ((Literals.TimestampLiteral) literal); + return sanitizeTimestamp(tsLiteral.unit(), tsLiteral.value(), nowMillis); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { @@ -564,14 +569,26 @@ private static String sanitizeDate(int days, int today) { return "(date)"; } - private static String sanitizeTimestamp(long micros, long now) { - String isPast = now > micros ? "ago" : "from-now"; - long diff = Math.abs(now - micros); - if (diff < FIVE_MINUTES_IN_MICROS) { + private static String sanitizeTimestamp(ChronoUnit unit, long timeUnits, long nowMillis) { + long timeMillis; + switch (unit) { + case MICROS: + timeMillis = DateTimeUtil.microsToMillis(timeUnits); + break; + case NANOS: + timeMillis = DateTimeUtil.nanosToMillis(timeUnits); + break; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } + + long diff = Math.abs(nowMillis - timeMillis); + if (diff < FIVE_MINUTES_IN_MILLIS) { return "(timestamp-about-now)"; } - long hours = TimeUnit.MICROSECONDS.toHours(diff); + String isPast = nowMillis > timeMillis ? "ago" : "from-now"; + long hours = DateTimeUtil.millisToHours(diff); if (hours <= THREE_DAYS_IN_HOURS) { return "(timestamp-" + hours + "-hours-" + isPast + ")"; } else if (hours < NINETY_DAYS_IN_HOURS) { @@ -589,17 +606,23 @@ private static String sanitizeNumber(Number value, String type) { return "(" + numDigits + "-digit-" + type + ")"; } - private static String sanitizeString(CharSequence value, long now, int today) { + private static String sanitizeString(CharSequence value, long nowMillis, int today) { try { if (DATE.matcher(value).matches()) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); } else if (TIMESTAMP.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); - return sanitizeTimestamp(ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.microsWithoutZone()); + return sanitizeTimestamp(ChronoUnit.MICROS, ts.value(), nowMillis); + } else if (TIMESTAMPNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithoutZone()); + return sanitizeTimestamp(ChronoUnit.NANOS, ts.value(), nowMillis); } else if (TIMESTAMPTZ.matcher(value).matches()) { - Literal ts = Literal.of(value).to(Types.TimestampType.withZone()); - return sanitizeTimestamp(ts.value(), now); + Literal ts = Literal.of(value).to(Types.TimestampType.microsWithZone()); + return sanitizeTimestamp(ChronoUnit.MICROS, ts.value(), nowMillis); + } else if (TIMESTAMPTZNS.matcher(value).matches()) { + Literal ts = Literal.of(value).to(Types.TimestampType.nanosWithZone()); + return sanitizeTimestamp(ChronoUnit.NANOS, ts.value(), nowMillis); } else if (TIME.matcher(value).matches()) { return "(time)"; } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 79d7190c49df..c0ba36ec0c30 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -39,7 +39,9 @@ import org.apache.iceberg.types.Conversions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.ByteBuffers; +import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.NaNUtil; class Literals { @@ -298,7 +300,7 @@ public Literal to(Type type) { case TIME: return (Literal) new TimeLiteral(value()); case TIMESTAMP: - return (Literal) new TimestampLiteral(value()); + return (Literal) new TimestampLiteral(((TimestampType) type).unit(), value()); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); @@ -426,8 +428,11 @@ protected Type.TypeID typeId() { } static class TimestampLiteral extends ComparableLiteral { - TimestampLiteral(Long value) { + private final ChronoUnit unit; + + TimestampLiteral(ChronoUnit unit, Long value) { super(value); + this.unit = unit; } @Override @@ -435,7 +440,28 @@ static class TimestampLiteral extends ComparableLiteral { public Literal to(Type type) { switch (type.typeId()) { case TIMESTAMP: - return (Literal) this; + ChronoUnit toUnit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + switch (toUnit) { + case MICROS: + return (Literal) this; + case NANOS: + return (Literal) + new TimestampLiteral(unit, DateTimeUtil.microsToNanos(value())); + } + break; + case NANOS: + switch (toUnit) { + case MICROS: + return (Literal) + new TimestampLiteral(unit, DateTimeUtil.nanosToMicros(value())); + case NANOS: + return (Literal) this; + } + break; + } + break; case DATE: return (Literal) new DateLiteral( @@ -451,6 +477,10 @@ public Literal to(Type type) { protected Type.TypeID typeId() { return Type.TypeID.TIMESTAMP; } + + protected ChronoUnit unit() { + return unit; + } } static class DecimalLiteral extends ComparableLiteral { @@ -501,18 +531,22 @@ public Literal to(Type type) { return (Literal) new TimeLiteral(timeMicros); case TIMESTAMP: - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); - return (Literal) new TimestampLiteral(timestampMicros); + TimestampType tsType = (TimestampType) type; + if (tsType.shouldAdjustToUTC()) { + long timestampUnits = + tsType + .unit() + .between(EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + return (Literal) new TimestampLiteral(tsType.unit(), timestampUnits); } else { - long timestampMicros = - ChronoUnit.MICROS.between( - EPOCH, - LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) - .atOffset(ZoneOffset.UTC)); - return (Literal) new TimestampLiteral(timestampMicros); + long timestampUnits = + tsType + .unit() + .between( + EPOCH, + LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .atOffset(ZoneOffset.UTC)); + return (Literal) new TimestampLiteral(tsType.unit(), timestampUnits); } case STRING: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index f69d5d6110ed..b4dee3749604 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,7 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - return (Transform) Timestamps.DAY; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.DAYS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +56,15 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.DAY.satisfiesOrderOf(other); + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.DAYS + || otherResultTypeUnit == ChronoUnit.MONTHS + || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); - } else if (other instanceof Days || other instanceof Months || other instanceof Years) { - return true; + } else { + return other instanceof Days || other instanceof Months || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index afc14516f3cd..3ceeec9417a7 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -34,7 +35,7 @@ static Hours get() { @SuppressWarnings("unchecked") protected Transform toEnum(Type type) { if (type.typeId() == Type.TypeID.TIMESTAMP) { - return (Transform) Timestamps.HOUR; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.HOURS); } throw new IllegalArgumentException("Unsupported type: " + type); @@ -57,15 +58,17 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return other == Timestamps.HOUR; - } else if (other instanceof Hours - || other instanceof Days - || other instanceof Months - || other instanceof Years) { - return true; + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.HOURS + || otherResultTypeUnit == ChronoUnit.DAYS + || otherResultTypeUnit == ChronoUnit.MONTHS + || otherResultTypeUnit == ChronoUnit.YEARS; + } else { + return other instanceof Hours + || other instanceof Days + || other instanceof Months + || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 8fa4d42385f7..cbdee19e03ea 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,8 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - return (Transform) Timestamps.MONTH; + return (Transform) + Timestamps.get((Types.TimestampType) type, ChronoUnit.MONTHS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +57,13 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.MONTH.satisfiesOrderOf(other); + ChronoUnit otherResultTypeUnit = ((Timestamps) other).getResultTypeUnit(); + return otherResultTypeUnit == ChronoUnit.MONTHS || otherResultTypeUnit == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); - } else if (other instanceof Months || other instanceof Years) { - return true; + } else { + return other instanceof Months || other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index e4796478bf28..b3f8d600bd38 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -122,16 +122,23 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor int width = ((Truncate) transform).width(); return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR + || transform == Timestamps.YEAR_FROM_MICROS + || transform == Timestamps.YEAR_FROM_NANOS || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH + || transform == Timestamps.MONTH_FROM_MICROS + || transform == Timestamps.MONTH_FROM_NANOS || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Dates.DAY || transform == Timestamps.DAY || transform instanceof Days) { + } else if (transform == Dates.DAY + || transform == Timestamps.DAY_FROM_MICROS + || transform == Timestamps.DAY_FROM_NANOS + || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.HOUR_FROM_MICROS + || transform == Timestamps.HOUR_FROM_NANOS + || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { return visitor.alwaysNull(field.fieldId(), sourceName, field.sourceId()); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 680e095270fb..4712fee60049 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -84,22 +84,16 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { results.add( visitor.truncate( sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); - } else if (transform == Dates.YEAR - || transform == Timestamps.YEAR - || transform instanceof Years) { + } else if ("year".equalsIgnoreCase(transform.toString())) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Dates.MONTH - || transform == Timestamps.MONTH - || transform instanceof Months) { + } else if ("month".equalsIgnoreCase(transform.toString())) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Dates.DAY - || transform == Timestamps.DAY - || transform instanceof Days) { + } else if ("day".equalsIgnoreCase(transform.toString())) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if ("day".equalsIgnoreCase(transform.toString())) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform instanceof UnknownTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java index b5b50e9d42b2..bf203262afcc 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestamps.java @@ -20,6 +20,7 @@ import com.google.errorprone.annotations.Immutable; import java.time.temporal.ChronoUnit; +import java.util.Locale; import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.BoundTransform; import org.apache.iceberg.expressions.Expression; @@ -28,57 +29,131 @@ import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SerializableFunction; -enum Timestamps implements Transform { - YEAR(ChronoUnit.YEARS, "year"), - MONTH(ChronoUnit.MONTHS, "month"), - DAY(ChronoUnit.DAYS, "day"), - HOUR(ChronoUnit.HOURS, "hour"); +class Timestamps implements Transform { + + static final Timestamps YEAR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.YEARS); + static final Timestamps MONTH_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.MONTHS); + static final Timestamps DAY_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.DAYS); + static final Timestamps HOUR_FROM_MICROS = new Timestamps(ChronoUnit.MICROS, ChronoUnit.HOURS); + static final Timestamps YEAR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.YEARS); + static final Timestamps MONTH_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.MONTHS); + static final Timestamps DAY_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.DAYS); + static final Timestamps HOUR_FROM_NANOS = new Timestamps(ChronoUnit.NANOS, ChronoUnit.HOURS); + + static Timestamps get(TimestampType type, String resultTypeUnit) { + switch (resultTypeUnit.toLowerCase(Locale.ENGLISH)) { + case "year": + return get(type, ChronoUnit.YEARS); + case "month": + return get(type, ChronoUnit.MONTHS); + case "day": + return get(type, ChronoUnit.DAYS); + case "hour": + return get(type, ChronoUnit.HOURS); + default: + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } + } + + static Timestamps get(TimestampType type, ChronoUnit resultTypeUnit) { + switch (type.unit()) { + case MICROS: + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_MICROS; + case MONTHS: + return MONTH_FROM_MICROS; + case DAYS: + return DAY_FROM_MICROS; + case HOURS: + return HOUR_FROM_MICROS; + } + break; + case NANOS: + switch (resultTypeUnit) { + case YEARS: + return YEAR_FROM_NANOS; + case MONTHS: + return MONTH_FROM_NANOS; + case DAYS: + return DAY_FROM_NANOS; + case HOURS: + return HOUR_FROM_NANOS; + } + break; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + type.unit()); + } + throw new IllegalArgumentException( + "Unsupported source/result type units: " + type + "->" + resultTypeUnit); + } @Immutable static class Apply implements SerializableFunction { - private final ChronoUnit granularity; + private final ChronoUnit sourceTypeUnit; + private final ChronoUnit resultTypeUnit; - Apply(ChronoUnit granularity) { - this.granularity = granularity; + Apply(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + this.sourceTypeUnit = sourceTypeUnit; + this.resultTypeUnit = resultTypeUnit; } @Override - public Integer apply(Long timestampMicros) { - if (timestampMicros == null) { + public Integer apply(Long timestampUnits) { + if (timestampUnits == null) { return null; } - switch (granularity) { - case YEARS: - return DateTimeUtil.microsToYears(timestampMicros); - case MONTHS: - return DateTimeUtil.microsToMonths(timestampMicros); - case DAYS: - return DateTimeUtil.microsToDays(timestampMicros); - case HOURS: - return DateTimeUtil.microsToHours(timestampMicros); + switch (sourceTypeUnit) { + case MICROS: + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.microsToYears(timestampUnits); + case MONTHS: + return DateTimeUtil.microsToMonths(timestampUnits); + case DAYS: + return DateTimeUtil.microsToDays(timestampUnits); + case HOURS: + return DateTimeUtil.microsToHours(timestampUnits); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } + case NANOS: + switch (resultTypeUnit) { + case YEARS: + return DateTimeUtil.nanosToYears(timestampUnits); + case MONTHS: + return DateTimeUtil.nanosToMonths(timestampUnits); + case DAYS: + return DateTimeUtil.nanosToDays(timestampUnits); + case HOURS: + return DateTimeUtil.nanosToHours(timestampUnits); + default: + throw new UnsupportedOperationException( + "Unsupported result type unit: " + resultTypeUnit); + } default: - throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + throw new UnsupportedOperationException( + "Unsupported source type unit: " + sourceTypeUnit); } } } - private final ChronoUnit granularity; - private final String name; private final Apply apply; - Timestamps(ChronoUnit granularity, String name) { - this.granularity = granularity; - this.name = name; - this.apply = new Apply(granularity); + Timestamps(ChronoUnit sourceTypeUnit, ChronoUnit resultTypeUnit) { + this.apply = new Apply(sourceTypeUnit, resultTypeUnit); } @Override - public Integer apply(Long timestampMicros) { - return apply.apply(timestampMicros); + public Integer apply(Long timestampUnits) { + return apply.apply(timestampUnits); } @Override @@ -94,12 +169,16 @@ public boolean canTransform(Type type) { @Override public Type getResultType(Type sourceType) { - if (granularity == ChronoUnit.DAYS) { + if (apply.resultTypeUnit == ChronoUnit.DAYS) { return Types.DateType.get(); } return Types.IntegerType.get(); } + public ChronoUnit getResultTypeUnit() { + return apply.resultTypeUnit; + } + @Override public boolean preservesOrder() { return true; @@ -112,11 +191,11 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies - // the order of day + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and + // hour satisfies the order of day Timestamps otherTransform = (Timestamps) other; - return granularity.getDuration().toHours() - <= otherTransform.granularity.getDuration().toHours(); + return apply.resultTypeUnit.getDuration().toHours() + <= otherTransform.apply.resultTypeUnit.getDuration().toHours(); } return false; @@ -174,7 +253,7 @@ public String toHumanString(Type outputType, Integer value) { return "null"; } - switch (granularity) { + switch (apply.resultTypeUnit) { case YEARS: return TransformUtil.humanYear(value); case MONTHS: @@ -184,13 +263,25 @@ public String toHumanString(Type outputType, Integer value) { case HOURS: return TransformUtil.humanHour(value); default: - throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + throw new UnsupportedOperationException("Unsupported time unit: " + apply.resultTypeUnit); } } @Override public String toString() { - return name; + switch (apply.resultTypeUnit) { + case YEARS: + return "year"; + case MONTHS: + return "month"; + case DAYS: + return "day"; + case HOURS: + return "hour"; + default: + throw new UnsupportedOperationException( + "Unsupported result time unit: " + apply.resultTypeUnit); + } } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 5a56b672b1b1..0c5e7dd77d0f 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -24,7 +24,7 @@ import org.apache.iceberg.expressions.BoundPredicate; import org.apache.iceberg.expressions.UnboundPredicate; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.TimestampType; import org.apache.iceberg.util.SerializableFunction; /** @@ -176,11 +176,7 @@ default String toHumanString(Type type, T value) { case TIME: return TransformUtil.humanTime((Long) value); case TIMESTAMP: - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - return TransformUtil.humanTimestampWithZone((Long) value); - } else { - return TransformUtil.humanTimestampWithoutZone((Long) value); - } + return TransformUtil.humanTimestamp((TimestampType) type, (Long) value); case FIXED: case BINARY: if (value instanceof ByteBuffer) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 53bc23a49888..3679628a3b36 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -26,6 +26,7 @@ import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Base64; +import org.apache.iceberg.types.Types; class TransformUtil { @@ -54,12 +55,26 @@ static String humanTime(Long microsFromMidnight) { return LocalTime.ofNanoOfDay(microsFromMidnight * 1000).toString(); } - static String humanTimestampWithZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); - } - - static String humanTimestampWithoutZone(Long timestampMicros) { - return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); + public static String humanTimestamp(Types.TimestampType tsType, Long value) { + if (tsType.shouldAdjustToUTC()) { + switch (tsType.unit()) { + case MICROS: + return ChronoUnit.MICROS.addTo(EPOCH, value).toString(); + case NANOS: + return ChronoUnit.NANOS.addTo(EPOCH, value).toString(); + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); + } + } else { + switch (tsType.unit()) { + case MICROS: + return ChronoUnit.MICROS.addTo(EPOCH, value).toLocalDateTime().toString(); + case NANOS: + return ChronoUnit.NANOS.addTo(EPOCH, value).toLocalDateTime().toString(); + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + tsType.unit()); + } + } } static String humanHour(int hourOrdinal) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index a1ce33ddd6da..d0893757ae74 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg.transforms; +import java.time.temporal.ChronoUnit; import java.util.Locale; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -25,6 +26,7 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types.TimestampType; /** * Factory methods for transforms. @@ -86,8 +88,9 @@ private Transforms() {} try { if (type.typeId() == Type.TypeID.TIMESTAMP) { - return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH)); - } else if (type.typeId() == Type.TypeID.DATE) { + return Timestamps.get((TimestampType) type, transform); + } + if (type.typeId() == Type.TypeID.DATE) { return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } } catch (IllegalArgumentException ignored) { @@ -129,7 +132,15 @@ public static Transform year(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - return (Transform) Timestamps.YEAR; + ChronoUnit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.YEAR_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.YEAR_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by year"); } @@ -150,7 +161,15 @@ public static Transform month(Type type) { case DATE: return (Transform) Dates.MONTH; case TIMESTAMP: - return (Transform) Timestamps.MONTH; + ChronoUnit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.MONTH_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.MONTH_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by month"); } @@ -171,7 +190,15 @@ public static Transform day(Type type) { case DATE: return (Transform) Dates.DAY; case TIMESTAMP: - return (Transform) Timestamps.DAY; + ChronoUnit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.DAY_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.DAY_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } default: throw new IllegalArgumentException("Cannot partition type " + type + " by day"); } @@ -188,9 +215,19 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - Preconditions.checkArgument( - type.typeId() == Type.TypeID.TIMESTAMP, "Cannot partition type %s by hour", type); - return (Transform) Timestamps.HOUR; + if (Preconditions.checkNotNull(type.typeId(), "Type ID cannot be null") + == Type.TypeID.TIMESTAMP) { + ChronoUnit unit = ((TimestampType) type).unit(); + switch (unit) { + case MICROS: + return (Transform) Timestamps.HOUR_FROM_MICROS; + case NANOS: + return (Transform) Timestamps.HOUR_FROM_NANOS; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } + } + throw new IllegalArgumentException("Cannot partition type " + type + " by hour"); } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 6c1eee578506..de81fabf7ec8 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -19,6 +19,7 @@ package org.apache.iceberg.transforms; import java.io.ObjectStreamException; +import java.time.temporal.ChronoUnit; import org.apache.iceberg.types.Type; import org.apache.iceberg.types.Types; @@ -37,7 +38,7 @@ protected Transform toEnum(Type type) { case DATE: return (Transform) Dates.YEAR; case TIMESTAMP: - return (Transform) Timestamps.YEAR; + return (Transform) Timestamps.get((Types.TimestampType) type, ChronoUnit.YEARS); default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -55,14 +56,12 @@ public boolean satisfiesOrderOf(Transform other) { } if (other instanceof Timestamps) { - return Timestamps.YEAR.satisfiesOrderOf(other); + return ((Timestamps) other).getResultTypeUnit() == ChronoUnit.YEARS; } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); - } else if (other instanceof Years) { - return true; + } else { + return other instanceof Years; } - - return false; } @Override diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index d09d9f5395ce..ddc52446e041 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -39,8 +39,10 @@ private Comparators() {} .put(Types.DoubleType.get(), Comparator.naturalOrder()) .put(Types.DateType.get(), Comparator.naturalOrder()) .put(Types.TimeType.get(), Comparator.naturalOrder()) - .put(Types.TimestampType.withZone(), Comparator.naturalOrder()) - .put(Types.TimestampType.withoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.microsWithZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.microsWithoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.nanosWithZone(), Comparator.naturalOrder()) + .put(Types.TimestampType.nanosWithoutZone(), Comparator.naturalOrder()) .put(Types.StringType.get(), Comparators.charSequences()) .put(Types.UUIDType.get(), Comparator.naturalOrder()) .put(Types.BinaryType.get(), Comparators.unsignedBytes()) diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index da70dd9ac6ab..3e29cb31203b 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -19,6 +19,7 @@ package org.apache.iceberg.types; import java.io.Serializable; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.List; import java.util.Locale; @@ -46,8 +47,10 @@ private Types() {} .put(DoubleType.get().toString(), DoubleType.get()) .put(DateType.get().toString(), DateType.get()) .put(TimeType.get().toString(), TimeType.get()) - .put(TimestampType.withZone().toString(), TimestampType.withZone()) - .put(TimestampType.withoutZone().toString(), TimestampType.withoutZone()) + .put(TimestampType.microsWithZone().toString(), TimestampType.microsWithZone()) + .put(TimestampType.microsWithoutZone().toString(), TimestampType.microsWithoutZone()) + .put(TimestampType.nanosWithZone().toString(), TimestampType.nanosWithZone()) + .put(TimestampType.nanosWithoutZone().toString(), TimestampType.nanosWithoutZone()) .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) @@ -205,27 +208,60 @@ public String toString() { } public static class TimestampType extends PrimitiveType { - private static final TimestampType INSTANCE_WITH_ZONE = new TimestampType(true); - private static final TimestampType INSTANCE_WITHOUT_ZONE = new TimestampType(false); + private static final TimestampType INSTANCE_MICROS_WITH_ZONE = + new TimestampType(true, ChronoUnit.MICROS); + private static final TimestampType INSTANCE_MICROS_WITHOUT_ZONE = + new TimestampType(false, ChronoUnit.MICROS); + private static final TimestampType INSTANCE_NANOS_WITH_ZONE = + new TimestampType(true, ChronoUnit.NANOS); + private static final TimestampType INSTANCE_NANOS_WITHOUT_ZONE = + new TimestampType(false, ChronoUnit.NANOS); + + /** @deprecated use {@link #microsWithZone()} instead. */ + @Deprecated public static TimestampType withZone() { - return INSTANCE_WITH_ZONE; + return INSTANCE_MICROS_WITH_ZONE; } + /** @deprecated use {@link #microsWithoutZone()} instead. */ + @Deprecated public static TimestampType withoutZone() { - return INSTANCE_WITHOUT_ZONE; + return INSTANCE_MICROS_WITHOUT_ZONE; + } + + public static TimestampType microsWithZone() { + return INSTANCE_MICROS_WITH_ZONE; + } + + public static TimestampType microsWithoutZone() { + return INSTANCE_MICROS_WITHOUT_ZONE; + } + + public static TimestampType nanosWithZone() { + return INSTANCE_NANOS_WITH_ZONE; + } + + public static TimestampType nanosWithoutZone() { + return INSTANCE_NANOS_WITHOUT_ZONE; } private final boolean adjustToUTC; + private final ChronoUnit unit; - private TimestampType(boolean adjustToUTC) { + private TimestampType(boolean adjustToUTC, ChronoUnit unit) { this.adjustToUTC = adjustToUTC; + this.unit = unit; } public boolean shouldAdjustToUTC() { return adjustToUTC; } + public ChronoUnit unit() { + return unit; + } + @Override public TypeID typeId() { return TypeID.TIMESTAMP; @@ -233,10 +269,13 @@ public TypeID typeId() { @Override public String toString() { - if (shouldAdjustToUTC()) { - return "timestamptz"; - } else { - return "timestamp"; + switch (unit) { + case MICROS: + return shouldAdjustToUTC() ? "timestamptz" : "timestamp"; + case NANOS: + return shouldAdjustToUTC() ? "timestamptz_ns" : "timestamp_ns"; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); } } @@ -249,12 +288,12 @@ public boolean equals(Object o) { } TimestampType timestampType = (TimestampType) o; - return adjustToUTC == timestampType.adjustToUTC; + return adjustToUTC == timestampType.adjustToUTC && unit == timestampType.unit; } @Override public int hashCode() { - return Objects.hash(TimestampType.class, adjustToUTC); + return Objects.hash(TimestampType.class, adjustToUTC, unit); } } diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index a2f5301f44a9..f72d62a361d2 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -33,8 +33,12 @@ private DateTimeUtil() {} public static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); public static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); - public static final long MICROS_PER_MILLIS = 1000L; + public static final long MICROS_PER_MILLIS = 1_000L; + public static final long MILLIS_PER_SECOND = 1_000L; public static final long MICROS_PER_SECOND = 1_000_000L; + public static final long NANOS_PER_SECOND = 1_000_000_000L; + public static final long NANOS_PER_MILLI = 1_000_000L; + public static final long NANOS_PER_MICRO = 1_000L; public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); @@ -60,14 +64,26 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } + public static long nanosFromInstant(Instant instant) { + return ChronoUnit.NANOS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); + } + public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -75,14 +91,38 @@ public static long microsToMillis(long micros) { return Math.floorDiv(micros, MICROS_PER_MILLIS); } + public static long nanosToMillis(long nanos) { + return Math.floorDiv(nanos, NANOS_PER_MILLI); + } + + public static long nanosToMicros(long nanos) { + return Math.floorDiv(nanos, NANOS_PER_MICRO); + } + + public static long microsToNanos(long micros) { + return Math.multiplyExact(micros, NANOS_PER_MICRO); + } + + public static long millisToNanos(long millis) { + return Math.multiplyExact(millis, NANOS_PER_MILLI); + } + public static OffsetDateTime timestamptzFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch); } + public static OffsetDateTime timestamptzFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch); + } + public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -106,11 +146,27 @@ public static String microsToIsoTimestamptz(long micros) { return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); } + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + DateTimeFormatter zeroOffsetFormatter = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); + return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); + } + public static String microsToIsoTimestamp(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -124,6 +180,11 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } + public static long isoTimestamptzToNanos(String timestampString) { + return nanosFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); + } + public static boolean isUTCTimestamptz(String timestampString) { OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); @@ -135,6 +196,11 @@ public static long isoTimestampToMicros(String timestampString) { LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } + public static long isoTimestampToNanos(String timestampString) { + return nanosFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + } + public static int daysToYears(int days) { return convertDays(days, ChronoUnit.YEARS); } @@ -159,28 +225,76 @@ public static int microsToYears(long micros) { return convertMicros(micros, ChronoUnit.YEARS); } + public static int nanosToYears(long nanos) { + return convertNanos(nanos, ChronoUnit.YEARS); + } + public static int microsToMonths(long micros) { return convertMicros(micros, ChronoUnit.MONTHS); } + public static int nanosToMonths(long nanos) { + return convertNanos(nanos, ChronoUnit.MONTHS); + } + public static int microsToDays(long micros) { return convertMicros(micros, ChronoUnit.DAYS); } + public static int nanosToDays(long nanos) { + return convertNanos(nanos, ChronoUnit.DAYS); + } + + public static int millisToHours(long millis) { + return convertMillis(millis, ChronoUnit.HOURS); + } + public static int microsToHours(long micros) { return convertMicros(micros, ChronoUnit.HOURS); } + public static int nanosToHours(long nanos) { + return convertNanos(nanos, ChronoUnit.HOURS); + } + + private static int convertMillis(long millis, ChronoUnit granularity) { + if (millis >= 0) { + long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); + long nanoAdjustment = Math.floorMod(millis, MILLIS_PER_SECOND) * NANOS_PER_MILLI; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 milli to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(millis, MILLIS_PER_SECOND); + long nanoAdjustment = Math.floorMod(millis + 1, MILLIS_PER_SECOND) * NANOS_PER_MILLI; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + private static int convertMicros(long micros, ChronoUnit granularity) { if (micros >= 0) { long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * 1000; + long nanoAdjustment = Math.floorMod(micros, MICROS_PER_SECOND) * NANOS_PER_MICRO; return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); } else { // add 1 micro to the value to account for the case where there is exactly 1 unit between // the timestamp and epoch because the result will always be decremented. long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); - long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * 1000; + long nanoAdjustment = Math.floorMod(micros + 1, MICROS_PER_SECOND) * NANOS_PER_MICRO; + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + + private static int convertNanos(long nanos, ChronoUnit granularity) { + if (nanos >= 0) { + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 nano to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos + 1, NANOS_PER_SECOND); return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; } } diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java index 5e4ca1fb11be..369a3a842224 100644 --- a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -29,12 +29,13 @@ public class PartitionSpecTestBase { Types.NestedField.required(2, "l", Types.LongType.get()), Types.NestedField.required(3, "d", Types.DateType.get()), Types.NestedField.required(4, "t", Types.TimeType.get()), - Types.NestedField.required(5, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.required(5, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.required(6, "dec", Types.DecimalType.of(9, 2)), Types.NestedField.required(7, "s", Types.StringType.get()), Types.NestedField.required(8, "u", Types.UUIDType.get()), Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), - Types.NestedField.required(10, "b", Types.BinaryType.get())); + Types.NestedField.required(10, "b", Types.BinaryType.get()), + Types.NestedField.required(11, "tsn", Types.TimestampType.nanosWithoutZone())); // a spec with all of the allowed transform/type pairs public static final PartitionSpec[] SPECS = @@ -49,6 +50,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).identity("u").build(), PartitionSpec.builderFor(SCHEMA).identity("f").build(), PartitionSpec.builderFor(SCHEMA).identity("b").build(), + PartitionSpec.builderFor(SCHEMA).identity("tsn").build(), PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(), @@ -59,6 +61,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("tsn", 128).build(), PartitionSpec.builderFor(SCHEMA).year("d").build(), PartitionSpec.builderFor(SCHEMA).month("d").build(), PartitionSpec.builderFor(SCHEMA).day("d").build(), @@ -66,6 +69,10 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).month("ts").build(), PartitionSpec.builderFor(SCHEMA).day("ts").build(), PartitionSpec.builderFor(SCHEMA).hour("ts").build(), + PartitionSpec.builderFor(SCHEMA).year("tsn").build(), + PartitionSpec.builderFor(SCHEMA).month("tsn").build(), + PartitionSpec.builderFor(SCHEMA).day("tsn").build(), + PartitionSpec.builderFor(SCHEMA).hour("tsn").build(), PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(), diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 332556e474c7..233c8c508239 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -178,8 +178,10 @@ public void testTime() { @Test public void testTimestamp() { - assertAccessorReturns(Types.TimestampType.withoutZone(), 123L); - assertAccessorReturns(Types.TimestampType.withZone(), 123L); + assertAccessorReturns(Types.TimestampType.microsWithoutZone(), 123L); + assertAccessorReturns(Types.TimestampType.microsWithZone(), 123L); + assertAccessorReturns(Types.TimestampType.nanosWithoutZone(), 123L); + assertAccessorReturns(Types.TimestampType.nanosWithZone(), 123L); } @Test diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java index 2fda247a33c8..5a98806cf291 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionPaths.java @@ -32,7 +32,7 @@ public class TestPartitionPaths { new Schema( Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()), - Types.NestedField.optional(3, "ts", Types.TimestampType.withoutZone())); + Types.NestedField.optional(3, "ts", Types.TimestampType.microsWithoutZone())); @Test public void testPartitionPath() { @@ -42,8 +42,8 @@ public void testPartitionPath() { Transform bucket = Transforms.bucket(10); Literal ts = - Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.withoutZone()); - Object tsHour = hour.bind(Types.TimestampType.withoutZone()).apply(ts.value()); + Literal.of("2017-12-01T10:12:55.038194").to(Types.TimestampType.microsWithoutZone()); + Object tsHour = hour.bind(Types.TimestampType.microsWithoutZone()).apply(ts.value()); Object idBucket = bucket.bind(Types.IntegerType.get()).apply(1); Row partition = Row.of(tsHour, idBucket); diff --git a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java index eb0e74164688..a621ccab81e5 100644 --- a/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java +++ b/api/src/test/java/org/apache/iceberg/TestPartitionSpecValidation.java @@ -30,10 +30,10 @@ public class TestPartitionSpecValidation { private static final Schema SCHEMA = new Schema( NestedField.required(1, "id", Types.LongType.get()), - NestedField.required(2, "ts", Types.TimestampType.withZone()), - NestedField.required(3, "another_ts", Types.TimestampType.withZone()), - NestedField.required(4, "d", Types.TimestampType.withZone()), - NestedField.required(5, "another_d", Types.TimestampType.withZone()), + NestedField.required(2, "ts", Types.TimestampType.microsWithZone()), + NestedField.required(3, "another_ts", Types.TimestampType.microsWithZone()), + NestedField.required(4, "d", Types.TimestampType.microsWithZone()), + NestedField.required(5, "another_d", Types.TimestampType.microsWithZone()), NestedField.required(6, "s", Types.StringType.get())); @Test diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java index 9a27830543ad..5638ef6c31b5 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java @@ -42,12 +42,13 @@ public class TestExpressionUtil { Types.NestedField.required(1, "id", Types.LongType.get()), Types.NestedField.required(2, "val", Types.IntegerType.get()), Types.NestedField.required(3, "val2", Types.IntegerType.get()), - Types.NestedField.required(4, "ts", Types.TimestampType.withoutZone()), - Types.NestedField.required(5, "date", Types.DateType.get()), - Types.NestedField.required(6, "time", Types.DateType.get()), - Types.NestedField.optional(7, "data", Types.StringType.get()), - Types.NestedField.optional(8, "measurement", Types.DoubleType.get()), - Types.NestedField.optional(9, "test", Types.IntegerType.get())); + Types.NestedField.required(4, "ts", Types.TimestampType.microsWithoutZone()), + Types.NestedField.required(5, "tsns", Types.TimestampType.nanosWithoutZone()), + Types.NestedField.required(6, "date", Types.DateType.get()), + Types.NestedField.required(7, "time", Types.DateType.get()), + Types.NestedField.optional(8, "data", Types.StringType.get()), + Types.NestedField.optional(9, "measurement", Types.DoubleType.get()), + Types.NestedField.optional(10, "test", Types.IntegerType.get())); private static final Types.StructType STRUCT = SCHEMA.asStruct(); @@ -461,7 +462,9 @@ public void testSanitizeTimestamp() { "2022-04-29T23:49:51", "2022-04-29T23:49:51.123456", "2022-04-29T23:49:51-07:00", - "2022-04-29T23:49:51.123456+01:00")) { + "2022-04-29T23:49:51.123456+01:00", + "2022-04-29T23:49:51.123456789", + "2022-04-29T23:49:51.123456789+01:00")) { assertEquals( Expressions.equal("test", "(timestamp)"), ExpressionUtil.sanitize(Expressions.equal("test", timestamp))); @@ -496,7 +499,14 @@ public void testSanitizeTimestampAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(nowLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -522,7 +532,14 @@ public void testSanitizeTimestampPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -548,7 +565,14 @@ public void testSanitizeTimestampLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(lastWeekLocal).to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekLocal))) .as("Sanitized string should be identical except for descriptive literal") @@ -574,7 +598,15 @@ public void testSanitizeTimestampFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.withoutZone())))); + Literal.of(ninetyMinutesFromNowLocal) + .to(Types.TimestampType.microsWithoutZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowLocal).to(Types.TimestampType.nanosWithoutZone())))); assertThat( ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowLocal))) @@ -597,7 +629,14 @@ public void testSanitizeTimestamptzAboutNow() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(nowUtc).to(Types.TimestampType.withZone())))); + Literal.of(nowUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-about-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(nowUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", nowUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -618,7 +657,14 @@ public void testSanitizeTimestamptzPast() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.withZone())))); + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesAgoUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesAgoUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -639,7 +685,14 @@ public void testSanitizeTimestamptzLastWeek() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(lastWeekUtc).to(Types.TimestampType.withZone())))); + Literal.of(lastWeekUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-7-days-ago)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(lastWeekUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", lastWeekUtc))) .as("Sanitized string should be identical except for descriptive literal") @@ -660,7 +713,14 @@ public void testSanitizeTimestamptzFuture() { Expressions.predicate( Expression.Operation.EQ, "test", - Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.withZone())))); + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.microsWithZone())))); + assertEquals( + Expressions.equal("test", "(timestamp-1-hours-from-now)"), + ExpressionUtil.sanitize( + Expressions.predicate( + Expression.Operation.EQ, + "test", + Literal.of(ninetyMinutesFromNowUtc).to(Types.TimestampType.nanosWithZone())))); assertThat(ExpressionUtil.toSanitizedString(Expressions.equal("test", ninetyMinutesFromNowUtc))) .as("Sanitized string should be identical except for descriptive literal") diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java index d5aa251ffb50..fcb031c27aa8 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java @@ -38,8 +38,10 @@ public void testLiterals() throws Exception { Literal.of(8.75D), Literal.of("2017-11-29").to(Types.DateType.get()), Literal.of("11:30:07").to(Types.TimeType.get()), - Literal.of("2017-11-29T11:30:07.123").to(Types.TimestampType.withoutZone()), - Literal.of("2017-11-29T11:30:07.123+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123456").to(Types.TimestampType.microsWithoutZone()), + Literal.of("2017-11-29T11:30:07.123456+01:00").to(Types.TimestampType.microsWithZone()), + Literal.of("2017-11-29T11:30:07.123456789").to(Types.TimestampType.nanosWithoutZone()), + Literal.of("2017-11-29T11:30:07.123456789+01:00").to(Types.TimestampType.nanosWithZone()), Literal.of("abc"), Literal.of(UUID.randomUUID()), Literal.of(new byte[] {1, 2, 3}).to(Types.FixedType.ofLength(3)), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index f8d2cd49d969..4c576d9a41fc 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -42,7 +42,11 @@ public void testIdentityConversions() { Pair.of(Literal.of("34.55"), Types.DecimalType.of(9, 2)), Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), - Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()), + Pair.of( + Literal.of("2017-08-18T14:21:01.919432"), Types.TimestampType.microsWithoutZone()), + Pair.of( + Literal.of("2017-08-18T14:21:01.919432755"), + Types.TimestampType.nanosWithoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), @@ -99,8 +103,10 @@ public void testInvalidBooleanConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -114,8 +120,10 @@ public void testInvalidIntegerConversions() { Literal.of(34), Types.BooleanType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -142,8 +150,10 @@ public void testInvalidFloatConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -159,8 +169,10 @@ public void testInvalidDoubleConversions() { Types.LongType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -177,8 +189,10 @@ public void testInvalidDateConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -196,8 +210,10 @@ public void testInvalidTimeConversions() { Types.FloatType.get(), Types.DoubleType.get(), Types.DateType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -206,9 +222,26 @@ public void testInvalidTimeConversions() { } @Test - public void testInvalidTimestampConversions() { + public void testInvalidTimestampMicrosConversions() { testInvalidConversions( - Literal.of("2017-08-18T14:21:01.919").to(Types.TimestampType.withoutZone()), + Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithoutZone()), + Types.BooleanType.get(), + Types.IntegerType.get(), + Types.LongType.get(), + Types.FloatType.get(), + Types.DoubleType.get(), + Types.TimeType.get(), + Types.DecimalType.of(9, 4), + Types.StringType.get(), + Types.UUIDType.get(), + Types.FixedType.ofLength(1), + Types.BinaryType.get()); + } + + @Test + public void testInvalidTimestampNanosConversions() { + testInvalidConversions( + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithoutZone()), Types.BooleanType.get(), Types.IntegerType.get(), Types.LongType.get(), @@ -233,8 +266,10 @@ public void testInvalidDecimalConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -267,8 +302,10 @@ public void testInvalidUUIDConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.FixedType.ofLength(1), @@ -286,8 +323,10 @@ public void testInvalidFixedConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -305,8 +344,10 @@ public void testInvalidBinaryConversions() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index f35b274eb3d5..0c6348f8af94 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -101,7 +101,7 @@ public void testStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 14, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -112,7 +112,7 @@ public void testStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("2017-08-18T14:21:01.919"); - timestamp = timestampStr.to(Types.TimestampType.withoutZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -120,7 +120,7 @@ public void testStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("2017-08-18T14:21:01.919-07:00"); - timestamp = timestampStr.to(Types.TimestampType.withZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); avroValue = avroConversion.toLong( LocalDateTime.of(2017, 8, 18, 21, 21, 1, 919 * 1000000).toInstant(ZoneOffset.UTC), @@ -141,7 +141,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with explicit UTC offset, +00:00 Literal timestampStr = Literal.of("1969-12-31T23:59:58.999999+00:00"); - Literal timestamp = timestampStr.to(Types.TimestampType.withZone()); + Literal timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); long avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -156,7 +156,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp without an explicit zone should be UTC (equal to the previous converted value) timestampStr = Literal.of("1969-12-31T23:59:58.999999"); - timestamp = timestampStr.to(Types.TimestampType.withoutZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithoutZone()); assertThat((long) timestamp.value()) .as("Timestamp without zone should match UTC") @@ -164,7 +164,7 @@ public void testNegativeStringToTimestampLiteral() { // Timestamp with an explicit offset should be adjusted to UTC timestampStr = Literal.of("1969-12-31T16:59:58.999999-07:00"); - timestamp = timestampStr.to(Types.TimestampType.withZone()); + timestamp = timestampStr.to(Types.TimestampType.microsWithZone()); avroValue = avroConversion.toLong( LocalDateTime.of(1969, 12, 31, 23, 59, 58, 999999 * 1_000).toInstant(ZoneOffset.UTC), @@ -181,8 +181,13 @@ public void testNegativeStringToTimestampLiteral() { @Test public void testTimestampWithZoneWithoutZoneInLiteral() { // Zone must be present in literals when converting to timestamp with zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919"); - Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withZone())) + Assertions.assertThatThrownBy( + () -> Literal.of("2017-08-18T14:21:01.919123").to(Types.TimestampType.microsWithZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampType.nanosWithZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } @@ -190,8 +195,16 @@ public void testTimestampWithZoneWithoutZoneInLiteral() { @Test public void testTimestampWithoutZoneWithZoneInLiteral() { // Zone must not be present in literals when converting to timestamp without zone - Literal timestampStr = Literal.of("2017-08-18T14:21:01.919+07:00"); - Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampType.withoutZone())) + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123+07:00") + .to(Types.TimestampType.microsWithoutZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + Assertions.assertThatThrownBy( + () -> + Literal.of("2017-08-18T14:21:01.919123456+07:00") + .to(Types.TimestampType.nanosWithoutZone())) .isInstanceOf(DateTimeException.class) .hasMessageContaining("could not be parsed"); } diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java index b8a0e40c1110..c76ac98612b9 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestBucketing.java @@ -107,17 +107,65 @@ public void testSpecValues() { .isEqualTo(-662762989); Literal timestampVal = - Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.withoutZone()); + Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.microsWithoutZone()); assertThat(BucketUtil.hash(timestampVal.value())) .as("Spec example: hash(2017-11-16T22:31:08) = -2047944441") .isEqualTo(-2047944441); + timestampVal = + Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.microsWithoutZone()); + assertThat(BucketUtil.hash(timestampVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000001) = -1207196810") + .isEqualTo(-1207196810); + Literal timestamptzVal = - Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.withZone()); + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.microsWithZone()); assertThat(BucketUtil.hash(timestamptzVal.value())) .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -2047944441") .isEqualTo(-2047944441); + timestamptzVal = + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.microsWithZone()); + assertThat(BucketUtil.hash(timestamptzVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -1207196810") + .isEqualTo(-1207196810); + + Literal timestampNsVal = + Literal.of("2017-11-16T22:31:08").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08) = -737750069") + .isEqualTo(-737750069); + + timestampNsVal = + Literal.of("2017-11-16T22:31:08.000001").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000001) = -976603392") + .isEqualTo(-976603392); + + timestampNsVal = + Literal.of("2017-11-16T22:31:08.000000001").to(Types.TimestampType.nanosWithoutZone()); + assertThat(BucketUtil.hash(timestampNsVal.value())) + .as("Spec example: hash(2017-11-16T22:31:08.000000001) = -160215926") + .isEqualTo(-160215926); + + Literal timestamptzNsVal = + Literal.of("2017-11-16T14:31:08-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08-08:00) = -737750069") + .isEqualTo(-737750069); + + timestamptzNsVal = + Literal.of("2017-11-16T14:31:08.000001-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000001-08:00) = -976603392") + .isEqualTo(-976603392); + + timestamptzNsVal = + Literal.of("2017-11-16T14:31:08.000000001-08:00").to(Types.TimestampType.nanosWithZone()); + assertThat(BucketUtil.hash(timestamptzNsVal.value())) + .as("Spec example: hash(2017-11-16T14:31:08.000000001-08:00) = -160215926") + .isEqualTo(-160215926); + assertThat(BucketUtil.hash("iceberg")) .as("Spec example: hash(\"iceberg\") = 1210000089") .isEqualTo(1210000089); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java index 6101fdf0986d..8d651bf618dd 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestIdentity.java @@ -98,7 +98,7 @@ public void testTimeHumanString() { @Test public void testTimestampWithZoneHumanString() { - Types.TimestampType timestamptz = Types.TimestampType.withZone(); + Types.TimestampType timestamptz = Types.TimestampType.microsWithZone(); Transform identity = Transforms.identity(); Literal ts = Literal.of("2017-12-01T10:12:55.038194-08:00").to(timestamptz); @@ -111,7 +111,7 @@ public void testTimestampWithZoneHumanString() { @Test public void testTimestampWithoutZoneHumanString() { - Types.TimestampType timestamp = Types.TimestampType.withoutZone(); + Types.TimestampType timestamp = Types.TimestampType.microsWithoutZone(); Transform identity = Transforms.identity(); String tsString = "2017-12-01T10:12:55.038194"; diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java index ccfda895f9f1..ffc48fc0e9aa 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestProjection.java @@ -268,10 +268,10 @@ public void testBadSparkPartitionFilter() { public void testProjectionNames() { final Schema schema = new Schema( - required(1, "timestamp1", Types.TimestampType.withoutZone()), - optional(2, "timestamp2", Types.TimestampType.withoutZone()), - optional(3, "timestamp3", Types.TimestampType.withoutZone()), - optional(4, "timestamp4", Types.TimestampType.withoutZone()), + required(1, "timestamp1", Types.TimestampType.microsWithoutZone()), + optional(2, "timestamp2", Types.TimestampType.microsWithoutZone()), + optional(3, "timestamp3", Types.TimestampType.microsWithoutZone()), + optional(4, "timestamp4", Types.TimestampType.microsWithoutZone()), optional(5, "date1", Types.DateType.get()), optional(6, "date2", Types.DateType.get()), optional(7, "date3", Types.DateType.get()), diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java index fa3436e5701b..87bdd6944a73 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestResiduals.java @@ -199,19 +199,23 @@ public void testIn() { public void testInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-01T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-02T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = in("ts", date20191201, date20191202); @@ -307,19 +311,23 @@ public void testNotNaN() { public void testNotInTimestamp() { Schema schema = new Schema( - Types.NestedField.optional(50, "ts", Types.TimestampType.withoutZone()), + Types.NestedField.optional(50, "ts", Types.TimestampType.microsWithoutZone()), Types.NestedField.optional(51, "dateint", Types.IntegerType.get())); Long date20191201 = (Long) - Literal.of("2019-12-01T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-01T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); Long date20191202 = (Long) - Literal.of("2019-12-02T00:00:00.00000").to(Types.TimestampType.withoutZone()).value(); + Literal.of("2019-12-02T00:00:00.00000") + .to(Types.TimestampType.microsWithoutZone()) + .value(); PartitionSpec spec = PartitionSpec.builderFor(schema).day("ts").build(); - Function day = Transforms.day().bind(Types.TimestampType.withoutZone()); + Function day = Transforms.day().bind(Types.TimestampType.microsWithoutZone()); Integer tsDay = day.apply(date20191201); Expression pred = notIn("ts", date20191201, date20191202); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java index 3c37e643eb95..dee5d8265b04 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestamps.java @@ -29,7 +29,7 @@ public class TestTimestamps { @Test @SuppressWarnings("deprecation") public void testDeprecatedTimestampTransform() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -61,7 +61,7 @@ public void testDeprecatedTimestampTransform() { @Test public void testTimestampTransform() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal ts = Literal.of("2017-12-01T10:12:55.038194").to(type); Literal pts = Literal.of("1970-01-01T00:00:01.000001").to(type); Literal nts = Literal.of("1969-12-31T23:59:58.999999").to(type); @@ -105,7 +105,7 @@ public void testTimestampTransform() { @Test public void testTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -125,7 +125,7 @@ public void testTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-30T10:12:55.038194").to(type); Transform year = Transforms.year(); @@ -145,7 +145,7 @@ public void testNegativeTimestampWithoutZoneToHumanString() { @Test public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-30T00:00:00.000000").to(type); Transform year = Transforms.year(); @@ -165,7 +165,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringLowerBound() { @Test public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { - Types.TimestampType type = Types.TimestampType.withoutZone(); + Types.TimestampType type = Types.TimestampType.microsWithoutZone(); Literal date = Literal.of("1969-12-31T23:59:59.999999").to(type); Transform year = Transforms.year(); @@ -185,7 +185,7 @@ public void testNegativeTimestampWithoutZoneToHumanStringUpperBound() { @Test public void testTimestampWithZoneToHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); Literal date = Literal.of("2017-12-01T10:12:55.038194-08:00").to(type); Transform year = Transforms.year(); @@ -206,7 +206,7 @@ public void testTimestampWithZoneToHumanString() { @Test public void testNullHumanString() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); assertThat(Transforms.year().toHumanString(type, null)) .as("Should produce \"null\" for null") .isEqualTo("null"); @@ -223,7 +223,7 @@ public void testNullHumanString() { @Test public void testTimestampsReturnType() { - Types.TimestampType type = Types.TimestampType.withZone(); + Types.TimestampType type = Types.TimestampType.microsWithZone(); Transform year = Transforms.year(); Type yearResultType = year.getResultType(type); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java index cd20868a06eb..dc0f199db132 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampsProjection.java @@ -44,7 +44,7 @@ import org.junit.jupiter.api.Test; public class TestTimestampsProjection { - private static final Types.TimestampType TYPE = Types.TimestampType.withoutZone(); + private static final Types.TimestampType TYPE = Types.TimestampType.microsWithoutZone(); private static final Schema SCHEMA = new Schema(optional(1, "timestamp", TYPE)); @SuppressWarnings("unchecked") diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java index c2330247fa9d..70b5a16e3bb3 100644 --- a/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTransformSerialization.java @@ -39,8 +39,8 @@ public void testFunctionSerialization() throws Exception { Types.StringType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithoutZone(), Types.BinaryType.get(), Types.FixedType.ofLength(4), Types.DecimalType.of(9, 4), diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 165d96c029cc..a04b039e5d65 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -75,8 +75,10 @@ public void testTime() { @Test public void testTimestamp() { - assertComparesCorrectly(Comparators.forType(Types.TimestampType.withoutZone()), 111, 222); - assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.microsWithZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampType.nanosWithZone()), 111, 222); } @Test diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 6c7a884a5839..2b160f01acb0 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -93,15 +93,26 @@ public void testByteBufferConversions() { assertThat(Literal.of(10000L).to(TimeType.get()).toByteBuffer().array()) .isEqualTo(new byte[] {16, 39, 0, 0, 0, 0, 0, 0}); - // timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte + // timestamps are stored as micro|nanoseconds from 1970-01-01 00:00:00 in an + // 8-byte // little-endian long // 400000L is 0...110|00011010|10000000 in binary // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 - assertConversion(400000L, TimestampType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertConversion(400000L, TimestampType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.withoutZone()).toByteBuffer().array()) + assertConversion( + 400000L, TimestampType.microsWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.microsWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.microsWithoutZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.microsWithZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.nanosWithoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion( + 400000L, TimestampType.nanosWithZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampType.nanosWithoutZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); - assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) + assertThat(Literal.of(400000L).to(TimestampType.nanosWithZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); // strings are stored as UTF-8 bytes (without length) diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 7f5948bd5838..56a011263995 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -37,8 +37,10 @@ public class TestReadabilityChecks { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(3), diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index d981b5a26789..52cb95dcba03 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -39,8 +39,10 @@ public void testIdentityTypes() throws Exception { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withoutZone(), - Types.TimestampType.withZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.nanosWithoutZone(), + Types.TimestampType.nanosWithZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index ca5c6edce16b..088c042de550 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -29,7 +29,9 @@ public void fromPrimitiveString() { Assertions.assertThat(Types.fromPrimitiveString("BooLean")).isSameAs(Types.BooleanType.get()); Assertions.assertThat(Types.fromPrimitiveString("timestamp")) - .isSameAs(Types.TimestampType.withoutZone()); + .isSameAs(Types.TimestampType.microsWithoutZone()); + Assertions.assertThat(Types.fromPrimitiveString("timestamp_ns")) + .isSameAs(Types.TimestampType.nanosWithoutZone()); Assertions.assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")) .isEqualTo(Types.FixedType.ofLength(3)); diff --git a/core/src/main/java/org/apache/iceberg/HistoryTable.java b/core/src/main/java/org/apache/iceberg/HistoryTable.java index c2bd01ea4a8e..f23309c468e4 100644 --- a/core/src/main/java/org/apache/iceberg/HistoryTable.java +++ b/core/src/main/java/org/apache/iceberg/HistoryTable.java @@ -36,7 +36,7 @@ public class HistoryTable extends BaseMetadataTable { private static final Schema HISTORY_SCHEMA = new Schema( - Types.NestedField.required(1, "made_current_at", Types.TimestampType.withZone()), + Types.NestedField.required(1, "made_current_at", Types.TimestampType.microsWithZone()), Types.NestedField.required(2, "snapshot_id", Types.LongType.get()), Types.NestedField.optional(3, "parent_id", Types.LongType.get()), Types.NestedField.required(4, "is_current_ancestor", Types.BooleanType.get())); diff --git a/core/src/main/java/org/apache/iceberg/MetadataLogEntriesTable.java b/core/src/main/java/org/apache/iceberg/MetadataLogEntriesTable.java index 3cffee37dc50..b2c1780083c0 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataLogEntriesTable.java +++ b/core/src/main/java/org/apache/iceberg/MetadataLogEntriesTable.java @@ -28,7 +28,7 @@ public class MetadataLogEntriesTable extends BaseMetadataTable { private static final Schema METADATA_LOG_ENTRIES_SCHEMA = new Schema( - Types.NestedField.required(1, "timestamp", Types.TimestampType.withZone()), + Types.NestedField.required(1, "timestamp", Types.TimestampType.microsWithZone()), Types.NestedField.required(2, "file", Types.StringType.get()), Types.NestedField.optional(3, "latest_snapshot_id", Types.LongType.get()), Types.NestedField.optional(4, "latest_schema_id", Types.IntegerType.get()), diff --git a/core/src/main/java/org/apache/iceberg/PartitionsTable.java b/core/src/main/java/org/apache/iceberg/PartitionsTable.java index d93200c7cfca..17d515174883 100644 --- a/core/src/main/java/org/apache/iceberg/PartitionsTable.java +++ b/core/src/main/java/org/apache/iceberg/PartitionsTable.java @@ -81,7 +81,7 @@ public class PartitionsTable extends BaseMetadataTable { Types.NestedField.optional( 9, "last_updated_at", - Types.TimestampType.withZone(), + Types.TimestampType.microsWithZone(), "Commit time of snapshot that last updated this partition"), Types.NestedField.optional( 10, diff --git a/core/src/main/java/org/apache/iceberg/ScanSummary.java b/core/src/main/java/org/apache/iceberg/ScanSummary.java index f975ef1636df..e5298387027a 100644 --- a/core/src/main/java/org/apache/iceberg/ScanSummary.java +++ b/core/src/main/java/org/apache/iceberg/ScanSummary.java @@ -91,7 +91,7 @@ private void addTimestampFilter(UnboundPredicate filter) { } public Builder after(String timestamp) { - Literal tsLiteral = Literal.of(timestamp).to(Types.TimestampType.withoutZone()); + Literal tsLiteral = Literal.of(timestamp).to(Types.TimestampType.microsWithoutZone()); return after(tsLiteral.value() / 1000); } @@ -101,7 +101,7 @@ public Builder after(long timestampMillis) { } public Builder before(String timestamp) { - Literal tsLiteral = Literal.of(timestamp).to(Types.TimestampType.withoutZone()); + Literal tsLiteral = Literal.of(timestamp).to(Types.TimestampType.microsWithoutZone()); return before(tsLiteral.value() / 1000); } @@ -133,7 +133,7 @@ private void removeTimeFilters(List expressions, Expression expressi NamedReference ref = (NamedReference) pred.term(); Literal lit = pred.literal(); if (TIMESTAMP_NAMES.contains(ref.name())) { - Literal tsLiteral = lit.to(Types.TimestampType.withoutZone()); + Literal tsLiteral = lit.to(Types.TimestampType.microsWithoutZone()); long millis = toMillis(tsLiteral.value()); addTimestampFilter(Expressions.predicate(pred.op(), "timestamp_ms", millis)); return; diff --git a/core/src/main/java/org/apache/iceberg/SingleValueParser.java b/core/src/main/java/org/apache/iceberg/SingleValueParser.java index 3de6a0bcc663..287504af9798 100644 --- a/core/src/main/java/org/apache/iceberg/SingleValueParser.java +++ b/core/src/main/java/org/apache/iceberg/SingleValueParser.java @@ -46,6 +46,7 @@ private SingleValueParser() {} private static final String KEYS = "keys"; private static final String VALUES = "values"; + @SuppressWarnings("checkstyle:CyclomaticComplexity") public static Object fromJson(Type type, JsonNode defaultValue) { if (defaultValue == null || defaultValue.isNull()) { return null; @@ -129,16 +130,29 @@ public static Object fromJson(Type type, JsonNode defaultValue) { case TIMESTAMP: Preconditions.checkArgument( defaultValue.isTextual(), "Cannot parse default as a %s value: %s", type, defaultValue); - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - String timestampTz = defaultValue.textValue(); + Types.TimestampType timestamp = (Types.TimestampType) type; + String timestampText = defaultValue.textValue(); + if (timestamp.shouldAdjustToUTC()) { Preconditions.checkArgument( - DateTimeUtil.isUTCTimestamptz(timestampTz), + DateTimeUtil.isUTCTimestamptz(timestampText), "Cannot parse default as a %s value: %s, offset must be +00:00", - type, + timestamp, defaultValue); - return DateTimeUtil.isoTimestamptzToMicros(timestampTz); - } else { - return DateTimeUtil.isoTimestampToMicros(defaultValue.textValue()); + } + switch (timestamp.unit()) { + case MICROS: + if (timestamp.shouldAdjustToUTC()) { + return DateTimeUtil.isoTimestamptzToMicros(timestampText); + } + return DateTimeUtil.isoTimestampToMicros(timestampText); + case NANOS: + if (timestamp.shouldAdjustToUTC()) { + return DateTimeUtil.isoTimestamptzToNanos(timestampText); + } + return DateTimeUtil.isoTimestampToNanos(timestampText); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestamp.unit()); } case FIXED: Preconditions.checkArgument( @@ -236,7 +250,7 @@ public static String toJson(Type type, Object defaultValue, boolean pretty) { return JsonUtil.generate(gen -> toJson(type, defaultValue, gen), pretty); } - @SuppressWarnings("checkstyle:MethodLength") + @SuppressWarnings({"checkstyle:MethodLength", "checkstyle:CyclomaticComplexity"}) public static void toJson(Type type, Object defaultValue, JsonGenerator generator) throws IOException { if (defaultValue == null) { @@ -283,10 +297,25 @@ public static void toJson(Type type, Object defaultValue, JsonGenerator generato case TIMESTAMP: Preconditions.checkArgument( defaultValue instanceof Long, "Invalid default %s value: %s", type, defaultValue); - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - generator.writeString(DateTimeUtil.microsToIsoTimestamptz((Long) defaultValue)); - } else { - generator.writeString(DateTimeUtil.microsToIsoTimestamp((Long) defaultValue)); + Types.TimestampType timestamp = (Types.TimestampType) type; + switch (timestamp.unit()) { + case MICROS: + if (timestamp.shouldAdjustToUTC()) { + generator.writeString(DateTimeUtil.microsToIsoTimestamptz((Long) defaultValue)); + } else { + generator.writeString(DateTimeUtil.microsToIsoTimestamp((Long) defaultValue)); + } + break; + case NANOS: + if (timestamp.shouldAdjustToUTC()) { + generator.writeString(DateTimeUtil.nanosToIsoTimestamptz((Long) defaultValue)); + } else { + generator.writeString(DateTimeUtil.nanosToIsoTimestamp((Long) defaultValue)); + } + break; + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestamp.unit()); } break; case STRING: diff --git a/core/src/main/java/org/apache/iceberg/SnapshotsTable.java b/core/src/main/java/org/apache/iceberg/SnapshotsTable.java index f948c5578345..d5543f76eedb 100644 --- a/core/src/main/java/org/apache/iceberg/SnapshotsTable.java +++ b/core/src/main/java/org/apache/iceberg/SnapshotsTable.java @@ -29,7 +29,7 @@ public class SnapshotsTable extends BaseMetadataTable { private static final Schema SNAPSHOT_SCHEMA = new Schema( - Types.NestedField.required(1, "committed_at", Types.TimestampType.withZone()), + Types.NestedField.required(1, "committed_at", Types.TimestampType.microsWithZone()), Types.NestedField.required(2, "snapshot_id", Types.LongType.get()), Types.NestedField.optional(3, "parent_id", Types.LongType.get()), Types.NestedField.optional(4, "operation", Types.StringType.get()), diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java index 46c17722f8f7..bc14cd5236a6 100644 --- a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java +++ b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java @@ -129,7 +129,8 @@ public static Schema buildAvroProjection( public static boolean isTimestamptz(Schema schema) { LogicalType logicalType = schema.getLogicalType(); if (logicalType instanceof LogicalTypes.TimestampMillis - || logicalType instanceof LogicalTypes.TimestampMicros) { + || logicalType instanceof LogicalTypes.TimestampMicros + || logicalType instanceof IcebergLogicalTypes.TimestampNanos) { // timestamptz is adjusted to UTC Object value = schema.getObjectProp(ADJUST_TO_UTC_PROP); diff --git a/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java b/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java index 0fa2e795811b..271e6287a78d 100644 --- a/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java +++ b/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java @@ -144,6 +144,9 @@ public ValueReader primitive(Schema primitive) { // Spark uses the same representation return ValueReaders.longs(); + case "timestamp-nanos": + return ValueReaders.longs(); + case "decimal": return ValueReaders.decimal( ValueReaders.decimalBytesReader(primitive), diff --git a/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java b/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java index 421bfc9dc462..5b16f9c9276f 100644 --- a/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java +++ b/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java @@ -107,6 +107,9 @@ public ValueWriter primitive(Schema primitive) { case "timestamp-micros": return ValueWriters.longs(); + case "timestamp-nanos": + return ValueWriters.longs(); + case "decimal": LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; return ValueWriters.decimal(decimal.getPrecision(), decimal.getScale()); diff --git a/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java b/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java new file mode 100644 index 000000000000..a81288a85d24 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.avro; + +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; + +public class IcebergLogicalTypes { + + private IcebergLogicalTypes() {} + + private static final String TIMESTAMP_NANOS = "timestamp-nanos"; + + private static final TimestampNanos TIMESTAMP_NANOS_TYPE = new TimestampNanos(); + + public static TimestampNanos timestampNanos() { + return TIMESTAMP_NANOS_TYPE; + } + + /** TimestampNanos represents a date and time in nanoseconds */ + public static class TimestampNanos extends LogicalType { + private TimestampNanos() { + super(TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException( + "Timestamp (nanos) can only be used with an underlying long type"); + } + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java index 174d63975195..8828ff17feeb 100644 --- a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java +++ b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java @@ -162,6 +162,7 @@ public Type map(Schema map, Type valueType) { } @Override + @SuppressWarnings("checkstyle:CyclomaticComplexity") public Type primitive(Schema primitive) { // first check supported logical types LogicalType logical = primitive.getLogicalType(); @@ -182,9 +183,16 @@ public Type primitive(Schema primitive) { } else if (logical instanceof LogicalTypes.TimestampMillis || logical instanceof LogicalTypes.TimestampMicros) { if (AvroSchemaUtil.isTimestamptz(primitive)) { - return Types.TimestampType.withZone(); + return Types.TimestampType.microsWithZone(); } else { - return Types.TimestampType.withoutZone(); + return Types.TimestampType.microsWithoutZone(); + } + + } else if (logical instanceof IcebergLogicalTypes.TimestampNanos) { + if (AvroSchemaUtil.isTimestamptz(primitive)) { + return Types.TimestampType.nanosWithZone(); + } else { + return Types.TimestampType.nanosWithoutZone(); } } else if (LogicalTypes.uuid().getName().equals(name)) { diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java index bc2847e1b4ba..b48f6c20a7e8 100644 --- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java +++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java @@ -44,6 +44,10 @@ class TypeToSchema extends TypeUtil.SchemaVisitor { LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); private static final Schema TIMESTAMPTZ_SCHEMA = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + private static final Schema TIMESTAMPNS_SCHEMA = + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + private static final Schema TIMESTAMPTZNS_SCHEMA = + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); private static final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING); private static final Schema UUID_SCHEMA = LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)); @@ -52,6 +56,8 @@ class TypeToSchema extends TypeUtil.SchemaVisitor { static { TIMESTAMP_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); TIMESTAMPTZ_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, true); + TIMESTAMPNS_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); + TIMESTAMPTZNS_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, true); } private final Deque fieldIds = Lists.newLinkedList(); @@ -204,10 +210,18 @@ public Schema primitive(Type.PrimitiveType primitive) { primitiveSchema = TIME_SCHEMA; break; case TIMESTAMP: - if (((Types.TimestampType) primitive).shouldAdjustToUTC()) { - primitiveSchema = TIMESTAMPTZ_SCHEMA; - } else { - primitiveSchema = TIMESTAMP_SCHEMA; + Types.TimestampType timestamp = (Types.TimestampType) primitive; + switch (timestamp.unit()) { + case MICROS: + primitiveSchema = timestamp.shouldAdjustToUTC() ? TIMESTAMPTZ_SCHEMA : TIMESTAMP_SCHEMA; + break; + case NANOS: + primitiveSchema = + timestamp.shouldAdjustToUTC() ? TIMESTAMPTZNS_SCHEMA : TIMESTAMPNS_SCHEMA; + break; + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestamp.unit()); } break; case STRING: diff --git a/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java b/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java index 4cb41263152d..7895209d6a8b 100644 --- a/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java +++ b/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java @@ -40,10 +40,19 @@ public static Object convertConstant(Type type, Object value) { case DATE: return DateTimeUtil.dateFromDays((Integer) value); case TIMESTAMP: - if (((Types.TimestampType) type).shouldAdjustToUTC()) { - return DateTimeUtil.timestamptzFromMicros((Long) value); - } else { - return DateTimeUtil.timestampFromMicros((Long) value); + Types.TimestampType timestamp = (Types.TimestampType) type; + switch (timestamp.unit()) { + case MICROS: + return timestamp.shouldAdjustToUTC() + ? DateTimeUtil.timestamptzFromMicros((Long) value) + : DateTimeUtil.timestampFromMicros((Long) value); + case NANOS: + return timestamp.shouldAdjustToUTC() + ? DateTimeUtil.timestamptzFromNanos((Long) value) + : DateTimeUtil.timestampFromNanos((Long) value); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestamp.unit()); } case FIXED: if (value instanceof GenericData.Fixed) { diff --git a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java b/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java index 1cc901d15bc1..3bc9fb2e4b18 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java @@ -131,10 +131,14 @@ public ValueReader primitive(Type.PrimitiveType ignored, Schema primitive) { return GenericReaders.times(); case "timestamp-micros": - if (AvroSchemaUtil.isTimestamptz(primitive)) { - return GenericReaders.timestamptz(); - } - return GenericReaders.timestamps(); + return AvroSchemaUtil.isTimestamptz(primitive) + ? GenericReaders.timestamptz() + : GenericReaders.timestamps(); + + case "timestamp-nanos": + return AvroSchemaUtil.isTimestamptz(primitive) + ? GenericReaders.timestamptzns() + : GenericReaders.timestampns(); case "decimal": return ValueReaders.decimal( diff --git a/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java b/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java index 4d6973d3cfe3..9880a5a77fc6 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java @@ -113,10 +113,14 @@ public ValueWriter primitive(Schema primitive) { return GenericWriters.times(); case "timestamp-micros": - if (AvroSchemaUtil.isTimestamptz(primitive)) { - return GenericWriters.timestamptz(); - } - return GenericWriters.timestamps(); + return AvroSchemaUtil.isTimestamptz(primitive) + ? GenericWriters.timestamptz() + : GenericWriters.timestamps(); + + case "timestamp-nanos": + return AvroSchemaUtil.isTimestamptz(primitive) + ? GenericWriters.timestamptzns() + : GenericWriters.timestampns(); case "decimal": LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; diff --git a/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java b/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java index 91a728d53d38..58116b8598e5 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java @@ -48,10 +48,18 @@ static ValueReader timestamps() { return TimestampReader.INSTANCE; } + static ValueReader timestampns() { + return TimestampnsReader.INSTANCE; + } + static ValueReader timestamptz() { return TimestamptzReader.INSTANCE; } + static ValueReader timestamptzns() { + return TimestamptznsReader.INSTANCE; + } + static ValueReader struct( StructType struct, List> readers, Map idToConstant) { return new GenericRecordReader(readers, struct, idToConstant); @@ -90,6 +98,17 @@ public LocalDateTime read(Decoder decoder, Object reuse) throws IOException { } } + private static class TimestampnsReader implements ValueReader { + private static final TimestampnsReader INSTANCE = new TimestampnsReader(); + + private TimestampnsReader() {} + + @Override + public LocalDateTime read(Decoder decoder, Object reuse) throws IOException { + return DateTimeUtil.timestampFromNanos(decoder.readLong()); + } + } + private static class TimestamptzReader implements ValueReader { private static final TimestamptzReader INSTANCE = new TimestamptzReader(); @@ -101,6 +120,17 @@ public OffsetDateTime read(Decoder decoder, Object reuse) throws IOException { } } + private static class TimestamptznsReader implements ValueReader { + private static final TimestamptznsReader INSTANCE = new TimestamptznsReader(); + + private TimestamptznsReader() {} + + @Override + public OffsetDateTime read(Decoder decoder, Object reuse) throws IOException { + return DateTimeUtil.timestamptzFromNanos(decoder.readLong()); + } + } + private static class GenericRecordReader extends ValueReaders.StructReader { private final StructType structType; diff --git a/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java b/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java index 1cea012e7a37..b4505f488106 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java @@ -47,10 +47,18 @@ static ValueWriter timestamps() { return TimestampWriter.INSTANCE; } + static ValueWriter timestampns() { + return TimestampnsWriter.INSTANCE; + } + static ValueWriter timestamptz() { return TimestamptzWriter.INSTANCE; } + static ValueWriter timestamptzns() { + return TimestamptznsWriter.INSTANCE; + } + static ValueWriter struct(List> writers) { return new GenericRecordWriter(writers); } @@ -91,6 +99,17 @@ public void write(LocalDateTime timestamp, Encoder encoder) throws IOException { } } + private static class TimestampnsWriter implements ValueWriter { + private static final TimestampnsWriter INSTANCE = new TimestampnsWriter(); + + private TimestampnsWriter() {} + + @Override + public void write(LocalDateTime timestampns, Encoder encoder) throws IOException { + encoder.writeLong(ChronoUnit.NANOS.between(EPOCH, timestampns.atOffset(ZoneOffset.UTC))); + } + } + private static class TimestamptzWriter implements ValueWriter { private static final TimestamptzWriter INSTANCE = new TimestamptzWriter(); @@ -102,6 +121,17 @@ public void write(OffsetDateTime timestamptz, Encoder encoder) throws IOExceptio } } + private static class TimestamptznsWriter implements ValueWriter { + private static final TimestamptznsWriter INSTANCE = new TimestamptznsWriter(); + + private TimestamptznsWriter() {} + + @Override + public void write(OffsetDateTime timestamptzns, Encoder encoder) throws IOException { + encoder.writeLong(ChronoUnit.NANOS.between(EPOCH, timestamptzns)); + } + } + private static class GenericRecordWriter extends ValueWriters.StructWriter { private GenericRecordWriter(List> writers) { super(writers); diff --git a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java index 08b27d7460da..a5026efd40e3 100644 --- a/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java +++ b/core/src/test/java/org/apache/iceberg/TestManifestWriterVersions.java @@ -45,7 +45,7 @@ public class TestManifestWriterVersions { private static final Schema SCHEMA = new Schema( required(1, "id", Types.LongType.get()), - required(2, "timestamp", Types.TimestampType.withZone()), + required(2, "timestamp", Types.TimestampType.microsWithZone()), required(3, "category", Types.StringType.get()), required(4, "data", Types.StringType.get()), required(5, "double", Types.DoubleType.get())); diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java index 32bc6299ce1b..fe5b2b114c29 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetrics.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -99,10 +99,12 @@ protected TestMetrics(int formatVersion) { required(7, "stringCol", StringType.get()), optional(8, "dateCol", DateType.get()), required(9, "timeCol", TimeType.get()), - required(10, "timestampColAboveEpoch", TimestampType.withoutZone()), - required(11, "fixedCol", FixedType.ofLength(4)), - required(12, "binaryCol", BinaryType.get()), - required(13, "timestampColBelowEpoch", TimestampType.withoutZone())); + required(10, "timestampColAboveEpoch", TimestampType.microsWithoutZone()), + required(11, "timestampnsColAboveEpoch", TimestampType.nanosWithoutZone()), + required(12, "fixedCol", FixedType.ofLength(4)), + required(13, "binaryCol", BinaryType.get()), + required(14, "timestampColBelowEpoch", TimestampType.microsWithoutZone()), + required(15, "timestampnsColBelowEpoch", TimestampType.nanosWithoutZone())); private static final Schema FLOAT_DOUBLE_ONLY_SCHEMA = new Schema( @@ -159,9 +161,11 @@ public void testMetricsForRepeatedValues() throws IOException { record.setField("dateCol", DateTimeUtil.dateFromDays(1500)); record.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); record.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + record.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); record.setField("fixedCol", fixed); record.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); record.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + record.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, record, record); Assert.assertEquals(2L, (long) metrics.recordCount()); @@ -193,9 +197,12 @@ public void testMetricsForTopLevelFields() throws IOException { firstRecord.setField("dateCol", DateTimeUtil.dateFromDays(1500)); firstRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); firstRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); firstRecord.setField("fixedCol", fixed); firstRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); firstRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-1_900_300L)); + firstRecord.setField( + "timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(-1_900_300_000L)); Record secondRecord = GenericRecord.create(SIMPLE_SCHEMA); secondRecord.setField("booleanCol", false); secondRecord.setField("intCol", Integer.MIN_VALUE); @@ -207,9 +214,11 @@ public void testMetricsForTopLevelFields() throws IOException { secondRecord.setField("dateCol", null); secondRecord.setField("timeCol", DateTimeUtil.timeFromMicros(3000L)); secondRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(900L)); + secondRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(900_000L)); secondRecord.setField("fixedCol", fixed); secondRecord.setField("binaryCol", ByteBuffer.wrap("W".getBytes())); secondRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-7_000L)); + secondRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(-7_000_000L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, firstRecord, secondRecord); Assert.assertEquals(2L, (long) metrics.recordCount()); @@ -232,13 +241,15 @@ public void testMetricsForTopLevelFields() throws IOException { assertCounts(9, 2L, 0L, metrics); assertBounds(9, TimeType.get(), 2000L, 3000L, metrics); assertCounts(10, 2L, 0L, metrics); - assertBounds(10, TimestampType.withoutZone(), 0L, 900L, metrics); + assertBounds(10, TimestampType.microsWithoutZone(), 0L, 900L, metrics); assertCounts(11, 2L, 0L, metrics); - assertBounds( - 11, FixedType.ofLength(4), ByteBuffer.wrap(fixed), ByteBuffer.wrap(fixed), metrics); + assertBounds(11, TimestampType.nanosWithoutZone(), 0L, 900_000L, metrics); assertCounts(12, 2L, 0L, metrics); assertBounds( - 12, + 12, FixedType.ofLength(4), ByteBuffer.wrap(fixed), ByteBuffer.wrap(fixed), metrics); + assertCounts(13, 2L, 0L, metrics); + assertBounds( + 13, BinaryType.get(), ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), @@ -248,10 +259,12 @@ public void testMetricsForTopLevelFields() throws IOException { // ORC-342: ORC writer creates inaccurate timestamp data and stats 1 sec below epoch // Values in the range `[1969-12-31 23:59:59.000,1969-12-31 23:59:59.999]` will have 1 sec // added to them - // So the upper bound value of -7_000 micros becomes 993_000 micros - assertBounds(13, TimestampType.withoutZone(), -1_900_300L, 993_000L, metrics); + // So the upper bound value of -7 millis becomes +993 millis + assertBounds(14, TimestampType.microsWithoutZone(), -1_900_300L, 993_000L, metrics); + assertBounds(15, TimestampType.nanosWithoutZone(), -1_900_300_000L, 993_000_000L, metrics); } else { - assertBounds(13, TimestampType.withoutZone(), -1_900_300L, -7_000L, metrics); + assertBounds(14, TimestampType.microsWithoutZone(), -1_900_300L, -7_000L, metrics); + assertBounds(15, TimestampType.nanosWithoutZone(), -1_900_300_000L, -7_000_000L, metrics); } } @@ -465,10 +478,13 @@ public void testMetricsForTopLevelWithMultipleRowGroup() throws Exception { newRecord.setField("dateCol", DateTimeUtil.dateFromDays(i + 1)); newRecord.setField("timeCol", DateTimeUtil.timeFromMicros(i + 1L)); newRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(i + 1L)); + newRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(i + 1L)); newRecord.setField("fixedCol", fixed); newRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); newRecord.setField( "timestampColBelowEpoch", DateTimeUtil.timestampFromMicros((i + 1L) * -1L)); + newRecord.setField( + "timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos((i + 1L) * -1L)); records.add(newRecord); } @@ -696,9 +712,11 @@ public void testSortedColumnMetrics() throws IOException { firstRecord.setField("dateCol", DateTimeUtil.dateFromDays(1500)); firstRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); firstRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); firstRecord.setField("fixedCol", fixed); firstRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); firstRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Record secondRecord = GenericRecord.create(SIMPLE_SCHEMA); @@ -712,9 +730,11 @@ public void testSortedColumnMetrics() throws IOException { secondRecord.setField("dateCol", DateTimeUtil.dateFromDays(3000)); secondRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); secondRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + secondRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); secondRecord.setField("fixedCol", fixed); secondRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); secondRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + secondRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, MetricsConfig.forTable(table), firstRecord, secondRecord); @@ -779,9 +799,9 @@ protected void assertBounds( Map lowerBounds = metrics.lowerBounds(); Map upperBounds = metrics.upperBounds(); - Assert.assertEquals( - lowerBound, - lowerBounds.containsKey(fieldId) ? fromByteBuffer(type, lowerBounds.get(fieldId)) : null); + T metricLowerBound = + lowerBounds.containsKey(fieldId) ? fromByteBuffer(type, lowerBounds.get(fieldId)) : null; + Assert.assertEquals(lowerBound, metricLowerBound); Assert.assertEquals( upperBound, upperBounds.containsKey(fieldId) ? fromByteBuffer(type, upperBounds.get(fieldId)) : null); diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java index 5aedde6ce5b0..ddd0ecacd777 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java @@ -51,8 +51,10 @@ private static List primitiveTypes() { return Lists.newArrayList( StringType.get(), TimeType.get(), - TimestampType.withoutZone(), - TimestampType.withZone(), + TimestampType.microsWithoutZone(), + TimestampType.microsWithZone(), + TimestampType.nanosWithoutZone(), + TimestampType.nanosWithZone(), UUIDType.get(), DateType.get(), BooleanType.get(), diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 1d903dfbb1a5..8fcf5624fa28 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -281,8 +281,10 @@ public void testUpdateFailure() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.nanosWithZone(), + Types.TimestampType.nanosWithoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get(), @@ -1086,11 +1088,11 @@ public void testMoveTopLevelColumnAfter() { new Schema( required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get()), - optional(3, "ts", Types.TimestampType.withZone())); + optional(3, "ts", Types.TimestampType.microsWithZone())); Schema expected = new Schema( required(1, "id", Types.LongType.get()), - optional(3, "ts", Types.TimestampType.withZone()), + optional(3, "ts", Types.TimestampType.microsWithZone()), required(2, "data", Types.StringType.get())); Schema actual = new SchemaUpdate(schema, 3).moveAfter("ts", "id").apply(); @@ -1102,13 +1104,13 @@ public void testMoveTopLevelColumnAfter() { public void testMoveTopLevelColumnBefore() { Schema schema = new Schema( - optional(3, "ts", Types.TimestampType.withZone()), + optional(3, "ts", Types.TimestampType.microsWithZone()), required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); Schema expected = new Schema( required(1, "id", Types.LongType.get()), - optional(3, "ts", Types.TimestampType.withZone()), + optional(3, "ts", Types.TimestampType.microsWithZone()), required(2, "data", Types.StringType.get())); Schema actual = new SchemaUpdate(schema, 3).moveBefore("ts", "data").apply(); @@ -1205,7 +1207,7 @@ public void testMoveNestedFieldAfter() { Types.StructType.of( required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get()), - optional(5, "ts", Types.TimestampType.withZone())))); + optional(5, "ts", Types.TimestampType.microsWithZone())))); Schema expected = new Schema( required(1, "id", Types.LongType.get()), @@ -1214,7 +1216,7 @@ public void testMoveNestedFieldAfter() { "struct", Types.StructType.of( required(3, "count", Types.LongType.get()), - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(4, "data", Types.StringType.get())))); Schema actual = new SchemaUpdate(schema, 5).moveAfter("struct.ts", "struct.count").apply(); @@ -1231,7 +1233,7 @@ public void testMoveNestedFieldBefore() { 2, "struct", Types.StructType.of( - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get())))); Schema expected = @@ -1242,7 +1244,7 @@ public void testMoveNestedFieldBefore() { "struct", Types.StructType.of( required(3, "count", Types.LongType.get()), - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(4, "data", Types.StringType.get())))); Schema actual = new SchemaUpdate(schema, 5).moveBefore("struct.ts", "struct.data").apply(); @@ -1261,7 +1263,7 @@ public void testMoveListElementField() { Types.ListType.ofOptional( 6, Types.StructType.of( - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get()))))); Schema expected = @@ -1274,7 +1276,7 @@ public void testMoveListElementField() { 6, Types.StructType.of( required(3, "count", Types.LongType.get()), - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(4, "data", Types.StringType.get()))))); Schema actual = new SchemaUpdate(schema, 6).moveBefore("list.ts", "list.data").apply(); @@ -1295,7 +1297,7 @@ public void testMoveMapValueStructField() { 7, Types.StringType.get(), Types.StructType.of( - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get()))))); Schema expected = @@ -1310,7 +1312,7 @@ public void testMoveMapValueStructField() { Types.StringType.get(), Types.StructType.of( required(3, "count", Types.LongType.get()), - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(4, "data", Types.StringType.get()))))); Schema actual = new SchemaUpdate(schema, 7).moveBefore("map.ts", "map.data").apply(); @@ -1326,12 +1328,12 @@ public void testMoveAddedTopLevelColumn() { Schema expected = new Schema( required(1, "id", Types.LongType.get()), - optional(3, "ts", Types.TimestampType.withZone()), + optional(3, "ts", Types.TimestampType.microsWithZone()), required(2, "data", Types.StringType.get())); Schema actual = new SchemaUpdate(schema, 2) - .addColumn("ts", Types.TimestampType.withZone()) + .addColumn("ts", Types.TimestampType.microsWithZone()) .moveAfter("ts", "id") .apply(); @@ -1346,13 +1348,13 @@ public void testMoveAddedTopLevelColumnAfterAddedColumn() { Schema expected = new Schema( required(1, "id", Types.LongType.get()), - optional(3, "ts", Types.TimestampType.withZone()), + optional(3, "ts", Types.TimestampType.microsWithZone()), optional(4, "count", Types.LongType.get()), required(2, "data", Types.StringType.get())); Schema actual = new SchemaUpdate(schema, 2) - .addColumn("ts", Types.TimestampType.withZone()) + .addColumn("ts", Types.TimestampType.microsWithZone()) .addColumn("count", Types.LongType.get()) .moveAfter("ts", "id") .moveAfter("count", "ts") @@ -1379,13 +1381,13 @@ public void testMoveAddedNestedStructField() { 2, "struct", Types.StructType.of( - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get())))); Schema actual = new SchemaUpdate(schema, 4) - .addColumn("struct", "ts", Types.TimestampType.withZone()) + .addColumn("struct", "ts", Types.TimestampType.microsWithZone()) .moveBefore("struct.ts", "struct.count") .apply(); @@ -1411,13 +1413,13 @@ public void testMoveAddedNestedStructFieldBeforeAddedColumn() { "struct", Types.StructType.of( optional(6, "size", Types.LongType.get()), - optional(5, "ts", Types.TimestampType.withZone()), + optional(5, "ts", Types.TimestampType.microsWithZone()), required(3, "count", Types.LongType.get()), required(4, "data", Types.StringType.get())))); Schema actual = new SchemaUpdate(schema, 4) - .addColumn("struct", "ts", Types.TimestampType.withZone()) + .addColumn("struct", "ts", Types.TimestampType.microsWithZone()) .addColumn("struct", "size", Types.LongType.get()) .moveBefore("struct.ts", "struct.count") .moveBefore("struct.size", "struct.ts") @@ -1472,7 +1474,7 @@ public void testMoveBeforeAddFails() { () -> new SchemaUpdate(schema, 2) .moveFirst("ts") - .addColumn("ts", Types.TimestampType.withZone()) + .addColumn("ts", Types.TimestampType.microsWithZone()) .apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: ts"); @@ -1481,7 +1483,7 @@ public void testMoveBeforeAddFails() { () -> new SchemaUpdate(schema, 2) .moveBefore("ts", "id") - .addColumn("ts", Types.TimestampType.withZone()) + .addColumn("ts", Types.TimestampType.microsWithZone()) .apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: ts"); @@ -1490,7 +1492,7 @@ public void testMoveBeforeAddFails() { () -> new SchemaUpdate(schema, 2) .moveAfter("ts", "data") - .addColumn("ts", Types.TimestampType.withZone()) + .addColumn("ts", Types.TimestampType.microsWithZone()) .apply()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot move missing column: ts"); diff --git a/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java b/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java index e04ba440ae3f..bea87203ad8c 100644 --- a/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java +++ b/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java @@ -43,8 +43,10 @@ public void testValidDefaults() throws IOException { {Types.DoubleType.get(), "123.456"}, {Types.DateType.get(), "\"2007-12-03\""}, {Types.TimeType.get(), "\"10:15:30\""}, - {Types.TimestampType.withoutZone(), "\"2007-12-03T10:15:30\""}, - {Types.TimestampType.withZone(), "\"2007-12-03T10:15:30+00:00\""}, + {Types.TimestampType.microsWithoutZone(), "\"2007-12-03T10:15:30\""}, + {Types.TimestampType.microsWithZone(), "\"2007-12-03T10:15:30+00:00\""}, + {Types.TimestampType.nanosWithoutZone(), "\"2008-12-03T10:15:30\""}, + {Types.TimestampType.nanosWithZone(), "\"2008-12-03T10:15:30+00:00\""}, {Types.StringType.get(), "\"foo\""}, {Types.UUIDType.get(), "\"eb26bdb1-a1d8-4aa6-990e-da940875492c\""}, {Types.FixedType.ofLength(2), "\"111f\""}, @@ -159,14 +161,24 @@ public void testInvalidDecimal() { @Test public void testInvalidTimestamptz() { - Type expectedType = Types.TimestampType.withZone(); String defaultJson = "\"2007-12-03T10:15:30+01:00\""; Exception exception = Assert.assertThrows( IllegalArgumentException.class, - () -> defaultValueParseAndUnParseRoundTrip(expectedType, defaultJson)); + () -> + defaultValueParseAndUnParseRoundTrip( + Types.TimestampType.microsWithZone(), defaultJson)); Assert.assertTrue( exception.getMessage().startsWith("Cannot parse default as a timestamptz value")); + + exception = + Assert.assertThrows( + IllegalArgumentException.class, + () -> + defaultValueParseAndUnParseRoundTrip( + Types.TimestampType.nanosWithZone(), defaultJson)); + Assert.assertTrue( + exception.getMessage().startsWith("Cannot parse default as a timestamptz_ns value")); } // serialize to json and deserialize back should return the same result diff --git a/core/src/test/java/org/apache/iceberg/TestSortOrder.java b/core/src/test/java/org/apache/iceberg/TestSortOrder.java index 8fbc4e11fbc3..a22fb5f22619 100644 --- a/core/src/test/java/org/apache/iceberg/TestSortOrder.java +++ b/core/src/test/java/org/apache/iceberg/TestSortOrder.java @@ -52,7 +52,7 @@ public class TestSortOrder { required(10, "id", Types.IntegerType.get()), required(11, "data", Types.StringType.get()), required(40, "d", Types.DateType.get()), - required(41, "ts", Types.TimestampType.withZone()), + required(41, "ts", Types.TimestampType.microsWithZone()), optional( 12, "s", diff --git a/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java b/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java index 7cf993307e3d..7d3f3bc151b5 100644 --- a/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java +++ b/core/src/test/java/org/apache/iceberg/TestTimestampPartitions.java @@ -45,7 +45,7 @@ public void testPartitionAppend() throws IOException { Schema dateSchema = new Schema( required(1, "id", Types.LongType.get()), - optional(2, "timestamp", Types.TimestampType.withoutZone())); + optional(2, "timestamp", Types.TimestampType.microsWithoutZone())); PartitionSpec partitionSpec = PartitionSpec.builderFor(dateSchema).day("timestamp", "date").build(); diff --git a/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java b/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java index 926272e60f98..5d14e9d4e736 100644 --- a/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java +++ b/core/src/test/java/org/apache/iceberg/TestUpdatePartitionSpec.java @@ -40,7 +40,7 @@ public class TestUpdatePartitionSpec extends TableTestBase { private static final Schema SCHEMA = new Schema( Types.NestedField.required(1, "id", Types.LongType.get()), - Types.NestedField.required(2, "ts", Types.TimestampType.withZone()), + Types.NestedField.required(2, "ts", Types.TimestampType.microsWithZone()), Types.NestedField.required(3, "category", Types.StringType.get()), Types.NestedField.optional(4, "data", Types.StringType.get())); diff --git a/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java b/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java index e3870f84decd..2be57668b88e 100644 --- a/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java +++ b/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java @@ -46,15 +46,18 @@ public abstract class AvroDataTest { optional(105, "f", Types.FloatType.get()), required(106, "d", Types.DoubleType.get()), optional(107, "date", Types.DateType.get()), - required(108, "ts", Types.TimestampType.withZone()), - required(110, "s", Types.StringType.get()), - required(111, "uuid", Types.UUIDType.get()), - required(112, "fixed", Types.FixedType.ofLength(7)), - optional(113, "bytes", Types.BinaryType.get()), - required(114, "dec_9_0", Types.DecimalType.of(9, 0)), - required(115, "dec_11_2", Types.DecimalType.of(11, 2)), - required(116, "dec_38_10", Types.DecimalType.of(38, 10)), // maximum precision - required(117, "time", Types.TimeType.get())); + required(108, "tstz", Types.TimestampType.microsWithZone()), + required(109, "ts", Types.TimestampType.microsWithoutZone()), + required(110, "tstzns", Types.TimestampType.nanosWithZone()), + required(111, "tsns", Types.TimestampType.nanosWithoutZone()), + required(112, "s", Types.StringType.get()), + required(113, "uuid", Types.UUIDType.get()), + required(114, "fixed", Types.FixedType.ofLength(7)), + optional(115, "bytes", Types.BinaryType.get()), + required(116, "dec_9_0", Types.DecimalType.of(9, 0)), + required(117, "dec_11_2", Types.DecimalType.of(11, 2)), + required(118, "dec_38_10", Types.DecimalType.of(38, 10)), // maximum precision + required(119, "time", Types.TimeType.get())); @Test public void testSimpleStruct() throws IOException { diff --git a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java index e135364bca66..913772464c32 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java @@ -50,8 +50,10 @@ public void testPrimitiveTypes() { Types.DoubleType.get(), Types.DateType.get(), Types.TimeType.get(), - Types.TimestampType.withZone(), - Types.TimestampType.withoutZone(), + Types.TimestampType.microsWithZone(), + Types.TimestampType.microsWithoutZone(), + Types.TimestampType.nanosWithZone(), + Types.TimestampType.nanosWithoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(12), @@ -71,6 +73,12 @@ public void testPrimitiveTypes() { LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), true), addAdjustToUtc( LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), false), + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), + true), + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), + false), Schema.create(Schema.Type.STRING), LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)), Schema.createFixed("fixed_12", null, null, 12), @@ -96,10 +104,15 @@ public void testAvroToIcebergTimestampTypeWithoutAdjustToUTC() { // avro<->iceberg conversion // This is because iceberg types can only can encode adjust-to-utc=true|false but not a missing // adjust-to-utc - Type expectedIcebergType = Types.TimestampType.withoutZone(); + Type expectedIcebergType = Types.TimestampType.microsWithoutZone(); Schema avroType = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); assertThat(AvroSchemaUtil.convert(avroType)).isEqualTo(expectedIcebergType); + + expectedIcebergType = Types.TimestampType.nanosWithoutZone(); + avroType = IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + + assertThat(AvroSchemaUtil.convert(avroType)).isEqualTo(expectedIcebergType); } private Schema addAdjustToUtc(Schema schema, boolean adjustToUTC) { @@ -118,13 +131,15 @@ public void testStructAndPrimitiveTypes() { optional(24, "double", Types.DoubleType.get()), optional(25, "date", Types.DateType.get()), optional(27, "time", Types.TimeType.get()), - optional(28, "timestamptz", Types.TimestampType.withZone()), - optional(29, "timestamp", Types.TimestampType.withoutZone()), - optional(30, "string", Types.StringType.get()), - optional(31, "uuid", Types.UUIDType.get()), - optional(32, "fixed", Types.FixedType.ofLength(16)), - optional(33, "binary", Types.BinaryType.get()), - optional(34, "decimal", Types.DecimalType.of(14, 2))); + optional(28, "timestamptz", Types.TimestampType.microsWithZone()), + optional(29, "timestamp", Types.TimestampType.microsWithoutZone()), + optional(30, "timestamptz_ns", Types.TimestampType.nanosWithZone()), + optional(31, "timestamp_ns", Types.TimestampType.nanosWithoutZone()), + optional(32, "string", Types.StringType.get()), + optional(33, "uuid", Types.UUIDType.get()), + optional(34, "fixed", Types.FixedType.ofLength(16)), + optional(35, "binary", Types.BinaryType.get()), + optional(36, "decimal", Types.DecimalType.of(14, 2))); Schema schema = record( @@ -150,15 +165,29 @@ public void testStructAndPrimitiveTypes() { addAdjustToUtc( LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), false)), - optionalField(30, "string", Schema.create(Schema.Type.STRING)), + optionalField( + 30, + "timestamptz_ns", + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos() + .addToSchema(Schema.create(Schema.Type.LONG)), + true)), optionalField( 31, + "timestamp_ns", + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos() + .addToSchema(Schema.create(Schema.Type.LONG)), + false)), + optionalField(32, "string", Schema.create(Schema.Type.STRING)), + optionalField( + 33, "uuid", LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16))), - optionalField(32, "fixed", Schema.createFixed("fixed_16", null, null, 16)), - optionalField(33, "binary", Schema.create(Schema.Type.BYTES)), + optionalField(34, "fixed", Schema.createFixed("fixed_16", null, null, 16)), + optionalField(35, "binary", Schema.create(Schema.Type.BYTES)), optionalField( - 34, + 36, "decimal", LogicalTypes.decimal(14, 2) .addToSchema(Schema.createFixed("decimal_14_2", null, null, 6)))); diff --git a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java index a0ffe0af05b5..09ceb4b656d3 100644 --- a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java +++ b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java @@ -42,7 +42,8 @@ public class TestExpressionParser { optional(105, "f", Types.FloatType.get()), required(106, "d", Types.DoubleType.get()), optional(107, "date", Types.DateType.get()), - required(108, "ts", Types.TimestampType.withoutZone()), + required(108, "ts", Types.TimestampType.microsWithoutZone()), + required(109, "tsns", Types.TimestampType.nanosWithoutZone()), required(110, "s", Types.StringType.get()), required(111, "uuid", Types.UUIDType.get()), required(112, "fixed", Types.FixedType.ofLength(7)), @@ -68,6 +69,7 @@ public void testSimpleExpressions() { Expressions.equal("d", 100.0d), Expressions.equal("date", "2022-08-14"), Expressions.equal("ts", "2022-08-14T10:00:00.123456"), + Expressions.equal("tsns", "2022-08-14T10:00:00.123456789"), Expressions.equal("uuid", UUID.randomUUID()), Expressions.equal("fixed", new byte[] {1, 2, 3, 4, 5, 6, 7}), Expressions.equal("bytes", ByteBuffer.wrap(new byte[] {1, 3, 5})), @@ -93,6 +95,9 @@ public void testSimpleExpressions() { Expressions.or( Expressions.greaterThan(Expressions.day("ts"), "2022-08-14"), Expressions.equal("date", "2022-08-14")), + Expressions.or( + Expressions.greaterThan(Expressions.day("tsns"), "2022-08-14"), + Expressions.equal("date", "2022-08-14")), Expressions.not(Expressions.in("l", 1, 2, 3, 4)) }; diff --git a/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java b/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java index 6b59095225d8..b9947a8e7836 100644 --- a/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java +++ b/core/src/test/java/org/apache/iceberg/mapping/TestMappingUpdates.java @@ -52,7 +52,7 @@ public void testAddColumnMappingUpdate() { MappedFields.of(MappedField.of(1, "id"), MappedField.of(2, "data")), mapping.asMappedFields()); - table.updateSchema().addColumn("ts", Types.TimestampType.withZone()).commit(); + table.updateSchema().addColumn("ts", Types.TimestampType.microsWithZone()).commit(); NameMapping updated = NameMappingParser.fromJson(table.properties().get(TableProperties.DEFAULT_NAME_MAPPING)); diff --git a/core/src/test/java/org/apache/iceberg/util/TestSortOrderUtil.java b/core/src/test/java/org/apache/iceberg/util/TestSortOrderUtil.java index 02c81de93222..6d28985b5ce7 100644 --- a/core/src/test/java/org/apache/iceberg/util/TestSortOrderUtil.java +++ b/core/src/test/java/org/apache/iceberg/util/TestSortOrderUtil.java @@ -41,7 +41,7 @@ public class TestSortOrderUtil { new Schema( required(10, "id", Types.IntegerType.get()), required(11, "data", Types.StringType.get()), - required(12, "ts", Types.TimestampType.withZone()), + required(12, "ts", Types.TimestampType.microsWithZone()), required(13, "category", Types.StringType.get())); @TempDir private File tableDir; diff --git a/format/spec.md b/format/spec.md index 855db29f569b..26e8755ea9e3 100644 --- a/format/spec.md +++ b/format/spec.md @@ -948,10 +948,10 @@ Lists must use the [3-level representation](https://github.com/apache/parquet-fo | **`decimal(P,S)`** | `decimal` | | | | **`date`** | `date` | | | | **`time`** | `long` | `iceberg.long-type`=`TIME` | Stores microseconds from midnight. | -| **`timestamp`** | `timestamp` | | Stores microseconds from 2015-01-01 00:00:00.000000. [1], [2] | -| **`timestamptz`** | `timestamp_instant` | | Stores microseconds from 2015-01-01 00:00:00.000000 UTC. [1], [2] | -| **`timestamp_ns`** | `timestamp` | | Stores nanoseconds from 2015-01-01 00:00:00.000000000. [1] | -| **`timestamptz_ns`** | `timestamp_instant` | | Stores nanoseconds from 2015-01-01 00:00:00.000000000 UTC. [1] | +| **`timestamp`** | `timestamp` | `iceberg.timestamp-unit`=`MICROS` | Stores microseconds from 2015-01-01 00:00:00.000000. [1], [2] | +| **`timestamptz`** | `timestamp_instant` | `iceberg.timestamp-unit`=`MICROS` | Stores microseconds from 2015-01-01 00:00:00.000000 UTC. [1], [2] | +| **`timestamp_ns`** | `timestamp` | `iceberg.timestamp-unit`=`NANOS` | Stores nanoseconds from 2015-01-01 00:00:00.000000000. [1] | +| **`timestamptz_ns`** | `timestamp_instant` | `iceberg.timestamp-unit`=`NANOS` | Stores nanoseconds from 2015-01-01 00:00:00.000000000 UTC. [1] | | **`string`** | `string` | | ORC `varchar` and `char` would also map to **`string`**. | | **`uuid`** | `binary` | `iceberg.binary-type`=`UUID` | | | **`fixed(L)`** | `binary` | `iceberg.binary-type`=`FIXED` & `iceberg.length`=`L` | The length would not be checked by the ORC reader and should be checked by the adapter. | @@ -963,7 +963,7 @@ Lists must use the [3-level representation](https://github.com/apache/parquet-fo Notes: 1. ORC's [TimestampColumnVector](https://orc.apache.org/api/hive-storage-api/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.html) consists of a time field (milliseconds since epoch) and a nanos field (nanoseconds within the second). Hence the milliseconds within the second are reported twice; once in the time field and again in the nanos field. The read adapter should only use milliseconds within the second from one of these fields. The write adapter should also report milliseconds within the second twice; once in the time field and again in the nanos field. ORC writer is expected to correctly consider millis information from one of the fields. More details at https://issues.apache.org/jira/browse/ORC-546 -2. ORC `timestamp` and `timestamp_instant` values store nanosecond precision. Iceberg ORC writers for Iceberg types `timestamp` and `timestamptz` **must** truncate nanoseconds to microseconds. +2. ORC `timestamp` and `timestamp_instant` values store nanosecond precision. Iceberg ORC writers for Iceberg types `timestamp` and `timestamptz` **must** truncate nanoseconds to microseconds. `iceberg.timestamp-unit` is optional, and is assumed to be `MICROS` if not present. One of the interesting challenges with this is how to map Iceberg’s schema evolution (id based) on to ORC’s (name based). In theory, we could use Iceberg’s column ids as the column and field names, but that would be inconvenient. diff --git a/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java b/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java index 93815ca5604c..328eb09dfdb4 100644 --- a/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java +++ b/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriter.java @@ -94,10 +94,18 @@ public OrcValueWriter primitive(Type.PrimitiveType iPrimitive, TypeDescriptio return GenericOrcWriters.times(); case TIMESTAMP: Types.TimestampType timestampType = (Types.TimestampType) iPrimitive; - if (timestampType.shouldAdjustToUTC()) { - return GenericOrcWriters.timestampTz(); - } else { - return GenericOrcWriters.timestamp(); + switch (timestampType.unit()) { + case MICROS: + return timestampType.shouldAdjustToUTC() + ? GenericOrcWriters.timestampTz() + : GenericOrcWriters.timestamp(); + case NANOS: + return timestampType.shouldAdjustToUTC() + ? GenericOrcWriters.timestampnsTz() + : GenericOrcWriters.timestampns(); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestampType.unit()); } case STRING: return GenericOrcWriters.strings(); diff --git a/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriters.java b/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriters.java index 5e12a828b7f0..750c20194c26 100644 --- a/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriters.java +++ b/orc/src/main/java/org/apache/iceberg/data/orc/GenericOrcWriters.java @@ -117,10 +117,18 @@ public static OrcValueWriter timestampTz() { return TimestampTzWriter.INSTANCE; } + public static OrcValueWriter timestampnsTz() { + return TimestampnsTzWriter.INSTANCE; + } + public static OrcValueWriter timestamp() { return TimestampWriter.INSTANCE; } + public static OrcValueWriter timestampns() { + return TimestampnsWriter.INSTANCE; + } + public static OrcValueWriter decimal(int precision, int scale) { if (precision <= 18) { return new Decimal18Writer(precision, scale); @@ -335,6 +343,18 @@ public void nonNullWrite(int rowId, OffsetDateTime data, ColumnVector output) { } } + private static class TimestampnsTzWriter implements OrcValueWriter { + private static final OrcValueWriter INSTANCE = new TimestampnsTzWriter(); + + @Override + @SuppressWarnings("JavaLocalDateTimeGetNano") + public void nonNullWrite(int rowId, OffsetDateTime data, ColumnVector output) { + TimestampColumnVector cv = (TimestampColumnVector) output; + cv.time[rowId] = data.toInstant().toEpochMilli(); + cv.nanos[rowId] = data.getNano(); + } + } + private static class TimestampWriter implements OrcValueWriter { private static final OrcValueWriter INSTANCE = new TimestampWriter(); @@ -349,6 +369,19 @@ public void nonNullWrite(int rowId, LocalDateTime data, ColumnVector output) { } } + private static class TimestampnsWriter implements OrcValueWriter { + private static final OrcValueWriter INSTANCE = new TimestampnsWriter(); + + @Override + @SuppressWarnings("JavaLocalDateTimeGetNano") + public void nonNullWrite(int rowId, LocalDateTime data, ColumnVector output) { + TimestampColumnVector cv = (TimestampColumnVector) output; + cv.setIsUTC(true); + cv.time[rowId] = data.toInstant(ZoneOffset.UTC).toEpochMilli(); + cv.nanos[rowId] = data.getNano(); + } + } + private static class Decimal18Writer implements OrcValueWriter { private final int precision; private final int scale; diff --git a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java index fae1a76c3706..7fffa9138f4c 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java +++ b/orc/src/main/java/org/apache/iceberg/orc/ORCSchemaUtil.java @@ -47,6 +47,11 @@ public enum LongType { LONG } + public enum TimestampUnit { + MICROS, + NANOS + } + private static class OrcField { private final String name; private final TypeDescription type; @@ -78,6 +83,12 @@ public TypeDescription type() { * to an ORC long type. The values for this attribute are denoted in {@code LongType}. */ public static final String ICEBERG_LONG_TYPE_ATTRIBUTE = "iceberg.long-type"; + /** + * The name of the ORC {@link TypeDescription} attribute indicating the Iceberg timestamp unit + * corresponding to an ORC timestamp type. The values for this attribute are denoted in {@code + * TimestampUnit}. + */ + public static final String ICEBERG_TIMESTAMP_UNIT_ATTRIBUTE = "iceberg.timestamp-unit"; static final String ICEBERG_FIELD_LENGTH = "iceberg.length"; @@ -147,6 +158,16 @@ private static TypeDescription convert(Integer fieldId, Type type, boolean isReq } else { orcType = TypeDescription.createTimestamp(); } + switch (tsType.unit()) { + case MICROS: + orcType.setAttribute(ICEBERG_TIMESTAMP_UNIT_ATTRIBUTE, TimestampUnit.MICROS.toString()); + break; + case NANOS: + orcType.setAttribute(ICEBERG_TIMESTAMP_UNIT_ATTRIBUTE, TimestampUnit.NANOS.toString()); + break; + default: + throw new IllegalArgumentException("Unhandled timestamp unit " + tsType.unit().name()); + } break; case STRING: orcType = TypeDescription.createString(); diff --git a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java index 972591d53d03..13b14d452c8d 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java +++ b/orc/src/main/java/org/apache/iceberg/orc/OrcMetrics.java @@ -207,6 +207,7 @@ private static Metrics buildOrcMetrics( upperBounds); } + @SuppressWarnings("checkstyle:CyclomaticComplexity") private static Optional fromOrcMin( Type type, ColumnStatistics columnStats, @@ -249,10 +250,16 @@ private static Optional fromOrcMin( } else if (columnStats instanceof TimestampColumnStatistics) { TimestampColumnStatistics tColStats = (TimestampColumnStatistics) columnStats; Timestamp minValue = tColStats.getMinimumUTC(); - min = - Optional.ofNullable(minValue) - .map(v -> DateTimeUtil.microsFromInstant(v.toInstant())) - .orElse(null); + if (minValue != null) { + switch (((Types.TimestampType) type).unit()) { + case MICROS: + min = DateTimeUtil.microsFromInstant(minValue.toInstant()); + break; + case NANOS: + min = DateTimeUtil.nanosFromInstant(minValue.toInstant()); + break; + } + } } else if (columnStats instanceof BooleanColumnStatistics) { BooleanColumnStatistics booleanStats = (BooleanColumnStatistics) columnStats; min = booleanStats.getFalseCount() <= 0; @@ -262,6 +269,7 @@ private static Optional fromOrcMin( Conversions.toByteBuffer(type, truncateIfNeeded(Bound.LOWER, type, min, metricsMode))); } + @SuppressWarnings("checkstyle:CyclomaticComplexity") private static Optional fromOrcMax( Type type, ColumnStatistics columnStats, @@ -304,10 +312,16 @@ private static Optional fromOrcMax( } else if (columnStats instanceof TimestampColumnStatistics) { TimestampColumnStatistics tColStats = (TimestampColumnStatistics) columnStats; Timestamp maxValue = tColStats.getMaximumUTC(); - max = - Optional.ofNullable(maxValue) - .map(v -> DateTimeUtil.microsFromInstant(v.toInstant())) - .orElse(null); + if (maxValue != null) { + switch (((Types.TimestampType) type).unit()) { + case MICROS: + max = DateTimeUtil.microsFromInstant(maxValue.toInstant()); + break; + case NANOS: + max = DateTimeUtil.nanosFromInstant(maxValue.toInstant()); + break; + } + } } else if (columnStats instanceof BooleanColumnStatistics) { BooleanColumnStatistics booleanStats = (BooleanColumnStatistics) columnStats; max = booleanStats.getTrueCount() > 0; diff --git a/orc/src/main/java/org/apache/iceberg/orc/OrcToIcebergVisitor.java b/orc/src/main/java/org/apache/iceberg/orc/OrcToIcebergVisitor.java index 6992f88b870b..75a244f57f5c 100644 --- a/orc/src/main/java/org/apache/iceberg/orc/OrcToIcebergVisitor.java +++ b/orc/src/main/java/org/apache/iceberg/orc/OrcToIcebergVisitor.java @@ -168,12 +168,30 @@ public Optional primitive(TypeDescription primitive) { foundField = Types.NestedField.of(icebergID, isOptional, name, Types.DateType.get()); break; case TIMESTAMP: - foundField = - Types.NestedField.of(icebergID, isOptional, name, Types.TimestampType.withoutZone()); + String unitAttributeValue = + primitive.getAttributeValue(ORCSchemaUtil.ICEBERG_TIMESTAMP_UNIT_ATTRIBUTE); + ORCSchemaUtil.TimestampUnit unit = + unitAttributeValue == null + ? ORCSchemaUtil.TimestampUnit.MICROS + : ORCSchemaUtil.TimestampUnit.valueOf(unitAttributeValue); + switch (unit) { + case MICROS: + foundField = + Types.NestedField.of( + icebergID, isOptional, name, Types.TimestampType.microsWithoutZone()); + break; + case NANOS: + foundField = + Types.NestedField.of( + icebergID, isOptional, name, Types.TimestampType.nanosWithoutZone()); + break; + default: + throw new UnsupportedOperationException("Unsupported timestamp unit: " + unit); + } break; case TIMESTAMP_INSTANT: foundField = - Types.NestedField.of(icebergID, isOptional, name, Types.TimestampType.withZone()); + Types.NestedField.of(icebergID, isOptional, name, Types.TimestampType.microsWithZone()); break; case DECIMAL: foundField = diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestEstimateOrcAvgWidthVisitor.java b/orc/src/test/java/org/apache/iceberg/orc/TestEstimateOrcAvgWidthVisitor.java index 1aa7dda04761..5ae72f8442d7 100644 --- a/orc/src/test/java/org/apache/iceberg/orc/TestEstimateOrcAvgWidthVisitor.java +++ b/orc/src/test/java/org/apache/iceberg/orc/TestEstimateOrcAvgWidthVisitor.java @@ -48,9 +48,9 @@ public class TestEstimateOrcAvgWidthVisitor { protected static final Types.NestedField BOOLEAN_FIELD = optional(12, "boolean", Types.BooleanType.get()); protected static final Types.NestedField TIMESTAMP_ZONE_FIELD = - optional(13, "timestampZone", Types.TimestampType.withZone()); + optional(13, "timestampZone", Types.TimestampType.microsWithZone()); protected static final Types.NestedField TIMESTAMP_FIELD = - optional(14, "timestamp", Types.TimestampType.withoutZone()); + optional(14, "timestamp", Types.TimestampType.microsWithoutZone()); protected static final Types.NestedField DATE_FIELD = optional(15, "date", Types.DateType.get()); protected static final Types.NestedField UUID_FIELD = required(16, "uuid", Types.UUIDType.get()); @@ -71,7 +71,7 @@ public class TestEstimateOrcAvgWidthVisitor { Types.StructType.of( required(24, "booleanField", Types.BooleanType.get()), optional(25, "date", Types.DateType.get()), - optional(27, "timestamp", Types.TimestampType.withZone()))); + optional(27, "timestamp", Types.TimestampType.microsWithZone()))); @Test public void testEstimateIntegerWidth() { diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java b/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java index c7c7a8aeb07e..7e2630351388 100644 --- a/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java +++ b/orc/src/test/java/org/apache/iceberg/orc/TestExpressionToSearchArgument.java @@ -76,8 +76,8 @@ public void testPrimitiveTypes() { required(6, "string", Types.StringType.get()), required(7, "date", Types.DateType.get()), required(8, "time", Types.TimeType.get()), - required(9, "tsTz", Types.TimestampType.withZone()), - required(10, "ts", Types.TimestampType.withoutZone()), + required(9, "tsTz", Types.TimestampType.microsWithZone()), + required(10, "ts", Types.TimestampType.microsWithoutZone()), required(11, "decimal", Types.DecimalType.of(38, 2)), required(12, "float2", Types.FloatType.get()), required(13, "double2", Types.DoubleType.get())); @@ -151,8 +151,8 @@ public void testTimezoneSensitiveTypes() { Schema schema = new Schema( required(1, "date", Types.DateType.get()), - required(2, "tsTz", Types.TimestampType.withZone()), - required(3, "ts", Types.TimestampType.withoutZone())); + required(2, "tsTz", Types.TimestampType.microsWithZone()), + required(3, "ts", Types.TimestampType.microsWithoutZone())); Expression expr = and( @@ -480,7 +480,7 @@ public void testModifiedComplexSchemaNameMapping() { @Test public void testExpressionContainsNonReferenceTerm() { - Schema schema = new Schema(required(1, "ts", Types.TimestampType.withoutZone())); + Schema schema = new Schema(required(1, "ts", Types.TimestampType.microsWithoutZone())); // all operations for these types should resolve to YES_NO_NULL Expression expr = equal(year("ts"), 10); diff --git a/orc/src/test/java/org/apache/iceberg/orc/TestORCSchemaUtil.java b/orc/src/test/java/org/apache/iceberg/orc/TestORCSchemaUtil.java index 24a376cd71d3..0a695c520e85 100644 --- a/orc/src/test/java/org/apache/iceberg/orc/TestORCSchemaUtil.java +++ b/orc/src/test/java/org/apache/iceberg/orc/TestORCSchemaUtil.java @@ -53,7 +53,7 @@ public class TestORCSchemaUtil { required(25, "floatCol", Types.FloatType.get()), optional(30, "dateCol", Types.DateType.get()), required(32, "timeCol", Types.TimeType.get()), - required(34, "timestampCol", Types.TimestampType.withZone()), + required(34, "timestampCol", Types.TimestampType.microsWithZone()), required(114, "dec_9_0", Types.DecimalType.of(9, 0)), required(115, "dec_11_2", Types.DecimalType.of(11, 2)), required(116, "dec_38_10", Types.DecimalType.of(38, 10)) // spark's maximum precision @@ -125,7 +125,7 @@ public void testRoundtripConversionNested() { required(25, "floatCol", Types.FloatType.get()), optional(30, "dateCol", Types.DateType.get()), required(32, "timeCol", Types.TimeType.get()), - required(34, "timestampCol", Types.TimestampType.withZone()), + required(34, "timestampCol", Types.TimestampType.microsWithZone()), required(35, "listPrimCol", Types.ListType.ofRequired(135, Types.LongType.get())), required(36, "listPrimNestCol", Types.ListType.ofRequired(136, structPrimTypeForList)), required(37, "listNestedCol", Types.ListType.ofRequired(137, nestedStructTypeForList)), diff --git a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java index 470f95e8bc99..3fe799a4fe17 100644 --- a/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java +++ b/parquet/src/main/java/org/apache/iceberg/data/parquet/BaseParquetWriter.java @@ -204,14 +204,20 @@ public Optional> visit( @Override public Optional> visit( LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampType) { - Preconditions.checkArgument( - LogicalTypeAnnotation.TimeUnit.MICROS.equals(timestampType.getUnit()), - "Cannot write timestamp in %s, only MICROS is supported", - timestampType.getUnit()); - if (timestampType.isAdjustedToUTC()) { - return Optional.of(new TimestamptzWriter(desc)); - } else { - return Optional.of(new TimestampWriter(desc)); + switch (timestampType.getUnit()) { + case MICROS: + return Optional.of( + timestampType.isAdjustedToUTC() + ? new TimestamptzWriter(desc) + : new TimestampWriter(desc)); + case NANOS: + return Optional.of( + timestampType.isAdjustedToUTC() + ? new TimestamptznsWriter(desc) + : new TimestampnsWriter(desc)); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit" + timestampType.getUnit()); } } @@ -278,6 +284,19 @@ public void write(int repetitionLevel, LocalDateTime value) { } } + private static class TimestampnsWriter + extends ParquetValueWriters.PrimitiveWriter { + private TimestampnsWriter(ColumnDescriptor desc) { + super(desc); + } + + @Override + public void write(int repetitionLevel, LocalDateTime value) { + column.writeLong( + repetitionLevel, ChronoUnit.NANOS.between(EPOCH, value.atOffset(ZoneOffset.UTC))); + } + } + private static class TimestamptzWriter extends ParquetValueWriters.PrimitiveWriter { private TimestamptzWriter(ColumnDescriptor desc) { @@ -290,6 +309,18 @@ public void write(int repetitionLevel, OffsetDateTime value) { } } + private static class TimestamptznsWriter + extends ParquetValueWriters.PrimitiveWriter { + private TimestamptznsWriter(ColumnDescriptor desc) { + super(desc); + } + + @Override + public void write(int repetitionLevel, OffsetDateTime value) { + column.writeLong(repetitionLevel, ChronoUnit.NANOS.between(EPOCH, value)); + } + } + private static class FixedWriter extends ParquetValueWriters.PrimitiveWriter { private FixedWriter(ColumnDescriptor desc) { super(desc); diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java index 26ef6e468ede..09298a9a544f 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/MessageTypeToType.java @@ -171,7 +171,7 @@ public Type primitive(PrimitiveType primitive) { case FIXED_LEN_BYTE_ARRAY: return Types.FixedType.ofLength(primitive.getTypeLength()); case INT96: - return Types.TimestampType.withZone(); + return Types.TimestampType.microsWithZone(); case BINARY: return Types.BinaryType.get(); } @@ -215,8 +215,21 @@ public Optional visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation time @Override public Optional visit( LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampType) { - return Optional.of( - timestampType.isAdjustedToUTC() ? TimestampType.withZone() : TimestampType.withoutZone()); + switch (timestampType.getUnit()) { + case MICROS: + return Optional.of( + timestampType.isAdjustedToUTC() + ? TimestampType.microsWithZone() + : TimestampType.microsWithoutZone()); + case NANOS: + return Optional.of( + timestampType.isAdjustedToUTC() + ? TimestampType.nanosWithZone() + : TimestampType.nanosWithoutZone()); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestampType.getUnit()); + } } @Override diff --git a/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java b/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java index 54f11500489b..5d91bb005634 100644 --- a/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java +++ b/parquet/src/main/java/org/apache/iceberg/parquet/TypeToMessageType.java @@ -56,6 +56,10 @@ public class TypeToMessageType { LogicalTypeAnnotation.timestampType(false /* not adjusted to UTC */, TimeUnit.MICROS); private static final LogicalTypeAnnotation TIMESTAMPTZ_MICROS = LogicalTypeAnnotation.timestampType(true /* adjusted to UTC */, TimeUnit.MICROS); + private static final LogicalTypeAnnotation TIMESTAMP_NANOS = + LogicalTypeAnnotation.timestampType(false /* not adjusted to UTC */, TimeUnit.NANOS); + private static final LogicalTypeAnnotation TIMESTAMPTZ_NANOS = + LogicalTypeAnnotation.timestampType(true /* adjusted to UTC */, TimeUnit.NANOS); public MessageType convert(Schema schema, String name) { Types.MessageTypeBuilder builder = Types.buildMessage(); @@ -136,10 +140,19 @@ public Type primitive( case TIME: return Types.primitive(INT64, repetition).as(TIME_MICROS).id(id).named(name); case TIMESTAMP: - if (((TimestampType) primitive).shouldAdjustToUTC()) { - return Types.primitive(INT64, repetition).as(TIMESTAMPTZ_MICROS).id(id).named(name); - } else { - return Types.primitive(INT64, repetition).as(TIMESTAMP_MICROS).id(id).named(name); + TimestampType timestamp = (TimestampType) primitive; + switch (timestamp.unit()) { + case MICROS: + return timestamp.shouldAdjustToUTC() + ? Types.primitive(INT64, repetition).as(TIMESTAMPTZ_MICROS).id(id).named(name) + : Types.primitive(INT64, repetition).as(TIMESTAMP_MICROS).id(id).named(name); + case NANOS: + return timestamp.shouldAdjustToUTC() + ? Types.primitive(INT64, repetition).as(TIMESTAMPTZ_NANOS).id(id).named(name) + : Types.primitive(INT64, repetition).as(TIMESTAMP_NANOS).id(id).named(name); + default: + throw new UnsupportedOperationException( + "Unsupported timestamp unit: " + timestamp.unit().toString()); } case STRING: return Types.primitive(BINARY, repetition).as(STRING).id(id).named(name); diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java index 56736ae42f91..13694e514376 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java @@ -106,8 +106,8 @@ public class TestBloomRowGroupFilter { optional(19, "boolean", Types.BooleanType.get()), optional(20, "time", Types.TimeType.get()), optional(21, "date", Types.DateType.get()), - optional(22, "timestamp", Types.TimestampType.withoutZone()), - optional(23, "timestamptz", Types.TimestampType.withZone()), + optional(22, "timestamp", Types.TimestampType.microsWithoutZone()), + optional(23, "timestamptz", Types.TimestampType.microsWithZone()), optional(24, "binary", Types.BinaryType.get()), optional(25, "int_decimal", Types.DecimalType.of(8, 2)), optional(26, "long_decimal", Types.DecimalType.of(14, 2)), @@ -137,8 +137,8 @@ public class TestBloomRowGroupFilter { optional(19, "_boolean", Types.BooleanType.get()), optional(20, "_time", Types.TimeType.get()), optional(21, "_date", Types.DateType.get()), - optional(22, "_timestamp", Types.TimestampType.withoutZone()), - optional(23, "_timestamptz", Types.TimestampType.withZone()), + optional(22, "_timestamp", Types.TimestampType.microsWithoutZone()), + optional(23, "_timestamptz", Types.TimestampType.microsWithZone()), optional(24, "_binary", Types.BinaryType.get()), optional(25, "_int_decimal", Types.DecimalType.of(8, 2)), optional(26, "_long_decimal", Types.DecimalType.of(14, 2)), diff --git a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java index 84fbf2a7d989..e0a2d58a2823 100644 --- a/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java +++ b/parquet/src/test/java/org/apache/iceberg/parquet/TestParquetSchemaUtil.java @@ -52,7 +52,7 @@ public class TestParquetSchemaUtil { optional(105, "f", Types.FloatType.get()), required(106, "d", Types.DoubleType.get()), optional(107, "date", Types.DateType.get()), - required(108, "ts", Types.TimestampType.withZone()), + required(108, "ts", Types.TimestampType.microsWithZone()), required(110, "s", Types.StringType.get()), required(112, "fixed", Types.FixedType.ofLength(7)), optional(113, "bytes", Types.BinaryType.get()),