diff --git a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java index 3708dafc4126..1c5e3321ab8d 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java +++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java @@ -43,6 +43,7 @@ public class ExpressionUtil { Transforms.bucket(Integer.MAX_VALUE).bind(Types.StringType.get()); private static final OffsetDateTime EPOCH = Instant.ofEpochSecond(0).atOffset(ZoneOffset.UTC); private static final long FIVE_MINUTES_IN_MICROS = TimeUnit.MINUTES.toMicros(5); + private static final long FIVE_MINUTES_IN_NANOS = TimeUnit.MINUTES.toNanos(5); private static final long THREE_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(3); private static final long NINETY_DAYS_IN_HOURS = TimeUnit.DAYS.toHours(90); private static final Pattern DATE = Pattern.compile("\\d{4}-\\d{2}-\\d{2}"); @@ -247,13 +248,12 @@ public static UnboundTerm unbind(Term term) { private static class ExpressionSanitizer extends ExpressionVisitors.ExpressionVisitor { - private final long now; + private final long nowMillis; private final int today; private ExpressionSanitizer() { - long nowMillis = System.currentTimeMillis(); - OffsetDateTime nowDateTime = Instant.ofEpochMilli(nowMillis).atOffset(ZoneOffset.UTC); - this.now = nowMillis * 1000; + this.nowMillis = System.currentTimeMillis(); + OffsetDateTime nowDateTime = Instant.ofEpochMilli(this.nowMillis).atOffset(ZoneOffset.UTC); this.today = (int) ChronoUnit.DAYS.between(EPOCH, nowDateTime); } @@ -293,13 +293,13 @@ public Expression predicate(BoundPredicate pred) { return new UnboundPredicate<>( pred.op(), unbind(pred.term()), - (T) sanitize(bound.term().type(), bound.literal(), now, today)); + (T) sanitize(bound.term().type(), bound.literal(), nowMillis, today)); } else if (pred.isSetPredicate()) { BoundSetPredicate bound = (BoundSetPredicate) pred; Iterable iter = () -> bound.literalSet().stream() - .map(lit -> (T) sanitize(bound.term().type(), lit, now, today)) + .map(lit -> (T) sanitize(bound.term().type(), lit, nowMillis, today)) .iterator(); return new UnboundPredicate<>(pred.op(), unbind(pred.term()), iter); } @@ -326,11 +326,11 @@ public Expression predicate(UnboundPredicate pred) { case STARTS_WITH: case NOT_STARTS_WITH: return new UnboundPredicate<>( - pred.op(), pred.term(), (T) sanitize(pred.literal(), now, today)); + pred.op(), pred.term(), (T) sanitize(pred.literal(), nowMillis, today)); case IN: case NOT_IN: Iterable iter = - () -> pred.literals().stream().map(lit -> sanitize(lit, now, today)).iterator(); + () -> pred.literals().stream().map(lit -> sanitize(lit, nowMillis, today)).iterator(); return new UnboundPredicate<>(pred.op(), pred.term(), (Iterable) iter); default: throw new UnsupportedOperationException( @@ -340,13 +340,12 @@ public Expression predicate(UnboundPredicate pred) { } private static class StringSanitizer extends ExpressionVisitors.ExpressionVisitor { - private final long nowMicros; + private final long nowMillis; private final int today; private StringSanitizer() { - long nowMillis = System.currentTimeMillis(); - OffsetDateTime nowDateTime = Instant.ofEpochMilli(nowMillis).atOffset(ZoneOffset.UTC); - this.nowMicros = nowMillis * 1000; + this.nowMillis = System.currentTimeMillis(); + OffsetDateTime nowDateTime = Instant.ofEpochMilli(this.nowMillis).atOffset(ZoneOffset.UTC); this.today = (int) ChronoUnit.DAYS.between(EPOCH, nowDateTime); } @@ -376,7 +375,7 @@ public String or(String leftResult, String rightResult) { } private String value(BoundLiteralPredicate pred) { - return sanitize(pred.term().type(), pred.literal().value(), nowMicros, today); + return sanitize(pred.term().type(), pred.literal().value(), nowMillis, today); } @Override @@ -408,7 +407,7 @@ public String predicate(BoundPredicate pred) { + " IN " + abbreviateValues( pred.asSetPredicate().literalSet().stream() - .map(lit -> sanitize(pred.term().type(), lit, nowMicros, today)) + .map(lit -> sanitize(pred.term().type(), lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -417,7 +416,7 @@ public String predicate(BoundPredicate pred) { + " NOT IN " + abbreviateValues( pred.asSetPredicate().literalSet().stream() - .map(lit -> sanitize(pred.term().type(), lit, nowMicros, today)) + .map(lit -> sanitize(pred.term().type(), lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -444,23 +443,23 @@ public String predicate(UnboundPredicate pred) { case NOT_NAN: return "not_nan(" + term + ")"; case LT: - return term + " < " + sanitize(pred.literal(), nowMicros, today); + return term + " < " + sanitize(pred.literal(), nowMillis, today); case LT_EQ: - return term + " <= " + sanitize(pred.literal(), nowMicros, today); + return term + " <= " + sanitize(pred.literal(), nowMillis, today); case GT: - return term + " > " + sanitize(pred.literal(), nowMicros, today); + return term + " > " + sanitize(pred.literal(), nowMillis, today); case GT_EQ: - return term + " >= " + sanitize(pred.literal(), nowMicros, today); + return term + " >= " + sanitize(pred.literal(), nowMillis, today); case EQ: - return term + " = " + sanitize(pred.literal(), nowMicros, today); + return term + " = " + sanitize(pred.literal(), nowMillis, today); case NOT_EQ: - return term + " != " + sanitize(pred.literal(), nowMicros, today); + return term + " != " + sanitize(pred.literal(), nowMillis, today); case IN: return term + " IN " + abbreviateValues( pred.literals().stream() - .map(lit -> sanitize(lit, nowMicros, today)) + .map(lit -> sanitize(lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); @@ -469,14 +468,14 @@ public String predicate(UnboundPredicate pred) { + " NOT IN " + abbreviateValues( pred.literals().stream() - .map(lit -> sanitize(lit, nowMicros, today)) + .map(lit -> sanitize(lit, nowMillis, today)) .collect(Collectors.toList())) .stream() .collect(Collectors.joining(", ", "(", ")")); case STARTS_WITH: - return term + " STARTS WITH " + sanitize(pred.literal(), nowMicros, today); + return term + " STARTS WITH " + sanitize(pred.literal(), nowMillis, today); case NOT_STARTS_WITH: - return term + " NOT STARTS WITH " + sanitize(pred.literal(), nowMicros, today); + return term + " NOT STARTS WITH " + sanitize(pred.literal(), nowMillis, today); default: throw new UnsupportedOperationException( "Cannot sanitize unsupported predicate type: " + pred.op()); @@ -501,7 +500,7 @@ private static List abbreviateValues(List sanitizedValues) { return sanitizedValues; } - private static String sanitize(Type type, Object value, long now, int today) { + private static String sanitize(Type type, Object value, long nowMillis, int today) { switch (type.typeId()) { case INTEGER: case LONG: @@ -514,9 +513,11 @@ private static String sanitize(Type type, Object value, long now, int today) { case TIME: return "(time)"; case TIMESTAMP: - return sanitizeTimestamp((long) value, now); + return sanitizeTimestamp((long) value, nowMillis); + case TIMESTAMPNS: + return sanitizeTimestampns((long) value, nowMillis); case STRING: - return sanitizeString((CharSequence) value, now, today); + return sanitizeString((CharSequence) value, nowMillis, today); case BOOLEAN: case UUID: case DECIMAL: @@ -529,13 +530,15 @@ private static String sanitize(Type type, Object value, long now, int today) { String.format("Cannot sanitize value for unsupported type %s: %s", type, value)); } - private static String sanitize(Literal literal, long now, int today) { + private static String sanitize(Literal literal, long nowMillis, int today) { if (literal instanceof Literals.StringLiteral) { - return sanitizeString(((Literals.StringLiteral) literal).value(), now, today); + return sanitizeString(((Literals.StringLiteral) literal).value(), nowMillis, today); } else if (literal instanceof Literals.DateLiteral) { return sanitizeDate(((Literals.DateLiteral) literal).value(), today); } else if (literal instanceof Literals.TimestampLiteral) { - return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), now); + return sanitizeTimestamp(((Literals.TimestampLiteral) literal).value(), nowMillis); + } else if (literal instanceof Literals.TimestampnsLiteral) { + return sanitizeTimestampns(((Literals.TimestampnsLiteral) literal).value(), nowMillis); } else if (literal instanceof Literals.TimeLiteral) { return "(time)"; } else if (literal instanceof Literals.IntegerLiteral) { @@ -564,13 +567,13 @@ private static String sanitizeDate(int days, int today) { return "(date)"; } - private static String sanitizeTimestamp(long micros, long now) { - String isPast = now > micros ? "ago" : "from-now"; - long diff = Math.abs(now - micros); + private static String sanitizeTimestamp(long micros, long nowMillis) { + long diff = Math.abs(nowMillis * 1000 - micros); if (diff < FIVE_MINUTES_IN_MICROS) { return "(timestamp-about-now)"; } + String isPast = nowMillis * 1000 > micros ? "ago" : "from-now"; long hours = TimeUnit.MICROSECONDS.toHours(diff); if (hours <= THREE_DAYS_IN_HOURS) { return "(timestamp-" + hours + "-hours-" + isPast + ")"; @@ -582,6 +585,24 @@ private static String sanitizeTimestamp(long micros, long now) { return "(timestamp)"; } + private static String sanitizeTimestampns(long nanos, long nowMillis) { + long diff = Math.abs(nowMillis * 1000_000 - nanos); + if (diff < FIVE_MINUTES_IN_NANOS) { + return "(timestampns-about-now)"; + } + + String isPast = nowMillis * 1000_000 > nanos ? "ago" : "from-now"; + long hours = TimeUnit.NANOSECONDS.toHours(diff); + if (hours <= THREE_DAYS_IN_HOURS) { + return "(timestampns-" + hours + "-hours-" + isPast + ")"; + } else if (hours < NINETY_DAYS_IN_HOURS) { + long days = hours / 24; + return "(timestampns-" + days + "-days-" + isPast + ")"; + } + + return "(timestampns)"; + } + private static String sanitizeNumber(Number value, String type) { // log10 of zero isn't defined and will result in negative infinity int numDigits = @@ -589,17 +610,17 @@ private static String sanitizeNumber(Number value, String type) { return "(" + numDigits + "-digit-" + type + ")"; } - private static String sanitizeString(CharSequence value, long now, int today) { + private static String sanitizeString(CharSequence value, long nowMillis, int today) { try { if (DATE.matcher(value).matches()) { Literal date = Literal.of(value).to(Types.DateType.get()); return sanitizeDate(date.value(), today); } else if (TIMESTAMP.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withoutZone()); - return sanitizeTimestamp(ts.value(), now); + return sanitizeTimestamp(ts.value(), nowMillis); } else if (TIMESTAMPTZ.matcher(value).matches()) { Literal ts = Literal.of(value).to(Types.TimestampType.withZone()); - return sanitizeTimestamp(ts.value(), now); + return sanitizeTimestamp(ts.value(), nowMillis); } else if (TIME.matcher(value).matches()) { return "(time)"; } else { diff --git a/api/src/main/java/org/apache/iceberg/expressions/Literals.java b/api/src/main/java/org/apache/iceberg/expressions/Literals.java index 79d7190c49df..e484db264865 100644 --- a/api/src/main/java/org/apache/iceberg/expressions/Literals.java +++ b/api/src/main/java/org/apache/iceberg/expressions/Literals.java @@ -299,6 +299,8 @@ public Literal to(Type type) { return (Literal) new TimeLiteral(value()); case TIMESTAMP: return (Literal) new TimestampLiteral(value()); + case TIMESTAMPNS: + return (Literal) new TimestampnsLiteral(value()); case DATE: if ((long) Integer.MAX_VALUE < value()) { return aboveMax(); @@ -436,6 +438,8 @@ public Literal to(Type type) { switch (type.typeId()) { case TIMESTAMP: return (Literal) this; + case TIMESTAMPNS: + return (Literal) new TimestampnsLiteral(value() * 1000); case DATE: return (Literal) new DateLiteral( @@ -453,6 +457,36 @@ protected Type.TypeID typeId() { } } + static class TimestampnsLiteral extends ComparableLiteral { + TimestampnsLiteral(Long value) { + super(value); + } + + @Override + @SuppressWarnings("unchecked") + public Literal to(Type type) { + switch (type.typeId()) { + case TIMESTAMPNS: + return (Literal) this; + case TIMESTAMP: + return (Literal) new TimestampLiteral(value() / 1000); + case DATE: + return (Literal) + new DateLiteral( + (int) + ChronoUnit.DAYS.between( + EPOCH_DAY, EPOCH.plus(value(), ChronoUnit.NANOS).toLocalDate())); + default: + } + return null; + } + + @Override + protected Type.TypeID typeId() { + return Type.TypeID.TIMESTAMPNS; + } + } + static class DecimalLiteral extends ComparableLiteral { DecimalLiteral(BigDecimal value) { super(value); @@ -515,6 +549,21 @@ public Literal to(Type type) { return (Literal) new TimestampLiteral(timestampMicros); } + case TIMESTAMPNS: + if (((Types.TimestampnsType) type).shouldAdjustToUTC()) { + long timestampNanos = + ChronoUnit.NANOS.between( + EPOCH, OffsetDateTime.parse(value(), DateTimeFormatter.ISO_DATE_TIME)); + return (Literal) new TimestampnsLiteral(timestampNanos); + } else { + long timestampNanos = + ChronoUnit.NANOS.between( + EPOCH, + LocalDateTime.parse(value(), DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .atOffset(ZoneOffset.UTC)); + return (Literal) new TimestampnsLiteral(timestampNanos); + } + case STRING: return (Literal) this; diff --git a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java index 912bcd271725..93d924dc0653 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Bucket.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Bucket.java @@ -54,6 +54,7 @@ static & SerializableFunction> B get( return (B) new BucketInteger(numBuckets); case TIME: case TIMESTAMP: + case TIMESTAMPNS: case LONG: return (B) new BucketLong(numBuckets); case DECIMAL: @@ -107,6 +108,7 @@ public boolean canTransform(Type type) { case DATE: case TIME: case TIMESTAMP: + case TIMESTAMPNS: case STRING: case BINARY: case FIXED: diff --git a/api/src/main/java/org/apache/iceberg/transforms/Days.java b/api/src/main/java/org/apache/iceberg/transforms/Days.java index f69d5d6110ed..d5b11effd975 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Days.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Days.java @@ -38,6 +38,8 @@ protected Transform toEnum(Type type) { return (Transform) Dates.DAY; case TIMESTAMP: return (Transform) Timestamps.DAY; + case TIMESTAMPNS: + return (Transform) Timestampns.DAY; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -56,6 +58,8 @@ public boolean satisfiesOrderOf(Transform other) { if (other instanceof Timestamps) { return Timestamps.DAY.satisfiesOrderOf(other); + } else if (other instanceof Timestampns) { + return Timestampns.DAY.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.DAY.satisfiesOrderOf(other); } else if (other instanceof Days || other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/Hours.java b/api/src/main/java/org/apache/iceberg/transforms/Hours.java index afc14516f3cd..5a28ad814f7e 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Hours.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Hours.java @@ -35,6 +35,8 @@ static Hours get() { protected Transform toEnum(Type type) { if (type.typeId() == Type.TypeID.TIMESTAMP) { return (Transform) Timestamps.HOUR; + } else if (type.typeId() == Type.TypeID.TIMESTAMPNS) { + return (Transform) Timestampns.HOUR; } throw new IllegalArgumentException("Unsupported type: " + type); @@ -42,7 +44,7 @@ protected Transform toEnum(Type type) { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.TIMESTAMP || type.typeId() == Type.TypeID.TIMESTAMPNS; } @Override @@ -58,6 +60,8 @@ public boolean satisfiesOrderOf(Transform other) { if (other instanceof Timestamps) { return other == Timestamps.HOUR; + } else if (other instanceof Timestampns) { + return other == Timestampns.HOUR; } else if (other instanceof Hours || other instanceof Days || other instanceof Months diff --git a/api/src/main/java/org/apache/iceberg/transforms/Months.java b/api/src/main/java/org/apache/iceberg/transforms/Months.java index 8fa4d42385f7..46d0ac0b1d6a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Months.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Months.java @@ -38,6 +38,8 @@ protected Transform toEnum(Type type) { return (Transform) Dates.MONTH; case TIMESTAMP: return (Transform) Timestamps.MONTH; + case TIMESTAMPNS: + return (Transform) Timestampns.MONTH; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -56,6 +58,8 @@ public boolean satisfiesOrderOf(Transform other) { if (other instanceof Timestamps) { return Timestamps.MONTH.satisfiesOrderOf(other); + } else if (other instanceof Timestampns) { + return Timestampns.MONTH.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.MONTH.satisfiesOrderOf(other); } else if (other instanceof Months || other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java index e4796478bf28..0cca32ad1e1a 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/PartitionSpecVisitor.java @@ -123,15 +123,22 @@ static R visit(Schema schema, PartitionField field, PartitionSpecVisitor return visitor.truncate(field.fieldId(), sourceName, field.sourceId(), width); } else if (transform == Dates.YEAR || transform == Timestamps.YEAR + || transform == Timestampns.YEAR || transform instanceof Years) { return visitor.year(field.fieldId(), sourceName, field.sourceId()); } else if (transform == Dates.MONTH || transform == Timestamps.MONTH + || transform == Timestampns.MONTH || transform instanceof Months) { return visitor.month(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Dates.DAY || transform == Timestamps.DAY || transform instanceof Days) { + } else if (transform == Dates.DAY + || transform == Timestamps.DAY + || transform == Timestampns.DAY + || transform instanceof Days) { return visitor.day(field.fieldId(), sourceName, field.sourceId()); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.HOUR + || transform == Timestampns.HOUR + || transform instanceof Hours) { return visitor.hour(field.fieldId(), sourceName, field.sourceId()); } else if (transform instanceof VoidTransform) { return visitor.alwaysNull(field.fieldId(), sourceName, field.sourceId()); diff --git a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java index 680e095270fb..cbb396c1dea1 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java +++ b/api/src/main/java/org/apache/iceberg/transforms/SortOrderVisitor.java @@ -86,20 +86,25 @@ static List visit(SortOrder sortOrder, SortOrderVisitor visitor) { sourceName, field.sourceId(), width, field.direction(), field.nullOrder())); } else if (transform == Dates.YEAR || transform == Timestamps.YEAR + || transform == Timestampns.YEAR || transform instanceof Years) { results.add( visitor.year(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.MONTH || transform == Timestamps.MONTH + || transform == Timestampns.MONTH || transform instanceof Months) { results.add( visitor.month(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform == Dates.DAY || transform == Timestamps.DAY + || transform == Timestampns.DAY || transform instanceof Days) { results.add( visitor.day(sourceName, field.sourceId(), field.direction(), field.nullOrder())); - } else if (transform == Timestamps.HOUR || transform instanceof Hours) { + } else if (transform == Timestamps.HOUR + || transform == Timestampns.HOUR + || transform instanceof Hours) { results.add( visitor.hour(sourceName, field.sourceId(), field.direction(), field.nullOrder())); } else if (transform instanceof UnknownTransform) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java index 01ea8130aa60..9d9d8be37af4 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TimeTransform.java @@ -39,7 +39,9 @@ public boolean preservesOrder() { @Override public boolean canTransform(Type type) { - return type.typeId() == Type.TypeID.DATE || type.typeId() == Type.TypeID.TIMESTAMP; + return type.typeId() == Type.TypeID.DATE + || type.typeId() == Type.TypeID.TIMESTAMP + || type.typeId() == Type.TypeID.TIMESTAMPNS; } @Override diff --git a/api/src/main/java/org/apache/iceberg/transforms/Timestampns.java b/api/src/main/java/org/apache/iceberg/transforms/Timestampns.java new file mode 100644 index 000000000000..cf5e74ad4424 --- /dev/null +++ b/api/src/main/java/org/apache/iceberg/transforms/Timestampns.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import com.google.errorprone.annotations.Immutable; +import java.time.temporal.ChronoUnit; +import org.apache.iceberg.expressions.BoundPredicate; +import org.apache.iceberg.expressions.BoundTransform; +import org.apache.iceberg.expressions.Expression; +import org.apache.iceberg.expressions.Expressions; +import org.apache.iceberg.expressions.UnboundPredicate; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.util.SerializableFunction; + +enum Timestampns implements Transform { + YEAR(ChronoUnit.YEARS, "year"), + MONTH(ChronoUnit.MONTHS, "month"), + DAY(ChronoUnit.DAYS, "day"), + HOUR(ChronoUnit.HOURS, "hour"); + + @Immutable + static class Apply implements SerializableFunction { + private final ChronoUnit granularity; + + Apply(ChronoUnit granularity) { + this.granularity = granularity; + } + + @Override + public Integer apply(Long timestampNanos) { + if (timestampNanos == null) { + return null; + } + + switch (granularity) { + case YEARS: + return DateTimeUtil.nanosToYears(timestampNanos); + case MONTHS: + return DateTimeUtil.nanosToMonths(timestampNanos); + case DAYS: + return DateTimeUtil.nanosToDays(timestampNanos); + case HOURS: + return DateTimeUtil.nanosToHours(timestampNanos); + default: + throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + } + } + } + + private final ChronoUnit granularity; + private final String name; + private final Apply apply; + + Timestampns(ChronoUnit granularity, String name) { + this.granularity = granularity; + this.name = name; + this.apply = new Apply(granularity); + } + + @Override + public Integer apply(Long timestampNanos) { + return apply.apply(timestampNanos); + } + + @Override + public SerializableFunction bind(Type type) { + Preconditions.checkArgument(canTransform(type), "Cannot bind to unsupported type: %s", type); + return apply; + } + + @Override + public boolean canTransform(Type type) { + return type.typeId() == Type.TypeID.TIMESTAMPNS; + } + + @Override + public Type getResultType(Type sourceType) { + if (granularity == ChronoUnit.DAYS) { + return Types.DateType.get(); + } + return Types.IntegerType.get(); + } + + @Override + public boolean preservesOrder() { + return true; + } + + @Override + public boolean satisfiesOrderOf(Transform other) { + if (this == other) { + return true; + } + + if (other instanceof Timestampns) { + // test the granularity, in hours. hour(ts) => 1 hour, day(ts) => 24 hours, and hour satisfies + // the order of day + Timestampns otherTransform = (Timestampns) other; + return granularity.getDuration().toHours() + <= otherTransform.granularity.getDuration().toHours(); + } + + return false; + } + + @Override + public UnboundPredicate project(String fieldName, BoundPredicate pred) { + if (pred.term() instanceof BoundTransform) { + return ProjectionUtil.projectTransformPredicate(this, fieldName, pred); + } + + if (pred.isUnaryPredicate()) { + return Expressions.predicate(pred.op(), fieldName); + + } else if (pred.isLiteralPredicate()) { + UnboundPredicate projected = + ProjectionUtil.truncateLong(fieldName, pred.asLiteralPredicate(), apply); + return ProjectionUtil.fixInclusiveTimeProjection(projected); + + } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.IN) { + UnboundPredicate projected = + ProjectionUtil.transformSet(fieldName, pred.asSetPredicate(), apply); + return ProjectionUtil.fixInclusiveTimeProjection(projected); + } + + return null; + } + + @Override + public UnboundPredicate projectStrict(String fieldName, BoundPredicate pred) { + if (pred.term() instanceof BoundTransform) { + return ProjectionUtil.projectTransformPredicate(this, fieldName, pred); + } + + if (pred.isUnaryPredicate()) { + return Expressions.predicate(pred.op(), fieldName); + + } else if (pred.isLiteralPredicate()) { + UnboundPredicate projected = + ProjectionUtil.truncateLongStrict(fieldName, pred.asLiteralPredicate(), apply); + return ProjectionUtil.fixStrictTimeProjection(projected); + + } else if (pred.isSetPredicate() && pred.op() == Expression.Operation.NOT_IN) { + UnboundPredicate projected = + ProjectionUtil.transformSet(fieldName, pred.asSetPredicate(), apply); + return ProjectionUtil.fixStrictTimeProjection(projected); + } + + return null; + } + + @Override + public String toHumanString(Type outputType, Integer value) { + if (value == null) { + return "null"; + } + + switch (granularity) { + case YEARS: + return TransformUtil.humanYear(value); + case MONTHS: + return TransformUtil.humanMonth(value); + case DAYS: + return TransformUtil.humanDay(value); + case HOURS: + return TransformUtil.humanHour(value); + default: + throw new UnsupportedOperationException("Unsupported time unit: " + granularity); + } + } + + @Override + public String toString() { + return name; + } + + @Override + public String dedupName() { + return "time"; + } +} diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transform.java b/api/src/main/java/org/apache/iceberg/transforms/Transform.java index 5a56b672b1b1..6bd9f5a53724 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transform.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transform.java @@ -181,6 +181,12 @@ default String toHumanString(Type type, T value) { } else { return TransformUtil.humanTimestampWithoutZone((Long) value); } + case TIMESTAMPNS: + if (((Types.TimestampnsType) type).shouldAdjustToUTC()) { + return TransformUtil.humanTimestampnsWithZone((Long) value); + } else { + return TransformUtil.humanTimestampnsWithoutZone((Long) value); + } case FIXED: case BINARY: if (value instanceof ByteBuffer) { diff --git a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java index 53bc23a49888..399238290060 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java +++ b/api/src/main/java/org/apache/iceberg/transforms/TransformUtil.java @@ -58,10 +58,18 @@ static String humanTimestampWithZone(Long timestampMicros) { return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toString(); } + static String humanTimestampnsWithZone(Long timestampNanos) { + return ChronoUnit.NANOS.addTo(EPOCH, timestampNanos).toString(); + } + static String humanTimestampWithoutZone(Long timestampMicros) { return ChronoUnit.MICROS.addTo(EPOCH, timestampMicros).toLocalDateTime().toString(); } + static String humanTimestampnsWithoutZone(Long timestampNanos) { + return ChronoUnit.NANOS.addTo(EPOCH, timestampNanos).toLocalDateTime().toString(); + } + static String humanHour(int hourOrdinal) { OffsetDateTime time = EPOCH.plusHours(hourOrdinal); return String.format( diff --git a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java index a1ce33ddd6da..c8aa5f14d648 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Transforms.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Transforms.java @@ -23,7 +23,6 @@ import java.util.regex.Pattern; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Schema; -import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.types.Type; /** @@ -87,6 +86,8 @@ private Transforms() {} try { if (type.typeId() == Type.TypeID.TIMESTAMP) { return Timestamps.valueOf(transform.toUpperCase(Locale.ENGLISH)); + } else if (type.typeId() == Type.TypeID.TIMESTAMPNS) { + return Timestampns.valueOf(transform.toUpperCase(Locale.ENGLISH)); } else if (type.typeId() == Type.TypeID.DATE) { return Dates.valueOf(transform.toUpperCase(Locale.ENGLISH)); } @@ -130,6 +131,8 @@ public static Transform year(Type type) { return (Transform) Dates.YEAR; case TIMESTAMP: return (Transform) Timestamps.YEAR; + case TIMESTAMPNS: + return (Transform) Timestampns.YEAR; default: throw new IllegalArgumentException("Cannot partition type " + type + " by year"); } @@ -151,6 +154,8 @@ public static Transform month(Type type) { return (Transform) Dates.MONTH; case TIMESTAMP: return (Transform) Timestamps.MONTH; + case TIMESTAMPNS: + return (Transform) Timestampns.MONTH; default: throw new IllegalArgumentException("Cannot partition type " + type + " by month"); } @@ -172,6 +177,8 @@ public static Transform day(Type type) { return (Transform) Dates.DAY; case TIMESTAMP: return (Transform) Timestamps.DAY; + case TIMESTAMPNS: + return (Transform) Timestampns.DAY; default: throw new IllegalArgumentException("Cannot partition type " + type + " by day"); } @@ -188,9 +195,14 @@ public static Transform day(Type type) { @Deprecated @SuppressWarnings("unchecked") public static Transform hour(Type type) { - Preconditions.checkArgument( - type.typeId() == Type.TypeID.TIMESTAMP, "Cannot partition type %s by hour", type); - return (Transform) Timestamps.HOUR; + switch (type.typeId()) { + case TIMESTAMP: + return (Transform) Timestamps.HOUR; + case TIMESTAMPNS: + return (Transform) Timestampns.HOUR; + default: + throw new IllegalArgumentException("Cannot partition type " + type + " by day"); + } } /** diff --git a/api/src/main/java/org/apache/iceberg/transforms/Years.java b/api/src/main/java/org/apache/iceberg/transforms/Years.java index 6c1eee578506..d9a4435a4aca 100644 --- a/api/src/main/java/org/apache/iceberg/transforms/Years.java +++ b/api/src/main/java/org/apache/iceberg/transforms/Years.java @@ -38,6 +38,8 @@ protected Transform toEnum(Type type) { return (Transform) Dates.YEAR; case TIMESTAMP: return (Transform) Timestamps.YEAR; + case TIMESTAMPNS: + return (Transform) Timestampns.YEAR; default: throw new IllegalArgumentException("Unsupported type: " + type); } @@ -56,6 +58,8 @@ public boolean satisfiesOrderOf(Transform other) { if (other instanceof Timestamps) { return Timestamps.YEAR.satisfiesOrderOf(other); + } else if (other instanceof Timestampns) { + return Timestampns.YEAR.satisfiesOrderOf(other); } else if (other instanceof Dates) { return Dates.YEAR.satisfiesOrderOf(other); } else if (other instanceof Years) { diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java b/api/src/main/java/org/apache/iceberg/types/Comparators.java index d09d9f5395ce..c944a80bf0b5 100644 --- a/api/src/main/java/org/apache/iceberg/types/Comparators.java +++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java @@ -41,6 +41,8 @@ private Comparators() {} .put(Types.TimeType.get(), Comparator.naturalOrder()) .put(Types.TimestampType.withZone(), Comparator.naturalOrder()) .put(Types.TimestampType.withoutZone(), Comparator.naturalOrder()) + .put(Types.TimestampnsType.withZone(), Comparator.naturalOrder()) + .put(Types.TimestampnsType.withoutZone(), Comparator.naturalOrder()) .put(Types.StringType.get(), Comparators.charSequences()) .put(Types.UUIDType.get(), Comparator.naturalOrder()) .put(Types.BinaryType.get(), Comparators.unsignedBytes()) diff --git a/api/src/main/java/org/apache/iceberg/types/Conversions.java b/api/src/main/java/org/apache/iceberg/types/Conversions.java index 1d2539514954..8ba5a7dfdfc7 100644 --- a/api/src/main/java/org/apache/iceberg/types/Conversions.java +++ b/api/src/main/java/org/apache/iceberg/types/Conversions.java @@ -97,6 +97,7 @@ public static ByteBuffer toByteBuffer(Type.TypeID typeId, Object value) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMPNS: return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, (long) value); case FLOAT: return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(0, (float) value); @@ -146,6 +147,7 @@ private static Object internalFromByteBuffer(Type type, ByteBuffer buffer) { case LONG: case TIME: case TIMESTAMP: + case TIMESTAMPNS: if (tmp.remaining() < 8) { // type was later promoted to long return (long) tmp.getInt(); diff --git a/api/src/main/java/org/apache/iceberg/types/Type.java b/api/src/main/java/org/apache/iceberg/types/Type.java index 5062b54d10e1..0d2385b90d5b 100644 --- a/api/src/main/java/org/apache/iceberg/types/Type.java +++ b/api/src/main/java/org/apache/iceberg/types/Type.java @@ -37,6 +37,7 @@ enum TypeID { DATE(Integer.class), TIME(Long.class), TIMESTAMP(Long.class), + TIMESTAMPNS(Long.class), STRING(CharSequence.class), UUID(java.util.UUID.class), FIXED(ByteBuffer.class), diff --git a/api/src/main/java/org/apache/iceberg/types/Types.java b/api/src/main/java/org/apache/iceberg/types/Types.java index da70dd9ac6ab..00d79759b56d 100644 --- a/api/src/main/java/org/apache/iceberg/types/Types.java +++ b/api/src/main/java/org/apache/iceberg/types/Types.java @@ -48,6 +48,8 @@ private Types() {} .put(TimeType.get().toString(), TimeType.get()) .put(TimestampType.withZone().toString(), TimestampType.withZone()) .put(TimestampType.withoutZone().toString(), TimestampType.withoutZone()) + .put(TimestampnsType.withZone().toString(), TimestampnsType.withZone()) + .put(TimestampnsType.withoutZone().toString(), TimestampnsType.withoutZone()) .put(StringType.get().toString(), StringType.get()) .put(UUIDType.get().toString(), UUIDType.get()) .put(BinaryType.get().toString(), BinaryType.get()) @@ -258,6 +260,60 @@ public int hashCode() { } } + public static class TimestampnsType extends PrimitiveType { + private static final TimestampnsType INSTANCE_WITH_ZONE = new TimestampnsType(true); + private static final TimestampnsType INSTANCE_WITHOUT_ZONE = new TimestampnsType(false); + + public static TimestampnsType withZone() { + return INSTANCE_WITH_ZONE; + } + + public static TimestampnsType withoutZone() { + return INSTANCE_WITHOUT_ZONE; + } + + private final boolean adjustToUTC; + + private TimestampnsType(boolean adjustToUTC) { + this.adjustToUTC = adjustToUTC; + } + + public boolean shouldAdjustToUTC() { + return adjustToUTC; + } + + @Override + public TypeID typeId() { + return TypeID.TIMESTAMPNS; + } + + @Override + public String toString() { + if (shouldAdjustToUTC()) { + return "timestamptzns"; + } else { + return "timestampns"; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } else if (!(o instanceof TimestampnsType)) { + return false; + } + + TimestampnsType timestampnsType = (TimestampnsType) o; + return adjustToUTC == timestampnsType.adjustToUTC; + } + + @Override + public int hashCode() { + return Objects.hash(TimestampnsType.class, adjustToUTC); + } + } + public static class StringType extends PrimitiveType { private static final StringType INSTANCE = new StringType(); diff --git a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java index a2f5301f44a9..cd92ee9d610c 100644 --- a/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java +++ b/api/src/main/java/org/apache/iceberg/util/DateTimeUtil.java @@ -35,6 +35,7 @@ private DateTimeUtil() {} public static final LocalDate EPOCH_DAY = EPOCH.toLocalDate(); public static final long MICROS_PER_MILLIS = 1000L; public static final long MICROS_PER_SECOND = 1_000_000L; + public static final long NANOS_PER_SECOND = 1_000_000_000L; public static LocalDate dateFromDays(int daysFromEpoch) { return ChronoUnit.DAYS.addTo(EPOCH_DAY, daysFromEpoch); @@ -60,6 +61,10 @@ public static LocalDateTime timestampFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch).toLocalDateTime(); } + public static LocalDateTime timestampFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch).toLocalDateTime(); + } + public static long microsFromInstant(Instant instant) { return ChronoUnit.MICROS.between(EPOCH, instant.atOffset(ZoneOffset.UTC)); } @@ -68,6 +73,10 @@ public static long microsFromTimestamp(LocalDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); } + public static long nanosFromTimestamp(LocalDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime.atOffset(ZoneOffset.UTC)); + } + public static long microsToMillis(long micros) { // When the timestamp is negative, i.e before 1970, we need to adjust the milliseconds portion. // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision. @@ -79,10 +88,18 @@ public static OffsetDateTime timestamptzFromMicros(long microsFromEpoch) { return ChronoUnit.MICROS.addTo(EPOCH, microsFromEpoch); } + public static OffsetDateTime timestamptzFromNanos(long nanosFromEpoch) { + return ChronoUnit.NANOS.addTo(EPOCH, nanosFromEpoch); + } + public static long microsFromTimestamptz(OffsetDateTime dateTime) { return ChronoUnit.MICROS.between(EPOCH, dateTime); } + public static long nanosFromTimestamptz(OffsetDateTime dateTime) { + return ChronoUnit.NANOS.between(EPOCH, dateTime); + } + public static String formatTimestampMillis(long millis) { return Instant.ofEpochMilli(millis).toString().replace("Z", "+00:00"); } @@ -106,11 +123,27 @@ public static String microsToIsoTimestamptz(long micros) { return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); } + public static String nanosToIsoTimestamptz(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + DateTimeFormatter zeroOffsetFormatter = + new DateTimeFormatterBuilder() + .parseCaseInsensitive() + .append(DateTimeFormatter.ISO_LOCAL_DATE_TIME) + .appendOffset("+HH:MM:ss", "+00:00") + .toFormatter(); + return localDateTime.atOffset(ZoneOffset.UTC).format(zeroOffsetFormatter); + } + public static String microsToIsoTimestamp(long micros) { LocalDateTime localDateTime = timestampFromMicros(micros); return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); } + public static String nanosToIsoTimestamp(long nanos) { + LocalDateTime localDateTime = timestampFromNanos(nanos); + return localDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME); + } + public static int isoDateToDays(String dateString) { return daysFromDate(LocalDate.parse(dateString, DateTimeFormatter.ISO_LOCAL_DATE)); } @@ -124,6 +157,11 @@ public static long isoTimestamptzToMicros(String timestampString) { OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); } + public static long isoTimestamptzToNanos(String timestampString) { + return nanosFromTimestamptz( + OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME)); + } + public static boolean isUTCTimestamptz(String timestampString) { OffsetDateTime offsetDateTime = OffsetDateTime.parse(timestampString, DateTimeFormatter.ISO_DATE_TIME); @@ -135,6 +173,11 @@ public static long isoTimestampToMicros(String timestampString) { LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); } + public static long isoTimestampToNanos(String timestampString) { + return nanosFromTimestamp( + LocalDateTime.parse(timestampString, DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + } + public static int daysToYears(int days) { return convertDays(days, ChronoUnit.YEARS); } @@ -159,18 +202,34 @@ public static int microsToYears(long micros) { return convertMicros(micros, ChronoUnit.YEARS); } + public static int nanosToYears(long nanos) { + return convertNanos(nanos, ChronoUnit.YEARS); + } + public static int microsToMonths(long micros) { return convertMicros(micros, ChronoUnit.MONTHS); } + public static int nanosToMonths(long nanos) { + return convertNanos(nanos, ChronoUnit.MONTHS); + } + public static int microsToDays(long micros) { return convertMicros(micros, ChronoUnit.DAYS); } + public static int nanosToDays(long nanos) { + return convertNanos(nanos, ChronoUnit.DAYS); + } + public static int microsToHours(long micros) { return convertMicros(micros, ChronoUnit.HOURS); } + public static int nanosToHours(long nanos) { + return convertNanos(nanos, ChronoUnit.HOURS); + } + private static int convertMicros(long micros, ChronoUnit granularity) { if (micros >= 0) { long epochSecond = Math.floorDiv(micros, MICROS_PER_SECOND); @@ -185,6 +244,20 @@ private static int convertMicros(long micros, ChronoUnit granularity) { } } + private static int convertNanos(long nanos, ChronoUnit granularity) { + if (nanos >= 0) { + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos, NANOS_PER_SECOND); + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)); + } else { + // add 1 nano to the value to account for the case where there is exactly 1 unit between + // the timestamp and epoch because the result will always be decremented. + long epochSecond = Math.floorDiv(nanos, NANOS_PER_SECOND); + long nanoAdjustment = Math.floorMod(nanos + 1, NANOS_PER_SECOND); + return (int) granularity.between(EPOCH, toOffsetDateTime(epochSecond, nanoAdjustment)) - 1; + } + } + private static OffsetDateTime toOffsetDateTime(long epochSecond, long nanoAdjustment) { return Instant.ofEpochSecond(epochSecond, nanoAdjustment).atOffset(ZoneOffset.UTC); } diff --git a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java index 5e4ca1fb11be..7d8550d6651f 100644 --- a/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java +++ b/api/src/test/java/org/apache/iceberg/PartitionSpecTestBase.java @@ -34,7 +34,8 @@ public class PartitionSpecTestBase { Types.NestedField.required(7, "s", Types.StringType.get()), Types.NestedField.required(8, "u", Types.UUIDType.get()), Types.NestedField.required(9, "f", Types.FixedType.ofLength(3)), - Types.NestedField.required(10, "b", Types.BinaryType.get())); + Types.NestedField.required(10, "b", Types.BinaryType.get()), + Types.NestedField.required(11, "tsn", Types.TimestampnsType.withoutZone())); // a spec with all of the allowed transform/type pairs public static final PartitionSpec[] SPECS = @@ -49,6 +50,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).identity("u").build(), PartitionSpec.builderFor(SCHEMA).identity("f").build(), PartitionSpec.builderFor(SCHEMA).identity("b").build(), + PartitionSpec.builderFor(SCHEMA).identity("tsn").build(), PartitionSpec.builderFor(SCHEMA).bucket("i", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("l", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("d", 128).build(), @@ -59,6 +61,7 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).bucket("u", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("f", 128).build(), PartitionSpec.builderFor(SCHEMA).bucket("b", 128).build(), + PartitionSpec.builderFor(SCHEMA).bucket("tsn", 128).build(), PartitionSpec.builderFor(SCHEMA).year("d").build(), PartitionSpec.builderFor(SCHEMA).month("d").build(), PartitionSpec.builderFor(SCHEMA).day("d").build(), @@ -66,6 +69,10 @@ public class PartitionSpecTestBase { PartitionSpec.builderFor(SCHEMA).month("ts").build(), PartitionSpec.builderFor(SCHEMA).day("ts").build(), PartitionSpec.builderFor(SCHEMA).hour("ts").build(), + PartitionSpec.builderFor(SCHEMA).year("tsn").build(), + PartitionSpec.builderFor(SCHEMA).month("tsn").build(), + PartitionSpec.builderFor(SCHEMA).day("tsn").build(), + PartitionSpec.builderFor(SCHEMA).hour("tsn").build(), PartitionSpec.builderFor(SCHEMA).truncate("i", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("l", 10).build(), PartitionSpec.builderFor(SCHEMA).truncate("dec", 10).build(), diff --git a/api/src/test/java/org/apache/iceberg/TestAccessors.java b/api/src/test/java/org/apache/iceberg/TestAccessors.java index 332556e474c7..699f6409552f 100644 --- a/api/src/test/java/org/apache/iceberg/TestAccessors.java +++ b/api/src/test/java/org/apache/iceberg/TestAccessors.java @@ -182,6 +182,12 @@ public void testTimestamp() { assertAccessorReturns(Types.TimestampType.withZone(), 123L); } + @Test + public void testTimestampns() { + assertAccessorReturns(Types.TimestampnsType.withoutZone(), 123L); + assertAccessorReturns(Types.TimestampnsType.withZone(), 123L); + } + @Test public void testString() { assertAccessorReturns(Types.StringType.get(), "abc"); diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java index d5aa251ffb50..1fb3cedc5e28 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestLiteralSerialization.java @@ -40,6 +40,8 @@ public void testLiterals() throws Exception { Literal.of("11:30:07").to(Types.TimeType.get()), Literal.of("2017-11-29T11:30:07.123").to(Types.TimestampType.withoutZone()), Literal.of("2017-11-29T11:30:07.123+01:00").to(Types.TimestampType.withZone()), + Literal.of("2017-11-29T11:30:07.123").to(Types.TimestampnsType.withoutZone()), + Literal.of("2017-11-29T11:30:07.123+01:00").to(Types.TimestampnsType.withZone()), Literal.of("abc"), Literal.of(UUID.randomUUID()), Literal.of(new byte[] {1, 2, 3}).to(Types.FixedType.ofLength(3)), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java index f8d2cd49d969..ac410b5a828f 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestMiscLiteralConversions.java @@ -43,6 +43,7 @@ public void testIdentityConversions() { Pair.of(Literal.of("2017-08-18"), Types.DateType.get()), Pair.of(Literal.of("14:21:01.919"), Types.TimeType.get()), Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampType.withoutZone()), + Pair.of(Literal.of("2017-08-18T14:21:01.919"), Types.TimestampnsType.withoutZone()), Pair.of(Literal.of("abc"), Types.StringType.get()), Pair.of(Literal.of(UUID.randomUUID()), Types.UUIDType.get()), Pair.of(Literal.of(new byte[] {0, 1, 2}), Types.FixedType.ofLength(3)), @@ -101,6 +102,8 @@ public void testInvalidBooleanConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -116,6 +119,8 @@ public void testInvalidIntegerConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -144,6 +149,8 @@ public void testInvalidFloatConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -161,6 +168,8 @@ public void testInvalidDoubleConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -179,6 +188,8 @@ public void testInvalidDateConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -198,6 +209,8 @@ public void testInvalidTimeConversions() { Types.DateType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 4), Types.StringType.get(), Types.UUIDType.get(), @@ -222,6 +235,23 @@ public void testInvalidTimestampConversions() { Types.BinaryType.get()); } + @Test + public void testInvalidTimestampnsConversions() { + testInvalidConversions( + Literal.of("2017-08-18T14:21:01.919123456").to(Types.TimestampnsType.withoutZone()), + Types.BooleanType.get(), + Types.IntegerType.get(), + Types.LongType.get(), + Types.FloatType.get(), + Types.DoubleType.get(), + Types.TimeType.get(), + Types.DecimalType.of(9, 4), + Types.StringType.get(), + Types.UUIDType.get(), + Types.FixedType.ofLength(1), + Types.BinaryType.get()); + } + @Test public void testInvalidDecimalConversions() { testInvalidConversions( @@ -235,6 +265,8 @@ public void testInvalidDecimalConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(1), @@ -269,6 +301,8 @@ public void testInvalidUUIDConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.FixedType.ofLength(1), @@ -288,6 +322,8 @@ public void testInvalidFixedConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), @@ -307,6 +343,8 @@ public void testInvalidBinaryConversions() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.DecimalType.of(9, 2), Types.StringType.get(), Types.UUIDType.get(), diff --git a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java index f35b274eb3d5..7d499e901926 100644 --- a/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java +++ b/api/src/test/java/org/apache/iceberg/expressions/TestStringLiteralConversions.java @@ -187,6 +187,15 @@ public void testTimestampWithZoneWithoutZoneInLiteral() { .hasMessageContaining("could not be parsed"); } + @Test + public void testTimestampnsWithZoneWithoutZoneInLiteral() { + // Zone must be present in literals when converting to timestamp with zone + Literal timestampStr = Literal.of("2017-08-18T14:21:01.919123456"); + Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampnsType.withZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + } + @Test public void testTimestampWithoutZoneWithZoneInLiteral() { // Zone must not be present in literals when converting to timestamp without zone @@ -196,6 +205,15 @@ public void testTimestampWithoutZoneWithZoneInLiteral() { .hasMessageContaining("could not be parsed"); } + @Test + public void testTimestampnsWithoutZoneWithZoneInLiteral() { + // Zone must not be present in literals when converting to timestamp without zone + Literal timestampStr = Literal.of("2017-08-18T14:21:01.919123456+07:00"); + Assertions.assertThatThrownBy(() -> timestampStr.to(Types.TimestampnsType.withoutZone())) + .isInstanceOf(DateTimeException.class) + .hasMessageContaining("could not be parsed"); + } + @Test public void testStringToUUIDLiteral() { UUID expected = UUID.randomUUID(); diff --git a/api/src/test/java/org/apache/iceberg/transforms/TestTimestampns.java b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampns.java new file mode 100644 index 000000000000..17ff6c0631f9 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/transforms/TestTimestampns.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.transforms; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.apache.iceberg.expressions.Literal; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestTimestampns { + @Test + public void testTimestampnsTransform() { + Types.TimestampnsType type = Types.TimestampnsType.withoutZone(); + Literal ts = Literal.of("2017-12-01T10:12:55.038194654").to(type); + Literal pts = Literal.of("1970-01-01T00:00:01.000000001").to(type); + Literal nts = Literal.of("1969-12-31T23:59:58.999999999").to(type); + + Transform years = Transforms.year(); + assertThat((int) years.bind(type).apply(ts.value())) + .as("Should produce 2017 - 1970 = 47") + .isEqualTo(47); + assertThat((int) years.bind(type).apply(pts.value())) + .as("Should produce 1970 - 1970 = 0") + .isZero(); + assertThat((int) years.bind(type).apply(nts.value())) + .as("Should produce 1969 - 1970 = -1") + .isEqualTo(-1); + + Transform months = Transforms.month(); + assertThat((int) months.bind(type).apply(ts.value())) + .as("Should produce 47 * 12 + 11 = 575") + .isEqualTo(575); + assertThat((int) months.bind(type).apply(pts.value())) + .as("Should produce 0 * 12 + 0 = 0") + .isZero(); + assertThat((int) months.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform days = Transforms.day(); + assertThat((int) days.bind(type).apply(ts.value())).as("Should produce 17501").isEqualTo(17501); + assertThat((int) days.bind(type).apply(pts.value())) + .as("Should produce 0 * 365 + 0 = 0") + .isZero(); + assertThat((int) days.bind(type).apply(nts.value())).isEqualTo(-1); + + Transform hours = Transforms.hour(); + assertThat((int) hours.bind(type).apply(ts.value())) + .as("Should produce 17501 * 24 + 10") + .isEqualTo(420034); + assertThat((int) hours.bind(type).apply(pts.value())) + .as("Should produce 0 * 24 + 0 = 0") + .isZero(); + assertThat((int) hours.bind(type).apply(nts.value())).isEqualTo(-1); + } + + @Test + public void testTimestampnsWithoutZoneToHumanString() { + Types.TimestampnsType type = Types.TimestampnsType.withoutZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194654").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-10"); + } + + @Test + public void testNegativeTimestampnsWithoutZoneToHumanString() { + Types.TimestampnsType type = Types.TimestampnsType.withoutZone(); + Literal date = Literal.of("1969-12-30T10:12:55.038194654").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-10"); + } + + @Test + public void testNegativeTimestampnsWithoutZoneToHumanStringLowerBound() { + Types.TimestampnsType type = Types.TimestampnsType.withoutZone(); + Literal date = Literal.of("1969-12-30T00:00:00.000000000").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-30"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-30-00"); + } + + @Test + public void testNegativeTimestampnsWithoutZoneToHumanStringUpperBound() { + Types.TimestampnsType type = Types.TimestampnsType.withoutZone(); + Literal date = Literal.of("1969-12-31T23:59:59.999999999").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("1969"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("1969-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("1969-12-31"); + + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("1969-12-31-23"); + } + + @Test + public void testTimestampnsWithZoneToHumanString() { + Types.TimestampnsType type = Types.TimestampnsType.withZone(); + Literal date = Literal.of("2017-12-01T10:12:55.038194654-08:00").to(type); + + Transform year = Transforms.year(); + assertThat(year.toHumanString(type, year.bind(type).apply(date.value()))).isEqualTo("2017"); + + Transform month = Transforms.month(); + assertThat(month.toHumanString(type, month.bind(type).apply(date.value()))) + .isEqualTo("2017-12"); + + Transform day = Transforms.day(); + assertThat(day.toHumanString(type, day.bind(type).apply(date.value()))).isEqualTo("2017-12-01"); + + // the hour is 18 because the value is always UTC + Transform hour = Transforms.hour(); + assertThat(hour.toHumanString(type, hour.bind(type).apply(date.value()))) + .isEqualTo("2017-12-01-18"); + } + + @Test + public void testNullHumanString() { + Types.TimestampnsType type = Types.TimestampnsType.withZone(); + assertThat(Transforms.year().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.month().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.day().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + assertThat(Transforms.hour().toHumanString(type, null)) + .as("Should produce \"null\" for null") + .isEqualTo("null"); + } + + @Test + public void testTimestampnsReturnType() { + Types.TimestampnsType type = Types.TimestampnsType.withZone(); + + Transform year = Transforms.year(); + Type yearResultType = year.getResultType(type); + assertThat(yearResultType).isEqualTo(Types.IntegerType.get()); + + Transform month = Transforms.month(); + Type monthResultType = month.getResultType(type); + assertThat(monthResultType).isEqualTo(Types.IntegerType.get()); + + Transform day = Transforms.day(); + Type dayResultType = day.getResultType(type); + assertThat(dayResultType).isEqualTo(Types.DateType.get()); + + Transform hour = Transforms.hour(); + Type hourResultType = hour.getResultType(type); + assertThat(hourResultType).isEqualTo(Types.IntegerType.get()); + } +} diff --git a/api/src/test/java/org/apache/iceberg/types/TestComparators.java b/api/src/test/java/org/apache/iceberg/types/TestComparators.java index 165d96c029cc..6e20be131442 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestComparators.java +++ b/api/src/test/java/org/apache/iceberg/types/TestComparators.java @@ -79,6 +79,12 @@ public void testTimestamp() { assertComparesCorrectly(Comparators.forType(Types.TimestampType.withZone()), 111, 222); } + @Test + public void testTimestampns() { + assertComparesCorrectly(Comparators.forType(Types.TimestampnsType.withoutZone()), 111, 222); + assertComparesCorrectly(Comparators.forType(Types.TimestampnsType.withZone()), 111, 222); + } + @Test public void testString() { assertComparesCorrectly(Comparators.forType(Types.StringType.get()), "a", "b"); diff --git a/api/src/test/java/org/apache/iceberg/types/TestConversions.java b/api/src/test/java/org/apache/iceberg/types/TestConversions.java index 6c7a884a5839..67e559b964c8 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestConversions.java +++ b/api/src/test/java/org/apache/iceberg/types/TestConversions.java @@ -38,6 +38,7 @@ import org.apache.iceberg.types.Types.StringType; import org.apache.iceberg.types.Types.TimeType; import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types.TimestampnsType; import org.apache.iceberg.types.Types.UUIDType; import org.junit.jupiter.api.Test; @@ -93,7 +94,8 @@ public void testByteBufferConversions() { assertThat(Literal.of(10000L).to(TimeType.get()).toByteBuffer().array()) .isEqualTo(new byte[] {16, 39, 0, 0, 0, 0, 0, 0}); - // timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an 8-byte + // microsecond timestamps are stored as microseconds from 1970-01-01 00:00:00.000000 in an + // 8-byte // little-endian long // 400000L is 0...110|00011010|10000000 in binary // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 @@ -104,6 +106,18 @@ public void testByteBufferConversions() { assertThat(Literal.of(400000L).to(TimestampType.withZone()).toByteBuffer().array()) .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + // nanosecond timestamps are stored as nanoseconds from 1970-01-01 00:00:00.000000 in an 8-byte + // little-endian long + // 400000L is 0...110|00011010|10000000 in binary + // 10000000 -> -128, 00011010 -> 26, 00000110 -> 6, ... , 00000000 -> 0 + assertConversion( + 400000L, TimestampnsType.withoutZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertConversion(400000L, TimestampnsType.withZone(), new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampnsType.withoutZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + assertThat(Literal.of(400000L).to(TimestampnsType.withZone()).toByteBuffer().array()) + .isEqualTo(new byte[] {-128, 26, 6, 0, 0, 0, 0, 0}); + // strings are stored as UTF-8 bytes (without length) // 'A' -> 65, 'B' -> 66, 'C' -> 67 assertConversion(CharBuffer.wrap("ABC"), StringType.get(), new byte[] {65, 66, 67}); diff --git a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java index 7f5948bd5838..97e685f783ba 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java +++ b/api/src/test/java/org/apache/iceberg/types/TestReadabilityChecks.java @@ -39,6 +39,8 @@ public class TestReadabilityChecks { Types.TimeType.get(), Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampnsType.withoutZone(), + Types.TimestampnsType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(3), diff --git a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java index d981b5a26789..4159cdd9b25a 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestSerializableTypes.java @@ -41,6 +41,8 @@ public void testIdentityTypes() throws Exception { Types.TimeType.get(), Types.TimestampType.withoutZone(), Types.TimestampType.withZone(), + Types.TimestampnsType.withoutZone(), + Types.TimestampnsType.withZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get() diff --git a/api/src/test/java/org/apache/iceberg/types/TestTypes.java b/api/src/test/java/org/apache/iceberg/types/TestTypes.java index ca5c6edce16b..07fac3e5287a 100644 --- a/api/src/test/java/org/apache/iceberg/types/TestTypes.java +++ b/api/src/test/java/org/apache/iceberg/types/TestTypes.java @@ -30,6 +30,8 @@ public void fromPrimitiveString() { Assertions.assertThat(Types.fromPrimitiveString("timestamp")) .isSameAs(Types.TimestampType.withoutZone()); + Assertions.assertThat(Types.fromPrimitiveString("timestampns")) + .isSameAs(Types.TimestampnsType.withoutZone()); Assertions.assertThat(Types.fromPrimitiveString("Fixed[ 3 ]")) .isEqualTo(Types.FixedType.ofLength(3)); diff --git a/api/src/test/java/org/apache/iceberg/util/RandomUtil.java b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java index 9131e6166133..9d276b0e29a6 100644 --- a/api/src/test/java/org/apache/iceberg/util/RandomUtil.java +++ b/api/src/test/java/org/apache/iceberg/util/RandomUtil.java @@ -119,6 +119,9 @@ public static Object generatePrimitive(Type.PrimitiveType primitive, Random rand case TIMESTAMP: return random.nextLong() % FIFTY_YEARS_IN_MICROS; + case TIMESTAMPNS: + return random.nextLong() % FIFTY_YEARS_IN_NANOS; + case STRING: return randomString(random); @@ -166,6 +169,7 @@ public static Object generateDictionaryEncodablePrimitive( case LONG: case TIME: case TIMESTAMP: + case TIMESTAMPNS: return (long) value; case STRING: return String.valueOf(value); @@ -194,6 +198,7 @@ public static Object generateDictionaryEncodablePrimitive( private static final long FIFTY_YEARS_IN_MICROS = (50L * (365 * 3 + 366) * 24 * 60 * 60 * 1_000_000) / 4; + private static final long FIFTY_YEARS_IN_NANOS = FIFTY_YEARS_IN_MICROS * 1000; private static final int ABOUT_380_YEARS_IN_DAYS = 380 * 365; private static final long ONE_DAY_IN_MICROS = 24 * 60 * 60 * 1_000_000L; private static final String CHARS = diff --git a/core/src/main/java/org/apache/iceberg/SingleValueParser.java b/core/src/main/java/org/apache/iceberg/SingleValueParser.java index 3de6a0bcc663..cfa85e0a590b 100644 --- a/core/src/main/java/org/apache/iceberg/SingleValueParser.java +++ b/core/src/main/java/org/apache/iceberg/SingleValueParser.java @@ -140,6 +140,20 @@ public static Object fromJson(Type type, JsonNode defaultValue) { } else { return DateTimeUtil.isoTimestampToMicros(defaultValue.textValue()); } + case TIMESTAMPNS: + Preconditions.checkArgument( + defaultValue.isTextual(), "Cannot parse default as a %s value: %s", type, defaultValue); + if (((Types.TimestampnsType) type).shouldAdjustToUTC()) { + String timestampTz = defaultValue.textValue(); + Preconditions.checkArgument( + DateTimeUtil.isUTCTimestamptz(timestampTz), + "Cannot parse default as a %s value: %s, offset must be +00:00", + type, + defaultValue); + return DateTimeUtil.isoTimestamptzToNanos(timestampTz); + } else { + return DateTimeUtil.isoTimestampToNanos(defaultValue.textValue()); + } case FIXED: Preconditions.checkArgument( defaultValue.isTextual(), "Cannot parse default as a %s value: %s", type, defaultValue); @@ -289,6 +303,15 @@ public static void toJson(Type type, Object defaultValue, JsonGenerator generato generator.writeString(DateTimeUtil.microsToIsoTimestamp((Long) defaultValue)); } break; + case TIMESTAMPNS: + Preconditions.checkArgument( + defaultValue instanceof Long, "Invalid default %s value: %s", type, defaultValue); + if (((Types.TimestampnsType) type).shouldAdjustToUTC()) { + generator.writeString(DateTimeUtil.nanosToIsoTimestamptz((Long) defaultValue)); + } else { + generator.writeString(DateTimeUtil.nanosToIsoTimestamp((Long) defaultValue)); + } + break; case STRING: Preconditions.checkArgument( defaultValue instanceof CharSequence, diff --git a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java index 46c17722f8f7..bc14cd5236a6 100644 --- a/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java +++ b/core/src/main/java/org/apache/iceberg/avro/AvroSchemaUtil.java @@ -129,7 +129,8 @@ public static Schema buildAvroProjection( public static boolean isTimestamptz(Schema schema) { LogicalType logicalType = schema.getLogicalType(); if (logicalType instanceof LogicalTypes.TimestampMillis - || logicalType instanceof LogicalTypes.TimestampMicros) { + || logicalType instanceof LogicalTypes.TimestampMicros + || logicalType instanceof IcebergLogicalTypes.TimestampNanos) { // timestamptz is adjusted to UTC Object value = schema.getObjectProp(ADJUST_TO_UTC_PROP); diff --git a/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java b/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java index 0fa2e795811b..c77a92b03206 100644 --- a/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java +++ b/core/src/main/java/org/apache/iceberg/avro/GenericAvroReader.java @@ -144,6 +144,10 @@ public ValueReader primitive(Schema primitive) { // Spark uses the same representation return ValueReaders.longs(); + case "timestamp-nanos": + // TODO not sure if this is correct; see difference between millis and micros cases above this + return ValueReaders.longs(); + case "decimal": return ValueReaders.decimal( ValueReaders.decimalBytesReader(primitive), diff --git a/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java b/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java index 421bfc9dc462..5b16f9c9276f 100644 --- a/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java +++ b/core/src/main/java/org/apache/iceberg/avro/GenericAvroWriter.java @@ -107,6 +107,9 @@ public ValueWriter primitive(Schema primitive) { case "timestamp-micros": return ValueWriters.longs(); + case "timestamp-nanos": + return ValueWriters.longs(); + case "decimal": LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; return ValueWriters.decimal(decimal.getPrecision(), decimal.getScale()); diff --git a/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java b/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java new file mode 100644 index 000000000000..0740d8d5594a --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/avro/IcebergLogicalTypes.java @@ -0,0 +1,52 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, + * * software distributed under the License is distributed on an + * * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * * KIND, either express or implied. See the License for the + * * specific language governing permissions and limitations + * * under the License. + * + */ + +package org.apache.iceberg.avro; + +import org.apache.avro.LogicalType; +import org.apache.avro.Schema; + +public class IcebergLogicalTypes { + + private static final String TIMESTAMP_NANOS = "timestamp-nanos"; + + private static final TimestampNanos TIMESTAMP_NANOS_TYPE = new TimestampNanos(); + + public static TimestampNanos timestampNanos() { + return TIMESTAMP_NANOS_TYPE; + } + + /** TimestampNanos represents a date and time in nanoseconds */ + public static class TimestampNanos extends LogicalType { + private TimestampNanos() { + super(TIMESTAMP_NANOS); + } + + @Override + public void validate(Schema schema) { + super.validate(schema); + if (schema.getType() != Schema.Type.LONG) { + throw new IllegalArgumentException( + "Timestamp (nanos) can only be used with an underlying long type"); + } + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java index 174d63975195..1b1fca9faf65 100644 --- a/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java +++ b/core/src/main/java/org/apache/iceberg/avro/SchemaToType.java @@ -187,6 +187,13 @@ public Type primitive(Schema primitive) { return Types.TimestampType.withoutZone(); } + } else if (logical instanceof IcebergLogicalTypes.TimestampNanos) { + if (AvroSchemaUtil.isTimestamptz(primitive)) { + return Types.TimestampnsType.withZone(); + } else { + return Types.TimestampnsType.withoutZone(); + } + } else if (LogicalTypes.uuid().getName().equals(name)) { return Types.UUIDType.get(); } diff --git a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java index bc2847e1b4ba..fccc4136ad77 100644 --- a/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java +++ b/core/src/main/java/org/apache/iceberg/avro/TypeToSchema.java @@ -44,6 +44,10 @@ class TypeToSchema extends TypeUtil.SchemaVisitor { LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); private static final Schema TIMESTAMPTZ_SCHEMA = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); + private static final Schema TIMESTAMPNS_SCHEMA = + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + private static final Schema TIMESTAMPTZNS_SCHEMA = + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); private static final Schema STRING_SCHEMA = Schema.create(Schema.Type.STRING); private static final Schema UUID_SCHEMA = LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)); @@ -52,6 +56,8 @@ class TypeToSchema extends TypeUtil.SchemaVisitor { static { TIMESTAMP_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); TIMESTAMPTZ_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, true); + TIMESTAMPNS_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, false); + TIMESTAMPTZNS_SCHEMA.addProp(AvroSchemaUtil.ADJUST_TO_UTC_PROP, true); } private final Deque fieldIds = Lists.newLinkedList(); @@ -210,6 +216,13 @@ public Schema primitive(Type.PrimitiveType primitive) { primitiveSchema = TIMESTAMP_SCHEMA; } break; + case TIMESTAMPNS: + if (((Types.TimestampnsType) primitive).shouldAdjustToUTC()) { + primitiveSchema = TIMESTAMPTZNS_SCHEMA; + } else { + primitiveSchema = TIMESTAMPNS_SCHEMA; + } + break; case STRING: primitiveSchema = STRING_SCHEMA; break; diff --git a/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java b/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java index 4cb41263152d..54b4db7d634e 100644 --- a/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java +++ b/core/src/main/java/org/apache/iceberg/data/IdentityPartitionConverters.java @@ -45,6 +45,12 @@ public static Object convertConstant(Type type, Object value) { } else { return DateTimeUtil.timestampFromMicros((Long) value); } + case TIMESTAMPNS: + if (((Types.TimestampnsType) type).shouldAdjustToUTC()) { + return DateTimeUtil.timestamptzFromNanos((Long) value); + } else { + return DateTimeUtil.timestampFromNanos((Long) value); + } case FIXED: if (value instanceof GenericData.Fixed) { return ((GenericData.Fixed) value).bytes(); diff --git a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java b/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java index 1cc901d15bc1..1f5de85f5c19 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/DataReader.java @@ -136,6 +136,12 @@ public ValueReader primitive(Type.PrimitiveType ignored, Schema primitive) { } return GenericReaders.timestamps(); + case "timestamp-nanos": + if (AvroSchemaUtil.isTimestamptz(primitive)) { + return GenericReaders.timestamptzns(); + } + return GenericReaders.timestampns(); + case "decimal": return ValueReaders.decimal( ValueReaders.decimalBytesReader(primitive), diff --git a/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java b/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java index 4d6973d3cfe3..11ac3483a184 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/DataWriter.java @@ -118,6 +118,12 @@ public ValueWriter primitive(Schema primitive) { } return GenericWriters.timestamps(); + case "timestamp-nanos": + if (AvroSchemaUtil.isTimestamptz(primitive)) { + return GenericWriters.timestamptzns(); + } + return GenericWriters.timestampns(); + case "decimal": LogicalTypes.Decimal decimal = (LogicalTypes.Decimal) logicalType; return ValueWriters.decimal(decimal.getPrecision(), decimal.getScale()); diff --git a/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java b/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java index 91a728d53d38..6ee4bd5fe8e4 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/GenericReaders.java @@ -48,10 +48,18 @@ static ValueReader timestamps() { return TimestampReader.INSTANCE; } + static ValueReader timestampns() { + return TimestampnsReader.INSTANCE; + } + static ValueReader timestamptz() { return TimestamptzReader.INSTANCE; } + static ValueReader timestamptzns() { + return TimestamptznsReader.INSTANCE; + } + static ValueReader struct( StructType struct, List> readers, Map idToConstant) { return new GenericRecordReader(readers, struct, idToConstant); @@ -90,6 +98,17 @@ public LocalDateTime read(Decoder decoder, Object reuse) throws IOException { } } + private static class TimestampnsReader implements ValueReader { + private static TimestampnsReader INSTANCE = new TimestampnsReader(); + + private TimestampnsReader() {} + + @Override + public LocalDateTime read(Decoder decoder, Object reuse) throws IOException { + return DateTimeUtil.timestampFromNanos(decoder.readLong()); + } + } + private static class TimestamptzReader implements ValueReader { private static final TimestamptzReader INSTANCE = new TimestamptzReader(); @@ -101,6 +120,17 @@ public OffsetDateTime read(Decoder decoder, Object reuse) throws IOException { } } + private static class TimestamptznsReader implements ValueReader { + private static final TimestamptznsReader INSTANCE = new TimestamptznsReader(); + + private TimestamptznsReader() {} + + @Override + public OffsetDateTime read(Decoder decoder, Object reuse) throws IOException { + return DateTimeUtil.timestamptzFromNanos(decoder.readLong()); + } + } + private static class GenericRecordReader extends ValueReaders.StructReader { private final StructType structType; diff --git a/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java b/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java index 1cea012e7a37..b4505f488106 100644 --- a/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java +++ b/core/src/main/java/org/apache/iceberg/data/avro/GenericWriters.java @@ -47,10 +47,18 @@ static ValueWriter timestamps() { return TimestampWriter.INSTANCE; } + static ValueWriter timestampns() { + return TimestampnsWriter.INSTANCE; + } + static ValueWriter timestamptz() { return TimestamptzWriter.INSTANCE; } + static ValueWriter timestamptzns() { + return TimestamptznsWriter.INSTANCE; + } + static ValueWriter struct(List> writers) { return new GenericRecordWriter(writers); } @@ -91,6 +99,17 @@ public void write(LocalDateTime timestamp, Encoder encoder) throws IOException { } } + private static class TimestampnsWriter implements ValueWriter { + private static final TimestampnsWriter INSTANCE = new TimestampnsWriter(); + + private TimestampnsWriter() {} + + @Override + public void write(LocalDateTime timestampns, Encoder encoder) throws IOException { + encoder.writeLong(ChronoUnit.NANOS.between(EPOCH, timestampns.atOffset(ZoneOffset.UTC))); + } + } + private static class TimestamptzWriter implements ValueWriter { private static final TimestamptzWriter INSTANCE = new TimestamptzWriter(); @@ -102,6 +121,17 @@ public void write(OffsetDateTime timestamptz, Encoder encoder) throws IOExceptio } } + private static class TimestamptznsWriter implements ValueWriter { + private static final TimestamptznsWriter INSTANCE = new TimestamptznsWriter(); + + private TimestamptznsWriter() {} + + @Override + public void write(OffsetDateTime timestamptzns, Encoder encoder) throws IOException { + encoder.writeLong(ChronoUnit.NANOS.between(EPOCH, timestamptzns)); + } + } + private static class GenericRecordWriter extends ValueWriters.StructWriter { private GenericRecordWriter(List> writers) { super(writers); diff --git a/core/src/test/java/org/apache/iceberg/TestMetrics.java b/core/src/test/java/org/apache/iceberg/TestMetrics.java index 32bc6299ce1b..3cd0393dc4bc 100644 --- a/core/src/test/java/org/apache/iceberg/TestMetrics.java +++ b/core/src/test/java/org/apache/iceberg/TestMetrics.java @@ -55,6 +55,7 @@ import org.apache.iceberg.types.Types.StructType; import org.apache.iceberg.types.Types.TimeType; import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types.TimestampnsType; import org.apache.iceberg.util.DateTimeUtil; import org.junit.After; import org.junit.Assert; @@ -100,9 +101,11 @@ protected TestMetrics(int formatVersion) { optional(8, "dateCol", DateType.get()), required(9, "timeCol", TimeType.get()), required(10, "timestampColAboveEpoch", TimestampType.withoutZone()), - required(11, "fixedCol", FixedType.ofLength(4)), - required(12, "binaryCol", BinaryType.get()), - required(13, "timestampColBelowEpoch", TimestampType.withoutZone())); + required(11, "timestampnsColAboveEpoch", TimestampnsType.withoutZone()), + required(12, "fixedCol", FixedType.ofLength(4)), + required(13, "binaryCol", BinaryType.get()), + required(14, "timestampColBelowEpoch", TimestampType.withoutZone()), + required(15, "timestampnsColBelowEpoch", TimestampnsType.withoutZone())); private static final Schema FLOAT_DOUBLE_ONLY_SCHEMA = new Schema( @@ -159,9 +162,11 @@ public void testMetricsForRepeatedValues() throws IOException { record.setField("dateCol", DateTimeUtil.dateFromDays(1500)); record.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); record.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + record.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); record.setField("fixedCol", fixed); record.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); record.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + record.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, record, record); Assert.assertEquals(2L, (long) metrics.recordCount()); @@ -193,9 +198,11 @@ public void testMetricsForTopLevelFields() throws IOException { firstRecord.setField("dateCol", DateTimeUtil.dateFromDays(1500)); firstRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); firstRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); firstRecord.setField("fixedCol", fixed); firstRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); firstRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-1_900_300L)); + firstRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(-1_900_300_000L)); Record secondRecord = GenericRecord.create(SIMPLE_SCHEMA); secondRecord.setField("booleanCol", false); secondRecord.setField("intCol", Integer.MIN_VALUE); @@ -207,9 +214,11 @@ public void testMetricsForTopLevelFields() throws IOException { secondRecord.setField("dateCol", null); secondRecord.setField("timeCol", DateTimeUtil.timeFromMicros(3000L)); secondRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(900L)); + secondRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(900_000L)); secondRecord.setField("fixedCol", fixed); secondRecord.setField("binaryCol", ByteBuffer.wrap("W".getBytes())); secondRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-7_000L)); + secondRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(-7_000_000L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, firstRecord, secondRecord); Assert.assertEquals(2L, (long) metrics.recordCount()); @@ -234,11 +243,13 @@ public void testMetricsForTopLevelFields() throws IOException { assertCounts(10, 2L, 0L, metrics); assertBounds(10, TimestampType.withoutZone(), 0L, 900L, metrics); assertCounts(11, 2L, 0L, metrics); - assertBounds( - 11, FixedType.ofLength(4), ByteBuffer.wrap(fixed), ByteBuffer.wrap(fixed), metrics); + assertBounds(11, TimestampnsType.withoutZone(), 0L, 900L, metrics); assertCounts(12, 2L, 0L, metrics); assertBounds( - 12, + 12, FixedType.ofLength(4), ByteBuffer.wrap(fixed), ByteBuffer.wrap(fixed), metrics); + assertCounts(13, 2L, 0L, metrics); + assertBounds( + 13, BinaryType.get(), ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), @@ -249,9 +260,11 @@ public void testMetricsForTopLevelFields() throws IOException { // Values in the range `[1969-12-31 23:59:59.000,1969-12-31 23:59:59.999]` will have 1 sec // added to them // So the upper bound value of -7_000 micros becomes 993_000 micros - assertBounds(13, TimestampType.withoutZone(), -1_900_300L, 993_000L, metrics); + assertBounds(14, TimestampType.withoutZone(), -1_900_300L, 993_000L, metrics); + assertBounds(15, TimestampnsType.withoutZone(), -1_900_300_000L, 993_000_000L, metrics); } else { - assertBounds(13, TimestampType.withoutZone(), -1_900_300L, -7_000L, metrics); + assertBounds(14, TimestampType.withoutZone(), -1_900_300L, -7_000L, metrics); + assertBounds(15, TimestampnsType.withoutZone(), -1_900_300_000L, -7_000_000L, metrics); } } @@ -465,10 +478,13 @@ public void testMetricsForTopLevelWithMultipleRowGroup() throws Exception { newRecord.setField("dateCol", DateTimeUtil.dateFromDays(i + 1)); newRecord.setField("timeCol", DateTimeUtil.timeFromMicros(i + 1L)); newRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(i + 1L)); + newRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(i + 1L)); newRecord.setField("fixedCol", fixed); newRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); newRecord.setField( "timestampColBelowEpoch", DateTimeUtil.timestampFromMicros((i + 1L) * -1L)); + newRecord.setField( + "timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos((i + 1L) * -1L)); records.add(newRecord); } @@ -696,9 +712,11 @@ public void testSortedColumnMetrics() throws IOException { firstRecord.setField("dateCol", DateTimeUtil.dateFromDays(1500)); firstRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); firstRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); firstRecord.setField("fixedCol", fixed); firstRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); firstRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + firstRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Record secondRecord = GenericRecord.create(SIMPLE_SCHEMA); @@ -712,9 +730,11 @@ public void testSortedColumnMetrics() throws IOException { secondRecord.setField("dateCol", DateTimeUtil.dateFromDays(3000)); secondRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L)); secondRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L)); + secondRecord.setField("timestampnsColAboveEpoch", DateTimeUtil.timestampFromNanos(0L)); secondRecord.setField("fixedCol", fixed); secondRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes())); secondRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L)); + secondRecord.setField("timestampnsColBelowEpoch", DateTimeUtil.timestampFromNanos(0L)); Metrics metrics = getMetrics(SIMPLE_SCHEMA, MetricsConfig.forTable(table), firstRecord, secondRecord); diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java index 5aedde6ce5b0..e778bd7103bd 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUnionByFieldName.java @@ -41,6 +41,7 @@ import org.apache.iceberg.types.Types.StructType; import org.apache.iceberg.types.Types.TimeType; import org.apache.iceberg.types.Types.TimestampType; +import org.apache.iceberg.types.Types.TimestampnsType; import org.apache.iceberg.types.Types.UUIDType; import org.assertj.core.api.Assertions; import org.junit.jupiter.api.Test; @@ -53,6 +54,8 @@ private static List primitiveTypes() { TimeType.get(), TimestampType.withoutZone(), TimestampType.withZone(), + TimestampnsType.withoutZone(), + TimestampnsType.withZone(), UUIDType.get(), DateType.get(), BooleanType.get(), diff --git a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java index 04cb64403523..f5cdc57f7a7e 100644 --- a/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java +++ b/core/src/test/java/org/apache/iceberg/TestSchemaUpdate.java @@ -283,6 +283,8 @@ public void testUpdateFailure() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.BinaryType.get(), diff --git a/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java b/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java index e04ba440ae3f..8f557ce1f6d8 100644 --- a/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java +++ b/core/src/test/java/org/apache/iceberg/TestSingleValueParser.java @@ -45,6 +45,8 @@ public void testValidDefaults() throws IOException { {Types.TimeType.get(), "\"10:15:30\""}, {Types.TimestampType.withoutZone(), "\"2007-12-03T10:15:30\""}, {Types.TimestampType.withZone(), "\"2007-12-03T10:15:30+00:00\""}, + {Types.TimestampnsType.withoutZone(), "\"2008-12-03T10:15:30\""}, + {Types.TimestampnsType.withZone(), "\"2008-12-03T10:15:30+00:00\""}, {Types.StringType.get(), "\"foo\""}, {Types.UUIDType.get(), "\"eb26bdb1-a1d8-4aa6-990e-da940875492c\""}, {Types.FixedType.ofLength(2), "\"111f\""}, @@ -159,14 +161,20 @@ public void testInvalidDecimal() { @Test public void testInvalidTimestamptz() { - Type expectedType = Types.TimestampType.withZone(); String defaultJson = "\"2007-12-03T10:15:30+01:00\""; Exception exception = Assert.assertThrows( IllegalArgumentException.class, - () -> defaultValueParseAndUnParseRoundTrip(expectedType, defaultJson)); + () -> defaultValueParseAndUnParseRoundTrip(Types.TimestampType.withZone(), defaultJson)); Assert.assertTrue( exception.getMessage().startsWith("Cannot parse default as a timestamptz value")); + + exception = + Assert.assertThrows( + IllegalArgumentException.class, + () -> defaultValueParseAndUnParseRoundTrip(Types.TimestampnsType.withZone(), defaultJson)); + Assert.assertTrue( + exception.getMessage().startsWith("Cannot parse default as a timestamptzns value")); } // serialize to json and deserialize back should return the same result diff --git a/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java b/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java index e3870f84decd..201bfc9281fe 100644 --- a/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java +++ b/core/src/test/java/org/apache/iceberg/avro/AvroDataTest.java @@ -46,15 +46,18 @@ public abstract class AvroDataTest { optional(105, "f", Types.FloatType.get()), required(106, "d", Types.DoubleType.get()), optional(107, "date", Types.DateType.get()), - required(108, "ts", Types.TimestampType.withZone()), - required(110, "s", Types.StringType.get()), - required(111, "uuid", Types.UUIDType.get()), - required(112, "fixed", Types.FixedType.ofLength(7)), - optional(113, "bytes", Types.BinaryType.get()), - required(114, "dec_9_0", Types.DecimalType.of(9, 0)), - required(115, "dec_11_2", Types.DecimalType.of(11, 2)), - required(116, "dec_38_10", Types.DecimalType.of(38, 10)), // maximum precision - required(117, "time", Types.TimeType.get())); + required(108, "tstz", Types.TimestampType.withZone()), + required(109, "ts", Types.TimestampType.withoutZone()), + required(110, "tstzns", Types.TimestampnsType.withZone()), + required(111, "tsns", Types.TimestampnsType.withoutZone()), + required(112, "s", Types.StringType.get()), + required(113, "uuid", Types.UUIDType.get()), + required(114, "fixed", Types.FixedType.ofLength(7)), + optional(115, "bytes", Types.BinaryType.get()), + required(116, "dec_9_0", Types.DecimalType.of(9, 0)), + required(117, "dec_11_2", Types.DecimalType.of(11, 2)), + required(118, "dec_38_10", Types.DecimalType.of(38, 10)), // maximum precision + required(119, "time", Types.TimeType.get())); @Test public void testSimpleStruct() throws IOException { diff --git a/core/src/test/java/org/apache/iceberg/avro/AvroTestHelpers.java b/core/src/test/java/org/apache/iceberg/avro/AvroTestHelpers.java index af35e27f5b22..65ba90b4b161 100644 --- a/core/src/test/java/org/apache/iceberg/avro/AvroTestHelpers.java +++ b/core/src/test/java/org/apache/iceberg/avro/AvroTestHelpers.java @@ -119,6 +119,7 @@ private static void assertEquals(Type type, Object expected, Object actual) { case DATE: case TIME: case TIMESTAMP: + case TIMESTAMPNS: case UUID: case FIXED: case BINARY: diff --git a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java index e135364bca66..be909b6a149a 100644 --- a/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java +++ b/core/src/test/java/org/apache/iceberg/avro/TestSchemaConversions.java @@ -52,6 +52,8 @@ public void testPrimitiveTypes() { Types.TimeType.get(), Types.TimestampType.withZone(), Types.TimestampType.withoutZone(), + Types.TimestampnsType.withZone(), + Types.TimestampnsType.withoutZone(), Types.StringType.get(), Types.UUIDType.get(), Types.FixedType.ofLength(12), @@ -71,6 +73,10 @@ public void testPrimitiveTypes() { LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), true), addAdjustToUtc( LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), false), + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), true), + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), false), Schema.create(Schema.Type.STRING), LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)), Schema.createFixed("fixed_12", null, null, 12), @@ -100,6 +106,11 @@ public void testAvroToIcebergTimestampTypeWithoutAdjustToUTC() { Schema avroType = LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)); assertThat(AvroSchemaUtil.convert(avroType)).isEqualTo(expectedIcebergType); + + expectedIcebergType = Types.TimestampnsType.withoutZone(); + avroType = IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)); + + assertThat(AvroSchemaUtil.convert(avroType)).isEqualTo(expectedIcebergType); } private Schema addAdjustToUtc(Schema schema, boolean adjustToUTC) { @@ -120,11 +131,13 @@ public void testStructAndPrimitiveTypes() { optional(27, "time", Types.TimeType.get()), optional(28, "timestamptz", Types.TimestampType.withZone()), optional(29, "timestamp", Types.TimestampType.withoutZone()), - optional(30, "string", Types.StringType.get()), - optional(31, "uuid", Types.UUIDType.get()), - optional(32, "fixed", Types.FixedType.ofLength(16)), - optional(33, "binary", Types.BinaryType.get()), - optional(34, "decimal", Types.DecimalType.of(14, 2))); + optional(30, "timestamptz_ns", Types.TimestampnsType.withZone()), + optional(31, "timestamp_ns", Types.TimestampnsType.withoutZone()), + optional(32, "string", Types.StringType.get()), + optional(33, "uuid", Types.UUIDType.get()), + optional(34, "fixed", Types.FixedType.ofLength(16)), + optional(35, "binary", Types.BinaryType.get()), + optional(36, "decimal", Types.DecimalType.of(14, 2))); Schema schema = record( @@ -150,15 +163,27 @@ public void testStructAndPrimitiveTypes() { addAdjustToUtc( LogicalTypes.timestampMicros().addToSchema(Schema.create(Schema.Type.LONG)), false)), - optionalField(30, "string", Schema.create(Schema.Type.STRING)), + optionalField( + 30, + "timestamptz_ns", + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), + true)), optionalField( 31, + "timestamp_ns", + addAdjustToUtc( + IcebergLogicalTypes.timestampNanos().addToSchema(Schema.create(Schema.Type.LONG)), + false)), + optionalField(32, "string", Schema.create(Schema.Type.STRING)), + optionalField( + 33, "uuid", LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16))), - optionalField(32, "fixed", Schema.createFixed("fixed_16", null, null, 16)), - optionalField(33, "binary", Schema.create(Schema.Type.BYTES)), + optionalField(34, "fixed", Schema.createFixed("fixed_16", null, null, 16)), + optionalField(35, "binary", Schema.create(Schema.Type.BYTES)), optionalField( - 34, + 36, "decimal", LogicalTypes.decimal(14, 2) .addToSchema(Schema.createFixed("decimal_14_2", null, null, 6)))); diff --git a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java index a0ffe0af05b5..f111dd747388 100644 --- a/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java +++ b/core/src/test/java/org/apache/iceberg/expressions/TestExpressionParser.java @@ -43,6 +43,7 @@ public class TestExpressionParser { required(106, "d", Types.DoubleType.get()), optional(107, "date", Types.DateType.get()), required(108, "ts", Types.TimestampType.withoutZone()), + required(109, "tsns", Types.TimestampnsType.withoutZone()), required(110, "s", Types.StringType.get()), required(111, "uuid", Types.UUIDType.get()), required(112, "fixed", Types.FixedType.ofLength(7)), @@ -68,6 +69,7 @@ public void testSimpleExpressions() { Expressions.equal("d", 100.0d), Expressions.equal("date", "2022-08-14"), Expressions.equal("ts", "2022-08-14T10:00:00.123456"), + Expressions.equal("tsns", "2022-08-14T10:00:00.123456789"), Expressions.equal("uuid", UUID.randomUUID()), Expressions.equal("fixed", new byte[] {1, 2, 3, 4, 5, 6, 7}), Expressions.equal("bytes", ByteBuffer.wrap(new byte[] {1, 3, 5})), @@ -93,6 +95,9 @@ public void testSimpleExpressions() { Expressions.or( Expressions.greaterThan(Expressions.day("ts"), "2022-08-14"), Expressions.equal("date", "2022-08-14")), + Expressions.or( + Expressions.greaterThan(Expressions.day("tsns"), "2022-08-14"), + Expressions.equal("date", "2022-08-14")), Expressions.not(Expressions.in("l", 1, 2, 3, 4)) };