diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index b45bfe3d5dd2..d20690e2ad34 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -81,6 +81,7 @@ import io.trino.spi.type.LongTimestampWithTimeZone; import io.trino.spi.type.TimestampWithTimeZoneType; import io.trino.spi.type.TypeManager; +import io.trino.spi.type.TypeOperators; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; @@ -128,13 +129,13 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.BiPredicate; import java.util.function.Function; import java.util.function.Supplier; import java.util.regex.Matcher; @@ -174,6 +175,7 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.FORMAT_VERSION_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; +import static io.trino.plugin.iceberg.IcebergUtil.canEnforceColumnConstraintInAllSpecs; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; import static io.trino.plugin.iceberg.IcebergUtil.getColumns; @@ -230,6 +232,7 @@ public class IcebergMetadata public static final String ORC_BLOOM_FILTER_FPP_KEY = "orc.bloom.filter.fpp"; private final TypeManager typeManager; + private final TypeOperators typeOperators; private final JsonCodec commitTaskCodec; private final TrinoCatalog catalog; private final HdfsEnvironment hdfsEnvironment; @@ -240,11 +243,13 @@ public class IcebergMetadata public IcebergMetadata( TypeManager typeManager, + TypeOperators typeOperators, JsonCodec commitTaskCodec, TrinoCatalog catalog, HdfsEnvironment hdfsEnvironment) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.typeOperators = requireNonNull(typeOperators, "typeOperators is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.catalog = requireNonNull(catalog, "catalog is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); @@ -1715,32 +1720,34 @@ public Optional> applyFilter(C IcebergTableHandle table = (IcebergTableHandle) handle; Table icebergTable = catalog.loadTable(session, table.getSchemaTableName()); - Set partitionSourceIds = identityPartitionColumnsInAllSpecs(icebergTable); - BiPredicate isIdentityPartition = (column, domain) -> partitionSourceIds.contains(column.getId()); - // Iceberg metadata columns can not be used in table scans - BiPredicate isMetadataColumn = (column, domain) -> isMetadataColumnId(column.getId()); - - TupleDomain newEnforcedConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition) - .intersect(table.getEnforcedPredicate()); - - TupleDomain remainingConstraint = constraint.getSummary() - .transformKeys(IcebergColumnHandle.class::cast) - .filter(isIdentityPartition.negate()); - - TupleDomain newUnenforcedConstraint = remainingConstraint - // TODO: Remove after completing https://github.com/trinodb/trino/issues/8759 - // Only applies to the unenforced constraint because structural types cannot be partition keys - .filter((columnHandle, predicate) -> !isStructuralType(columnHandle.getType())) - .filter(isMetadataColumn.negate()) - .intersect(table.getUnenforcedPredicate()); + Map unsupported = new LinkedHashMap<>(); + Map newEnforced = new LinkedHashMap<>(); + Map newUnenforced = new LinkedHashMap<>(); + Map domains = constraint.getSummary().getDomains().orElseThrow(() -> new IllegalArgumentException("constraint summary is NONE")); + domains.forEach((column, domain) -> { + IcebergColumnHandle columnHandle = (IcebergColumnHandle) column; + // Iceberg metadata columns can not be used to filter a table scan in Iceberg library + // TODO (https://github.com/trinodb/trino/issues/8759) structural types cannot be used to filter a table scan in Iceberg library. + if (isMetadataColumnId(columnHandle.getId()) || isStructuralType(columnHandle.getType())) { + unsupported.put(columnHandle, domain); + } + else if (canEnforceColumnConstraintInAllSpecs(typeOperators, icebergTable, columnHandle, domain)) { + newEnforced.put(columnHandle, domain); + } + else { + newUnenforced.put(columnHandle, domain); + } + }); + + TupleDomain newEnforcedConstraint = TupleDomain.withColumnDomains(newEnforced).intersect(table.getEnforcedPredicate()); + TupleDomain newUnenforcedConstraint = TupleDomain.withColumnDomains(newUnenforced).intersect(table.getUnenforcedPredicate()); if (newEnforcedConstraint.equals(table.getEnforcedPredicate()) && newUnenforcedConstraint.equals(table.getUnenforcedPredicate())) { return Optional.empty(); } + TupleDomain remainingConstraint = TupleDomain.withColumnDomains(newUnenforced).intersect(TupleDomain.withColumnDomains(unsupported)); return Optional.of(new ConstraintApplicationResult<>( new IcebergTableHandle( table.getSchemaName(), diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java index f71a4daa6351..3be12c8c16e1 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadataFactory.java @@ -18,6 +18,7 @@ import io.trino.plugin.iceberg.catalog.TrinoCatalogFactory; import io.trino.spi.security.ConnectorIdentity; import io.trino.spi.type.TypeManager; +import io.trino.spi.type.TypeOperators; import javax.inject.Inject; @@ -26,6 +27,7 @@ public class IcebergMetadataFactory { private final TypeManager typeManager; + private final TypeOperators typeOperators; private final JsonCodec commitTaskCodec; private final TrinoCatalogFactory catalogFactory; private final HdfsEnvironment hdfsEnvironment; @@ -38,6 +40,8 @@ public IcebergMetadataFactory( HdfsEnvironment hdfsEnvironment) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); + // TODO consider providing TypeOperators in ConnectorContext to increase cache reuse + this.typeOperators = new TypeOperators(); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); @@ -45,6 +49,6 @@ public IcebergMetadataFactory( public IcebergMetadata create(ConnectorIdentity identity) { - return new IcebergMetadata(typeManager, commitTaskCodec, catalogFactory.create(identity), hdfsEnvironment); + return new IcebergMetadata(typeManager, typeOperators, commitTaskCodec, catalogFactory.create(identity), hdfsEnvironment); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index 43c1639de89d..489ad31a0235 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -22,6 +22,7 @@ import io.airlift.slice.Slice; import io.airlift.slice.SliceUtf8; import io.airlift.slice.Slices; +import io.trino.plugin.iceberg.PartitionTransforms.ColumnTransform; import io.trino.plugin.iceberg.catalog.IcebergTableOperations; import io.trino.plugin.iceberg.catalog.IcebergTableOperationsProvider; import io.trino.plugin.iceberg.catalog.TrinoCatalog; @@ -30,10 +31,15 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorTableMetadata; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.function.InvocationConvention; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.Range; +import io.trino.spi.predicate.ValueSet; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Int128; import io.trino.spi.type.Type; import io.trino.spi.type.TypeManager; +import io.trino.spi.type.TypeOperators; import io.trino.spi.type.UuidType; import io.trino.spi.type.VarbinaryType; import io.trino.spi.type.VarcharType; @@ -55,6 +61,7 @@ import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.types.Types.StructType; +import java.lang.invoke.MethodHandle; import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; @@ -72,6 +79,7 @@ import java.util.regex.Pattern; import java.util.stream.Stream; +import static com.google.common.base.Verify.verify; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; @@ -95,6 +103,8 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation; import static io.trino.plugin.iceberg.PartitionFields.parsePartitionFields; import static io.trino.plugin.iceberg.PartitionFields.toPartitionFields; +import static io.trino.plugin.iceberg.TrinoTypes.getNextValue; +import static io.trino.plugin.iceberg.TrinoTypes.getPreviousValue; import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; import static io.trino.plugin.iceberg.util.Timestamps.timestampTzFromMicros; @@ -102,6 +112,9 @@ import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; import static io.trino.spi.StandardErrorCode.INVALID_TABLE_PROPERTY; import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED; +import static io.trino.spi.function.InvocationConvention.InvocationArgumentConvention.NEVER_NULL; +import static io.trino.spi.function.InvocationConvention.InvocationReturnConvention.FAIL_ON_NULL; +import static io.trino.spi.predicate.Utils.nativeValueToBlock; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.BooleanType.BOOLEAN; import static io.trino.spi.type.DateType.DATE; @@ -121,6 +134,7 @@ import static java.lang.Long.parseLong; import static java.lang.String.format; import static java.util.Comparator.comparing; +import static java.util.Objects.requireNonNull; import static org.apache.iceberg.BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE; import static org.apache.iceberg.BaseMetastoreTableOperations.TABLE_TYPE_PROP; import static org.apache.iceberg.LocationProviders.locationsFor; @@ -304,6 +318,109 @@ private static String quotedName(String name) return '"' + name.replace("\"", "\"\"") + '"'; } + public static boolean canEnforceColumnConstraintInAllSpecs(TypeOperators typeOperators, Table table, IcebergColumnHandle columnHandle, Domain domain) + { + return table.specs().values().stream() + .allMatch(spec -> canEnforceConstraintWithinPartitioningSpec(typeOperators, spec, columnHandle, domain)); + } + + private static boolean canEnforceConstraintWithinPartitioningSpec(TypeOperators typeOperators, PartitionSpec spec, IcebergColumnHandle column, Domain domain) + { + for (PartitionField field : spec.getFieldsBySourceId(column.getId())) { + if (canEnforceConstraintWithPartitionField(typeOperators, field, column, domain)) { + return true; + } + } + return false; + } + + private static boolean canEnforceConstraintWithPartitionField(TypeOperators typeOperators, PartitionField field, IcebergColumnHandle column, Domain domain) + { + if (field.transform().toString().equals("void")) { + // Useless for filtering. + return false; + } + if (field.transform().isIdentity()) { + // A predicate on an identity partitioning column can always be enforced. + return true; + } + + ColumnTransform transform = PartitionTransforms.getColumnTransform(field, column.getType()); + if (transform.preservesNonNull()) { + // Partitioning transform must return NULL for NULL input. + // Below we assume it never returns NULL for non-NULL input, + // so NULL values and non-NULL values are always segregated. + // In practice, this condition matches the void transform only, + // which isn't useful for filtering anyway. + return false; + } + ValueSet valueSet = domain.getValues(); + + boolean canEnforce = valueSet.getValuesProcessor().transform( + ranges -> { + MethodHandle targetTypeEqualOperator = typeOperators.getEqualOperator( + transform.getType(), InvocationConvention.simpleConvention(FAIL_ON_NULL, NEVER_NULL, NEVER_NULL)); + for (Range range : ranges.getOrderedRanges()) { + if (!canEnforceRangeWithPartitioningField(field, transform, range, targetTypeEqualOperator)) { + return false; + } + } + return true; + }, + discreteValues -> false, + allOrNone -> true); + return canEnforce; + } + + private static boolean canEnforceRangeWithPartitioningField(PartitionField field, ColumnTransform transform, Range range, MethodHandle targetTypeEqualOperator) + { + if (!transform.isMonotonic()) { + // E.g. bucketing transform + return false; + } + io.trino.spi.type.Type type = range.getType(); + if (!type.isOrderable()) { + return false; + } + if (!range.isLowUnbounded()) { + Object boundedValue = range.getLowBoundedValue(); + Optional adjacentValue = range.isLowInclusive() ? getPreviousValue(type, boundedValue) : getNextValue(type, boundedValue); + if (adjacentValue.isEmpty() || yieldSamePartitioningValue(field, transform, type, boundedValue, adjacentValue.get(), targetTypeEqualOperator)) { + return false; + } + } + if (!range.isHighUnbounded()) { + Object boundedValue = range.getHighBoundedValue(); + Optional adjacentValue = range.isHighInclusive() ? getNextValue(type, boundedValue) : getPreviousValue(type, boundedValue); + if (adjacentValue.isEmpty() || yieldSamePartitioningValue(field, transform, type, boundedValue, adjacentValue.get(), targetTypeEqualOperator)) { + return false; + } + } + return true; + } + + private static boolean yieldSamePartitioningValue( + PartitionField field, + ColumnTransform transform, + io.trino.spi.type.Type sourceType, + Object first, + Object second, + MethodHandle targetTypeEqualOperator) + { + requireNonNull(first, "first is null"); + requireNonNull(second, "second is null"); + Object firstTransformed = transform.getValueTransform().apply(nativeValueToBlock(sourceType, first), 0); + Object secondTransformed = transform.getValueTransform().apply(nativeValueToBlock(sourceType, second), 0); + // The pushdown logic assumes NULLs and non-NULLs are segregated, so that we have to think about non-null values only. + verify(firstTransformed != null && secondTransformed != null, "Transform for %s returned null for non-null input", field); + try { + return (boolean) targetTypeEqualOperator.invoke(firstTransformed, secondTransformed); + } + catch (Throwable throwable) { + throw new RuntimeException(throwable); + } + } + public static Object deserializePartitionValue(Type type, String valueString, String name) { if (valueString == null) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTransforms.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTransforms.java index 5bbf991c8461..b9db138346e0 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTransforms.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/PartitionTransforms.java @@ -167,7 +167,7 @@ public static ColumnTransform getColumnTransform(PartitionField field, Type sour private static ColumnTransform identity(Type type) { - return new ColumnTransform(type, Function.identity(), ValueTransform.identity(type)); + return new ColumnTransform(type, false, true, Function.identity(), ValueTransform.identity(type)); } @VisibleForTesting @@ -176,6 +176,8 @@ static ColumnTransform bucket(Type type, int count) Hasher hasher = getBucketingHash(type); return new ColumnTransform( INTEGER, + false, + false, block -> bucketBlock(block, count, hasher), (block, position) -> { if (block.isNull(position)) { @@ -230,6 +232,8 @@ private static ColumnTransform yearsFromDate() LongUnaryOperator transform = value -> epochYear(DAYS.toMillis(value)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(DATE, INTEGER, block, transform), ValueTransform.from(DATE, transform)); } @@ -239,6 +243,8 @@ private static ColumnTransform monthsFromDate() LongUnaryOperator transform = value -> epochMonth(DAYS.toMillis(value)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(DATE, INTEGER, block, transform), ValueTransform.from(DATE, transform)); } @@ -248,6 +254,8 @@ private static ColumnTransform daysFromDate() LongUnaryOperator transform = LongUnaryOperator.identity(); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(DATE, INTEGER, block, transform), ValueTransform.from(DATE, transform)); } @@ -257,6 +265,8 @@ private static ColumnTransform yearsFromTimestamp() LongUnaryOperator transform = epochMicros -> epochYear(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(TIMESTAMP_MICROS, INTEGER, block, transform), ValueTransform.from(TIMESTAMP_MICROS, transform)); } @@ -266,6 +276,8 @@ private static ColumnTransform monthsFromTimestamp() LongUnaryOperator transform = epochMicros -> epochMonth(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(TIMESTAMP_MICROS, INTEGER, block, transform), ValueTransform.from(TIMESTAMP_MICROS, transform)); } @@ -275,6 +287,8 @@ private static ColumnTransform daysFromTimestamp() LongUnaryOperator transform = epochMicros -> epochDay(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(TIMESTAMP_MICROS, INTEGER, block, transform), ValueTransform.from(TIMESTAMP_MICROS, transform)); } @@ -284,6 +298,8 @@ private static ColumnTransform hoursFromTimestamp() LongUnaryOperator transform = epochMicros -> epochHour(floorDiv(epochMicros, MICROSECONDS_PER_MILLISECOND)); return new ColumnTransform( INTEGER, + false, + true, block -> transformBlock(TIMESTAMP_MICROS, INTEGER, block, transform), ValueTransform.from(TIMESTAMP_MICROS, transform)); } @@ -293,6 +309,8 @@ private static ColumnTransform yearsFromTimestampWithTimeZone() ToLongFunction transform = value -> epochYear(value.getEpochMillis()); return new ColumnTransform( INTEGER, + false, + true, block -> extractTimestampWithTimeZone(block, transform), ValueTransform.fromTimestampTzTransform(transform)); } @@ -302,6 +320,8 @@ private static ColumnTransform monthsFromTimestampWithTimeZone() ToLongFunction transform = value -> epochMonth(value.getEpochMillis()); return new ColumnTransform( INTEGER, + false, + true, block -> extractTimestampWithTimeZone(block, transform), ValueTransform.fromTimestampTzTransform(transform)); } @@ -311,6 +331,8 @@ private static ColumnTransform daysFromTimestampWithTimeZone() ToLongFunction transform = value -> epochDay(value.getEpochMillis()); return new ColumnTransform( INTEGER, + false, + true, block -> extractTimestampWithTimeZone(block, transform), ValueTransform.fromTimestampTzTransform(transform)); } @@ -320,6 +342,8 @@ private static ColumnTransform hoursFromTimestampWithTimeZone() ToLongFunction transform = value -> epochHour(value.getEpochMillis()); return new ColumnTransform( INTEGER, + false, + true, block -> extractTimestampWithTimeZone(block, transform), ValueTransform.fromTimestampTzTransform(transform)); } @@ -431,6 +455,8 @@ private static ColumnTransform truncateInteger(int width) { return new ColumnTransform( INTEGER, + false, + true, block -> truncateInteger(block, width), (block, position) -> { if (block.isNull(position)) { @@ -463,6 +489,8 @@ private static ColumnTransform truncateBigint(int width) { return new ColumnTransform( BIGINT, + false, + true, block -> truncateBigint(block, width), (block, position) -> { if (block.isNull(position)) { @@ -496,6 +524,8 @@ private static ColumnTransform truncateShortDecimal(Type type, int width, Decima BigInteger unscaledWidth = BigInteger.valueOf(width); return new ColumnTransform( type, + false, + true, block -> truncateShortDecimal(decimal, block, unscaledWidth), (block, position) -> { if (block.isNull(position)) { @@ -531,6 +561,8 @@ private static ColumnTransform truncateLongDecimal(Type type, int width, Decimal BigInteger unscaledWidth = BigInteger.valueOf(width); return new ColumnTransform( type, + false, + true, block -> truncateLongDecimal(decimal, block, unscaledWidth), (block, position) -> { if (block.isNull(position)) { @@ -576,6 +608,8 @@ private static ColumnTransform truncateVarchar(int width) { return new ColumnTransform( VARCHAR, + false, + true, block -> truncateVarchar(block, width), (block, position) -> { if (block.isNull(position)) { @@ -615,6 +649,8 @@ private static ColumnTransform truncateVarbinary(int width) { return new ColumnTransform( VARBINARY, + false, + true, block -> truncateVarbinary(block, width), (block, position) -> { if (block.isNull(position)) { @@ -651,6 +687,8 @@ private static ColumnTransform voidTransform(Type type) Block nullBlock = nativeValueToBlock(type, null); return new ColumnTransform( type, + true, + true, block -> new RunLengthEncodedBlock(nullBlock, block.getPositionCount()), (block, position) -> null); } @@ -703,21 +741,38 @@ private interface Hasher public static class ColumnTransform { private final Type type; + private final boolean preservesNonNull; + private final boolean monotonic; private final Function blockTransform; private final ValueTransform valueTransform; - public ColumnTransform(Type type, Function blockTransform, ValueTransform valueTransform) + public ColumnTransform(Type type, boolean preservesNonNull, boolean monotonic, Function blockTransform, ValueTransform valueTransform) { this.type = requireNonNull(type, "type is null"); + this.preservesNonNull = preservesNonNull; + this.monotonic = monotonic; this.blockTransform = requireNonNull(blockTransform, "transform is null"); this.valueTransform = requireNonNull(valueTransform, "valueTransform is null"); } + /** + * Result type. + */ public Type getType() { return type; } + public boolean preservesNonNull() + { + return preservesNonNull; + } + + public boolean isMonotonic() + { + return monotonic; + } + public Function getBlockTransform() { return blockTransform; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoTypes.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoTypes.java new file mode 100644 index 000000000000..41e28dd5a965 --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/TrinoTypes.java @@ -0,0 +1,175 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import io.trino.spi.type.LongTimestampWithTimeZone; +import io.trino.spi.type.TimestampType; +import io.trino.spi.type.TimestampWithTimeZoneType; +import io.trino.spi.type.Type; +import io.trino.spi.type.Type.Range; + +import java.util.Optional; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MICROSECOND; +import static io.trino.spi.type.Timestamps.PICOSECONDS_PER_MILLISECOND; +import static io.trino.spi.type.TinyintType.TINYINT; +import static java.util.Objects.requireNonNull; + +public final class TrinoTypes +{ + private TrinoTypes() {} + + /** + * Returns the maximum value that compares less than {@code value}. + *

+ * The type of the value must match {@link Type#getJavaType}. + * + * @throws IllegalStateException if this type is not {@link Type#isOrderable() orderable} + */ + public static Optional getPreviousValue(Type type, Object value) + { + if (!type.isOrderable()) { + throw new IllegalArgumentException("Type is not orderable: " + type); + } + requireNonNull(value, "value is null"); + + if (type == TINYINT || type == SMALLINT || type == INTEGER || type == BIGINT) { + Range typeRange = type.getRange().orElseThrow(); + return getAdjacentValue((long) typeRange.getMin(), (long) typeRange.getMax(), (long) value, Direction.PREV); + } + + if (type == DATE) { + // TODO update the code here when type implements getRange + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + return getAdjacentValue(Integer.MIN_VALUE, Integer.MAX_VALUE, (long) value, Direction.PREV); + } + + if (type instanceof TimestampType) { + // Iceberg supports only timestamp(6) + checkArgument(((TimestampType) type).getPrecision() == 6, "Unexpected type: %s", type); + // TODO update the code here when type implements getRange + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + return getAdjacentValue(Long.MIN_VALUE, Long.MAX_VALUE, (long) value, Direction.PREV); + } + + if (type instanceof TimestampWithTimeZoneType) { + // Iceberg supports only timestamp(6) + checkArgument(((TimestampWithTimeZoneType) type).getPrecision() == 6, "Unexpected type: %s", type); + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + LongTimestampWithTimeZone timestampTzValue = (LongTimestampWithTimeZone) value; + long epochMillis = timestampTzValue.getEpochMillis(); + int picosOfMilli = timestampTzValue.getPicosOfMilli(); + // Calculate value 1 microsecond earlier + picosOfMilli -= PICOSECONDS_PER_MICROSECOND; + if (picosOfMilli < 0) { + if (epochMillis == Long.MIN_VALUE) { + return Optional.empty(); + } + epochMillis--; + picosOfMilli += PICOSECONDS_PER_MILLISECOND; + } + // The zone doesn't matter for timestamp with time zone comparisons. Use UTC to avoid confusion. + return Optional.of(LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMilli, UTC_KEY)); + } + + return Optional.empty(); + } + + /** + * Returns the minimum value that compares greater than {@code value}. + *

+ * The type of the value must match {@link Type#getJavaType}. + * + * @throws IllegalStateException if this type is not {@link Type#isOrderable() orderable} + */ + public static Optional getNextValue(Type type, Object value) + { + if (!type.isOrderable()) { + throw new IllegalArgumentException("Type is not orderable: " + type); + } + requireNonNull(value, "value is null"); + + if (type == TINYINT || type == SMALLINT || type == INTEGER || type == BIGINT) { + Range typeRange = type.getRange().orElseThrow(); + return getAdjacentValue((long) typeRange.getMin(), (long) typeRange.getMax(), (long) value, Direction.NEXT); + } + + if (type == DATE) { + // TODO update the code here when type implements getRange + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + return getAdjacentValue(Integer.MIN_VALUE, Integer.MAX_VALUE, (long) value, Direction.NEXT); + } + + if (type instanceof TimestampType) { + // Iceberg supports only timestamp(6) + checkArgument(((TimestampType) type).getPrecision() == 6, "Unexpected type: %s", type); + // TODO update the code here when type implements getRange + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + return getAdjacentValue(Long.MIN_VALUE, Long.MAX_VALUE, (long) value, Direction.NEXT); + } + + if (type instanceof TimestampWithTimeZoneType) { + // Iceberg supports only timestamp(6) + checkArgument(((TimestampWithTimeZoneType) type).getPrecision() == 6, "Unexpected type: %s", type); + verify(type.getRange().isEmpty(), "Type %s unexpectedly returned a range", type); + LongTimestampWithTimeZone timestampTzValue = (LongTimestampWithTimeZone) value; + long epochMillis = timestampTzValue.getEpochMillis(); + int picosOfMilli = timestampTzValue.getPicosOfMilli(); + // Calculate value 1 microsecond later + picosOfMilli += PICOSECONDS_PER_MICROSECOND; + if (picosOfMilli >= PICOSECONDS_PER_MILLISECOND) { + if (epochMillis == Long.MAX_VALUE) { + return Optional.empty(); + } + epochMillis++; + picosOfMilli -= PICOSECONDS_PER_MILLISECOND; + } + // The zone doesn't matter for timestamp with time zone comparisons. Use UTC to avoid confusion. + return Optional.of(LongTimestampWithTimeZone.fromEpochMillisAndFraction(epochMillis, picosOfMilli, UTC_KEY)); + } + + return Optional.empty(); + } + + private static Optional getAdjacentValue(long min, long max, long value, Direction direction) + { + switch (direction) { + case PREV: + if (value == min) { + return Optional.empty(); + } + return Optional.of(value - 1); + + case NEXT: + if (value == max) { + return Optional.empty(); + } + return Optional.of(value + 1); + } + throw new UnsupportedOperationException("Unsupported direction: " + direction); + } + + private enum Direction + { + PREV, + NEXT + } +} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 8e99cc1504a5..ba2435151343 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -30,6 +30,7 @@ import io.trino.spi.connector.TableNotFoundException; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.TupleDomain; +import io.trino.sql.planner.plan.FilterNode; import io.trino.testing.BaseConnectorTest; import io.trino.testing.DataProviders; import io.trino.testing.MaterializedResult; @@ -93,6 +94,7 @@ import static io.trino.spi.predicate.Domain.singleValue; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.sql.planner.OptimizerConfig.JoinDistributionType.BROADCAST; import static io.trino.testing.MaterializedResult.resultBuilder; @@ -1221,6 +1223,19 @@ public void testHourTransform() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_hour_transform WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_hour_transform WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_hour_transform WHERE d >= DATE '2015-05-15'")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_hour_transform WHERE d >= TIMESTAMP '2015-05-15 12:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_hour_transform WHERE d >= TIMESTAMP '2015-05-15 12:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_hour_transform"); } @@ -1270,6 +1285,20 @@ public void testDayTransformDate() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 12e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_day_transform_date WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_date WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_day_transform_date WHERE d >= DATE '2015-01-13'")) + .isFullyPushedDown(); + + // d comparison with TIMESTAMP can be unwrapped + assertThat(query("SELECT * FROM test_day_transform_date WHERE d >= TIMESTAMP '2015-01-13 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_date WHERE d >= TIMESTAMP '2015-01-13 00:00:00.000001'")) + .isFullyPushedDown(); + dropTable("test_day_transform_date"); } @@ -1332,6 +1361,19 @@ public void testDayTransformTimestamp() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_day_transform_timestamp WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_timestamp WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_day_transform_timestamp WHERE d >= DATE '2015-05-15'")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_day_transform_timestamp WHERE d >= TIMESTAMP '2015-05-15 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_timestamp WHERE d >= TIMESTAMP '2015-05-15 00:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_day_transform_timestamp"); } @@ -1394,6 +1436,20 @@ public void testDayTransformTimestampWithTimeZone() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_day_transform_timestamptz WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_timestamptz WHERE d IS NULL")) + .isFullyPushedDown(); + + // Tests run with non-UTC session, so timestamp_tz > a_date will not align with partition boundaries. Use with_timezone to align it. + assertThat(query("SELECT * FROM test_day_transform_timestamptz WHERE d >= with_timezone(DATE '2015-05-15', 'UTC')")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_day_transform_timestamptz WHERE d >= TIMESTAMP '2015-05-15 00:00:00 UTC'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_day_transform_timestamptz WHERE d >= TIMESTAMP '2015-05-15 00:00:00.000001 UTC'")) + .isNotFullyPushedDown(FilterNode.class); + assertUpdate("DROP TABLE test_day_transform_timestamptz"); } @@ -1447,6 +1503,22 @@ public void testMonthTransformDate() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 15e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_month_transform_date WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_date WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_month_transform_date WHERE d >= DATE '2020-06-01'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_date WHERE d >= DATE '2020-06-02'")) + .isNotFullyPushedDown(FilterNode.class); + + // d comparison with TIMESTAMP can be unwrapped + assertThat(query("SELECT * FROM test_month_transform_date WHERE d >= TIMESTAMP '2015-06-01 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_date WHERE d >= TIMESTAMP '2015-05-01 00:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_month_transform_date"); } @@ -1507,6 +1579,21 @@ public void testMonthTransformTimestamp() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d >= DATE '2015-05-01'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d >= DATE '2015-05-02'")) + .isNotFullyPushedDown(FilterNode.class); + + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d >= TIMESTAMP '2015-05-01 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamp WHERE d >= TIMESTAMP '2015-05-01 00:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_month_transform_timestamp"); } @@ -1567,6 +1654,22 @@ public void testMonthTransformTimestampWithTimeZone() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d IS NULL")) + .isFullyPushedDown(); + + // Tests run with non-UTC session, so timestamp_tz > a_date will not align with partition boundaries. Use with_timezone to align it. + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d >= with_timezone(DATE '2015-05-01', 'UTC')")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d >= with_timezone(DATE '2015-05-02', 'UTC')")) + .isNotFullyPushedDown(FilterNode.class); + + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d >= TIMESTAMP '2015-05-01 00:00:00 UTC'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_month_transform_timestamptz WHERE d >= TIMESTAMP '2015-05-01 00:00:00.000001 UTC'")) + .isNotFullyPushedDown(FilterNode.class); + assertUpdate("DROP TABLE test_month_transform_timestamptz"); } @@ -1615,6 +1718,22 @@ public void testYearTransformDate() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_year_transform_date WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_date WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_year_transform_date WHERE d >= DATE '2015-01-01'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_date WHERE d >= DATE '2015-01-02'")) + .isNotFullyPushedDown(FilterNode.class); + + // d comparison with TIMESTAMP can be unwrapped + assertThat(query("SELECT * FROM test_year_transform_date WHERE d >= TIMESTAMP '2015-01-01 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_date WHERE d >= TIMESTAMP '2015-01-01 00:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_year_transform_date"); } @@ -1673,6 +1792,21 @@ public void testYearTransformTimestamp() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d >= DATE '2015-01-01'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d >= DATE '2015-01-02'")) + .isNotFullyPushedDown(FilterNode.class); + + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d >= TIMESTAMP '2015-01-01 00:00:00'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamp WHERE d >= TIMESTAMP '2015-01-01 00:00:00.000001'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_year_transform_timestamp"); } @@ -1731,6 +1865,22 @@ public void testYearTransformTimestampWithTimeZone() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 13e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d IS NULL")) + .isFullyPushedDown(); + + // Tests run with non-UTC session, so timestamp_tz > a_date will not align with partition boundaries. Use with_timezone to align it. + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d >= with_timezone(DATE '2015-01-01', 'UTC')")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d >= with_timezone(DATE '2015-01-02', 'UTC')")) + .isNotFullyPushedDown(FilterNode.class); + + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d >= TIMESTAMP '2015-01-01 00:00:00 UTC'")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_year_transform_timestamptz WHERE d >= TIMESTAMP '2015-01-01 00:00:00.000001 UTC'")) + .isNotFullyPushedDown(FilterNode.class); + assertUpdate("DROP TABLE test_year_transform_timestamptz"); } @@ -1773,6 +1923,23 @@ public void testTruncateTextTransform() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 8e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d >= 'ab'")) + .isNotFullyPushedDown(FilterNode.class); // TODO subsume partition boundary filters on varchar + // Currently, prefix-checking LIKE -> range conversion is part of DomainTranslator and doesn't allow for filter elimination. TODO subsume prefix-checking LIKE with truncate(). + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d LIKE 'ab%'")) + .isNotFullyPushedDown(FilterNode.class); + // condition to long to subsume, we use truncate(2) + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d >= 'abc'")) + .isNotFullyPushedDown(FilterNode.class); + // condition to long to subsume, we use truncate(2) + assertThat(query("SELECT * FROM test_truncate_text_transform WHERE d LIKE 'abc%'")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_truncate_text_transform"); } @@ -1833,6 +2000,18 @@ public void testTruncateIntegerTransform(String dataType) " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 16e0, NULL, NULL)"); + assertThat(query("SELECT * FROM " + table + " WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM " + table + " WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM " + table + " WHERE d >= 10")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM " + table + " WHERE d > 10")) + .isNotFullyPushedDown(FilterNode.class); + assertThat(query("SELECT * FROM " + table + " WHERE d >= 11")) + .isNotFullyPushedDown(FilterNode.class); + dropTable(table); } @@ -1885,6 +2064,18 @@ public void testTruncateDecimalTransform() " ('b', NULL, NULL, 0e0, NULL, '1', '101'), " + " (NULL, NULL, NULL, NULL, 6e0, NULL, NULL)"); + assertThat(query("SELECT * FROM test_truncate_decimal_transform WHERE d IS NOT NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM test_truncate_decimal_transform WHERE d IS NULL")) + .isFullyPushedDown(); + + assertThat(query("SELECT * FROM test_truncate_decimal_transform WHERE d >= 12.20")) + .isNotFullyPushedDown(FilterNode.class); // TODO subsume partition boundary filters on decimals + assertThat(query("SELECT * FROM test_truncate_decimal_transform WHERE d > 12.20")) + .isNotFullyPushedDown(FilterNode.class); + assertThat(query("SELECT * FROM test_truncate_decimal_transform WHERE d >= 12.21")) + .isNotFullyPushedDown(FilterNode.class); + dropTable("test_truncate_decimal_transform"); } @@ -1893,6 +2084,7 @@ public void testBucketTransform() { testBucketTransformForType("DATE", "DATE '2020-05-19'", "DATE '2020-08-19'", "DATE '2020-11-19'"); testBucketTransformForType("VARCHAR", "CAST('abcd' AS VARCHAR)", "CAST('mommy' AS VARCHAR)", "CAST('abxy' AS VARCHAR)"); + testBucketTransformForType("INTEGER", "10", "12", "20"); testBucketTransformForType("BIGINT", "CAST(100000000 AS BIGINT)", "CAST(200000002 AS BIGINT)", "CAST(400000001 AS BIGINT)"); testBucketTransformForType( "UUID", @@ -1912,6 +2104,10 @@ protected void testBucketTransformForType( assertUpdate(format("CREATE TABLE %s (d %s) WITH (partitioning = ARRAY['bucket(d, 2)'])", tableName, type)); assertUpdate(format("INSERT INTO %s VALUES (NULL), (%s), (%s), (%s)", tableName, value, greaterValueInSameBucket, valueInOtherBucket), 4); assertThat(query(format("SELECT * FROM %s", tableName))).matches(format("VALUES (NULL), (%s), (%s), (%s)", value, greaterValueInSameBucket, valueInOtherBucket)); + assertThat(query(format("SELECT * FROM %s WHERE d <= %s AND (rand() = 42 OR d != %s)", tableName, value, valueInOtherBucket))) + .matches("VALUES " + value); + assertThat(query(format("SELECT * FROM %s WHERE d >= %s AND (rand() = 42 OR d != %s)", tableName, greaterValueInSameBucket, valueInOtherBucket))) + .matches("VALUES " + greaterValueInSameBucket); String selectFromPartitions = format("SELECT partition.d_bucket, record_count, data.d.min AS d_min, data.d.max AS d_max FROM \"%s$partitions\"", tableName); @@ -1931,6 +2127,35 @@ protected void testBucketTransformForType( " ('d', NULL, 0.25e0, NULL), " + " (NULL, NULL, NULL, 4e0)"); + assertThat(query("SELECT * FROM " + tableName + " WHERE d IS NULL")) + .isFullyPushedDown(); + assertThat(query("SELECT * FROM " + tableName + " WHERE d IS NOT NULL")) + .isNotFullyPushedDown(FilterNode.class); // this could be subsumed + + // Bucketing transform doesn't allow comparison filter elimination + if (format == PARQUET && type.equals("UUID")) { + // TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results + assertThatThrownBy(() -> assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + value)).isNotFullyPushedDown(FilterNode.class)) + .isInstanceOf(AssertionError.class) + .hasMessageMatching("(?s)\\[Rows for query .*to contain exactly in any order:.*but could not find the following elements:.*"); + } + else { + assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + value)) + .isNotFullyPushedDown(FilterNode.class); + } + assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + greaterValueInSameBucket)) + .isNotFullyPushedDown(FilterNode.class); + if (format == PARQUET && type.equals("UUID")) { + // TODO (https://github.com/trinodb/trino/issues/12834): reading Parquet with UUID filter yields incorrect results + assertThatThrownBy(() -> assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + valueInOtherBucket)).isNotFullyPushedDown(FilterNode.class)) + .isInstanceOf(AssertionError.class) + .hasMessageMatching("(?s)\\[Rows for query .*to contain exactly in any order:.*but could not find the following elements:.*"); + } + else { + assertThat(query("SELECT * FROM " + tableName + " WHERE d >= " + valueInOtherBucket)) + .isNotFullyPushedDown(FilterNode.class); + } + dropTable(tableName); } @@ -1994,6 +2219,14 @@ public void testVoidTransform() " ('b', NULL, NULL, 0e0, NULL, '1', '7'), " + " (NULL, NULL, NULL, NULL, 7e0, NULL, NULL)"); + // Void transform doesn't allow filter elimination + assertThat(query("SELECT * FROM test_void_transform WHERE d IS NULL")) + .isNotFullyPushedDown(FilterNode.class); + assertThat(query("SELECT * FROM test_void_transform WHERE d IS NOT NULL")) + .isNotFullyPushedDown(FilterNode.class); + assertThat(query("SELECT * FROM test_void_transform WHERE d >= 'abc'")) + .isNotFullyPushedDown(FilterNode.class); + assertUpdate("DROP TABLE " + "test_void_transform"); } @@ -3450,7 +3683,12 @@ public void testOptimizeTimePartitionedTable(String dataType, String partitionin .as("file count after optimize date") .isGreaterThanOrEqualTo(5); - assertUpdate("ALTER TABLE " + tableName + " EXECUTE optimize WHERE p >= " + optimizeDate); + assertUpdate( + // Use UTC zone so that DATE and TIMESTAMP WITH TIME ZONE comparisons align with partition boundaries. + Session.builder(getSession()) + .setTimeZoneKey(UTC_KEY) + .build(), + "ALTER TABLE " + tableName + " EXECUTE optimize WHERE p >= " + optimizeDate); assertThat((long) computeScalar("SELECT count(DISTINCT \"$path\") FROM " + tableName + " WHERE p < " + optimizeDate)) .as("file count before optimize date, after the optimize") @@ -3467,6 +3705,12 @@ public static Object[][] testOptimizeTimePartitionedTableDataProvider() { return new Object[][] { {"date", "%s", 15}, + {"date", "day(%s)", 15}, + {"date", "month(%s)", 3}, + {"timestamp(6)", "day(%s)", 15}, + {"timestamp(6)", "month(%s)", 3}, + {"timestamp(6) with time zone", "day(%s)", 15}, + {"timestamp(6) with time zone", "month(%s)", 3}, }; }