diff --git a/docs/src/main/sphinx/connector/delta-lake.md b/docs/src/main/sphinx/connector/delta-lake.md index 1bbd1bb95e9c..1432e4571076 100644 --- a/docs/src/main/sphinx/connector/delta-lake.md +++ b/docs/src/main/sphinx/connector/delta-lake.md @@ -124,6 +124,10 @@ values. Typical usage does not require you to configure them. * - `delta.checkpoint-row-statistics-writing.enabled` - Enable writing row statistics to checkpoint files. - `true` +* - ``delta.checkpoint-filtering.enabled`` + - Enable partition pruning when reading checkpoint files. + The equivalent catalog session property is ``checkpoint_filtering_enabled``. + - ``false`` * - `delta.dynamic-filtering.wait-timeout` - Duration to wait for completion of [dynamic filtering](/admin/dynamic-filtering) during split generation. The equivalent diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java index 9dd03f686b18..f92a5b893b11 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeConfig.java @@ -62,6 +62,7 @@ public class DeltaLakeConfig private boolean unsafeWritesEnabled; private boolean checkpointRowStatisticsWritingEnabled = true; private long defaultCheckpointWritingInterval = 10; + private boolean checkpointFilteringEnabled; private Duration vacuumMinRetention = new Duration(7, DAYS); private Optional hiveCatalogName = Optional.empty(); private Duration dynamicFilteringWaitTimeout = new Duration(0, SECONDS); @@ -269,6 +270,18 @@ public long getDefaultCheckpointWritingInterval() return defaultCheckpointWritingInterval; } + public boolean isCheckpointPartitionFilterEnabled() + { + return checkpointFilteringEnabled; + } + + @Config("delta.checkpoint-filtering.enabled") + public DeltaLakeConfig setCheckpointPartitionFilterEnabled(boolean checkpointFilteringEnabled) + { + this.checkpointFilteringEnabled = checkpointFilteringEnabled; + return this; + } + @NotNull public Duration getVacuumMinRetention() { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index fc337cdbeb1c..90dd538310f6 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -3518,7 +3518,7 @@ private OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandl private List getAddFileEntriesMatchingEnforcedPartitionConstraint(ConnectorSession session, DeltaLakeTableHandle tableHandle) { TableSnapshot tableSnapshot = getSnapshot(session, tableHandle); - List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), session); + List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), tableHandle.getEnforcedPartitionConstraint(), session); TupleDomain enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint(); if (enforcedPartitionConstraint.isAll()) { return validDataFiles; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java index 0ca4012c16e6..1179ccec7cb7 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSessionProperties.java @@ -70,6 +70,7 @@ public final class DeltaLakeSessionProperties public static final String LEGACY_CREATE_TABLE_WITH_EXISTING_LOCATION_ENABLED = "legacy_create_table_with_existing_location_enabled"; private static final String PROJECTION_PUSHDOWN_ENABLED = "projection_pushdown_enabled"; private static final String QUERY_PARTITION_FILTER_REQUIRED = "query_partition_filter_required"; + private static final String CHECKPOINT_FILTERING_ENABLED = "checkpoint_filtering_enabled"; private final List> sessionProperties; @@ -202,6 +203,11 @@ public DeltaLakeSessionProperties( QUERY_PARTITION_FILTER_REQUIRED, "Require filter on partition column", deltaLakeConfig.isQueryPartitionFilterRequired(), + false), + booleanProperty( + CHECKPOINT_FILTERING_ENABLED, + "Use filter in checkpoint reader", + deltaLakeConfig.isCheckpointPartitionFilterEnabled(), false)); } @@ -306,4 +312,9 @@ public static boolean isQueryPartitionFilterRequired(ConnectorSession session) { return session.getProperty(QUERY_PARTITION_FILTER_REQUIRED, Boolean.class); } + + public static boolean isCheckpointFilteringEnabled(ConnectorSession session) + { + return session.getProperty(CHECKPOINT_FILTERING_ENABLED, Boolean.class); + } } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java index 84b5e71caf1f..606ab5d55ce1 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java @@ -154,7 +154,7 @@ private Stream getSplits( { TableSnapshot tableSnapshot = deltaLakeTransactionManager.get(transaction, session.getIdentity()) .getSnapshot(session, tableHandle.getSchemaTableName(), tableHandle.getLocation(), tableHandle.getReadVersion()); - List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), session); + List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), tableHandle.getEnforcedPartitionConstraint(), session); TupleDomain enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint(); TupleDomain nonPartitionConstraint = tableHandle.getNonPartitionConstraint(); Domain pathDomain = getPathDomain(nonPartitionConstraint); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java index 5069517239a8..ba4fc8c37a40 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java @@ -18,6 +18,7 @@ import io.trino.filesystem.TrinoFileSystem; import io.trino.filesystem.TrinoInputFile; import io.trino.parquet.ParquetReaderOptions; +import io.trino.plugin.deltalake.DeltaLakeColumnHandle; import io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator; import io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager; import io.trino.plugin.deltalake.transactionlog.checkpoint.LastCheckpoint; @@ -26,6 +27,7 @@ import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.TypeManager; import java.io.FileNotFoundException; @@ -178,7 +180,8 @@ public Stream getCheckpointTransactionLogEntries( TypeManager typeManager, TrinoFileSystem fileSystem, FileFormatDataSourceStats stats, - Optional metadataAndProtocol) + Optional metadataAndProtocol, + TupleDomain partitionConstraint) throws IOException { if (lastCheckpoint.isEmpty()) { @@ -206,7 +209,8 @@ public Stream getCheckpointTransactionLogEntries( typeManager, stats, checkpoint, - checkpointFile))); + checkpointFile, + partitionConstraint))); } return resultStream; } @@ -225,7 +229,8 @@ private Iterator getCheckpointTransactionLogEntrie TypeManager typeManager, FileFormatDataSourceStats stats, LastCheckpoint checkpoint, - TrinoInputFile checkpointFile) + TrinoInputFile checkpointFile, + TupleDomain partitionConstraint) throws IOException { long fileSize; @@ -247,7 +252,8 @@ private Iterator getCheckpointTransactionLogEntrie stats, parquetReaderOptions, checkpointRowStatisticsWritingEnabled, - domainCompactionThreshold); + domainCompactionThreshold, + partitionConstraint); } public record MetadataAndProtocolEntry(MetadataEntry metadataEntry, ProtocolEntry protocolEntry) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java index 52ff743b8968..b503a27c28cb 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java @@ -27,6 +27,7 @@ import io.trino.filesystem.TrinoFileSystemFactory; import io.trino.filesystem.TrinoInputFile; import io.trino.parquet.ParquetReaderOptions; +import io.trino.plugin.deltalake.DeltaLakeColumnHandle; import io.trino.plugin.deltalake.DeltaLakeColumnMetadata; import io.trino.plugin.deltalake.DeltaLakeConfig; import io.trino.plugin.deltalake.transactionlog.TableSnapshot.MetadataAndProtocolEntry; @@ -39,6 +40,7 @@ import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.ArrayType; import io.trino.spi.type.BooleanType; import io.trino.spi.type.MapType; @@ -75,6 +77,8 @@ import static io.airlift.slice.SizeOf.instanceSize; import static io.trino.cache.CacheUtils.invalidateAllIf; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; +import static io.trino.plugin.deltalake.DeltaLakeSessionProperties.isCheckpointFilteringEnabled; +import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.readLastCheckpoint; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.getTransactionLogJsonEntryPath; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; @@ -219,9 +223,20 @@ public MetadataEntry getMetadataEntry(TableSnapshot tableSnapshot, ConnectorSess .orElseThrow(() -> new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Metadata not found in transaction log for " + tableSnapshot.getTable())); } + @Deprecated public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, ConnectorSession session) + { + return getActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), session); + } + + public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, ConnectorSession session) { try { + if (isCheckpointFilteringEnabled(session)) { + return loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, partitionConstraint, session).stream() + .collect(toImmutableList()); + } + TableVersion tableVersion = new TableVersion(new TableLocation(tableSnapshot.getTable(), tableSnapshot.getTableLocation()), tableSnapshot.getVersion()); DeltaLakeDataFileCacheEntry cacheEntry = activeDataFileCache.get(tableVersion, () -> { @@ -249,7 +264,7 @@ public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEn } } - List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, session); + List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), session); return new DeltaLakeDataFileCacheEntry(tableSnapshot.getVersion(), activeFiles); }); return cacheEntry.getActiveFiles(); @@ -259,7 +274,12 @@ public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEn } } - private List loadActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, ConnectorSession session) + private List loadActiveFiles( + TableSnapshot tableSnapshot, + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + TupleDomain partitionConstraint, + ConnectorSession session) { List transactions = tableSnapshot.getTransactions(); try (Stream checkpointEntries = tableSnapshot.getCheckpointTransactionLogEntries( @@ -269,8 +289,12 @@ private List loadActiveFiles(TableSnapshot tableSnapshot, Metadata typeManager, fileSystemFactory.create(session), fileFormatDataSourceStats, - Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)))) { + Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), + partitionConstraint)) { return activeAddEntries(checkpointEntries, transactions) + .filter(partitionConstraint.isAll() + ? addAction -> true + : addAction -> partitionMatchesPredicate(addAction.getCanonicalPartitionValues(), partitionConstraint.getDomains().orElseThrow())) .collect(toImmutableList()); } catch (IOException e) { @@ -407,8 +431,9 @@ private Stream getEntries( { try { List transactions = tableSnapshot.getTransactions(); + // Passing TupleDomain.all() because this method is used for getting all entries Stream checkpointEntries = tableSnapshot.getCheckpointTransactionLogEntries( - session, entryTypes, checkpointSchemaManager, typeManager, fileSystem, stats, Optional.empty()); + session, entryTypes, checkpointSchemaManager, typeManager, fileSystem, stats, Optional.empty(), TupleDomain.all()); return entryMapper.apply( checkpointEntries, diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java index 1892b44643dd..c2dca1f4e295 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java @@ -22,6 +22,7 @@ import io.airlift.log.Logger; import io.trino.filesystem.TrinoInputFile; import io.trino.parquet.ParquetReaderOptions; +import io.trino.plugin.deltalake.DeltaHiveTypeTranslator; import io.trino.plugin.deltalake.DeltaLakeColumnHandle; import io.trino.plugin.deltalake.DeltaLakeColumnMetadata; import io.trino.plugin.deltalake.transactionlog.AddFileEntry; @@ -82,6 +83,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; +import static io.trino.plugin.deltalake.DeltaLakeSplitManager.partitionMatchesPredicate; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.isDeletionVectorEnabled; import static io.trino.plugin.deltalake.transactionlog.TransactionLogAccess.columnsWithStats; @@ -140,6 +142,7 @@ public String getColumnName() private final Queue nextEntries; private final List extractors; private final boolean checkpointRowStatisticsWritingEnabled; + private final TupleDomain partitionConstraint; private MetadataEntry metadataEntry; private ProtocolEntry protocolEntry; private List schema; @@ -160,13 +163,15 @@ public CheckpointEntryIterator( FileFormatDataSourceStats stats, ParquetReaderOptions parquetReaderOptions, boolean checkpointRowStatisticsWritingEnabled, - int domainCompactionThreshold) + int domainCompactionThreshold, + TupleDomain partitionConstraint) { this.checkpointPath = checkpoint.location().toString(); this.session = requireNonNull(session, "session is null"); this.stringList = (ArrayType) typeManager.getType(TypeSignature.arrayType(VARCHAR.getTypeSignature())); this.stringMap = (MapType) typeManager.getType(TypeSignature.mapType(VARCHAR.getTypeSignature(), VARCHAR.getTypeSignature())); this.checkpointRowStatisticsWritingEnabled = checkpointRowStatisticsWritingEnabled; + this.partitionConstraint = requireNonNull(partitionConstraint, "partitionConstraint is null"); checkArgument(!fields.isEmpty(), "fields is empty"); Map extractors = ImmutableMap.builder() .put(TRANSACTION, this::buildTxnEntry) @@ -221,7 +226,7 @@ private DeltaLakeColumnHandle buildColumnHandle(EntryType entryType, CheckpointS { Type type = switch (entryType) { case TRANSACTION -> schemaManager.getTxnEntryType(); - case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, true, true); + case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, true, true, true); case REMOVE -> schemaManager.getRemoveEntryType(); case METADATA -> schemaManager.getMetadataEntryType(); case PROTOCOL -> schemaManager.getProtocolEntryType(true, true); @@ -272,7 +277,30 @@ private TupleDomain buildTupleDomainColumnHandle(EntryType ent type)), ColumnType.REGULAR, column.getComment()); - return TupleDomain.withColumnDomains(ImmutableMap.of(handle, Domain.notNull(handle.getType()))); + + ImmutableMap.Builder domains = ImmutableMap.builder() + .put(handle, Domain.notNull(handle.getType())); + if (entryType == ADD) { + partitionConstraint.getDomains().orElseThrow().forEach((key, value) -> domains.put(toPartitionValuesParsedField(column, key), value)); + } + + return TupleDomain.withColumnDomains(domains.buildOrThrow()); + } + + private static HiveColumnHandle toPartitionValuesParsedField(HiveColumnHandle addColumn, DeltaLakeColumnHandle partitionColumn) + { + return new HiveColumnHandle( + addColumn.getBaseColumnName(), + addColumn.getBaseHiveColumnIndex(), + addColumn.getBaseHiveType(), + addColumn.getBaseType(), + Optional.of(new HiveColumnProjectionInfo( + ImmutableList.of(0, 0), // hiveColumnIndex; we provide fake value because we always find columns by name + ImmutableList.of("partitionvalues_parsed", partitionColumn.getColumnName()), + DeltaHiveTypeTranslator.toHiveType(partitionColumn.getType()), + partitionColumn.getType())), + HiveColumnHandle.ColumnType.REGULAR, + addColumn.getComment()); } private DeltaLakeTransactionLogEntry buildCommitInfoEntry(ConnectorSession session, Block block, int pagePosition) @@ -431,13 +459,16 @@ private DeltaLakeTransactionLogEntry buildAddEntry(ConnectorSession session, Blo statsFieldIndex = 5; } - Optional parsedStats = Optional.ofNullable(getRowField(addEntryRow, statsFieldIndex + 1)).map(this::parseStatisticsFromParquet); + boolean partitionValuesParsedExists = addEntryRow.getUnderlyingFieldBlock(statsFieldIndex + 1) instanceof RowBlock && // partitionValues_parsed + addEntryRow.getUnderlyingFieldBlock(statsFieldIndex + 2) instanceof RowBlock; // stats_parsed + int parsedStatsIndex = partitionValuesParsedExists ? statsFieldIndex + 1 : statsFieldIndex; + Optional parsedStats = Optional.ofNullable(getRowField(addEntryRow, parsedStatsIndex + 1)).map(this::parseStatisticsFromParquet); Optional stats = Optional.empty(); if (parsedStats.isEmpty()) { stats = Optional.ofNullable(getStringField(addEntryRow, statsFieldIndex)); } - Map tags = getMapField(addEntryRow, statsFieldIndex + 2); + Map tags = getMapField(addEntryRow, parsedStatsIndex + 2); AddFileEntry result = new AddFileEntry( path, partitionValues, @@ -709,7 +740,15 @@ private void fillNextEntries() for (int i = 0; i < extractors.size(); ++i) { DeltaLakeTransactionLogEntry entry = extractors.get(i).getEntry(session, page.getBlock(i).getLoadedBlock(), pagePosition); if (entry != null) { - nextEntries.add(entry); + if (entry.getAdd() != null) { + if (partitionConstraint.isAll() || + partitionMatchesPredicate(entry.getAdd().getCanonicalPartitionValues(), partitionConstraint.getDomains().orElseThrow())) { + nextEntries.add(entry); + } + } + else { + nextEntries.add(entry); + } } } pagePosition++; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java index 290d66081fc0..3fb8df2d5b69 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java @@ -15,6 +15,7 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Inject; +import io.trino.plugin.deltalake.DeltaLakeColumnHandle; import io.trino.plugin.deltalake.DeltaLakeColumnMetadata; import io.trino.plugin.deltalake.transactionlog.MetadataEntry; import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; @@ -30,6 +31,7 @@ import java.util.Optional; import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.isDeletionVectorEnabled; import static io.trino.plugin.deltalake.transactionlog.TransactionLogAccess.columnsWithStats; @@ -112,7 +114,7 @@ public RowType getMetadataEntryType() return metadataEntryType; } - public RowType getAddEntryType(MetadataEntry metadataEntry, ProtocolEntry protocolEntry, boolean requireWriteStatsAsJson, boolean requireWriteStatsAsStruct) + public RowType getAddEntryType(MetadataEntry metadataEntry, ProtocolEntry protocolEntry, boolean requireWriteStatsAsJson, boolean requireWriteStatsAsStruct, boolean usePartitionValuesParsed) { List allColumns = extractSchema(metadataEntry, protocolEntry, typeManager); List minMaxColumns = columnsWithStats(metadataEntry, protocolEntry, typeManager); @@ -156,6 +158,15 @@ public RowType getAddEntryType(MetadataEntry metadataEntry, ProtocolEntry protoc if (requireWriteStatsAsJson) { addFields.add(RowType.field("stats", VARCHAR)); } + if (usePartitionValuesParsed) { + List partitionColumns = extractPartitionColumns(metadataEntry, protocolEntry, typeManager); + if (!partitionColumns.isEmpty()) { + List partitionValuesParsed = partitionColumns.stream() + .map(column -> RowType.field(column.getColumnName(), column.getType())) + .collect(toImmutableList()); + addFields.add(RowType.field("partitionValues_parsed", RowType.from(partitionValuesParsed))); + } + } if (requireWriteStatsAsStruct) { addFields.add(RowType.field("stats_parsed", RowType.from(statsColumns.build()))); } diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java index 4f2c94a46abd..b8d0c1e38c74 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java @@ -111,7 +111,8 @@ public void write(CheckpointEntries entries, TrinoOutputFile outputFile) RowType metadataEntryType = checkpointSchemaManager.getMetadataEntryType(); RowType protocolEntryType = checkpointSchemaManager.getProtocolEntryType(protocolEntry.getReaderFeatures().isPresent(), protocolEntry.getWriterFeatures().isPresent()); RowType txnEntryType = checkpointSchemaManager.getTxnEntryType(); - RowType addEntryType = checkpointSchemaManager.getAddEntryType(entries.getMetadataEntry(), entries.getProtocolEntry(), writeStatsAsJson, writeStatsAsStruct); + // TODO https://github.com/trinodb/trino/issues/19586 Add support for writing 'partitionValues_parsed' field + RowType addEntryType = checkpointSchemaManager.getAddEntryType(entries.getMetadataEntry(), entries.getProtocolEntry(), writeStatsAsJson, writeStatsAsStruct, false); RowType removeEntryType = checkpointSchemaManager.getRemoveEntryType(); List columnNames = ImmutableList.of( diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java index 44ebc5d96aed..0c797dc87c22 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java @@ -29,6 +29,7 @@ import io.trino.spi.TrinoException; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.TypeManager; import java.io.IOException; @@ -103,7 +104,8 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) typeManager, fileSystem, fileFormatDataSourceStats, - Optional.empty()) + Optional.empty(), + TupleDomain.all()) .filter(entry -> entry.getMetaData() != null || entry.getProtocol() != null) .collect(toImmutableList()); @@ -135,7 +137,8 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) typeManager, fileSystem, fileFormatDataSourceStats, - Optional.of(new MetadataAndProtocolEntry(metadataLogEntry.getMetaData(), protocolLogEntry.getProtocol()))) + Optional.of(new MetadataAndProtocolEntry(metadataLogEntry.getMetaData(), protocolLogEntry.getProtocol())), + TupleDomain.all()) .forEach(checkpointBuilder::addLogEntry); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 6ce5295fd046..f9ea4fc8d336 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -43,6 +43,7 @@ import org.apache.parquet.hadoop.metadata.FileMetaData; import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.schema.PrimitiveType; +import org.intellij.lang.annotations.Language; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInstance; @@ -1032,6 +1033,110 @@ public void testReadMultipartCheckpoint() assertThat(query("SELECT * FROM " + tableName)).matches("VALUES 1, 2, 3, 4, 5, 6, 7"); } + /** + * @see deltalake.partition_values_parsed + */ + @Test + public void testDeltaLakeWithPartitionValuesParsed() + throws Exception + { + testPartitionValuesParsed("deltalake/partition_values_parsed"); + } + + /** + * @see trino432.partition_values_parsed + */ + @Test + public void testTrinoWithoutPartitionValuesParsed() + throws Exception + { + testPartitionValuesParsed("trino432/partition_values_parsed"); + } + + private void testPartitionValuesParsed(String resourceName) + throws Exception + { + String tableName = "test_partition_values_parsed_checkpoint_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource(resourceName).toURI()).toPath(), tableLocation); + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + + Session session = Session.builder(getQueryRunner().getDefaultSession()) + .setCatalogSessionProperty("delta", "checkpoint_filtering_enabled", "true") + .build(); + + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE int_part = 10 AND string_part = 'part1'")) + .matches("VALUES 1"); + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE int_part != 10")) + .matches("VALUES 2"); + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE int_part > 10")) + .matches("VALUES 2"); + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE int_part >= 10")) + .matches("VALUES 1, 2"); + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE int_part IN (10, 20)")) + .matches("VALUES 1, 2"); + assertThat(query("SELECT id FROM " + tableName + " WHERE int_part IS NULL AND string_part IS NULL")) + .matches("VALUES 3"); + assertThat(query("SELECT id FROM " + tableName + " WHERE int_part IS NOT NULL AND string_part IS NOT NULL")) + .matches("VALUES 1, 2"); + + assertThat(query("SELECT id FROM " + tableName + " WHERE int_part = 10 AND string_part = 'unmatched partition condition'")) + .returnsEmptyResult(); + assertThat(query("SELECT id FROM " + tableName + " WHERE int_part IS NULL AND string_part IS NOT NULL")) + .returnsEmptyResult(); + } + + /** + * @see deltalake.partition_values_parsed_all_types + */ + @Test + public void testDeltaLakeWithPartitionValuesParsedAllTypes() + throws Exception + { + String tableName = "test_partition_values_parsed_checkpoint_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("deltalake/partition_values_parsed_all_types").toURI()).toPath(), tableLocation); + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + + assertPartitionValuesParsedCondition(tableName, 1, "part_boolean = true"); + assertPartitionValuesParsedCondition(tableName, 1, "part_tinyint = 1"); + assertPartitionValuesParsedCondition(tableName, 1, "part_smallint = 10"); + assertPartitionValuesParsedCondition(tableName, 1, "part_int = 100"); + assertPartitionValuesParsedCondition(tableName, 1, "part_bigint = 1000"); + assertPartitionValuesParsedCondition(tableName, 1, "part_short_decimal = CAST('123.12' AS DECIMAL(5,2))"); + assertPartitionValuesParsedCondition(tableName, 1, "part_long_decimal = CAST('123456789012345678.123' AS DECIMAL(21,3))"); + assertPartitionValuesParsedCondition(tableName, 1, "part_double = 1.2"); + assertPartitionValuesParsedCondition(tableName, 1, "part_float = 3.4"); + assertPartitionValuesParsedCondition(tableName, 1, "part_varchar = 'a'"); + assertPartitionValuesParsedCondition(tableName, 1, "part_date = DATE '2020-08-21'"); + assertPartitionValuesParsedCondition(tableName, 1, "part_timestamp = TIMESTAMP '2020-10-21 01:00:00.123 UTC'"); + assertPartitionValuesParsedCondition(tableName, 1, "part_timestamp_ntz = TIMESTAMP '2023-01-02 01:02:03.456'"); + + assertPartitionValuesParsedCondition(tableName, 3, "part_boolean IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_tinyint IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_smallint IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_int IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_bigint IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_short_decimal IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_long_decimal IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_double IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_float IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_varchar IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_date IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_timestamp IS NULL"); + assertPartitionValuesParsedCondition(tableName, 3, "part_timestamp_ntz IS NULL"); + } + + private void assertPartitionValuesParsedCondition(String tableName, int id, @Language("SQL") String condition) + { + Session session = Session.builder(getQueryRunner().getDefaultSession()) + .setCatalogSessionProperty("delta", "checkpoint_filtering_enabled", "true") + .build(); + + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE " + condition)) + .matches("VALUES " + id); + } + private static MetadataEntry loadMetadataEntry(long entryNumber, Path tableLocation) throws IOException { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java index f374ebb6c605..c2ddffcc048f 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeConfig.java @@ -53,6 +53,7 @@ public void testDefaults() .setMaxPartitionsPerWriter(100) .setUnsafeWritesEnabled(false) .setDefaultCheckpointWritingInterval(10) + .setCheckpointPartitionFilterEnabled(false) .setCheckpointRowStatisticsWritingEnabled(true) .setVacuumMinRetention(new Duration(7, DAYS)) .setHiveCatalogName(null) @@ -90,6 +91,7 @@ public void testExplicitPropertyMappings() .put("delta.max-partitions-per-writer", "200") .put("delta.enable-non-concurrent-writes", "true") .put("delta.default-checkpoint-writing-interval", "15") + .put("delta.checkpoint-filtering.enabled", "true") .put("delta.checkpoint-row-statistics-writing.enabled", "false") .put("delta.vacuum.min-retention", "13h") .put("delta.hive-catalog-name", "hive") @@ -125,6 +127,7 @@ public void testExplicitPropertyMappings() .setUnsafeWritesEnabled(true) .setDefaultCheckpointWritingInterval(15) .setCheckpointRowStatisticsWritingEnabled(false) + .setCheckpointPartitionFilterEnabled(true) .setVacuumMinRetention(new Duration(13, HOURS)) .setHiveCatalogName("hive") .setDynamicFilteringWaitTimeout(new Duration(30, MINUTES)) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java index 0e4f043f94f0..6470b9978c2f 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeFileOperations.java @@ -204,6 +204,37 @@ public void testReadTableCheckpointInterval() assertUpdate("DROP TABLE test_read_checkpoint"); } + @Test + public void testReadPartitionTableWithCheckpointFiltering() + { + String catalog = getSession().getCatalog().orElseThrow(); + + assertUpdate("DROP TABLE IF EXISTS test_checkpoint_filtering"); + + assertUpdate("CREATE TABLE test_checkpoint_filtering(key varchar, data varchar) WITH (partitioned_by = ARRAY['key'], checkpoint_interval = 2)"); + assertUpdate("INSERT INTO test_checkpoint_filtering(key, data) VALUES ('p1', '1-abc'), ('p1', '1-def'), ('p2', '2-abc'), ('p2', '2-def')", 4); + assertUpdate("INSERT INTO test_checkpoint_filtering(key, data) VALUES ('p1', '1-baz'), ('p2', '2-baz')", 2); + + Session session = Session.builder(getSession()) + .setCatalogSessionProperty(catalog, "checkpoint_filtering_enabled", "true") + .build(); + + assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'test_checkpoint_filtering')"); + assertFileSystemAccesses( + session, + "TABLE test_checkpoint_filtering", + ImmutableMultiset.builder() + .add(new FileOperation(LAST_CHECKPOINT, "_last_checkpoint", INPUT_FILE_NEW_STREAM)) + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_GET_LENGTH), 4) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .addCopies(new FileOperation(CHECKPOINT, "00000000000000000002.checkpoint.parquet", INPUT_FILE_NEW_STREAM), 2) // TODO (https://github.com/trinodb/trino/issues/18916) should be checked once per query + .add(new FileOperation(TRANSACTION_LOG_JSON, "00000000000000000003.json", INPUT_FILE_NEW_STREAM)) + .addCopies(new FileOperation(DATA, "key=p1/", INPUT_FILE_NEW_STREAM), 2) + .addCopies(new FileOperation(DATA, "key=p2/", INPUT_FILE_NEW_STREAM), 2) + .build()); + + assertUpdate("DROP TABLE test_checkpoint_filtering"); + } + @Test public void testReadWholePartition() { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java index ff5ea4be8ac6..9e0367c8b9f0 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java @@ -185,7 +185,7 @@ private DeltaLakeSplitManager setupSplitManager(List addFileEntrie new ParquetReaderConfig()) { @Override - public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, ConnectorSession session) + public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, ConnectorSession session) { return addFileEntries; } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java index 281804f7bec3..fc1d17dc475d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestTransactionLogAccess.java @@ -66,6 +66,7 @@ import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_GET_LENGTH; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; +import static io.trino.plugin.deltalake.DeltaTestingConnectorSession.SESSION; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractColumnMetadata; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.LAST_CHECKPOINT_FILENAME; import static io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY; @@ -73,7 +74,6 @@ import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; import static io.trino.spi.type.TimeZoneKey.UTC_KEY; import static io.trino.testing.MultisetAssertions.assertMultisetsEqual; -import static io.trino.testing.TestingConnectorSession.SESSION; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; import static java.lang.String.format; import static java.time.ZoneOffset.UTC; diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java index 618c953171eb..251985eb1afe 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestingDeltaLakeUtils.java @@ -28,7 +28,7 @@ import java.util.stream.Stream; import static io.trino.plugin.deltalake.DeltaLakeQueryRunner.DELTA_CATALOG; -import static io.trino.testing.TestingConnectorSession.SESSION; +import static io.trino.plugin.deltalake.DeltaTestingConnectorSession.SESSION; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; public final class TestingDeltaLakeUtils diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java index 3b6186fe7654..fd953a86f30d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java @@ -28,6 +28,7 @@ import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.TypeManager; import io.trino.testing.TestingConnectorContext; import org.junit.jupiter.api.BeforeEach; @@ -142,7 +143,7 @@ public void readsCheckpointFile() ProtocolEntry protocolEntry = transactionLogAccess.getProtocolEntry(SESSION, tableSnapshot); tableSnapshot.setCachedMetadata(Optional.of(metadataEntry)); try (Stream stream = tableSnapshot.getCheckpointTransactionLogEntries( - SESSION, ImmutableSet.of(ADD), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)))) { + SESSION, ImmutableSet.of(ADD), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), TupleDomain.all())) { List entries = stream.collect(toImmutableList()); assertThat(entries).hasSize(9); @@ -184,7 +185,7 @@ public void readsCheckpointFile() // lets read two entry types in one call; add and protocol try (Stream stream = tableSnapshot.getCheckpointTransactionLogEntries( - SESSION, ImmutableSet.of(ADD, PROTOCOL), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)))) { + SESSION, ImmutableSet.of(ADD, PROTOCOL), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), TupleDomain.all())) { List entries = stream.collect(toImmutableList()); assertThat(entries).hasSize(10); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java index d31ca20fe438..245b602ad40b 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java @@ -24,6 +24,7 @@ import io.trino.filesystem.TrinoOutputFile; import io.trino.filesystem.hdfs.HdfsFileSystemFactory; import io.trino.parquet.writer.ParquetWriterOptions; +import io.trino.plugin.deltalake.DeltaLakeColumnHandle; import io.trino.plugin.deltalake.DeltaLakeConfig; import io.trino.plugin.deltalake.transactionlog.AddFileEntry; import io.trino.plugin.deltalake.transactionlog.DeltaLakeTransactionLogEntry; @@ -32,6 +33,9 @@ import io.trino.plugin.deltalake.transactionlog.RemoveFileEntry; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.parquet.ParquetReaderConfig; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.type.Int128; +import io.trino.spi.type.Type; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -41,16 +45,23 @@ import java.io.File; import java.io.IOException; import java.net.URI; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.ZonedDateTime; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalInt; import java.util.Set; import java.util.UUID; import java.util.stream.IntStream; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.io.Resources.getResource; +import static com.google.common.math.LongMath.divide; +import static io.airlift.slice.Slices.utf8Slice; +import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaTestingConnectorSession.SESSION; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.COMMIT; @@ -60,7 +71,29 @@ import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.TRANSACTION; import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; import static io.trino.plugin.hive.HiveTestUtils.HDFS_FILE_SYSTEM_STATS; +import static io.trino.spi.predicate.Domain.notNull; +import static io.trino.spi.predicate.Domain.onlyNull; +import static io.trino.spi.predicate.Domain.singleValue; +import static io.trino.spi.type.BigintType.BIGINT; +import static io.trino.spi.type.BooleanType.BOOLEAN; +import static io.trino.spi.type.DateTimeEncoding.packDateTimeWithZone; +import static io.trino.spi.type.DateType.DATE; +import static io.trino.spi.type.DecimalType.createDecimalType; +import static io.trino.spi.type.DoubleType.DOUBLE; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.spi.type.RealType.REAL; +import static io.trino.spi.type.SmallintType.SMALLINT; +import static io.trino.spi.type.TimeZoneKey.UTC_KEY; +import static io.trino.spi.type.TimestampType.TIMESTAMP_MICROS; +import static io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_TZ_MILLIS; +import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND; +import static io.trino.spi.type.Timestamps.NANOSECONDS_PER_MICROSECOND; +import static io.trino.spi.type.TinyintType.TINYINT; +import static io.trino.spi.type.VarcharType.VARCHAR; import static io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER; +import static java.lang.Float.floatToIntBits; +import static java.math.RoundingMode.UNNECESSARY; +import static java.time.ZoneOffset.UTC; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; @@ -91,7 +124,7 @@ public void testReadNoEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - assertThatThrownBy(() -> createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(), Optional.empty(), Optional.empty())) + assertThatThrownBy(() -> createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(), Optional.empty(), Optional.empty(), TupleDomain.all())) .isInstanceOf(IllegalArgumentException.class) .hasMessage("fields is empty"); } @@ -135,7 +168,7 @@ public void testReadProtocolEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(1); @@ -153,7 +186,7 @@ public void testReadMetadataAndProtocolEntry() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(2); @@ -196,7 +229,7 @@ public void testReadAddEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri))); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), TupleDomain.all()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(9); @@ -236,6 +269,146 @@ public void testReadAddEntries() Optional.empty())); } + @Test + public void testReadAddEntriesPartitionPruning() + throws Exception + { + String checkpoint = "deltalake/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet"; + URI checkpointUri = getResource(checkpoint).toURI(); + + DeltaLakeColumnHandle stringPartField = new DeltaLakeColumnHandle( + "string_part", + VARCHAR, + OptionalInt.empty(), + "string_part", + VARCHAR, + REGULAR, + Optional.empty()); + + DeltaLakeColumnHandle intPartField = new DeltaLakeColumnHandle( + "int_part", + BIGINT, + OptionalInt.empty(), + "int_part", + BIGINT, + REGULAR, + Optional.empty()); + + // The domain specifies all partition columns + CheckpointEntryIterator partitionsEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L), stringPartField, singleValue(VARCHAR, utf8Slice("part1"))))); + List partitionsEntries = ImmutableList.copyOf(partitionsEntryIterator); + + assertThat(partitionsEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); + assertThat(partitionsEntries) + .hasSize(1) + .extracting(entry -> entry.getAdd().getPath()) + .containsExactly("int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet"); + + // The domain specifies a part of partition columns + CheckpointEntryIterator partitionEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L)))); + List partitionEntries = ImmutableList.copyOf(partitionEntryIterator); + + assertThat(partitionEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); + assertThat(partitionEntries) + .hasSize(1) + .extracting(entry -> entry.getAdd().getPath()) + .containsExactly("int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet"); + + // Verify empty iterator when the condition doesn't match + CheckpointEntryIterator emptyIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of( + intPartField, singleValue(BIGINT, 10L), + stringPartField, singleValue(VARCHAR, utf8Slice("unmatched partition condition"))))); + assertThat(ImmutableList.copyOf(emptyIterator)).isEmpty(); + + // Verify IS NULL condition + CheckpointEntryIterator isNullIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of( + intPartField, onlyNull(BIGINT), + stringPartField, onlyNull(VARCHAR)))); + assertThat(ImmutableList.copyOf(isNullIterator)) + .hasSize(1) + .extracting(entry -> entry.getAdd().getPath()) + .containsExactly("int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/part-00000-dcb29d13-eeca-4fa6-a8bf-860da0131a5c.c000.snappy.parquet"); + + // Verify IS NOT NULL condition + CheckpointEntryIterator isNotNullIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of( + intPartField, notNull(BIGINT), + stringPartField, notNull(VARCHAR)))); + assertThat(ImmutableList.copyOf(isNotNullIterator)) + .hasSize(2) + .extracting(entry -> entry.getAdd().getPath()) + .containsExactly( + "int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet", + "int_part=20/string_part=part2/part-00000-e0b4887e-95f6-4ce1-b96c-32c5cf472476.c000.snappy.parquet"); + } + + @Test + public void testReadAddEntriesPartitionPruningAllTypes() + throws Exception + { + String checkpoint = "deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.checkpoint.parquet"; + URI checkpointUri = getResource(checkpoint).toURI(); + + assertPartitionValuesParsedCondition(checkpointUri, "part_boolean", BOOLEAN, true); + assertPartitionValuesParsedCondition(checkpointUri, "part_tinyint", TINYINT, 1L); + assertPartitionValuesParsedCondition(checkpointUri, "part_smallint", SMALLINT, 10L); + assertPartitionValuesParsedCondition(checkpointUri, "part_int", INTEGER, 100L); + assertPartitionValuesParsedCondition(checkpointUri, "part_bigint", BIGINT, 1000L); + assertPartitionValuesParsedCondition(checkpointUri, "part_short_decimal", createDecimalType(5, 2), 12312L); + assertPartitionValuesParsedCondition(checkpointUri, "part_long_decimal", createDecimalType(21, 3), Int128.valueOf("123456789012345678123")); + assertPartitionValuesParsedCondition(checkpointUri, "part_double", DOUBLE, 1.2); + assertPartitionValuesParsedCondition(checkpointUri, "part_float", REAL, (long) floatToIntBits(3.4f)); + assertPartitionValuesParsedCondition(checkpointUri, "part_varchar", VARCHAR, utf8Slice("a")); + assertPartitionValuesParsedCondition(checkpointUri, "part_date", DATE, LocalDate.parse("2020-08-21").toEpochDay()); + ZonedDateTime zonedDateTime = LocalDateTime.parse("2020-10-21T01:00:00.123").atZone(UTC); + long timestampValue = packDateTimeWithZone(zonedDateTime.toInstant().toEpochMilli(), UTC_KEY); + assertPartitionValuesParsedCondition(checkpointUri, "part_timestamp", TIMESTAMP_TZ_MILLIS, timestampValue); + LocalDateTime timestampNtz = LocalDateTime.parse("2023-01-02T01:02:03.456"); + long timestampNtzValue = timestampNtz.toEpochSecond(UTC) * MICROSECONDS_PER_SECOND + divide(timestampNtz.getNano(), NANOSECONDS_PER_MICROSECOND, UNNECESSARY); + assertPartitionValuesParsedCondition(checkpointUri, "part_timestamp_ntz", TIMESTAMP_MICROS, timestampNtzValue); + } + + private void assertPartitionValuesParsedCondition(URI checkpointUri, String columnName, Type type, Object value) + throws IOException + { + DeltaLakeColumnHandle intPartField = new DeltaLakeColumnHandle(columnName, type, OptionalInt.empty(), columnName, type, REGULAR, Optional.empty()); + + CheckpointEntryIterator partitionEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(type, value)))); + List partitionEntries = ImmutableList.copyOf(partitionEntryIterator); + + assertThat(partitionEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); + assertThat(partitionEntries).hasSize(1); + } + @Test public void testReadAllEntries() throws Exception @@ -246,7 +419,8 @@ public void testReadAllEntries() checkpointUri, ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE, COMMIT), Optional.of(readMetadataEntry(checkpointUri)), - Optional.of(readProtocolEntry(checkpointUri))); + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.all()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(17); @@ -309,7 +483,8 @@ public void testSkipRemoveEntries() "metadataFormatProvider", ImmutableMap.of()), "{\"type\":\"struct\",\"fields\":" + - "[{\"name\":\"ts\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}]}", + "[{\"name\":\"ts\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}," + + "{\"name\":\"part_key\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}]}", ImmutableList.of("part_key"), ImmutableMap.of(), 1000); @@ -367,16 +542,17 @@ public void testSkipRemoveEntries() writer.write(entries, createOutputFile(targetPath)); CheckpointEntryIterator metadataAndProtocolEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); CheckpointEntryIterator addEntryIterator = createCheckpointEntryIterator( URI.create(targetPath), ImmutableSet.of(ADD), Optional.of(metadataEntry), - Optional.of(protocolEntry)); + Optional.of(protocolEntry), + TupleDomain.all()); CheckpointEntryIterator removeEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(REMOVE), Optional.empty(), Optional.empty()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(REMOVE), Optional.empty(), Optional.empty(), TupleDomain.all()); CheckpointEntryIterator txnEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(TRANSACTION), Optional.empty(), Optional.empty()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(TRANSACTION), Optional.empty(), Optional.empty(), TupleDomain.all()); assertThat(Iterators.size(metadataAndProtocolEntryIterator)).isEqualTo(2); assertThat(Iterators.size(addEntryIterator)).isEqualTo(1); @@ -392,18 +568,23 @@ public void testSkipRemoveEntries() private MetadataEntry readMetadataEntry(URI checkpointUri) throws IOException { - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA), Optional.empty(), Optional.empty()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA), Optional.empty(), Optional.empty(), TupleDomain.all()); return Iterators.getOnlyElement(checkpointEntryIterator).getMetaData(); } private ProtocolEntry readProtocolEntry(URI checkpointUri) throws IOException { - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); return Iterators.getOnlyElement(checkpointEntryIterator).getProtocol(); } - private CheckpointEntryIterator createCheckpointEntryIterator(URI checkpointUri, Set entryTypes, Optional metadataEntry, Optional protocolEntry) + private CheckpointEntryIterator createCheckpointEntryIterator( + URI checkpointUri, + Set entryTypes, + Optional metadataEntry, + Optional protocolEntry, + TupleDomain partitionConstraint) throws IOException { TrinoFileSystem fileSystem = new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS).create(SESSION); @@ -421,7 +602,8 @@ private CheckpointEntryIterator createCheckpointEntryIterator(URI checkpointUri, new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true, - new DeltaLakeConfig().getDomainCompactionThreshold()); + new DeltaLakeConfig().getDomainCompactionThreshold(), + partitionConstraint); } private static TrinoOutputFile createOutputFile(String path) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java index c17f72519d4e..0b1260a50aa8 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java @@ -36,6 +36,7 @@ import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.block.Block; import io.trino.spi.block.SqlRow; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.BigintType; import io.trino.spi.type.Int128; import io.trino.spi.type.IntegerType; @@ -484,7 +485,8 @@ private CheckpointEntries readCheckpoint(String checkpointPath, MetadataEntry me new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), rowStatisticsEnabled, - new DeltaLakeConfig().getDomainCompactionThreshold()); + new DeltaLakeConfig().getDomainCompactionThreshold(), + TupleDomain.all()); CheckpointBuilder checkpointBuilder = new CheckpointBuilder(); while (checkpointEntryIterator.hasNext()) { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java index 9145e2d112c9..ac6e842b104a 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java @@ -27,6 +27,7 @@ import io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.parquet.ParquetReaderConfig; +import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.ArrayType; import io.trino.spi.type.DecimalType; import io.trino.spi.type.DoubleType; @@ -107,7 +108,8 @@ public void testParseParquetStatistics() new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true, - new DeltaLakeConfig().getDomainCompactionThreshold()); + new DeltaLakeConfig().getDomainCompactionThreshold(), + TupleDomain.all()); MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData(); CheckpointEntryIterator protocolEntryIterator = new CheckpointEntryIterator( checkpointFile, @@ -121,7 +123,8 @@ public void testParseParquetStatistics() new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true, - new DeltaLakeConfig().getDomainCompactionThreshold()); + new DeltaLakeConfig().getDomainCompactionThreshold(), + TupleDomain.all()); ProtocolEntry protocolEntry = getOnlyElement(protocolEntryIterator).getProtocol(); CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( @@ -136,7 +139,8 @@ public void testParseParquetStatistics() new FileFormatDataSourceStats(), new ParquetReaderConfig().toParquetReaderOptions(), true, - new DeltaLakeConfig().getDomainCompactionThreshold()); + new DeltaLakeConfig().getDomainCompactionThreshold(), + TupleDomain.all()); DeltaLakeTransactionLogEntry matchingAddFileEntry = null; while (checkpointEntryIterator.hasNext()) { DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next(); diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/README.md new file mode 100644 index 000000000000..a3ae6655153f --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/README.md @@ -0,0 +1,21 @@ +Data generated using OSS Delta Lake 3.0.0: + +```sql +CREATE TABLE test_partition_values_parsed ( + id INT, + int_part INT, + string_part STRING +) +USING delta +PARTITIONED BY (int_part, string_part) +LOCATION 's3://test-bucket/test_partition_values_parsed' +TBLPROPERTIES ( + delta.checkpoint.writeStatsAsStruct = true, + delta.checkpoint.writeStatsAsJson = false, + delta.checkpointInterval = 1 +); + +INSERT INTO test_partition_values_parsed VALUES (1, 10, 'part1'); +INSERT INTO test_partition_values_parsed VALUES (2, 20, 'part2'); +INSERT INTO test_partition_values_parsed VALUES (3, NULL, NULL); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..9c3414fefca2 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1699426607713,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[\"int_part\",\"string_part\"]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"true\",\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpointInterval\":\"1\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"8e5c07fa-7101-4a83-899b-6d124f3bb8d6"}} +{"metaData":{"id":"2f4075c9-1e3f-4fcc-b5b3-1f15d800a400","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_part\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"string_part\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["int_part","string_part"],"configuration":{"delta.checkpoint.writeStatsAsStruct":"true","delta.checkpoint.writeStatsAsJson":"false","delta.checkpointInterval":"1"},"createdTime":1699426607271}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet new file mode 100644 index 000000000000..fa1bbfd0199f Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..48e3f5d2085c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699426621220,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"9bca6598-055f-4d05-8e1f-910b0b018fda"}} +{"add":{"path":"int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet","partitionValues":{"int_part":"10","string_part":"part1"},"size":452,"modificationTime":1699426621000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 000000000000..005b9c6bc313 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..e6f9284d3789 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699426626909,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"64a2fdf1-ecd5-44b2-ad60-42d487414f5f"}} +{"add":{"path":"int_part=20/string_part=part2/part-00000-e0b4887e-95f6-4ce1-b96c-32c5cf472476.c000.snappy.parquet","partitionValues":{"int_part":"20","string_part":"part2"},"size":452,"modificationTime":1699426625000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 000000000000..7215b844f02d Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..45c93355c04c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699426631677,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"6e40e266-c530-4ecd-ab6e-ecadcdc9d4a7"}} +{"add":{"path":"int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/part-00000-dcb29d13-eeca-4fa6-a8bf-860da0131a5c.c000.snappy.parquet","partitionValues":{"int_part":null,"string_part":null},"size":452,"modificationTime":1699426630000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..d4f22091df1b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":5,"sizeInBytes":16925,"numOfAddFiles":3,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"partitionValues_parsed","type":{"type":"struct","fields":[{"name":"int_part","type":"integer","nullable":true,"metadata":{}},{"name":"string_part","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"10dbfdd2d82b31c969cfee6227ff3a6e"} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet new file mode 100644 index 000000000000..7829d199045c Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=10/string_part=part1/part-00000-383afb1a-87de-4e70-86ab-c21ae44c7f3f.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=20/string_part=part2/part-00000-e0b4887e-95f6-4ce1-b96c-32c5cf472476.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=20/string_part=part2/part-00000-e0b4887e-95f6-4ce1-b96c-32c5cf472476.c000.snappy.parquet new file mode 100644 index 000000000000..3caaab661c9f Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=20/string_part=part2/part-00000-e0b4887e-95f6-4ce1-b96c-32c5cf472476.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/part-00000-dcb29d13-eeca-4fa6-a8bf-860da0131a5c.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/part-00000-dcb29d13-eeca-4fa6-a8bf-860da0131a5c.c000.snappy.parquet new file mode 100644 index 000000000000..e3a2b4b9dc28 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/part-00000-dcb29d13-eeca-4fa6-a8bf-860da0131a5c.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/README.md b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/README.md new file mode 100644 index 000000000000..cb777a9bf3a4 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/README.md @@ -0,0 +1,77 @@ +Data generated using OSS Delta Lake 3.0.0: + +```sql +CREATE TABLE test_partition_values_parsed_all_types ( + id INT, + part_boolean BOOLEAN, + part_tinyint TINYINT, + part_smallint SMALLINT, + part_int INT, + part_bigint BIGINT, + part_short_decimal DECIMAL(5,2), + part_long_decimal DECIMAL(21,3), + part_double DOUBLE, + part_float REAL, + part_varchar STRING, + part_date DATE, + part_timestamp TIMESTAMP, + part_timestamp_ntz TIMESTAMP_NTZ +) +USING delta +PARTITIONED BY (part_boolean, part_tinyint, part_smallint, part_int, part_bigint, part_short_decimal, part_long_decimal, part_double, part_float, part_varchar, part_date, part_timestamp, part_timestamp_ntz) +LOCATION 's3://test-bucket/test_partition_values_parsed_all_types' +TBLPROPERTIES ( + delta.checkpoint.writeStatsAsStruct = true, + delta.checkpoint.writeStatsAsJson = false, + delta.checkpointInterval = 3 +); + +INSERT INTO test_partition_values_parsed_all_types VALUES ( + 1, + true, + 1, + 10, + 100, + 1000, + CAST('123.12' AS DECIMAL(5,2)), + CAST('123456789012345678.123' AS DECIMAL(21,3)), + 1.2, + 3.4, + 'a', + DATE '2020-08-21', + TIMESTAMP '2020-10-21 01:00:00.123 UTC', + TIMESTAMP '2023-01-02 01:02:03.456' +); +INSERT INTO test_partition_values_parsed_all_types VALUES ( + 2, + false, + 2, + 20, + 200, + 2000, + CAST('223.12' AS DECIMAL(5,2)), + CAST('223456789012345678.123' AS DECIMAL(21,3)), + 10.2, + 30.4, + 'b', + DATE '2020-08-22', + TIMESTAMP '2020-10-22 01:00:00.123 UTC', + TIMESTAMP '2023-01-03 01:02:03.456' +); +INSERT INTO test_partition_values_parsed_all_types VALUES ( + 3, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL +); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..e1daa727df02 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1699840247802,"operation":"CREATE TABLE","operationParameters":{"isManaged":"false","description":null,"partitionBy":"[\"part_boolean\",\"part_tinyint\",\"part_smallint\",\"part_int\",\"part_bigint\",\"part_short_decimal\",\"part_long_decimal\",\"part_double\",\"part_float\",\"part_varchar\",\"part_date\",\"part_timestamp\",\"part_timestamp_ntz\"]","properties":"{\"delta.checkpoint.writeStatsAsStruct\":\"true\",\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpointInterval\":\"3\"}"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"744f4d91-5de9-42f0-b072-25748b1132af"}} +{"metaData":{"id":"ca3590e9-3a1e-4fd0-abab-2f835bf73d39","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_tinyint\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_smallint\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_bigint\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_short_decimal\",\"type\":\"decimal(5,2)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_long_decimal\",\"type\":\"decimal(21,3)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_varchar\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"part_timestamp_ntz\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part_boolean","part_tinyint","part_smallint","part_int","part_bigint","part_short_decimal","part_long_decimal","part_double","part_float","part_varchar","part_date","part_timestamp","part_timestamp_ntz"],"configuration":{"delta.checkpoint.writeStatsAsStruct":"true","delta.checkpoint.writeStatsAsJson":"false","delta.checkpointInterval":"3"},"createdTime":1699840247778}} +{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["timestampNtz"],"writerFeatures":["timestampNtz"]}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..73603ed7a74a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699840249910,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":0,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"50588af8-f3c2-41d6-bdd4-9506345e01f5"}} +{"add":{"path":"part_boolean=true/part_tinyint=1/part_smallint=10/part_int=100/part_bigint=1000/part_short_decimal=123.12/part_long_decimal=123456789012345678.123/part_double=1.2/part_float=3.4/part_varchar=a/part_date=2020-08-21/part_timestamp=2020-10-21%2001%253A00%253A00.123/part_timestamp_ntz=2023-01-02%2001%253A02%253A03.456/part-00000-0c74f71e-bb8b-4156-8560-bd819942f7fd.c000.snappy.parquet","partitionValues":{"part_varchar":"a","part_double":"1.2","part_int":"100","part_timestamp":"2020-10-21 01:00:00.123","part_smallint":"10","part_short_decimal":"123.12","part_boolean":"true","part_date":"2020-08-21","part_tinyint":"1","part_long_decimal":"123456789012345678.123","part_float":"3.4","part_bigint":"1000","part_timestamp_ntz":"2023-01-02 01:02:03.456"},"size":452,"modificationTime":1699840249000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..b37edc3725ec --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699840251434,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":1,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"d845f63d-213d-4d46-b3b4-2702fefb2f7a"}} +{"add":{"path":"part_boolean=false/part_tinyint=2/part_smallint=20/part_int=200/part_bigint=2000/part_short_decimal=223.12/part_long_decimal=223456789012345678.123/part_double=10.2/part_float=30.4/part_varchar=b/part_date=2020-08-22/part_timestamp=2020-10-22%2001%253A00%253A00.123/part_timestamp_ntz=2023-01-03%2001%253A02%253A03.456/part-00000-b0b98900-4fe4-4c22-ad51-909b9600e221.c000.snappy.parquet","partitionValues":{"part_varchar":"b","part_double":"10.2","part_int":"200","part_timestamp":"2020-10-22 01:00:00.123","part_smallint":"20","part_short_decimal":"223.12","part_boolean":"false","part_date":"2020-08-22","part_tinyint":"2","part_long_decimal":"223456789012345678.123","part_float":"30.4","part_bigint":"2000","part_timestamp_ntz":"2023-01-03 01:02:03.456"},"size":452,"modificationTime":1699840251000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 000000000000..67e317e8240b Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..437bf034ba23 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1699840252999,"operation":"WRITE","operationParameters":{"mode":"Append","partitionBy":"[]"},"readVersion":2,"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"452"},"engineInfo":"Apache-Spark/3.5.0 Delta-Lake/3.0.0","txnId":"2ed8dd2f-5949-4f91-bc4a-7b1b4e5fa67d"}} +{"add":{"path":"part_boolean=__HIVE_DEFAULT_PARTITION__/part_tinyint=__HIVE_DEFAULT_PARTITION__/part_smallint=__HIVE_DEFAULT_PARTITION__/part_int=__HIVE_DEFAULT_PARTITION__/part_bigint=__HIVE_DEFAULT_PARTITION__/part_short_decimal=__HIVE_DEFAULT_PARTITION__/part_long_decimal=__HIVE_DEFAULT_PARTITION__/part_double=__HIVE_DEFAULT_PARTITION__/part_float=__HIVE_DEFAULT_PARTITION__/part_varchar=__HIVE_DEFAULT_PARTITION__/part_date=__HIVE_DEFAULT_PARTITION__/part_timestamp=__HIVE_DEFAULT_PARTITION__/part_timestamp_ntz=__HIVE_DEFAULT_PARTITION__/part-00000-194b12f4-b133-4363-81b6-aeaea00fff6d.c000.snappy.parquet","partitionValues":{"part_varchar":null,"part_double":null,"part_int":null,"part_timestamp":null,"part_smallint":null,"part_short_decimal":null,"part_boolean":null,"part_date":null,"part_tinyint":null,"part_long_decimal":null,"part_float":null,"part_bigint":null,"part_timestamp_ntz":null},"size":452,"modificationTime":1699840252000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}"}} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..3cdc5d88f48e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":5,"sizeInBytes":25294,"numOfAddFiles":3,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"partitionValues_parsed","type":{"type":"struct","fields":[{"name":"part_boolean","type":"boolean","nullable":true,"metadata":{}},{"name":"part_tinyint","type":"byte","nullable":true,"metadata":{}},{"name":"part_smallint","type":"short","nullable":true,"metadata":{}},{"name":"part_int","type":"integer","nullable":true,"metadata":{}},{"name":"part_bigint","type":"long","nullable":true,"metadata":{}},{"name":"part_short_decimal","type":"decimal(5,2)","nullable":true,"metadata":{}},{"name":"part_long_decimal","type":"decimal(21,3)","nullable":true,"metadata":{}},{"name":"part_double","type":"double","nullable":true,"metadata":{}},{"name":"part_float","type":"float","nullable":true,"metadata":{}},{"name":"part_varchar","type":"string","nullable":true,"metadata":{}},{"name":"part_date","type":"date","nullable":true,"metadata":{}},{"name":"part_timestamp","type":"timestamp","nullable":true,"metadata":{}},{"name":"part_timestamp_ntz","type":"timestamp_ntz","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"6148802d218014989a51bf9a23a6a796"} diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=__HIVE_DEFAULT_PARTITION__/part_tinyint=__HIVE_DEFAULT_PARTITION__/part_smallint=__HIVE_DEFAULT_PARTITION__/part_int=__HIVE_DEFAULT_PARTITION__/part_bigint=__HIVE_DEFAULT_PARTITION__/part_short_decimal=__HIVE_DEFAULT_PARTITION__/part_long_decimal=__HIVE_DEFAULT_PARTITION__/part_double=__HIVE_DEFAULT_PARTITION__/part_float=__HIVE_DEFAULT_PARTITION__/part_varchar=__HIVE_DEFAULT_PARTITION__/part_date=__HIVE_DEFAULT_PARTITION__/part_timestamp=__HIVE_DEFAULT_PARTITION__/part_timestamp_ntz=__HIVE_DEFAULT_PARTITION__/part-00000-194b12f4-b133-4363-81b6-aeaea00fff6d.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=__HIVE_DEFAULT_PARTITION__/part_tinyint=__HIVE_DEFAULT_PARTITION__/part_smallint=__HIVE_DEFAULT_PARTITION__/part_int=__HIVE_DEFAULT_PARTITION__/part_bigint=__HIVE_DEFAULT_PARTITION__/part_short_decimal=__HIVE_DEFAULT_PARTITION__/part_long_decimal=__HIVE_DEFAULT_PARTITION__/part_double=__HIVE_DEFAULT_PARTITION__/part_float=__HIVE_DEFAULT_PARTITION__/part_varchar=__HIVE_DEFAULT_PARTITION__/part_date=__HIVE_DEFAULT_PARTITION__/part_timestamp=__HIVE_DEFAULT_PARTITION__/part_timestamp_ntz=__HIVE_DEFAULT_PARTITION__/part-00000-194b12f4-b133-4363-81b6-aeaea00fff6d.c000.snappy.parquet new file mode 100644 index 000000000000..b2fcf53cecf0 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=__HIVE_DEFAULT_PARTITION__/part_tinyint=__HIVE_DEFAULT_PARTITION__/part_smallint=__HIVE_DEFAULT_PARTITION__/part_int=__HIVE_DEFAULT_PARTITION__/part_bigint=__HIVE_DEFAULT_PARTITION__/part_short_decimal=__HIVE_DEFAULT_PARTITION__/part_long_decimal=__HIVE_DEFAULT_PARTITION__/part_double=__HIVE_DEFAULT_PARTITION__/part_float=__HIVE_DEFAULT_PARTITION__/part_varchar=__HIVE_DEFAULT_PARTITION__/part_date=__HIVE_DEFAULT_PARTITION__/part_timestamp=__HIVE_DEFAULT_PARTITION__/part_timestamp_ntz=__HIVE_DEFAULT_PARTITION__/part-00000-194b12f4-b133-4363-81b6-aeaea00fff6d.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=false/part_tinyint=2/part_smallint=20/part_int=200/part_bigint=2000/part_short_decimal=223.12/part_long_decimal=223456789012345678.123/part_double=10.2/part_float=30.4/part_varchar=b/part_date=2020-08-22/part_timestamp=2020-10-22 01%3A00%3A00.123/part_timestamp_ntz=2023-01-03 01%3A02%3A03.456/part-00000-b0b98900-4fe4-4c22-ad51-909b9600e221.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=false/part_tinyint=2/part_smallint=20/part_int=200/part_bigint=2000/part_short_decimal=223.12/part_long_decimal=223456789012345678.123/part_double=10.2/part_float=30.4/part_varchar=b/part_date=2020-08-22/part_timestamp=2020-10-22 01%3A00%3A00.123/part_timestamp_ntz=2023-01-03 01%3A02%3A03.456/part-00000-b0b98900-4fe4-4c22-ad51-909b9600e221.c000.snappy.parquet new file mode 100644 index 000000000000..cad9053d1e42 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=false/part_tinyint=2/part_smallint=20/part_int=200/part_bigint=2000/part_short_decimal=223.12/part_long_decimal=223456789012345678.123/part_double=10.2/part_float=30.4/part_varchar=b/part_date=2020-08-22/part_timestamp=2020-10-22 01%3A00%3A00.123/part_timestamp_ntz=2023-01-03 01%3A02%3A03.456/part-00000-b0b98900-4fe4-4c22-ad51-909b9600e221.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=true/part_tinyint=1/part_smallint=10/part_int=100/part_bigint=1000/part_short_decimal=123.12/part_long_decimal=123456789012345678.123/part_double=1.2/part_float=3.4/part_varchar=a/part_date=2020-08-21/part_timestamp=2020-10-21 01%3A00%3A00.123/part_timestamp_ntz=2023-01-02 01%3A02%3A03.456/part-00000-0c74f71e-bb8b-4156-8560-bd819942f7fd.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=true/part_tinyint=1/part_smallint=10/part_int=100/part_bigint=1000/part_short_decimal=123.12/part_long_decimal=123456789012345678.123/part_double=1.2/part_float=3.4/part_varchar=a/part_date=2020-08-21/part_timestamp=2020-10-21 01%3A00%3A00.123/part_timestamp_ntz=2023-01-02 01%3A02%3A03.456/part-00000-0c74f71e-bb8b-4156-8560-bd819942f7fd.c000.snappy.parquet new file mode 100644 index 000000000000..a4b8c3bbc65d Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/deltalake/partition_values_parsed_all_types/part_boolean=true/part_tinyint=1/part_smallint=10/part_int=100/part_bigint=1000/part_short_decimal=123.12/part_long_decimal=123456789012345678.123/part_double=1.2/part_float=3.4/part_varchar=a/part_date=2020-08-21/part_timestamp=2020-10-21 01%3A00%3A00.123/part_timestamp_ntz=2023-01-02 01%3A02%3A03.456/part-00000-0c74f71e-bb8b-4156-8560-bd819942f7fd.c000.snappy.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/README.md b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/README.md new file mode 100644 index 000000000000..7f168afd1241 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/README.md @@ -0,0 +1,17 @@ +Data generated using Trino 432: + +```sql +CREATE TABLE test_partition_values_parsed ( + id INT, + int_part INT, + string_part VARCHAR +) +WITH ( + partitioned_by = ARRAY['int_part', 'string_part'], + checkpoint_interval = 1 +); + +INSERT INTO test_partition_values_parsed VALUES (1, 10, 'part1'); +INSERT INTO test_partition_values_parsed VALUES (2, 20, 'part2'); +INSERT INTO test_partition_values_parsed VALUES (3, NULL, NULL); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..aaf86ca45c0a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"version":0,"timestamp":1699495423555,"userId":"yuya.ebihara","userName":"yuya.ebihara","operation":"CREATE TABLE","operationParameters":{"queryId":"20231109_020343_00031_9eakg"},"clusterId":"trino-testversion-312e43c0-2e3d-4039-8fd3-9fe5e100d186","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"7e6c2886-ecbb-4732-a73e-2c8db848c121","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"int_part\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"string_part\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["int_part","string_part"],"configuration":{"delta.checkpointInterval":"1"},"createdTime":1699495423555}} diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet new file mode 100644 index 000000000000..ae2b960481d9 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..aa6542a281ce --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000001.json @@ -0,0 +1,2 @@ +{"commitInfo":{"version":1,"timestamp":1699495423897,"userId":"yuya.ebihara","userName":"yuya.ebihara","operation":"WRITE","operationParameters":{"queryId":"20231109_020343_00032_9eakg"},"clusterId":"trino-testversion-312e43c0-2e3d-4039-8fd3-9fe5e100d186","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true}} +{"add":{"path":"int_part=10/string_part=part1/20231109_020343_00032_9eakg_302b745a-59c0-4fce-8ca7-fed724196b93","partitionValues":{"int_part":"10","string_part":"part1"},"size":199,"modificationTime":1699495423885,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1},\"maxValues\":{\"id\":1},\"nullCount\":{\"id\":0}}","tags":{}}} diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 000000000000..99165454a1b5 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..d3461be234eb --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"version":2,"timestamp":1699495426681,"userId":"yuya.ebihara","userName":"yuya.ebihara","operation":"WRITE","operationParameters":{"queryId":"20231109_020344_00033_9eakg"},"clusterId":"trino-testversion-312e43c0-2e3d-4039-8fd3-9fe5e100d186","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true}} +{"add":{"path":"int_part=20/string_part=part2/20231109_020344_00033_9eakg_e6448c08-9b43-4fa1-8288-823fd3d692b9","partitionValues":{"int_part":"20","string_part":"part2"},"size":199,"modificationTime":1699495426664,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2},\"maxValues\":{\"id\":2},\"nullCount\":{\"id\":0}}","tags":{}}} diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet new file mode 100644 index 000000000000..fb248e83da3f Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.checkpoint.parquet differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..4a48ec934c12 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"version":3,"timestamp":1699495430701,"userId":"yuya.ebihara","userName":"yuya.ebihara","operation":"WRITE","operationParameters":{"queryId":"20231109_020350_00034_9eakg"},"clusterId":"trino-testversion-312e43c0-2e3d-4039-8fd3-9fe5e100d186","readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true}} +{"add":{"path":"int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/20231109_020350_00034_9eakg_2a008dd8-da7f-496a-b404-ca455732578e","partitionValues":{"int_part":null,"string_part":null},"size":199,"modificationTime":1699495430685,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3},\"maxValues\":{\"id\":3},\"nullCount\":{\"id\":0}}","tags":{}}} diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..d97239b80953 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":3,"size":5} \ No newline at end of file diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_trino_meta/extended_stats.json b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_trino_meta/extended_stats.json new file mode 100644 index 000000000000..13e7378a48fa --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/_delta_log/_trino_meta/extended_stats.json @@ -0,0 +1 @@ +{"modelVersion":4,"alreadyAnalyzedModifiedTimeMax":"2023-11-09T02:03:50.685Z","columnStatistics":{"string_part":{"totalSizeInBytes":10,"ndvSummary":"AgwCAEFaAwGC32hM"},"int_part":{"ndvSummary":"AgwCAENwWxjApyj8"},"id":{"ndvSummary":"AgwDAEAWuIcByymfRz7H6g=="}}} \ No newline at end of file diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=10/string_part=part1/20231109_020343_00032_9eakg_302b745a-59c0-4fce-8ca7-fed724196b93 b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=10/string_part=part1/20231109_020343_00032_9eakg_302b745a-59c0-4fce-8ca7-fed724196b93 new file mode 100644 index 000000000000..f650bf9998f9 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=10/string_part=part1/20231109_020343_00032_9eakg_302b745a-59c0-4fce-8ca7-fed724196b93 differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=20/string_part=part2/20231109_020344_00033_9eakg_e6448c08-9b43-4fa1-8288-823fd3d692b9 b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=20/string_part=part2/20231109_020344_00033_9eakg_e6448c08-9b43-4fa1-8288-823fd3d692b9 new file mode 100644 index 000000000000..c26b952c5e64 Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=20/string_part=part2/20231109_020344_00033_9eakg_e6448c08-9b43-4fa1-8288-823fd3d692b9 differ diff --git a/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/20231109_020350_00034_9eakg_2a008dd8-da7f-496a-b404-ca455732578e b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/20231109_020350_00034_9eakg_2a008dd8-da7f-496a-b404-ca455732578e new file mode 100644 index 000000000000..979c12f8bf0a Binary files /dev/null and b/plugin/trino-delta-lake/src/test/resources/trino432/partition_values_parsed/int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/20231109_020350_00034_9eakg_2a008dd8-da7f-496a-b404-ca455732578e differ