diff --git a/presto-analyzer/src/test/java/com/facebook/presto/sql/analyzer/TestBuiltInQueryPreparer.java b/presto-analyzer/src/test/java/com/facebook/presto/sql/analyzer/TestBuiltInQueryPreparer.java index f852cc4b221d3..a0e0fc3f53ef0 100644 --- a/presto-analyzer/src/test/java/com/facebook/presto/sql/analyzer/TestBuiltInQueryPreparer.java +++ b/presto-analyzer/src/test/java/com/facebook/presto/sql/analyzer/TestBuiltInQueryPreparer.java @@ -76,8 +76,8 @@ public void setup() procedures.add(new TableDataRewriteDistributedProcedure("system", "distributed_fun", distributedArguments, (session, transactionContext, procedureHandle, fragments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)); + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())); procedureRegistry.addProcedures(new ConnectorId("test"), procedures); queryPreparer = new BuiltInQueryPreparer(SQL_PARSER, procedureRegistry); } diff --git a/presto-docs/src/main/sphinx/connector/iceberg.rst b/presto-docs/src/main/sphinx/connector/iceberg.rst index 8897bfb777e1f..4619d783071ad 100644 --- a/presto-docs/src/main/sphinx/connector/iceberg.rst +++ b/presto-docs/src/main/sphinx/connector/iceberg.rst @@ -1237,6 +1237,47 @@ Examples: CALL iceberg.system.set_table_property('schema_name', 'table_name', 'commit.retry.num-retries', '10'); +Rewrite Data Files +^^^^^^^^^^^^^^^^^^ + +Iceberg tracks all data files under different partition specs in a table. More data files require +more metadata to be stored in manifest files, and small data files can cause an unnecessary amount of metadata and +less efficient queries due to file open costs. Also, data files under different partition specs can +prevent metadata level deletion or thorough predicate push down for Presto. + +Use ``rewrite_data_files`` to rewrite the data files of a specified table so that they are +merged into fewer but larger files under the newest partition spec. If the table is partitioned, the data +files compaction can act separately on the selected partitions to improve read performance by reducing +metadata overhead and runtime file open cost. + +The following arguments are available: + +===================== ========== =============== ======================================================================= +Argument Name required type Description +===================== ========== =============== ======================================================================= +``schema`` ✔️ string Schema of the table to update. + +``table_name`` ✔️ string Name of the table to update. + +``filter`` string Predicate as a string used for filtering the files. Currently + only rewrite of whole partitions is supported. Filter on partition + columns. The default value is `true`. + +``options`` map Options to be used for data files rewrite. (to be expanded) +===================== ========== =============== ======================================================================= + +Examples: + +* Rewrite all the data files in table `db.sample` to the newest partition spec and combine small files to larger ones:: + + CALL iceberg.system.rewrite_data_files('db', 'sample'); + CALL iceberg.system.rewrite_data_files(schema => 'db', table_name => 'sample'); + +* Rewrite the data files in partitions specified by a filter in table `db.sample` to the newest partition spec:: + + CALL iceberg.system.rewrite_data_files('db', 'sample', 'partition_key = 1'); + CALL iceberg.system.rewrite_data_files(schema => 'db', table_name => 'sample', filter => 'partition_key = 1'); + Presto C++ Support ^^^^^^^^^^^^^^^^^^ diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java index 6ec2498ca40a8..bd10b9dea4e7e 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergAbstractMetadata.java @@ -15,6 +15,7 @@ import com.facebook.airlift.json.JsonCodec; import com.facebook.airlift.log.Logger; +import com.facebook.presto.common.QualifiedObjectName; import com.facebook.presto.common.RuntimeStats; import com.facebook.presto.common.Subfield; import com.facebook.presto.common.predicate.TupleDomain; @@ -35,6 +36,8 @@ import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ColumnMetadata; import com.facebook.presto.spi.ConnectorDeleteTableHandle; +import com.facebook.presto.spi.ConnectorDistributedProcedureHandle; +import com.facebook.presto.spi.ConnectorId; import com.facebook.presto.spi.ConnectorInsertTableHandle; import com.facebook.presto.spi.ConnectorNewTableLayout; import com.facebook.presto.spi.ConnectorOutputTableHandle; @@ -62,6 +65,9 @@ import com.facebook.presto.spi.connector.ConnectorTableVersion.VersionType; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; +import com.facebook.presto.spi.procedure.BaseProcedure; +import com.facebook.presto.spi.procedure.DistributedProcedure; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpression; import com.facebook.presto.spi.relation.RowExpressionService; import com.facebook.presto.spi.security.ViewSecurity; @@ -249,12 +255,14 @@ public abstract class IcebergAbstractMetadata protected static final int CURRENT_MATERIALIZED_VIEW_FORMAT_VERSION = 1; protected final TypeManager typeManager; + protected final ProcedureRegistry procedureRegistry; protected final JsonCodec commitTaskCodec; protected final JsonCodec> columnMappingsCodec; protected final JsonCodec> schemaTableNamesCodec; protected final NodeVersion nodeVersion; protected final RowExpressionService rowExpressionService; protected final FilterStatsCalculatorService filterStatsCalculatorService; + protected Optional procedureContext = Optional.empty(); protected Transaction transaction; protected final StatisticsFileCache statisticsFileCache; protected final IcebergTableProperties tableProperties; @@ -264,6 +272,7 @@ public abstract class IcebergAbstractMetadata public IcebergAbstractMetadata( TypeManager typeManager, + ProcedureRegistry procedureRegistry, StandardFunctionResolution functionResolution, RowExpressionService rowExpressionService, JsonCodec commitTaskCodec, @@ -275,6 +284,7 @@ public IcebergAbstractMetadata( IcebergTableProperties tableProperties) { this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.procedureRegistry = requireNonNull(procedureRegistry, "procedureRegistry is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); this.columnMappingsCodec = requireNonNull(columnMappingsCodec, "columnMappingsCodec is null"); this.schemaTableNamesCodec = requireNonNull(schemaTableNamesCodec, "schemaTableNamesCodec is null"); @@ -317,6 +327,11 @@ protected abstract void updateIcebergViewProperties( public abstract void unregisterTable(ConnectorSession clientSession, SchemaTableName schemaTableName); + public Optional getProcedureContext() + { + return this.procedureContext; + } + /** * This class implements the default implementation for getTableLayoutForConstraint which will be used in the case of a Java Worker */ @@ -327,15 +342,17 @@ public ConnectorTableLayoutResult getTableLayoutForConstraint( Constraint constraint, Optional> desiredColumns) { - Map predicateColumns = constraint.getSummary().getDomains().get().keySet().stream() - .map(IcebergColumnHandle.class::cast) - .collect(toImmutableMap(IcebergColumnHandle::getName, Functions.identity())); + Map predicateColumns = constraint.getSummary().getDomains() + .map(domains -> domains.keySet().stream() + .map(IcebergColumnHandle.class::cast) + .collect(toImmutableMap(IcebergColumnHandle::getName, Functions.identity()))) + .orElse(ImmutableMap.of()); IcebergTableHandle handle = (IcebergTableHandle) table; Table icebergTable = getIcebergTable(session, handle.getSchemaTableName()); List partitionColumns = getPartitionKeyColumnHandles(handle, icebergTable, typeManager); - TupleDomain partitionColumnPredicate = TupleDomain.withColumnDomains(Maps.filterKeys(constraint.getSummary().getDomains().get(), Predicates.in(partitionColumns))); + TupleDomain partitionColumnPredicate = TupleDomain.withColumnDomains(Maps.filterKeys(constraint.getSummary().getDomains().orElse(ImmutableMap.of()), Predicates.in(partitionColumns))); Optional> requestedColumns = desiredColumns.map(columns -> columns.stream().map(column -> (IcebergColumnHandle) column).collect(toImmutableSet())); List partitions; @@ -1117,6 +1134,46 @@ public void truncateTable(ConnectorSession session, ConnectorTableHandle tableHa removeScanFiles(icebergTable, TupleDomain.all()); } + @Override + public ConnectorDistributedProcedureHandle beginCallDistributedProcedure( + ConnectorSession session, + QualifiedObjectName procedureName, + ConnectorTableLayoutHandle tableLayoutHandle, + Object[] arguments) + { + IcebergTableHandle handle = ((IcebergTableLayoutHandle) tableLayoutHandle).getTable(); + Table icebergTable = getIcebergTable(session, handle.getSchemaTableName()); + + if (handle.isSnapshotSpecified()) { + throw new PrestoException(NOT_SUPPORTED, "This connector do not allow table execute at specified snapshot"); + } + + transaction = icebergTable.newTransaction(); + BaseProcedure procedure = procedureRegistry.resolve( + new ConnectorId(procedureName.getCatalogName()), + new SchemaTableName( + procedureName.getSchemaName(), + procedureName.getObjectName())); + verify(procedure instanceof DistributedProcedure, "procedure must be DistributedProcedure"); + procedureContext = Optional.of((IcebergProcedureContext) ((DistributedProcedure) procedure).createContext(icebergTable, transaction)); + return ((DistributedProcedure) procedure).begin(session, procedureContext.get(), tableLayoutHandle, arguments); + } + + @Override + public void finishCallDistributedProcedure(ConnectorSession session, ConnectorDistributedProcedureHandle procedureHandle, QualifiedObjectName procedureName, Collection fragments) + { + BaseProcedure procedure = procedureRegistry.resolve( + new ConnectorId(procedureName.getCatalogName()), + new SchemaTableName( + procedureName.getSchemaName(), + procedureName.getObjectName())); + verify(procedure instanceof DistributedProcedure, "procedure must be DistributedProcedure"); + verify(procedureContext.isPresent(), "procedure context must be present"); + ((DistributedProcedure) procedure).finish(session, procedureContext.get(), procedureHandle, fragments); + transaction.commitTransaction(); + procedureContext = Optional.empty(); + } + @Override public ConnectorDeleteTableHandle beginDelete(ConnectorSession session, ConnectorTableHandle tableHandle) { diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java index 36c326b27844e..4b9007f96017d 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergCommonModule.java @@ -48,6 +48,7 @@ import com.facebook.presto.iceberg.procedure.ManifestFileCacheInvalidationProcedure; import com.facebook.presto.iceberg.procedure.RegisterTableProcedure; import com.facebook.presto.iceberg.procedure.RemoveOrphanFiles; +import com.facebook.presto.iceberg.procedure.RewriteDataFilesProcedure; import com.facebook.presto.iceberg.procedure.RollbackToSnapshotProcedure; import com.facebook.presto.iceberg.procedure.RollbackToTimestampProcedure; import com.facebook.presto.iceberg.procedure.SetCurrentSnapshotProcedure; @@ -190,6 +191,7 @@ protected void setup(Binder binder) procedures.addBinding().toProvider(SetTablePropertyProcedure.class).in(Scopes.SINGLETON); procedures.addBinding().toProvider(StatisticsFileCacheInvalidationProcedure.class).in(Scopes.SINGLETON); procedures.addBinding().toProvider(ManifestFileCacheInvalidationProcedure.class).in(Scopes.SINGLETON); + procedures.addBinding().toProvider(RewriteDataFilesProcedure.class).in(Scopes.SINGLETON); // for orc binder.bind(EncryptionLibrary.class).annotatedWith(HiveDwrfEncryptionProvider.ForCryptoService.class).to(UnsupportedEncryptionLibrary.class).in(Scopes.SINGLETON); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergDistributedProcedureHandle.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergDistributedProcedureHandle.java new file mode 100644 index 0000000000000..c6f78f97992cb --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergDistributedProcedureHandle.java @@ -0,0 +1,74 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.hive.HiveCompressionCodec; +import com.facebook.presto.spi.ConnectorDistributedProcedureHandle; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Map; + +import static java.util.Objects.requireNonNull; + +public class IcebergDistributedProcedureHandle + extends IcebergWritableTableHandle + implements ConnectorDistributedProcedureHandle +{ + private final IcebergTableLayoutHandle tableLayoutHandle; + private final Map relevantData; + + @JsonCreator + public IcebergDistributedProcedureHandle( + @JsonProperty("schemaName") String schemaName, + @JsonProperty("tableName") IcebergTableName tableName, + @JsonProperty("schema") PrestoIcebergSchema schema, + @JsonProperty("partitionSpec") PrestoIcebergPartitionSpec partitionSpec, + @JsonProperty("inputColumns") List inputColumns, + @JsonProperty("outputPath") String outputPath, + @JsonProperty("fileFormat") FileFormat fileFormat, + @JsonProperty("compressionCodec") HiveCompressionCodec compressionCodec, + @JsonProperty("storageProperties") Map storageProperties, + @JsonProperty("tableLayoutHandle") IcebergTableLayoutHandle tableLayoutHandle, + @JsonProperty("relevantData") Map relevantData) + { + super( + schemaName, + tableName, + schema, + partitionSpec, + inputColumns, + outputPath, + fileFormat, + compressionCodec, + storageProperties, + ImmutableList.of()); + this.tableLayoutHandle = requireNonNull(tableLayoutHandle, "tableLayoutHandle is null"); + this.relevantData = requireNonNull(relevantData, "relevantData is null"); + } + + @JsonProperty + public IcebergTableLayoutHandle getTableLayoutHandle() + { + return tableLayoutHandle; + } + + @JsonProperty + public Map getRelevantData() + { + return relevantData; + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHandleResolver.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHandleResolver.java index 199939c6b7985..92d3d0e9fdeec 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHandleResolver.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHandleResolver.java @@ -16,6 +16,7 @@ import com.facebook.presto.hive.HiveTransactionHandle; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorDeleteTableHandle; +import com.facebook.presto.spi.ConnectorDistributedProcedureHandle; import com.facebook.presto.spi.ConnectorHandleResolver; import com.facebook.presto.spi.ConnectorInsertTableHandle; import com.facebook.presto.spi.ConnectorOutputTableHandle; @@ -69,6 +70,12 @@ public Class getDeleteTableHandleClass() return IcebergTableHandle.class; } + @Override + public Class getDistributedProcedureHandleClass() + { + return IcebergDistributedProcedureHandle.class; + } + @Override public Class getTransactionHandleClass() { diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java index 6499bbeecb869..4bf0768bf66bf 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadata.java @@ -56,6 +56,7 @@ import com.facebook.presto.spi.ViewNotFoundException; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpressionService; import com.facebook.presto.spi.security.PrestoPrincipal; import com.facebook.presto.spi.statistics.ColumnStatisticMetadata; @@ -177,6 +178,7 @@ public IcebergHiveMetadata( ExtendedHiveMetastore metastore, HdfsEnvironment hdfsEnvironment, TypeManager typeManager, + ProcedureRegistry procedureRegistry, StandardFunctionResolution functionResolution, RowExpressionService rowExpressionService, JsonCodec commitTaskCodec, @@ -190,7 +192,8 @@ public IcebergHiveMetadata( IcebergTableProperties tableProperties, ConnectorSystemConfig connectorSystemConfig) { - super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, nodeVersion, filterStatsCalculatorService, statisticsFileCache, tableProperties); + super(typeManager, procedureRegistry, functionResolution, rowExpressionService, commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, + nodeVersion, filterStatsCalculatorService, statisticsFileCache, tableProperties); this.catalogName = requireNonNull(catalogName, "catalogName is null"); this.metastore = requireNonNull(metastore, "metastore is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java index 19529497803d3..ca37b7910b009 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergHiveMetadataFactory.java @@ -24,6 +24,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpressionService; import jakarta.inject.Inject; @@ -39,6 +40,7 @@ public class IcebergHiveMetadataFactory final ExtendedHiveMetastore metastore; final HdfsEnvironment hdfsEnvironment; final TypeManager typeManager; + final ProcedureRegistry procedureRegistry; final JsonCodec commitTaskCodec; final JsonCodec> columnMappingsCodec; final JsonCodec> schemaTableNamesCodec; @@ -58,6 +60,7 @@ public IcebergHiveMetadataFactory( ExtendedHiveMetastore metastore, HdfsEnvironment hdfsEnvironment, TypeManager typeManager, + ProcedureRegistry procedureRegistry, StandardFunctionResolution functionResolution, RowExpressionService rowExpressionService, JsonCodec commitTaskCodec, @@ -75,6 +78,7 @@ public IcebergHiveMetadataFactory( this.metastore = requireNonNull(metastore, "metastore is null"); this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.procedureRegistry = requireNonNull(procedureRegistry, "procedureRegistry is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); @@ -96,6 +100,7 @@ public ConnectorMetadata create() metastore, hdfsEnvironment, typeManager, + procedureRegistry, functionResolution, rowExpressionService, commitTaskCodec, diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java index 0b189b23d7509..9202be8e4d5c4 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java @@ -32,6 +32,7 @@ import com.facebook.presto.spi.SchemaTablePrefix; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpressionService; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -109,6 +110,7 @@ public class IcebergNativeMetadata public IcebergNativeMetadata( IcebergNativeCatalogFactory catalogFactory, TypeManager typeManager, + ProcedureRegistry procedureRegistry, StandardFunctionResolution functionResolution, RowExpressionService rowExpressionService, JsonCodec commitTaskCodec, @@ -120,7 +122,8 @@ public IcebergNativeMetadata( StatisticsFileCache statisticsFileCache, IcebergTableProperties tableProperties) { - super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, nodeVersion, filterStatsCalculatorService, statisticsFileCache, tableProperties); + super(typeManager, procedureRegistry, functionResolution, rowExpressionService, commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, + nodeVersion, filterStatsCalculatorService, statisticsFileCache, tableProperties); this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null"); this.catalogType = requireNonNull(catalogType, "catalogType is null"); this.warehouseDataDir = Optional.ofNullable(catalogFactory.getCatalogWarehouseDataDir()); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java index 12479950b3c03..72f11ce078166 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadataFactory.java @@ -22,6 +22,7 @@ import com.facebook.presto.spi.connector.ConnectorMetadata; import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpressionService; import jakarta.inject.Inject; @@ -33,6 +34,7 @@ public class IcebergNativeMetadataFactory implements IcebergMetadataFactory { final TypeManager typeManager; + final ProcedureRegistry procedureRegistry; final JsonCodec commitTaskCodec; final JsonCodec> columnMappingsCodec; final JsonCodec> schemaTableNamesCodec; @@ -50,6 +52,7 @@ public IcebergNativeMetadataFactory( IcebergConfig config, IcebergNativeCatalogFactory catalogFactory, TypeManager typeManager, + ProcedureRegistry procedureRegistry, StandardFunctionResolution functionResolution, RowExpressionService rowExpressionService, JsonCodec commitTaskCodec, @@ -62,6 +65,7 @@ public IcebergNativeMetadataFactory( { this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null"); this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.procedureRegistry = requireNonNull(procedureRegistry, "procedureRegistry is null"); this.functionResolution = requireNonNull(functionResolution, "functionResolution is null"); this.rowExpressionService = requireNonNull(rowExpressionService, "rowExpressionService is null"); this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); @@ -76,6 +80,8 @@ public IcebergNativeMetadataFactory( public ConnectorMetadata create() { - return new IcebergNativeMetadata(catalogFactory, typeManager, functionResolution, rowExpressionService, commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, catalogType, nodeVersion, filterStatsCalculatorService, statisticsFileCache, tableProperties); + return new IcebergNativeMetadata(catalogFactory, typeManager, procedureRegistry, functionResolution, rowExpressionService, + commitTaskCodec, columnMappingsCodec, schemaTableNamesCodec, catalogType, nodeVersion, filterStatsCalculatorService, + statisticsFileCache, tableProperties); } } diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSinkProvider.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSinkProvider.java index e8e8db1163aed..e14d0178b153d 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSinkProvider.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPageSinkProvider.java @@ -16,6 +16,7 @@ import com.facebook.airlift.json.JsonCodec; import com.facebook.presto.hive.HdfsContext; import com.facebook.presto.hive.HdfsEnvironment; +import com.facebook.presto.spi.ConnectorDistributedProcedureHandle; import com.facebook.presto.spi.ConnectorInsertTableHandle; import com.facebook.presto.spi.ConnectorOutputTableHandle; import com.facebook.presto.spi.ConnectorPageSink; @@ -79,6 +80,12 @@ public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHa return createPageSink(session, (IcebergWritableTableHandle) insertTableHandle); } + @Override + public ConnectorPageSink createPageSink(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorDistributedProcedureHandle procedureHandle, PageSinkContext pageSinkContext) + { + return createPageSink(session, (IcebergWritableTableHandle) procedureHandle); + } + private ConnectorPageSink createPageSink(ConnectorSession session, IcebergWritableTableHandle tableHandle) { HdfsContext hdfsContext = new HdfsContext(session, tableHandle.getSchemaName(), tableHandle.getTableName().getTableName()); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergProcedureContext.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergProcedureContext.java new file mode 100644 index 0000000000000..e5f2d04c97e4c --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergProcedureContext.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.spi.connector.ConnectorProcedureContext; +import org.apache.iceberg.Table; +import org.apache.iceberg.Transaction; + +import static java.util.Objects.requireNonNull; + +public class IcebergProcedureContext + implements ConnectorProcedureContext +{ + final Table table; + final Transaction transaction; + + public IcebergProcedureContext(Table table, Transaction transaction) + { + this.table = requireNonNull(table, "table is null"); + this.transaction = requireNonNull(transaction, "transaction is null"); + } + + public Table getTable() + { + return table; + } + + public Transaction getTransaction() + { + return transaction; + } +} diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSplitManager.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSplitManager.java index 0ad3345b7ae9d..aeaf5dea8b202 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSplitManager.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergSplitManager.java @@ -82,6 +82,7 @@ public ConnectorSplitSource getSplits( TupleDomain predicate = getNonMetadataColumnConstraints(layoutHandle .getValidPredicate()); + Table icebergTable = getIcebergTable(transactionManager.get(transaction), session, table.getSchemaTableName()); if (table.getIcebergTableName().getTableType() == CHANGELOG) { diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java index 3b8994df46855..3fb8319865844 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/InternalIcebergConnectorFactory.java @@ -49,6 +49,7 @@ import com.facebook.presto.spi.function.StandardFunctionResolution; import com.facebook.presto.spi.plan.FilterStatsCalculatorService; import com.facebook.presto.spi.procedure.BaseProcedure; +import com.facebook.presto.spi.procedure.ProcedureRegistry; import com.facebook.presto.spi.relation.RowExpressionService; import com.facebook.presto.spi.session.PropertyMetadata; import com.google.common.collect.ImmutableSet; @@ -94,6 +95,7 @@ public static Connector createConnector( binder.bind(NodeVersion.class).toInstance(new NodeVersion(context.getNodeManager().getCurrentNode().getVersion())); binder.bind(NodeManager.class).toInstance(context.getNodeManager()); binder.bind(TypeManager.class).toInstance(context.getTypeManager()); + binder.bind(ProcedureRegistry.class).toInstance(context.getProcedureRegistry()); binder.bind(PageIndexerFactory.class).toInstance(context.getPageIndexerFactory()); binder.bind(PageSorter.class).toInstance(context.getPageSorter()); binder.bind(StandardFunctionResolution.class).toInstance(context.getStandardFunctionResolution()); diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/procedure/RewriteDataFilesProcedure.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/procedure/RewriteDataFilesProcedure.java new file mode 100644 index 0000000000000..50dc0d92cff6e --- /dev/null +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/procedure/RewriteDataFilesProcedure.java @@ -0,0 +1,227 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg.procedure; + +import com.facebook.airlift.json.JsonCodec; +import com.facebook.presto.common.predicate.TupleDomain; +import com.facebook.presto.common.type.TypeManager; +import com.facebook.presto.iceberg.CommitTaskData; +import com.facebook.presto.iceberg.IcebergColumnHandle; +import com.facebook.presto.iceberg.IcebergDistributedProcedureHandle; +import com.facebook.presto.iceberg.IcebergProcedureContext; +import com.facebook.presto.iceberg.IcebergTableHandle; +import com.facebook.presto.iceberg.IcebergTableLayoutHandle; +import com.facebook.presto.iceberg.PartitionData; +import com.facebook.presto.iceberg.RuntimeStatsMetricsReporter; +import com.facebook.presto.spi.ConnectorDistributedProcedureHandle; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.classloader.ThreadContextClassLoader; +import com.facebook.presto.spi.procedure.DistributedProcedure; +import com.facebook.presto.spi.procedure.DistributedProcedure.Argument; +import com.facebook.presto.spi.procedure.TableDataRewriteDistributedProcedure; +import com.google.common.base.VerifyException; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import io.airlift.slice.Slice; +import org.apache.iceberg.DataFile; +import org.apache.iceberg.DataFiles; +import org.apache.iceberg.DeleteFile; +import org.apache.iceberg.FileContent; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.RewriteFiles; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.Transaction; +import org.apache.iceberg.io.CloseableIterable; +import org.apache.iceberg.io.CloseableIterator; +import org.apache.iceberg.types.Type; + +import javax.inject.Inject; +import javax.inject.Provider; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Consumer; + +import static com.facebook.presto.common.Utils.checkArgument; +import static com.facebook.presto.common.type.StandardTypes.VARCHAR; +import static com.facebook.presto.iceberg.ExpressionConverter.toIcebergExpression; +import static com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec; +import static com.facebook.presto.iceberg.IcebergUtil.getColumns; +import static com.facebook.presto.iceberg.IcebergUtil.getFileFormat; +import static com.facebook.presto.iceberg.PartitionSpecConverter.toPrestoPartitionSpec; +import static com.facebook.presto.iceberg.SchemaConverter.toPrestoSchema; +import static com.facebook.presto.spi.procedure.TableDataRewriteDistributedProcedure.SCHEMA; +import static com.facebook.presto.spi.procedure.TableDataRewriteDistributedProcedure.TABLE_NAME; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static java.util.Objects.requireNonNull; + +public class RewriteDataFilesProcedure + implements Provider +{ + TypeManager typeManager; + JsonCodec commitTaskCodec; + + @Inject + public RewriteDataFilesProcedure( + TypeManager typeManager, + JsonCodec commitTaskCodec) + { + this.typeManager = requireNonNull(typeManager, "typeManager is null"); + this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null"); + } + + @Override + public DistributedProcedure get() + { + return new TableDataRewriteDistributedProcedure( + "system", + "rewrite_data_files", + ImmutableList.of( + new Argument(SCHEMA, VARCHAR), + new Argument(TABLE_NAME, VARCHAR), + new Argument("filter", VARCHAR, false, "TRUE"), + new Argument("options", "map(varchar, varchar)", false, null)), + (session, procedureContext, tableLayoutHandle, arguments) -> beginCallDistributedProcedure(session, (IcebergProcedureContext) procedureContext, (IcebergTableLayoutHandle) tableLayoutHandle, arguments), + ((session, procedureContext, tableHandle, fragments) -> finishCallDistributedProcedure(session, (IcebergProcedureContext) procedureContext, tableHandle, fragments)), + arguments -> { + checkArgument(arguments.length == 2, "invalid arguments count: " + arguments.length); + checkArgument(arguments[0] instanceof Table && arguments[1] instanceof Transaction, "Invalid arguments, required: [Table, Transaction]"); + return new IcebergProcedureContext((Table) arguments[0], (Transaction) arguments[1]); + }); + } + + private ConnectorDistributedProcedureHandle beginCallDistributedProcedure(ConnectorSession session, IcebergProcedureContext procedureContext, IcebergTableLayoutHandle layoutHandle, Object[] arguments) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) { + Table icebergTable = procedureContext.getTable(); + IcebergTableHandle tableHandle = layoutHandle.getTable(); + + return new IcebergDistributedProcedureHandle( + tableHandle.getSchemaName(), + tableHandle.getIcebergTableName(), + toPrestoSchema(icebergTable.schema(), typeManager), + toPrestoPartitionSpec(icebergTable.spec(), typeManager), + getColumns(icebergTable.schema(), icebergTable.spec(), typeManager), + icebergTable.location(), + getFileFormat(icebergTable), + getCompressionCodec(session), + icebergTable.properties(), + layoutHandle, + ImmutableMap.of()); + } + } + + private void finishCallDistributedProcedure(ConnectorSession session, IcebergProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments) + { + try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(getClass().getClassLoader())) { + IcebergDistributedProcedureHandle handle = (IcebergDistributedProcedureHandle) procedureHandle; + Table icebergTable = procedureContext.getTransaction().table(); + + List commitTasks = fragments.stream() + .map(slice -> commitTaskCodec.fromJson(slice.getBytes())) + .collect(toImmutableList()); + + org.apache.iceberg.types.Type[] partitionColumnTypes = icebergTable.spec().fields().stream() + .map(field -> field.transform().getResultType( + icebergTable.schema().findType(field.sourceId()))) + .toArray(Type[]::new); + + Set newFiles = new HashSet<>(); + for (CommitTaskData task : commitTasks) { + DataFiles.Builder builder = DataFiles.builder(icebergTable.spec()) + .withPath(task.getPath()) + .withFileSizeInBytes(task.getFileSizeInBytes()) + .withFormat(handle.getFileFormat().name()) + .withMetrics(task.getMetrics().metrics()); + + if (!icebergTable.spec().fields().isEmpty()) { + String partitionDataJson = task.getPartitionDataJson() + .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); + builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); + } + newFiles.add(builder.build()); + } + + IcebergTableLayoutHandle layoutHandle = handle.getTableLayoutHandle(); + IcebergTableHandle tableHandle = layoutHandle.getTable(); + final Set scannedDataFiles = new HashSet<>(); + final Set fullyAppliedDeleteFiles = new HashSet<>(); + if (tableHandle.getIcebergTableName().getSnapshotId().isPresent()) { + TupleDomain predicate = layoutHandle.getValidPredicate(); + + Consumer fileScanTaskConsumer = (task) -> { + scannedDataFiles.add(task.file()); + if (!task.deletes().isEmpty()) { + task.deletes().forEach(deleteFile -> { + if (deleteFile.content() == FileContent.EQUALITY_DELETES && + !icebergTable.specs().get(deleteFile.specId()).isPartitioned() && + !predicate.isAll()) { + // Equality files with an unpartitioned spec are applied as global deletes + // So they should not be cleaned up unless the whole table is optimized + return; + } + fullyAppliedDeleteFiles.add(deleteFile); + }); + } + }; + + TableScan tableScan = procedureContext.getTable().newScan() + .metricsReporter(new RuntimeStatsMetricsReporter(session.getRuntimeStats())) + .filter(toIcebergExpression(predicate)) + .useSnapshot(tableHandle.getIcebergTableName().getSnapshotId().get()); + CloseableIterable fileScanTaskIterable = tableScan.planFiles(); + CloseableIterator fileScanTaskIterator = fileScanTaskIterable.iterator(); + fileScanTaskIterator.forEachRemaining(fileScanTaskConsumer); + try { + fileScanTaskIterable.close(); + fileScanTaskIterator.close(); + // TODO: remove this after org.apache.iceberg.io.CloseableIterator'withClose + // correct release resources holds by iterator. + fileScanTaskIterator = CloseableIterator.empty(); + } + catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + if (fragments.isEmpty() && + scannedDataFiles.isEmpty() && + fullyAppliedDeleteFiles.isEmpty()) { + return; + } + + RewriteFiles rewriteFiles = procedureContext.getTransaction().newRewrite() + .rewriteFiles(scannedDataFiles, fullyAppliedDeleteFiles, newFiles, ImmutableSet.of()); + + // Table.snapshot method returns null if there is no matching snapshot + Snapshot snapshot = requireNonNull( + handle.getTableName() + .getSnapshotId() + .map(icebergTable::snapshot) + .orElse(null), + "snapshot is null"); + if (icebergTable.currentSnapshot() != null) { + rewriteFiles.validateFromSnapshot(snapshot.snapshotId()); + } + rewriteFiles.commit(); + } + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java index 39e13724d44e9..1101edd14f3b5 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java @@ -73,6 +73,7 @@ import static com.facebook.presto.iceberg.procedure.RegisterTableProcedure.getFileSystem; import static com.facebook.presto.iceberg.procedure.RegisterTableProcedure.resolveLatestMetadataLocation; import static com.facebook.presto.testing.MaterializedResult.resultBuilder; +import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.collect.Iterables.getOnlyElement; import static java.lang.String.format; @@ -2013,6 +2014,66 @@ public void testMetadataDeleteOnTableWithUnsupportedSpecsWhoseDataAllDeleted(Str } } + @Test(dataProvider = "version_and_mode") + public void testMetadataDeleteOnTableAfterWholeRewriteDataFiles(String version, String mode) + { + String errorMessage = "This connector only supports delete where one or more partitions are deleted entirely.*"; + String schemaName = getSession().getSchema().get(); + String tableName = "test_rewrite_data_files_table_" + randomTableSuffix(); + try { + // Create a table with partition column `a`, and insert some data under this partition spec + assertUpdate("CREATE TABLE " + tableName + " (a INTEGER, b VARCHAR) WITH (format_version = '" + version + "', delete_mode = '" + mode + "')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (1, '1001'), (2, '1002')", 2); + + // Then evaluate the partition spec by adding a partition column `c`, and insert some data under the new partition spec + assertUpdate("ALTER TABLE " + tableName + " ADD COLUMN c INTEGER WITH (partitioning = 'identity')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, '1003', 3), (4, '1004', 4), (5, '1005', 5)", 3); + + // Do not support metadata delete with filter on column `c`, because we have data with old partition spec + assertQueryFails("DELETE FROM " + tableName + " WHERE c > 3", errorMessage); + + // Call procedure rewrite_data_files without filter to rewrite all data files + assertUpdate("call system.rewrite_data_files(table_name => '" + tableName + "', schema => '" + schemaName + "')", 5); + + // Then we can do metadata delete on column `c`, because all data files are rewritten under new partition spec + assertUpdate("DELETE FROM " + tableName + " WHERE c > 3", 2); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, '1001', NULL), (2, '1002', NULL), (3, '1003', 3)"); + } + finally { + dropTable(getSession(), tableName); + } + } + + @Test(dataProvider = "version_and_mode") + public void testMetadataDeleteOnTableAfterPartialRewriteDataFiles(String version, String mode) + { + String errorMessage = "This connector only supports delete where one or more partitions are deleted entirely.*"; + String schemaName = getSession().getSchema().get(); + String tableName = "test_rewrite_data_files_table_" + randomTableSuffix(); + try { + // Create a table with partition column `a`, and insert some data under this partition spec + assertUpdate("CREATE TABLE " + tableName + " (a INTEGER, b VARCHAR) WITH (format_version = '" + version + "', delete_mode = '" + mode + "', partitioning = ARRAY['a'])"); + assertUpdate("INSERT INTO " + tableName + " VALUES (1, '1001'), (2, '1002')", 2); + + // Then evaluate the partition spec by adding a partition column `c`, and insert some data under the new partition spec + assertUpdate("ALTER TABLE " + tableName + " ADD COLUMN c INTEGER WITH (partitioning = 'identity')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, '1003', 3), (4, '1004', 4), (5, '1005', 5)", 3); + + // Do not support metadata delete with filter on column `c`, because we have data with old partition spec + assertQueryFails("DELETE FROM " + tableName + " WHERE c > 3", errorMessage); + + // Call procedure rewrite_data_files with filter to rewrite data files under the prior partition spec + assertUpdate("call system.rewrite_data_files(table_name => '" + tableName + "', schema => '" + schemaName + "', filter => 'a in (1, 2)')", 2); + + // Then we can do metadata delete on column `c`, because all data files are now under new partition spec + assertUpdate("DELETE FROM " + tableName + " WHERE c > 3", 2); + assertQuery("SELECT * FROM " + tableName, "VALUES (1, '1001', NULL), (2, '1002', NULL), (3, '1003', 3)"); + } + finally { + dropTable(getSession(), tableName); + } + } + @DataProvider(name = "version_and_mode") public Object[][] versionAndMode() { diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java index 8b3471229e4db..a781d28fdd012 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java @@ -2043,6 +2043,62 @@ public void testDecimal(boolean decimalVectorReaderEnabled) } } + public void testMetadataDeleteOnV2MorTableWithRewriteDataFiles() + { + String tableName = "test_rewrite_data_files_table_" + randomTableSuffix(); + try { + // Create a table with partition column `a`, and insert some data under this partition spec + assertUpdate("CREATE TABLE " + tableName + " (a INTEGER, b VARCHAR) WITH (format_version = '2', delete_mode = 'merge-on-read')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (1, '1001'), (2, '1002')", 2); + assertUpdate("DELETE FROM " + tableName + " WHERE a = 1", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES (2, '1002')"); + + Table icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 1); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 1); + + // Evaluate the partition spec by adding a partition column `c`, and insert some data under the new partition spec + assertUpdate("ALTER TABLE " + tableName + " ADD COLUMN c INTEGER WITH (partitioning = 'identity')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (3, '1003', 3), (4, '1004', 4), (5, '1005', 5)", 3); + + icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 4); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 1); + + // Execute row level delete with filter on column `b` + assertUpdate("DELETE FROM " + tableName + " WHERE b = '1004'", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES (2, '1002', NULL), (3, '1003', 3), (5, '1005', 5)"); + icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 4); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 2); + + assertQueryFails("call system.rewrite_data_files(table_name => '" + tableName + "', schema => 'tpch', filter => 'a > 3')", ".*"); + assertQueryFails("call system.rewrite_data_files(table_name => '" + tableName + "', schema => 'tpch', filter => 'c > 3')", ".*"); + + assertUpdate("call system.rewrite_data_files(table_name => '" + tableName + "', schema => 'tpch')", 3); + assertQuery("SELECT * FROM " + tableName, "VALUES (2, '1002', NULL), (3, '1003', 3), (5, '1005', 5)"); + icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 3); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 0); + + // Do metadata delete on column `a`, because all partition specs contains partition column `a` + assertUpdate("DELETE FROM " + tableName + " WHERE c = 5", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES (2, '1002', NULL), (3, '1003', 3)"); + icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 2); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 0); + + assertUpdate("call system.rewrite_data_files(table_name => '" + tableName + "', schema => 'tpch', filter => 'c > 2')", 1); + assertQuery("SELECT * FROM " + tableName, "VALUES (2, '1002', NULL), (3, '1003', 3)"); + icebergTable = loadTable(tableName); + assertHasDataFiles(icebergTable.currentSnapshot(), 2); + assertHasDeleteFiles(icebergTable.currentSnapshot(), 0); + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + } + } + @Test public void testRefsTable() { @@ -2891,14 +2947,14 @@ private void testWithAllFileFormats(Session session, BiConsumer map = snapshot.summary(); int totalDataFiles = Integer.valueOf(map.get(TOTAL_DATA_FILES_PROP)); assertEquals(totalDataFiles, dataFilesCount); } - private void assertHasDeleteFiles(Snapshot snapshot, int deleteFilesCount) + protected void assertHasDeleteFiles(Snapshot snapshot, int deleteFilesCount) { Map map = snapshot.summary(); int totalDeleteFiles = Integer.valueOf(map.get(TOTAL_DELETE_FILES_PROP)); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergLogicalPlanner.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergLogicalPlanner.java index ed274b55b7bf5..d86b4e8674de4 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergLogicalPlanner.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergLogicalPlanner.java @@ -98,6 +98,7 @@ import static com.facebook.presto.sql.planner.assertions.MatchResult.match; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyNot; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.anyTree; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.callDistributedProcedure; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.exchange; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.expression; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.filter; @@ -107,8 +108,13 @@ import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.project; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.strictProject; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.strictTableScan; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.tableFinish; import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values; import static com.facebook.presto.sql.planner.optimizations.PlanNodeSearcher.searchFrom; +import static com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.LOCAL; +import static com.facebook.presto.sql.planner.plan.ExchangeNode.Scope.REMOTE_STREAMING; +import static com.facebook.presto.sql.planner.plan.ExchangeNode.Type.GATHER; +import static com.facebook.presto.sql.planner.plan.ExchangeNode.Type.REPARTITION; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; @@ -730,6 +736,59 @@ public void testThoroughlyPushdownForTableWithUnsupportedSpecsWhoseDataAllDelete } } + @Test + public void testCallDistributedProcedureOnPartitionedTable() + { + String tableName = "partition_table_for_call_distributed_procedure"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar) with (partitioning = ARRAY['c1'])"); + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + + assertPlan(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s')", tableName, getSession().getSchema().get()), + output(tableFinish(exchange(REMOTE_STREAMING, GATHER, + callDistributedProcedure( + exchange(LOCAL, GATHER, + exchange(REMOTE_STREAMING, REPARTITION, + strictTableScan(tableName, identityMap("c1", "c2"))))))))); + + // Do not support the filter that couldn't be enforced totally by tableScan + assertQueryFails(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c2 > ''bar''')", tableName, getSession().getSchema().get()), + "Unexpected FilterNode found in plan; probably connector was not able to handle provided WHERE expression"); + + // Support the filter that could be enforced totally by tableScan + assertPlan(getSession(), format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c1 > 3')", tableName, getSession().getSchema().get()), + output(tableFinish(exchange(REMOTE_STREAMING, GATHER, + callDistributedProcedure( + exchange(LOCAL, GATHER, + exchange(REMOTE_STREAMING, REPARTITION, + strictTableScan(tableName, identityMap("c1", "c2")))))))), + plan -> assertTableLayout( + plan, + tableName, + withColumnDomains(ImmutableMap.of( + new Subfield( + "c1", + ImmutableList.of()), + Domain.create(ValueSet.ofRanges(greaterThan(INTEGER, 3L)), false))), + TRUE_CONSTANT, + ImmutableSet.of("c1"))); + + // Support filter conditions that are always false, which cause the underlying TableScanNode to be optimized into an empty ValuesNode + assertPlan(getSession(), format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => '1 > 2')", tableName, getSession().getSchema().get()), + output(tableFinish(exchange(REMOTE_STREAMING, GATHER, + callDistributedProcedure( + exchange(LOCAL, GATHER, + exchange(REMOTE_STREAMING, REPARTITION, + values(ImmutableList.of("c1", "c2"), + ImmutableList.of())))))))); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + @DataProvider(name = "timezones") public Object[][] timezones() { diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestRewriteDataFilesProcedure.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestRewriteDataFilesProcedure.java new file mode 100644 index 0000000000000..fb79f69618fca --- /dev/null +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestRewriteDataFilesProcedure.java @@ -0,0 +1,508 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.iceberg; + +import com.facebook.presto.testing.QueryRunner; +import com.facebook.presto.tests.AbstractTestQueryFramework; +import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.Table; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.hadoop.HadoopCatalog; +import org.apache.iceberg.io.CloseableIterator; +import org.testng.annotations.Test; + +import java.io.File; +import java.nio.file.Path; +import java.util.Map; +import java.util.OptionalInt; +import java.util.concurrent.atomic.AtomicInteger; + +import static com.facebook.presto.iceberg.CatalogType.HADOOP; +import static com.facebook.presto.iceberg.FileFormat.PARQUET; +import static com.facebook.presto.iceberg.IcebergQueryRunner.ICEBERG_CATALOG; +import static com.facebook.presto.iceberg.IcebergQueryRunner.getIcebergDataDirectoryPath; +import static java.lang.String.format; +import static org.apache.iceberg.SnapshotSummary.TOTAL_DATA_FILES_PROP; +import static org.apache.iceberg.SnapshotSummary.TOTAL_DELETE_FILES_PROP; +import static org.apache.iceberg.expressions.Expressions.alwaysTrue; +import static org.testng.Assert.assertEquals; + +public class TestRewriteDataFilesProcedure + extends AbstractTestQueryFramework +{ + public static final String TEST_SCHEMA = "tpch"; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return IcebergQueryRunner.builder() + .setCatalogType(HADOOP) + .setFormat(PARQUET) + .setNodeCount(OptionalInt.of(1)) + .setCreateTpchTables(false) + .setAddJmxPlugin(false) + .build().getQueryRunner(); + } + + public void dropTable(String tableName) + { + assertQuerySucceeds("DROP TABLE IF EXISTS " + tableName); + } + + @Test + public void testRewriteDataFilesInEmptyTable() + { + String tableName = "default_empty_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (id integer, value integer)"); + assertUpdate(format("CALL system.rewrite_data_files('%s', '%s')", TEST_SCHEMA, tableName), 0); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesOnPartitionTable() + { + String tableName = "example_partition_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar) with (partitioning = ARRAY['c2'])"); + + // create 5 files for each partition (c2 = 'foo' and c2 = 'bar') + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(7, 'foo'), (8, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(9, 'foo'), (10, 'bar')", 2); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 5); + //The number of data files is 10,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 10); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 10, 0); + + assertUpdate("DELETE from " + tableName + " WHERE c1 = 7", 1); + assertUpdate("DELETE from " + tableName + " WHERE c1 in (8, 10)", 2); + + table.refresh(); + assertHasSize(table.snapshots(), 7); + //The number of data files is 10,and the number of delete files is 3 + assertHasDataFiles(table.currentSnapshot(), 10); + assertHasDeleteFiles(table.currentSnapshot(), 3); + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(9, 'foo')"); + + assertUpdate(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s')", tableName, TEST_SCHEMA), 7); + + table.refresh(); + assertHasSize(table.snapshots(), 8); + //The number of data files is 2,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 2); + assertHasDeleteFiles(table.currentSnapshot(), 0); + fileScanTasks = table.newScan() + .filter(alwaysTrue()) + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 2, 0); + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(9, 'foo')"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesOnNonPartitionTable() + { + String tableName = "example_non_partition_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar)"); + + // create 5 files + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(7, 'foo'), (8, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(9, 'foo'), (10, 'bar')", 2); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 5); + //The number of data files is 5,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 5); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 5, 0); + + assertUpdate("DELETE from " + tableName + " WHERE c1 = 7", 1); + assertUpdate("DELETE from " + tableName + " WHERE c1 in (9, 10)", 2); + + table.refresh(); + assertHasSize(table.snapshots(), 7); + //The number of data files is 5,and the number of delete files is 2 + assertHasDataFiles(table.currentSnapshot(), 5); + assertHasDeleteFiles(table.currentSnapshot(), 2); + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(8, 'bar')"); + + assertUpdate(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s')", tableName, TEST_SCHEMA), 7); + + table.refresh(); + assertHasSize(table.snapshots(), 8); + //The number of data files is 1,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 1); + assertHasDeleteFiles(table.currentSnapshot(), 0); + fileScanTasks = table.newScan() + .filter(alwaysTrue()) + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 1, 0); + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(8, 'bar')"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesWithFilter() + { + String tableName = "example_partition_filter_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar) with (partitioning = ARRAY['c2'])"); + + // create 5 files for each partition (c2 = 'foo' and c2 = 'bar') + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(7, 'foo'), (8, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(9, 'foo'), (10, 'bar')", 2); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 5); + //The number of data files is 10,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 10); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 10, 0); + + // do not support rewrite files filtered by non-identity columns + assertQueryFails(format("call system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c1 > 3')", tableName, TEST_SCHEMA), ".*"); + + // select 5 files to rewrite + assertUpdate(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c2 = ''bar''')", tableName, TEST_SCHEMA), 5); + table.refresh(); + assertHasSize(table.snapshots(), 6); + //The number of data files is 6,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 6); + assertHasDeleteFiles(table.currentSnapshot(), 0); + fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 6, 0); + + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(7, 'foo'), (8, 'bar'), " + + "(9, 'foo'), (10, 'bar')"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesWithDeterministicTrueFilter() + { + String tableName = "example_non_partition_true_filter_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar)"); + + // create 5 files + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(7, 'foo'), (8, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(9, 'foo'), (10, 'bar')", 2); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 5); + //The number of data files is 5,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 5); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 5, 0); + + // do not support rewrite files filtered by non-identity columns + assertQueryFails(format("call system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c1 > 3')", tableName, TEST_SCHEMA), ".*"); + + // the filter is `true` means select all files to rewrite + assertUpdate(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => '1 = 1')", tableName, TEST_SCHEMA), 10); + + table.refresh(); + assertHasSize(table.snapshots(), 6); + //The number of data files is 1,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 1); + assertHasDeleteFiles(table.currentSnapshot(), 0); + fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 1, 0); + + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(7, 'foo'), (8, 'bar'), " + + "(9, 'foo'), (10, 'bar')"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesWithDeterministicFalseFilter() + { + String tableName = "example_non_partition_false_filter_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (c1 integer, c2 varchar)"); + + // create 5 files + assertUpdate("INSERT INTO " + tableName + " values(1, 'foo'), (2, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(3, 'foo'), (4, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(5, 'foo'), (6, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(7, 'foo'), (8, 'bar')", 2); + assertUpdate("INSERT INTO " + tableName + " values(9, 'foo'), (10, 'bar')", 2); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 5); + //The number of data files is 5,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 5); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 5, 0); + + // the filter is `false` means select no file to rewrite + assertUpdate(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => '1 = 0')", tableName, TEST_SCHEMA), 0); + + table.refresh(); + assertHasSize(table.snapshots(), 5); + //The number of data files is still 5,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 5); + assertHasDeleteFiles(table.currentSnapshot(), 0); + fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 5, 0); + + assertQuery("select * from " + tableName, + "values(1, 'foo'), (2, 'bar'), " + + "(3, 'foo'), (4, 'bar'), " + + "(5, 'foo'), (6, 'bar'), " + + "(7, 'foo'), (8, 'bar'), " + + "(9, 'foo'), (10, 'bar')"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testRewriteDataFilesWithDeleteAndPartitionEvolution() + { + String tableName = "example_partition_evolution_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (a int, b varchar)"); + assertUpdate("INSERT INTO " + tableName + " values(1, '1001'), (2, '1002')", 2); + assertUpdate("DELETE FROM " + tableName + " WHERE a = 1", 1); + assertQuery("select * from " + tableName, "values(2, '1002')"); + + Table table = loadTable(tableName); + assertHasSize(table.snapshots(), 2); + //The number of data files is 1,and the number of delete files is 1 + assertHasDataFiles(table.currentSnapshot(), 1); + assertHasDeleteFiles(table.currentSnapshot(), 1); + + assertUpdate("alter table " + tableName + " add column c int with (partitioning = 'identity')"); + assertUpdate("INSERT INTO " + tableName + " values(5, '1005', 5), (6, '1006', 6), (7, '1007', 7)", 3); + assertUpdate("DELETE FROM " + tableName + " WHERE b = '1006'", 1); + assertQuery("select * from " + tableName, "values(2, '1002', NULL), (5, '1005', 5), (7, '1007', 7)"); + + table.refresh(); + assertHasSize(table.snapshots(), 4); + //The number of data files is 4,and the number of delete files is 2 + assertHasDataFiles(table.currentSnapshot(), 4); + assertHasDeleteFiles(table.currentSnapshot(), 2); + + assertQueryFails(format("call system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'a > 3')", tableName, TEST_SCHEMA), ".*"); + assertQueryFails(format("call system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c > 3')", tableName, TEST_SCHEMA), ".*"); + + assertUpdate(format("call system.rewrite_data_files(table_name => '%s', schema => '%s')", tableName, TEST_SCHEMA), 3); + table.refresh(); + assertHasSize(table.snapshots(), 5); + //The number of data files is 3,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 3); + assertHasDeleteFiles(table.currentSnapshot(), 0); + CloseableIterator fileScanTasks = table.newScan() + .useSnapshot(table.currentSnapshot().snapshotId()) + .planFiles().iterator(); + assertFilesPlan(fileScanTasks, 3, 0); + assertQuery("select * from " + tableName, "values(2, '1002', NULL), (5, '1005', 5), (7, '1007', 7)"); + + assertUpdate("delete from " + tableName + " where b = '1002'", 1); + table.refresh(); + assertHasSize(table.snapshots(), 6); + //The number of data files is 3,and the number of delete files is 1 + assertHasDataFiles(table.currentSnapshot(), 3); + assertHasDeleteFiles(table.currentSnapshot(), 1); + assertUpdate(format("call system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'c is null')", tableName, TEST_SCHEMA), 0); + + table.refresh(); + assertHasSize(table.snapshots(), 7); + //The number of data files is 2,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 2); + assertHasDeleteFiles(table.currentSnapshot(), 0); + assertQuery("select * from " + tableName, "values(5, '1005', 5), (7, '1007', 7)"); + + // This is a metadata delete + assertUpdate("delete from " + tableName + " where c = 7", 1); + table.refresh(); + assertHasSize(table.snapshots(), 8); + //The number of data files is 1,and the number of delete files is 0 + assertHasDataFiles(table.currentSnapshot(), 1); + assertHasDeleteFiles(table.currentSnapshot(), 0); + assertQuery("select * from " + tableName, "values(5, '1005', 5)"); + } + finally { + dropTable(tableName); + } + } + + @Test + public void testInvalidParameterCases() + { + String tableName = "invalid_parameter_table"; + try { + assertUpdate("CREATE TABLE " + tableName + " (a int, b varchar, c int)"); + assertQueryFails("CALL system.rewrite_data_files('n', table_name => 't')", ".*Named and positional arguments cannot be mixed"); + assertQueryFails("CALL custom.rewrite_data_files('n', 't')", "Procedure not registered: custom.rewrite_data_files"); + assertQueryFails("CALL system.rewrite_data_files()", ".*Required procedure argument 'schema' is missing"); + assertQueryFails("CALL system.rewrite_data_files('s', 'n')", "Schema s does not exist"); + assertQueryFails("CALL system.rewrite_data_files('', '')", "Table name is empty"); + assertQueryFails(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => '''hello''')", tableName, TEST_SCHEMA), ".*WHERE clause must evaluate to a boolean: actual type varchar\\(5\\)"); + assertQueryFails(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => '1001')", tableName, TEST_SCHEMA), ".*WHERE clause must evaluate to a boolean: actual type integer"); + assertQueryFails(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'a')", tableName, TEST_SCHEMA), ".*WHERE clause must evaluate to a boolean: actual type integer"); + assertQueryFails(format("CALL system.rewrite_data_files(table_name => '%s', schema => '%s', filter => 'n')", tableName, TEST_SCHEMA), ".*Column 'n' cannot be resolved"); + } + finally { + dropTable(tableName); + } + } + + private Table loadTable(String tableName) + { + Catalog catalog = CatalogUtil.loadCatalog(HadoopCatalog.class.getName(), ICEBERG_CATALOG, getProperties(), new Configuration()); + return catalog.loadTable(TableIdentifier.of(TEST_SCHEMA, tableName)); + } + + private Map getProperties() + { + File metastoreDir = getCatalogDirectory(); + return ImmutableMap.of("warehouse", metastoreDir.toString()); + } + + private File getCatalogDirectory() + { + Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory(); + Path catalogDirectory = getIcebergDataDirectoryPath(dataDirectory, HADOOP.name(), new IcebergConfig().getFileFormat(), false); + return catalogDirectory.toFile(); + } + + private void assertHasSize(Iterable iterable, int size) + { + AtomicInteger count = new AtomicInteger(0); + iterable.forEach(obj -> count.incrementAndGet()); + assertEquals(count.get(), size); + } + + private void assertHasDataFiles(Snapshot snapshot, int dataFilesCount) + { + Map map = snapshot.summary(); + int totalDataFiles = Integer.valueOf(map.get(TOTAL_DATA_FILES_PROP)); + assertEquals(totalDataFiles, dataFilesCount); + } + + private void assertHasDeleteFiles(Snapshot snapshot, int deleteFilesCount) + { + Map map = snapshot.summary(); + int totalDeleteFiles = Integer.valueOf(map.get(TOTAL_DELETE_FILES_PROP)); + assertEquals(totalDeleteFiles, deleteFilesCount); + } + + private void assertFilesPlan(CloseableIterator iterator, int dataFileCount, int deleteFileCount) + { + AtomicInteger dataCount = new AtomicInteger(0); + AtomicInteger deleteCount = new AtomicInteger(0); + while (iterator.hasNext()) { + FileScanTask fileScanTask = iterator.next(); + dataCount.incrementAndGet(); + deleteCount.addAndGet(fileScanTask.deletes().size()); + } + assertEquals(dataCount.get(), dataFileCount); + assertEquals(deleteCount.get(), deleteFileCount); + + try { + iterator.close(); + iterator = CloseableIterator.empty(); + } + catch (Exception e) { + // do nothing + } + } +} diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestRenameTableOnFragileFileSystem.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestRenameTableOnFragileFileSystem.java index 6e4da6b8d666c..4fe2febcddba9 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestRenameTableOnFragileFileSystem.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/hive/TestRenameTableOnFragileFileSystem.java @@ -48,6 +48,7 @@ import com.facebook.presto.iceberg.IcebergTableType; import com.facebook.presto.iceberg.ManifestFileCache; import com.facebook.presto.iceberg.statistics.StatisticsFileCache; +import com.facebook.presto.metadata.BuiltInProcedureRegistry; import com.facebook.presto.metadata.FunctionAndTypeManager; import com.facebook.presto.metadata.MetadataManager; import com.facebook.presto.spi.ConnectorSession; @@ -412,6 +413,7 @@ private ConnectorMetadata getIcebergHiveMetadata(ExtendedHiveMetastore metastore metastore, hdfsEnvironment, FUNCTION_AND_TYPE_MANAGER, + new BuiltInProcedureRegistry(METADATA.getFunctionAndTypeManager()), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, jsonCodec(CommitTaskData.class), diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java index 727076b088744..42e0df564402a 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/rest/TestIcebergSmokeRest.java @@ -142,10 +142,4 @@ public void testSetOauth2ServerUriPropertyI() assertEquals(catalog.properties().get(OAUTH2_SERVER_URI), authEndpoint); } - - @Override - public void testDeprecatedTablePropertiesCreateTable() - { - // v1 table create fails due to Iceberg REST catalog bug (see: https://github.com/apache/iceberg/issues/8756) - } } diff --git a/presto-main-base/src/main/java/com/facebook/presto/execution/scheduler/TableWriteInfo.java b/presto-main-base/src/main/java/com/facebook/presto/execution/scheduler/TableWriteInfo.java index b9c5efa99422a..cff7bec518dde 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/execution/scheduler/TableWriteInfo.java +++ b/presto-main-base/src/main/java/com/facebook/presto/execution/scheduler/TableWriteInfo.java @@ -111,7 +111,8 @@ private static Optional createWriterTarget(Optional finishDeleteWithOutput(Session session, } @Override - public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, TableHandle tableHandle, Object[] arguments) + public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, + TableHandle tableHandle, Object[] arguments, boolean sourceTableEliminated) { - return delegate.beginCallDistributedProcedure(session, procedureName, tableHandle, arguments); + return delegate.beginCallDistributedProcedure(session, procedureName, tableHandle, arguments, sourceTableEliminated); } @Override diff --git a/presto-main-base/src/main/java/com/facebook/presto/metadata/Metadata.java b/presto-main-base/src/main/java/com/facebook/presto/metadata/Metadata.java index 666c337bd9a10..9dbf15ec0adc4 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/metadata/Metadata.java +++ b/presto-main-base/src/main/java/com/facebook/presto/metadata/Metadata.java @@ -348,7 +348,8 @@ public interface Metadata /** * Begin call distributed procedure */ - DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, TableHandle tableHandle, Object[] arguments); + DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, + TableHandle tableHandle, Object[] arguments, boolean sourceTableEliminated); /** * Finish call distributed procedure diff --git a/presto-main-base/src/main/java/com/facebook/presto/metadata/MetadataManager.java b/presto-main-base/src/main/java/com/facebook/presto/metadata/MetadataManager.java index be66084783461..78330c383937e 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/metadata/MetadataManager.java +++ b/presto-main-base/src/main/java/com/facebook/presto/metadata/MetadataManager.java @@ -442,7 +442,6 @@ public Optional getSystemTable(Session session, QualifiedObjectName public TableLayoutResult getLayout(Session session, TableHandle table, Constraint constraint, Optional> desiredColumns) { long startTime = System.nanoTime(); - checkArgument(!constraint.getSummary().isNone(), "Cannot get Layout if constraint is none"); ConnectorId connectorId = table.getConnectorId(); ConnectorTableHandle connectorTable = table.getConnectorHandle(); @@ -1015,14 +1014,16 @@ public Optional finishDeleteWithOutput(Session session, } @Override - public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, TableHandle tableHandle, Object[] arguments) + public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, + TableHandle tableHandle, Object[] arguments, + boolean sourceTableEliminated) { ConnectorId connectorId = tableHandle.getConnectorId(); CatalogMetadata catalogMetadata = getCatalogMetadataForWrite(session, connectorId); ConnectorTableLayoutHandle layout; if (!tableHandle.getLayout().isPresent()) { - TableLayoutResult result = getLayout(session, tableHandle, Constraint.alwaysTrue(), Optional.empty()); + TableLayoutResult result = getLayout(session, tableHandle, sourceTableEliminated ? Constraint.alwaysFalse() : Constraint.alwaysTrue(), Optional.empty()); layout = result.getLayout().getLayoutHandle(); } else { diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java index e4925a46b5f84..fc383fc928d23 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java @@ -1285,8 +1285,15 @@ protected Scope visitCall(Call call, Optional scope) if (analysis.isDescribe()) { return createAndAssignScope(call, scope); } - QualifiedObjectName procedureName = analysis.getProcedureName() - .orElse(createQualifiedObjectName(session, call, call.getName(), metadata)); + Optional procedureNameOptional = analysis.getProcedureName(); + QualifiedObjectName procedureName; + if (!procedureNameOptional.isPresent()) { + procedureName = createQualifiedObjectName(session, call, call.getName(), metadata); + analysis.setProcedureName(Optional.of(procedureName)); + } + else { + procedureName = procedureNameOptional.get(); + } ConnectorId connectorId = metadata.getCatalogHandle(session, procedureName.getCatalogName()) .orElseThrow(() -> new SemanticException(MISSING_CATALOG, call, "Catalog %s does not exist", procedureName.getCatalogName())); diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java index f766ee47ad3c6..fe73971051712 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/LogicalPlanner.java @@ -286,7 +286,8 @@ private RelationPlan createCallDistributedProcedurePlanForTableDataRewrite(Analy procedureName.get(), procedureArguments.get(), Optional.of(targetTable), - tableMetadata.getTable()); + tableMetadata.getTable(), + false); TableFinishNode commitNode = new TableFinishNode( Optional.empty(), idAllocator.getNextId(), diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/RewriteWriterTarget.java b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/RewriteWriterTarget.java index 1a9103d54aef7..20e947d62de34 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/RewriteWriterTarget.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/optimizations/RewriteWriterTarget.java @@ -107,12 +107,19 @@ public Optional getWriterTarget(PlanNode node) if (node instanceof CallDistributedProcedureNode) { Optional tableHandle = findTableHandleForCallDistributedProcedure(((CallDistributedProcedureNode) node).getSource()); Optional callDistributedProcedureTarget = ((CallDistributedProcedureNode) node).getTarget(); - return !tableHandle.isPresent() ? callDistributedProcedureTarget.map(WriterTarget.class::cast) : + return !tableHandle.isPresent() ? + callDistributedProcedureTarget.map(target -> new CallDistributedProcedureTarget( + target.getProcedureName(), + target.getProcedureArguments(), + target.getSourceHandle(), + target.getSchemaTableName(), + true)) : callDistributedProcedureTarget.map(target -> new CallDistributedProcedureTarget( target.getProcedureName(), target.getProcedureArguments(), tableHandle, - target.getSchemaTableName())); + target.getSchemaTableName(), + false)); } if (node instanceof ExchangeNode || node instanceof UnionNode) { diff --git a/presto-main-base/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java b/presto-main-base/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java index b832c6745a18d..b79c2b2c567b0 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java +++ b/presto-main-base/src/test/java/com/facebook/presto/metadata/AbstractMockMetadata.java @@ -440,7 +440,8 @@ public Optional finishDeleteWithOutput(Session session, } @Override - public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, TableHandle tableHandle, Object[] arguments) + public DistributedProcedureHandle beginCallDistributedProcedure(Session session, QualifiedObjectName procedureName, + TableHandle tableHandle, Object[] arguments, boolean sourceTableEliminated) { throw new UnsupportedOperationException(); } diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/AbstractAnalyzerTest.java b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/AbstractAnalyzerTest.java index 7bb0832c7d3c0..5c28e36be9241 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/AbstractAnalyzerTest.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/AbstractAnalyzerTest.java @@ -197,8 +197,8 @@ public void setup() procedures.add(new TableDataRewriteDistributedProcedure("system", "distributed_procedure", distributedArguments, (session, transactionContext, procedureHandle, fragments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)); + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())); metadata.getProcedureRegistry().addProcedures(SECOND_CONNECTOR_ID, procedures); Catalog tpchTestCatalog = createTestingCatalog(TPCH_CATALOG, TPCH_CONNECTOR_ID); diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java index 6b901cff03cdc..a8a69731f4361 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/TestLogicalPlanner.java @@ -210,8 +210,8 @@ public Connector create(String catalogName, Map config, Connecto procedures.add(new TableDataRewriteDistributedProcedure("system", "distributed_fun", arguments, (session, transactionContext, procedureHandle, fragments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)); + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())); return new Connector() { diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/planner/assertions/PlanMatchPattern.java b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/assertions/PlanMatchPattern.java index e5838185f495f..d882fe7e54a5f 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/planner/assertions/PlanMatchPattern.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/assertions/PlanMatchPattern.java @@ -43,6 +43,7 @@ import com.facebook.presto.spi.plan.SemiJoinNode; import com.facebook.presto.spi.plan.SortNode; import com.facebook.presto.spi.plan.SpatialJoinNode; +import com.facebook.presto.spi.plan.TableFinishNode; import com.facebook.presto.spi.plan.TableWriterNode; import com.facebook.presto.spi.plan.TopNNode; import com.facebook.presto.spi.plan.UnionNode; @@ -60,6 +61,7 @@ import com.facebook.presto.sql.planner.iterative.GroupReference; import com.facebook.presto.sql.planner.plan.ApplyNode; import com.facebook.presto.sql.planner.plan.AssignUniqueId; +import com.facebook.presto.sql.planner.plan.CallDistributedProcedureNode; import com.facebook.presto.sql.planner.plan.EnforceSingleRowNode; import com.facebook.presto.sql.planner.plan.ExchangeNode; import com.facebook.presto.sql.planner.plan.GroupIdNode; @@ -691,6 +693,16 @@ public static PlanMatchPattern enforceSingleRow(PlanMatchPattern source) return node(EnforceSingleRowNode.class, source); } + public static PlanMatchPattern callDistributedProcedure(PlanMatchPattern source) + { + return node(CallDistributedProcedureNode.class, source); + } + + public static PlanMatchPattern tableFinish(PlanMatchPattern source) + { + return node(TableFinishNode.class, source); + } + public static PlanMatchPattern tableWriter(List columns, List columnNames, PlanMatchPattern source) { return node(TableWriterNode.class, source).with(new TableWriterMatcher(columns, columnNames)); diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp index 228d31f569937..6674b05f847b4 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.cpp @@ -411,6 +411,40 @@ void from_json(const json& j, DeleteFile& p) { } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { +void to_json(json& j, const std::shared_ptr& p) { + if (p == nullptr) { + return; + } + String type = p->_type; + + if (type == "hive-iceberg") { + j = *std::static_pointer_cast(p); + return; + } + + throw TypeError(type + " no abstract type ColumnHandle "); +} + +void from_json(const json& j, std::shared_ptr& p) { + String type; + try { + type = p->getSubclassKey(j); + } catch (json::parse_error& e) { + throw ParseError(std::string(e.what()) + " ColumnHandle ColumnHandle"); + } + + if (type == "hive-iceberg") { + std::shared_ptr k = + std::make_shared(); + j.get_to(*k); + p = std::static_pointer_cast(k); + return; + } + + throw TypeError(type + " no abstract type ColumnHandle "); +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { // Loosely copied this here from NLOHMANN_JSON_SERIALIZE_ENUM() // NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays @@ -505,170 +539,505 @@ void from_json(const json& j, IcebergTableName& p) { } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { -// Loosely copied this here from NLOHMANN_JSON_SERIALIZE_ENUM() - -// NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays -static const std::pair - PartitionTransformType_enum_table[] = - { // NOLINT: cert-err58-cpp - {PartitionTransformType::IDENTITY, "IDENTITY"}, - {PartitionTransformType::HOUR, "HOUR"}, - {PartitionTransformType::DAY, "DAY"}, - {PartitionTransformType::MONTH, "MONTH"}, - {PartitionTransformType::YEAR, "YEAR"}, - {PartitionTransformType::BUCKET, "BUCKET"}, - {PartitionTransformType::TRUNCATE, "TRUNCATE"}}; -void to_json(json& j, const PartitionTransformType& e) { - static_assert( - std::is_enum::value, - "PartitionTransformType must be an enum!"); - const auto* it = std::find_if( - std::begin(PartitionTransformType_enum_table), - std::end(PartitionTransformType_enum_table), - [e](const std::pair& ej_pair) -> bool { - return ej_pair.first == e; - }); - j = ((it != std::end(PartitionTransformType_enum_table)) - ? it - : std::begin(PartitionTransformType_enum_table)) - ->second; -} -void from_json(const json& j, PartitionTransformType& e) { - static_assert( - std::is_enum::value, - "PartitionTransformType must be an enum!"); - const auto* it = std::find_if( - std::begin(PartitionTransformType_enum_table), - std::end(PartitionTransformType_enum_table), - [&j](const std::pair& ej_pair) -> bool { - return ej_pair.second == j; - }); - e = ((it != std::end(PartitionTransformType_enum_table)) - ? it - : std::begin(PartitionTransformType_enum_table)) - ->first; -} -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { -void to_json(json& j, const IcebergPartitionField& p) { +void to_json(json& j, const SortField& p) { j = json::object(); - to_json_key( - j, "sourceId", p.sourceId, "IcebergPartitionField", "int", "sourceId"); - to_json_key( - j, "fieldId", p.fieldId, "IcebergPartitionField", "int", "fieldId"); - to_json_key( - j, "parameter", p.parameter, "IcebergPartitionField", "int", "parameter"); to_json_key( j, - "transform", - p.transform, - "IcebergPartitionField", - "PartitionTransformType", - "transform"); - to_json_key(j, "name", p.name, "IcebergPartitionField", "String", "name"); + "sourceColumnId", + p.sourceColumnId, + "SortField", + "int", + "sourceColumnId"); + to_json_key( + j, "sortOrder", p.sortOrder, "SortField", "SortOrder", "sortOrder"); } -void from_json(const json& j, IcebergPartitionField& p) { - from_json_key( - j, "sourceId", p.sourceId, "IcebergPartitionField", "int", "sourceId"); - from_json_key( - j, "fieldId", p.fieldId, "IcebergPartitionField", "int", "fieldId"); - from_json_key( - j, "parameter", p.parameter, "IcebergPartitionField", "int", "parameter"); +void from_json(const json& j, SortField& p) { from_json_key( j, - "transform", - p.transform, - "IcebergPartitionField", - "PartitionTransformType", - "transform"); - from_json_key(j, "name", p.name, "IcebergPartitionField", "String", "name"); + "sourceColumnId", + p.sourceColumnId, + "SortField", + "int", + "sourceColumnId"); + from_json_key( + j, "sortOrder", p.sortOrder, "SortField", "SortOrder", "sortOrder"); } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { +IcebergTableHandle::IcebergTableHandle() noexcept { + _type = "hive-iceberg"; +} -void to_json(json& j, const PrestoIcebergNestedField& p) { +void to_json(json& j, const IcebergTableHandle& p) { j = json::object(); + j["@type"] = "hive-iceberg"; to_json_key( j, - "optional", - p.optional, - "PrestoIcebergNestedField", - "bool", - "optional"); - to_json_key(j, "id", p.id, "PrestoIcebergNestedField", "int", "id"); - to_json_key(j, "name", p.name, "PrestoIcebergNestedField", "String", "name"); + "schemaName", + p.schemaName, + "IcebergTableHandle", + "String", + "schemaName"); to_json_key( j, - "prestoType", - p.prestoType, - "PrestoIcebergNestedField", - "Type", - "prestoType"); - to_json_key(j, "doc", p.doc, "PrestoIcebergNestedField", "String", "doc"); -} - -void from_json(const json& j, PrestoIcebergNestedField& p) { - from_json_key( + "icebergTableName", + p.icebergTableName, + "IcebergTableHandle", + "IcebergTableName", + "icebergTableName"); + to_json_key( j, - "optional", - p.optional, - "PrestoIcebergNestedField", + "snapshotSpecified", + p.snapshotSpecified, + "IcebergTableHandle", "bool", - "optional"); - from_json_key(j, "id", p.id, "PrestoIcebergNestedField", "int", "id"); - from_json_key( - j, "name", p.name, "PrestoIcebergNestedField", "String", "name"); - from_json_key( - j, - "prestoType", - p.prestoType, - "PrestoIcebergNestedField", - "Type", - "prestoType"); - from_json_key(j, "doc", p.doc, "PrestoIcebergNestedField", "String", "doc"); -} -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { - -void to_json(json& j, const PrestoIcebergSchema& p) { - j = json::object(); + "snapshotSpecified"); to_json_key( - j, "schemaId", p.schemaId, "PrestoIcebergSchema", "int", "schemaId"); + j, + "outputPath", + p.outputPath, + "IcebergTableHandle", + "String", + "outputPath"); to_json_key( j, - "columns", - p.columns, - "PrestoIcebergSchema", - "List", - "columns"); + "storageProperties", + p.storageProperties, + "IcebergTableHandle", + "Map", + "storageProperties"); to_json_key( j, - "columnNameToIdMapping", - p.columnNameToIdMapping, - "PrestoIcebergSchema", - "Map", - "columnNameToIdMapping"); + "tableSchemaJson", + p.tableSchemaJson, + "IcebergTableHandle", + "String", + "tableSchemaJson"); to_json_key( j, - "aliases", - p.aliases, - "PrestoIcebergSchema", - "Map", - "aliases"); + "partitionFieldIds", + p.partitionFieldIds, + "IcebergTableHandle", + "List", + "partitionFieldIds"); to_json_key( j, - "identifierFieldIds", - p.identifierFieldIds, - "PrestoIcebergSchema", + "equalityFieldIds", + p.equalityFieldIds, + "IcebergTableHandle", "List", - "identifierFieldIds"); + "equalityFieldIds"); + to_json_key( + j, + "sortOrder", + p.sortOrder, + "IcebergTableHandle", + "List", + "sortOrder"); + to_json_key( + j, + "updatedColumns", + p.updatedColumns, + "IcebergTableHandle", + "List", + "updatedColumns"); + to_json_key( + j, + "materializedViewName", + p.materializedViewName, + "IcebergTableHandle", + "SchemaTableName", + "materializedViewName"); } -void from_json(const json& j, PrestoIcebergSchema& p) { +void from_json(const json& j, IcebergTableHandle& p) { + p._type = j["@type"]; from_json_key( - j, "schemaId", p.schemaId, "PrestoIcebergSchema", "int", "schemaId"); + j, + "schemaName", + p.schemaName, + "IcebergTableHandle", + "String", + "schemaName"); + from_json_key( + j, + "icebergTableName", + p.icebergTableName, + "IcebergTableHandle", + "IcebergTableName", + "icebergTableName"); + from_json_key( + j, + "snapshotSpecified", + p.snapshotSpecified, + "IcebergTableHandle", + "bool", + "snapshotSpecified"); + from_json_key( + j, + "outputPath", + p.outputPath, + "IcebergTableHandle", + "String", + "outputPath"); + from_json_key( + j, + "storageProperties", + p.storageProperties, + "IcebergTableHandle", + "Map", + "storageProperties"); + from_json_key( + j, + "tableSchemaJson", + p.tableSchemaJson, + "IcebergTableHandle", + "String", + "tableSchemaJson"); + from_json_key( + j, + "partitionFieldIds", + p.partitionFieldIds, + "IcebergTableHandle", + "List", + "partitionFieldIds"); + from_json_key( + j, + "equalityFieldIds", + p.equalityFieldIds, + "IcebergTableHandle", + "List", + "equalityFieldIds"); + from_json_key( + j, + "sortOrder", + p.sortOrder, + "IcebergTableHandle", + "List", + "sortOrder"); + from_json_key( + j, + "updatedColumns", + p.updatedColumns, + "IcebergTableHandle", + "List", + "updatedColumns"); + from_json_key( + j, + "materializedViewName", + p.materializedViewName, + "IcebergTableHandle", + "SchemaTableName", + "materializedViewName"); +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { +IcebergTableLayoutHandle::IcebergTableLayoutHandle() noexcept { + _type = "hive-iceberg"; +} + +void to_json(json& j, const IcebergTableLayoutHandle& p) { + j = json::object(); + j["@type"] = "hive-iceberg"; + to_json_key( + j, + "partitionColumns", + p.partitionColumns, + "IcebergTableLayoutHandle", + "List", + "partitionColumns"); + to_json_key( + j, + "dataColumns", + p.dataColumns, + "IcebergTableLayoutHandle", + "List", + "dataColumns"); + to_json_key( + j, + "domainPredicate", + p.domainPredicate, + "IcebergTableLayoutHandle", + "TupleDomain", + "domainPredicate"); + to_json_key( + j, + "remainingPredicate", + p.remainingPredicate, + "IcebergTableLayoutHandle", + "std::shared_ptr", + "remainingPredicate"); + to_json_key( + j, + "predicateColumns", + p.predicateColumns, + "IcebergTableLayoutHandle", + "Map", + "predicateColumns"); + to_json_key( + j, + "requestedColumns", + p.requestedColumns, + "IcebergTableLayoutHandle", + "List", + "requestedColumns"); + to_json_key( + j, + "pushdownFilterEnabled", + p.pushdownFilterEnabled, + "IcebergTableLayoutHandle", + "bool", + "pushdownFilterEnabled"); + to_json_key( + j, + "partitionColumnPredicate", + p.partitionColumnPredicate, + "IcebergTableLayoutHandle", + "TupleDomain>", + "partitionColumnPredicate"); + to_json_key( + j, + "table", + p.table, + "IcebergTableLayoutHandle", + "IcebergTableHandle", + "table"); +} + +void from_json(const json& j, IcebergTableLayoutHandle& p) { + p._type = j["@type"]; + from_json_key( + j, + "partitionColumns", + p.partitionColumns, + "IcebergTableLayoutHandle", + "List", + "partitionColumns"); + from_json_key( + j, + "dataColumns", + p.dataColumns, + "IcebergTableLayoutHandle", + "List", + "dataColumns"); + from_json_key( + j, + "domainPredicate", + p.domainPredicate, + "IcebergTableLayoutHandle", + "TupleDomain", + "domainPredicate"); + from_json_key( + j, + "remainingPredicate", + p.remainingPredicate, + "IcebergTableLayoutHandle", + "std::shared_ptr", + "remainingPredicate"); + from_json_key( + j, + "predicateColumns", + p.predicateColumns, + "IcebergTableLayoutHandle", + "Map", + "predicateColumns"); + from_json_key( + j, + "requestedColumns", + p.requestedColumns, + "IcebergTableLayoutHandle", + "List", + "requestedColumns"); + from_json_key( + j, + "pushdownFilterEnabled", + p.pushdownFilterEnabled, + "IcebergTableLayoutHandle", + "bool", + "pushdownFilterEnabled"); + from_json_key( + j, + "partitionColumnPredicate", + p.partitionColumnPredicate, + "IcebergTableLayoutHandle", + "TupleDomain>", + "partitionColumnPredicate"); + from_json_key( + j, + "table", + p.table, + "IcebergTableLayoutHandle", + "IcebergTableHandle", + "table"); +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { +// Loosely copied this here from NLOHMANN_JSON_SERIALIZE_ENUM() + +// NOLINTNEXTLINE: cppcoreguidelines-avoid-c-arrays +static const std::pair + PartitionTransformType_enum_table[] = + { // NOLINT: cert-err58-cpp + {PartitionTransformType::IDENTITY, "IDENTITY"}, + {PartitionTransformType::HOUR, "HOUR"}, + {PartitionTransformType::DAY, "DAY"}, + {PartitionTransformType::MONTH, "MONTH"}, + {PartitionTransformType::YEAR, "YEAR"}, + {PartitionTransformType::BUCKET, "BUCKET"}, + {PartitionTransformType::TRUNCATE, "TRUNCATE"}}; +void to_json(json& j, const PartitionTransformType& e) { + static_assert( + std::is_enum::value, + "PartitionTransformType must be an enum!"); + const auto* it = std::find_if( + std::begin(PartitionTransformType_enum_table), + std::end(PartitionTransformType_enum_table), + [e](const std::pair& ej_pair) -> bool { + return ej_pair.first == e; + }); + j = ((it != std::end(PartitionTransformType_enum_table)) + ? it + : std::begin(PartitionTransformType_enum_table)) + ->second; +} +void from_json(const json& j, PartitionTransformType& e) { + static_assert( + std::is_enum::value, + "PartitionTransformType must be an enum!"); + const auto* it = std::find_if( + std::begin(PartitionTransformType_enum_table), + std::end(PartitionTransformType_enum_table), + [&j](const std::pair& ej_pair) -> bool { + return ej_pair.second == j; + }); + e = ((it != std::end(PartitionTransformType_enum_table)) + ? it + : std::begin(PartitionTransformType_enum_table)) + ->first; +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { + +void to_json(json& j, const IcebergPartitionField& p) { + j = json::object(); + to_json_key( + j, "sourceId", p.sourceId, "IcebergPartitionField", "int", "sourceId"); + to_json_key( + j, "fieldId", p.fieldId, "IcebergPartitionField", "int", "fieldId"); + to_json_key( + j, "parameter", p.parameter, "IcebergPartitionField", "int", "parameter"); + to_json_key( + j, + "transform", + p.transform, + "IcebergPartitionField", + "PartitionTransformType", + "transform"); + to_json_key(j, "name", p.name, "IcebergPartitionField", "String", "name"); +} + +void from_json(const json& j, IcebergPartitionField& p) { + from_json_key( + j, "sourceId", p.sourceId, "IcebergPartitionField", "int", "sourceId"); + from_json_key( + j, "fieldId", p.fieldId, "IcebergPartitionField", "int", "fieldId"); + from_json_key( + j, "parameter", p.parameter, "IcebergPartitionField", "int", "parameter"); + from_json_key( + j, + "transform", + p.transform, + "IcebergPartitionField", + "PartitionTransformType", + "transform"); + from_json_key(j, "name", p.name, "IcebergPartitionField", "String", "name"); +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { + +void to_json(json& j, const PrestoIcebergNestedField& p) { + j = json::object(); + to_json_key( + j, + "optional", + p.optional, + "PrestoIcebergNestedField", + "bool", + "optional"); + to_json_key(j, "id", p.id, "PrestoIcebergNestedField", "int", "id"); + to_json_key(j, "name", p.name, "PrestoIcebergNestedField", "String", "name"); + to_json_key( + j, + "prestoType", + p.prestoType, + "PrestoIcebergNestedField", + "Type", + "prestoType"); + to_json_key(j, "doc", p.doc, "PrestoIcebergNestedField", "String", "doc"); +} + +void from_json(const json& j, PrestoIcebergNestedField& p) { + from_json_key( + j, + "optional", + p.optional, + "PrestoIcebergNestedField", + "bool", + "optional"); + from_json_key(j, "id", p.id, "PrestoIcebergNestedField", "int", "id"); + from_json_key( + j, "name", p.name, "PrestoIcebergNestedField", "String", "name"); + from_json_key( + j, + "prestoType", + p.prestoType, + "PrestoIcebergNestedField", + "Type", + "prestoType"); + from_json_key(j, "doc", p.doc, "PrestoIcebergNestedField", "String", "doc"); +} +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { + +void to_json(json& j, const PrestoIcebergSchema& p) { + j = json::object(); + to_json_key( + j, "schemaId", p.schemaId, "PrestoIcebergSchema", "int", "schemaId"); + to_json_key( + j, + "columns", + p.columns, + "PrestoIcebergSchema", + "List", + "columns"); + to_json_key( + j, + "columnNameToIdMapping", + p.columnNameToIdMapping, + "PrestoIcebergSchema", + "Map", + "columnNameToIdMapping"); + to_json_key( + j, + "aliases", + p.aliases, + "PrestoIcebergSchema", + "Map", + "aliases"); + to_json_key( + j, + "identifierFieldIds", + p.identifierFieldIds, + "PrestoIcebergSchema", + "List", + "identifierFieldIds"); +} + +void from_json(const json& j, PrestoIcebergSchema& p) { + from_json_key( + j, "schemaId", p.schemaId, "PrestoIcebergSchema", "int", "schemaId"); from_json_key( j, "columns", @@ -729,869 +1098,669 @@ void from_json(const json& j, PrestoIcebergPartitionSpec& p) { "schema", p.schema, "PrestoIcebergPartitionSpec", - "PrestoIcebergSchema", - "schema"); - from_json_key( - j, - "fields", - p.fields, - "PrestoIcebergPartitionSpec", - "List", - "fields"); -} -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { - -void to_json(json& j, const SortField& p) { - j = json::object(); - to_json_key( - j, - "sourceColumnId", - p.sourceColumnId, - "SortField", - "int", - "sourceColumnId"); - to_json_key( - j, "sortOrder", p.sortOrder, "SortField", "SortOrder", "sortOrder"); -} - -void from_json(const json& j, SortField& p) { - from_json_key( - j, - "sourceColumnId", - p.sourceColumnId, - "SortField", - "int", - "sourceColumnId"); + "PrestoIcebergSchema", + "schema"); from_json_key( - j, "sortOrder", p.sortOrder, "SortField", "SortOrder", "sortOrder"); + j, + "fields", + p.fields, + "PrestoIcebergPartitionSpec", + "List", + "fields"); } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { -IcebergInsertTableHandle::IcebergInsertTableHandle() noexcept { +IcebergDistributedProcedureHandle:: + IcebergDistributedProcedureHandle() noexcept { _type = "hive-iceberg"; } -void to_json(json& j, const IcebergInsertTableHandle& p) { +void to_json(json& j, const IcebergDistributedProcedureHandle& p) { j = json::object(); j["@type"] = "hive-iceberg"; to_json_key( j, "schemaName", p.schemaName, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "String", "schemaName"); to_json_key( j, "tableName", p.tableName, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "IcebergTableName", "tableName"); to_json_key( j, "schema", p.schema, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "PrestoIcebergSchema", "schema"); to_json_key( j, "partitionSpec", p.partitionSpec, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "PrestoIcebergPartitionSpec", "partitionSpec"); to_json_key( j, "inputColumns", p.inputColumns, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "List", "inputColumns"); to_json_key( j, "outputPath", p.outputPath, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "String", "outputPath"); to_json_key( j, "fileFormat", p.fileFormat, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "FileFormat", "fileFormat"); to_json_key( j, "compressionCodec", p.compressionCodec, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "HiveCompressionCodec", "compressionCodec"); to_json_key( j, "storageProperties", p.storageProperties, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "Map", "storageProperties"); to_json_key( j, - "sortOrder", - p.sortOrder, - "IcebergInsertTableHandle", - "List", - "sortOrder"); + "tableLayoutHandle", + p.tableLayoutHandle, + "IcebergDistributedProcedureHandle", + "IcebergTableLayoutHandle", + "tableLayoutHandle"); to_json_key( j, - "materializedViewName", - p.materializedViewName, - "IcebergInsertTableHandle", - "SchemaTableName", - "materializedViewName"); + "relevantData", + p.relevantData, + "IcebergDistributedProcedureHandle", + "Map", + "relevantData"); } -void from_json(const json& j, IcebergInsertTableHandle& p) { +void from_json(const json& j, IcebergDistributedProcedureHandle& p) { p._type = j["@type"]; from_json_key( j, "schemaName", p.schemaName, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "String", "schemaName"); from_json_key( j, "tableName", p.tableName, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "IcebergTableName", "tableName"); from_json_key( j, "schema", p.schema, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "PrestoIcebergSchema", "schema"); from_json_key( j, "partitionSpec", p.partitionSpec, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "PrestoIcebergPartitionSpec", "partitionSpec"); from_json_key( j, "inputColumns", p.inputColumns, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "List", "inputColumns"); from_json_key( j, "outputPath", p.outputPath, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "String", "outputPath"); from_json_key( j, "fileFormat", p.fileFormat, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "FileFormat", "fileFormat"); from_json_key( j, "compressionCodec", p.compressionCodec, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "HiveCompressionCodec", "compressionCodec"); from_json_key( j, "storageProperties", p.storageProperties, - "IcebergInsertTableHandle", + "IcebergDistributedProcedureHandle", "Map", "storageProperties"); from_json_key( j, - "sortOrder", - p.sortOrder, - "IcebergInsertTableHandle", - "List", - "sortOrder"); + "tableLayoutHandle", + p.tableLayoutHandle, + "IcebergDistributedProcedureHandle", + "IcebergTableLayoutHandle", + "tableLayoutHandle"); from_json_key( j, - "materializedViewName", - p.materializedViewName, - "IcebergInsertTableHandle", - "SchemaTableName", - "materializedViewName"); + "relevantData", + p.relevantData, + "IcebergDistributedProcedureHandle", + "Map", + "relevantData"); } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { -IcebergOutputTableHandle::IcebergOutputTableHandle() noexcept { +IcebergInsertTableHandle::IcebergInsertTableHandle() noexcept { _type = "hive-iceberg"; } -void to_json(json& j, const IcebergOutputTableHandle& p) { +void to_json(json& j, const IcebergInsertTableHandle& p) { j = json::object(); j["@type"] = "hive-iceberg"; to_json_key( j, "schemaName", p.schemaName, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "String", "schemaName"); to_json_key( j, "tableName", p.tableName, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "IcebergTableName", "tableName"); to_json_key( j, "schema", p.schema, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "PrestoIcebergSchema", "schema"); to_json_key( j, "partitionSpec", p.partitionSpec, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "PrestoIcebergPartitionSpec", "partitionSpec"); to_json_key( j, "inputColumns", p.inputColumns, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "List", "inputColumns"); to_json_key( j, "outputPath", p.outputPath, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "String", "outputPath"); to_json_key( j, "fileFormat", p.fileFormat, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "FileFormat", "fileFormat"); to_json_key( j, "compressionCodec", p.compressionCodec, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "HiveCompressionCodec", "compressionCodec"); to_json_key( j, "storageProperties", p.storageProperties, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "Map", "storageProperties"); to_json_key( j, "sortOrder", p.sortOrder, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "List", "sortOrder"); + to_json_key( + j, + "materializedViewName", + p.materializedViewName, + "IcebergInsertTableHandle", + "SchemaTableName", + "materializedViewName"); } -void from_json(const json& j, IcebergOutputTableHandle& p) { +void from_json(const json& j, IcebergInsertTableHandle& p) { p._type = j["@type"]; from_json_key( j, "schemaName", p.schemaName, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "String", "schemaName"); from_json_key( j, "tableName", p.tableName, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "IcebergTableName", "tableName"); from_json_key( j, "schema", p.schema, - "IcebergOutputTableHandle", + "IcebergInsertTableHandle", "PrestoIcebergSchema", - "schema"); - from_json_key( - j, - "partitionSpec", - p.partitionSpec, - "IcebergOutputTableHandle", - "PrestoIcebergPartitionSpec", - "partitionSpec"); - from_json_key( - j, - "inputColumns", - p.inputColumns, - "IcebergOutputTableHandle", - "List", - "inputColumns"); - from_json_key( - j, - "outputPath", - p.outputPath, - "IcebergOutputTableHandle", - "String", - "outputPath"); - from_json_key( - j, - "fileFormat", - p.fileFormat, - "IcebergOutputTableHandle", - "FileFormat", - "fileFormat"); - from_json_key( - j, - "compressionCodec", - p.compressionCodec, - "IcebergOutputTableHandle", - "HiveCompressionCodec", - "compressionCodec"); - from_json_key( - j, - "storageProperties", - p.storageProperties, - "IcebergOutputTableHandle", - "Map", - "storageProperties"); - from_json_key( - j, - "sortOrder", - p.sortOrder, - "IcebergOutputTableHandle", - "List", - "sortOrder"); -} -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { -IcebergSplit::IcebergSplit() noexcept { - _type = "hive-iceberg"; -} - -void to_json(json& j, const IcebergSplit& p) { - j = json::object(); - j["@type"] = "hive-iceberg"; - to_json_key(j, "path", p.path, "IcebergSplit", "String", "path"); - to_json_key(j, "start", p.start, "IcebergSplit", "int64_t", "start"); - to_json_key(j, "length", p.length, "IcebergSplit", "int64_t", "length"); - to_json_key( - j, - "fileFormat", - p.fileFormat, - "IcebergSplit", - "FileFormat", - "fileFormat"); - to_json_key( - j, - "addresses", - p.addresses, - "IcebergSplit", - "List", - "addresses"); - to_json_key( - j, - "partitionKeys", - p.partitionKeys, - "IcebergSplit", - "Map", - "partitionKeys"); - to_json_key( - j, - "partitionSpecAsJson", - p.partitionSpecAsJson, - "IcebergSplit", - "String", - "partitionSpecAsJson"); - to_json_key( - j, - "partitionDataJson", - p.partitionDataJson, - "IcebergSplit", - "String", - "partitionDataJson"); - to_json_key( - j, - "nodeSelectionStrategy", - p.nodeSelectionStrategy, - "IcebergSplit", - "NodeSelectionStrategy", - "nodeSelectionStrategy"); - to_json_key( - j, - "splitWeight", - p.splitWeight, - "IcebergSplit", - "SplitWeight", - "splitWeight"); - to_json_key( - j, "deletes", p.deletes, "IcebergSplit", "List", "deletes"); - to_json_key( - j, - "changelogSplitInfo", - p.changelogSplitInfo, - "IcebergSplit", - "ChangelogSplitInfo", - "changelogSplitInfo"); - to_json_key( - j, - "dataSequenceNumber", - p.dataSequenceNumber, - "IcebergSplit", - "int64_t", - "dataSequenceNumber"); - to_json_key( - j, - "affinitySchedulingSectionSize", - p.affinitySchedulingSectionSize, - "IcebergSplit", - "int64_t", - "affinitySchedulingSectionSize"); -} - -void from_json(const json& j, IcebergSplit& p) { - p._type = j["@type"]; - from_json_key(j, "path", p.path, "IcebergSplit", "String", "path"); - from_json_key(j, "start", p.start, "IcebergSplit", "int64_t", "start"); - from_json_key(j, "length", p.length, "IcebergSplit", "int64_t", "length"); - from_json_key( - j, - "fileFormat", - p.fileFormat, - "IcebergSplit", - "FileFormat", - "fileFormat"); - from_json_key( - j, - "addresses", - p.addresses, - "IcebergSplit", - "List", - "addresses"); - from_json_key( - j, - "partitionKeys", - p.partitionKeys, - "IcebergSplit", - "Map", - "partitionKeys"); + "schema"); from_json_key( j, - "partitionSpecAsJson", - p.partitionSpecAsJson, - "IcebergSplit", - "String", - "partitionSpecAsJson"); + "partitionSpec", + p.partitionSpec, + "IcebergInsertTableHandle", + "PrestoIcebergPartitionSpec", + "partitionSpec"); from_json_key( j, - "partitionDataJson", - p.partitionDataJson, - "IcebergSplit", - "String", - "partitionDataJson"); + "inputColumns", + p.inputColumns, + "IcebergInsertTableHandle", + "List", + "inputColumns"); from_json_key( j, - "nodeSelectionStrategy", - p.nodeSelectionStrategy, - "IcebergSplit", - "NodeSelectionStrategy", - "nodeSelectionStrategy"); + "outputPath", + p.outputPath, + "IcebergInsertTableHandle", + "String", + "outputPath"); from_json_key( j, - "splitWeight", - p.splitWeight, - "IcebergSplit", - "SplitWeight", - "splitWeight"); + "fileFormat", + p.fileFormat, + "IcebergInsertTableHandle", + "FileFormat", + "fileFormat"); from_json_key( - j, "deletes", p.deletes, "IcebergSplit", "List", "deletes"); + j, + "compressionCodec", + p.compressionCodec, + "IcebergInsertTableHandle", + "HiveCompressionCodec", + "compressionCodec"); from_json_key( j, - "changelogSplitInfo", - p.changelogSplitInfo, - "IcebergSplit", - "ChangelogSplitInfo", - "changelogSplitInfo"); + "storageProperties", + p.storageProperties, + "IcebergInsertTableHandle", + "Map", + "storageProperties"); from_json_key( j, - "dataSequenceNumber", - p.dataSequenceNumber, - "IcebergSplit", - "int64_t", - "dataSequenceNumber"); + "sortOrder", + p.sortOrder, + "IcebergInsertTableHandle", + "List", + "sortOrder"); from_json_key( j, - "affinitySchedulingSectionSize", - p.affinitySchedulingSectionSize, - "IcebergSplit", - "int64_t", - "affinitySchedulingSectionSize"); + "materializedViewName", + p.materializedViewName, + "IcebergInsertTableHandle", + "SchemaTableName", + "materializedViewName"); } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { -IcebergTableHandle::IcebergTableHandle() noexcept { +IcebergOutputTableHandle::IcebergOutputTableHandle() noexcept { _type = "hive-iceberg"; } -void to_json(json& j, const IcebergTableHandle& p) { +void to_json(json& j, const IcebergOutputTableHandle& p) { j = json::object(); j["@type"] = "hive-iceberg"; to_json_key( j, "schemaName", p.schemaName, - "IcebergTableHandle", + "IcebergOutputTableHandle", "String", "schemaName"); to_json_key( j, - "icebergTableName", - p.icebergTableName, - "IcebergTableHandle", + "tableName", + p.tableName, + "IcebergOutputTableHandle", "IcebergTableName", - "icebergTableName"); + "tableName"); to_json_key( j, - "snapshotSpecified", - p.snapshotSpecified, - "IcebergTableHandle", - "bool", - "snapshotSpecified"); + "schema", + p.schema, + "IcebergOutputTableHandle", + "PrestoIcebergSchema", + "schema"); + to_json_key( + j, + "partitionSpec", + p.partitionSpec, + "IcebergOutputTableHandle", + "PrestoIcebergPartitionSpec", + "partitionSpec"); + to_json_key( + j, + "inputColumns", + p.inputColumns, + "IcebergOutputTableHandle", + "List", + "inputColumns"); to_json_key( j, "outputPath", p.outputPath, - "IcebergTableHandle", + "IcebergOutputTableHandle", "String", "outputPath"); to_json_key( j, - "storageProperties", - p.storageProperties, - "IcebergTableHandle", - "Map", - "storageProperties"); - to_json_key( - j, - "tableSchemaJson", - p.tableSchemaJson, - "IcebergTableHandle", - "String", - "tableSchemaJson"); + "fileFormat", + p.fileFormat, + "IcebergOutputTableHandle", + "FileFormat", + "fileFormat"); to_json_key( j, - "partitionFieldIds", - p.partitionFieldIds, - "IcebergTableHandle", - "List", - "partitionFieldIds"); + "compressionCodec", + p.compressionCodec, + "IcebergOutputTableHandle", + "HiveCompressionCodec", + "compressionCodec"); to_json_key( j, - "equalityFieldIds", - p.equalityFieldIds, - "IcebergTableHandle", - "List", - "equalityFieldIds"); + "storageProperties", + p.storageProperties, + "IcebergOutputTableHandle", + "Map", + "storageProperties"); to_json_key( j, "sortOrder", p.sortOrder, - "IcebergTableHandle", + "IcebergOutputTableHandle", "List", "sortOrder"); - to_json_key( - j, - "updatedColumns", - p.updatedColumns, - "IcebergTableHandle", - "List", - "updatedColumns"); - to_json_key( - j, - "materializedViewName", - p.materializedViewName, - "IcebergTableHandle", - "SchemaTableName", - "materializedViewName"); } -void from_json(const json& j, IcebergTableHandle& p) { +void from_json(const json& j, IcebergOutputTableHandle& p) { p._type = j["@type"]; from_json_key( j, "schemaName", p.schemaName, - "IcebergTableHandle", + "IcebergOutputTableHandle", "String", "schemaName"); from_json_key( j, - "icebergTableName", - p.icebergTableName, - "IcebergTableHandle", + "tableName", + p.tableName, + "IcebergOutputTableHandle", "IcebergTableName", - "icebergTableName"); + "tableName"); from_json_key( j, - "snapshotSpecified", - p.snapshotSpecified, - "IcebergTableHandle", - "bool", - "snapshotSpecified"); + "schema", + p.schema, + "IcebergOutputTableHandle", + "PrestoIcebergSchema", + "schema"); from_json_key( j, - "outputPath", - p.outputPath, - "IcebergTableHandle", - "String", - "outputPath"); + "partitionSpec", + p.partitionSpec, + "IcebergOutputTableHandle", + "PrestoIcebergPartitionSpec", + "partitionSpec"); from_json_key( j, - "storageProperties", - p.storageProperties, - "IcebergTableHandle", - "Map", - "storageProperties"); + "inputColumns", + p.inputColumns, + "IcebergOutputTableHandle", + "List", + "inputColumns"); from_json_key( j, - "tableSchemaJson", - p.tableSchemaJson, - "IcebergTableHandle", + "outputPath", + p.outputPath, + "IcebergOutputTableHandle", "String", - "tableSchemaJson"); - from_json_key( - j, - "partitionFieldIds", - p.partitionFieldIds, - "IcebergTableHandle", - "List", - "partitionFieldIds"); + "outputPath"); from_json_key( j, - "equalityFieldIds", - p.equalityFieldIds, - "IcebergTableHandle", - "List", - "equalityFieldIds"); + "fileFormat", + p.fileFormat, + "IcebergOutputTableHandle", + "FileFormat", + "fileFormat"); from_json_key( j, - "sortOrder", - p.sortOrder, - "IcebergTableHandle", - "List", - "sortOrder"); + "compressionCodec", + p.compressionCodec, + "IcebergOutputTableHandle", + "HiveCompressionCodec", + "compressionCodec"); from_json_key( j, - "updatedColumns", - p.updatedColumns, - "IcebergTableHandle", - "List", - "updatedColumns"); + "storageProperties", + p.storageProperties, + "IcebergOutputTableHandle", + "Map", + "storageProperties"); from_json_key( j, - "materializedViewName", - p.materializedViewName, - "IcebergTableHandle", - "SchemaTableName", - "materializedViewName"); -} -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { -void to_json(json& j, const std::shared_ptr& p) { - if (p == nullptr) { - return; - } - String type = p->_type; - - if (type == "hive-iceberg") { - j = *std::static_pointer_cast(p); - return; - } - - throw TypeError(type + " no abstract type ColumnHandle "); -} - -void from_json(const json& j, std::shared_ptr& p) { - String type; - try { - type = p->getSubclassKey(j); - } catch (json::parse_error& e) { - throw ParseError(std::string(e.what()) + " ColumnHandle ColumnHandle"); - } - - if (type == "hive-iceberg") { - std::shared_ptr k = - std::make_shared(); - j.get_to(*k); - p = std::static_pointer_cast(k); - return; - } - - throw TypeError(type + " no abstract type ColumnHandle "); + "sortOrder", + p.sortOrder, + "IcebergOutputTableHandle", + "List", + "sortOrder"); } } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { -IcebergTableLayoutHandle::IcebergTableLayoutHandle() noexcept { +IcebergSplit::IcebergSplit() noexcept { _type = "hive-iceberg"; } -void to_json(json& j, const IcebergTableLayoutHandle& p) { +void to_json(json& j, const IcebergSplit& p) { j = json::object(); j["@type"] = "hive-iceberg"; + to_json_key(j, "path", p.path, "IcebergSplit", "String", "path"); + to_json_key(j, "start", p.start, "IcebergSplit", "int64_t", "start"); + to_json_key(j, "length", p.length, "IcebergSplit", "int64_t", "length"); to_json_key( j, - "partitionColumns", - p.partitionColumns, - "IcebergTableLayoutHandle", - "List", - "partitionColumns"); + "fileFormat", + p.fileFormat, + "IcebergSplit", + "FileFormat", + "fileFormat"); to_json_key( j, - "dataColumns", - p.dataColumns, - "IcebergTableLayoutHandle", - "List", - "dataColumns"); + "addresses", + p.addresses, + "IcebergSplit", + "List", + "addresses"); to_json_key( j, - "domainPredicate", - p.domainPredicate, - "IcebergTableLayoutHandle", - "TupleDomain", - "domainPredicate"); + "partitionKeys", + p.partitionKeys, + "IcebergSplit", + "Map", + "partitionKeys"); to_json_key( j, - "remainingPredicate", - p.remainingPredicate, - "IcebergTableLayoutHandle", - "std::shared_ptr", - "remainingPredicate"); + "partitionSpecAsJson", + p.partitionSpecAsJson, + "IcebergSplit", + "String", + "partitionSpecAsJson"); to_json_key( j, - "predicateColumns", - p.predicateColumns, - "IcebergTableLayoutHandle", - "Map", - "predicateColumns"); + "partitionDataJson", + p.partitionDataJson, + "IcebergSplit", + "String", + "partitionDataJson"); to_json_key( j, - "requestedColumns", - p.requestedColumns, - "IcebergTableLayoutHandle", - "List", - "requestedColumns"); + "nodeSelectionStrategy", + p.nodeSelectionStrategy, + "IcebergSplit", + "NodeSelectionStrategy", + "nodeSelectionStrategy"); to_json_key( j, - "pushdownFilterEnabled", - p.pushdownFilterEnabled, - "IcebergTableLayoutHandle", - "bool", - "pushdownFilterEnabled"); + "splitWeight", + p.splitWeight, + "IcebergSplit", + "SplitWeight", + "splitWeight"); + to_json_key( + j, "deletes", p.deletes, "IcebergSplit", "List", "deletes"); to_json_key( j, - "partitionColumnPredicate", - p.partitionColumnPredicate, - "IcebergTableLayoutHandle", - "TupleDomain>", - "partitionColumnPredicate"); + "changelogSplitInfo", + p.changelogSplitInfo, + "IcebergSplit", + "ChangelogSplitInfo", + "changelogSplitInfo"); to_json_key( j, - "table", - p.table, - "IcebergTableLayoutHandle", - "IcebergTableHandle", - "table"); + "dataSequenceNumber", + p.dataSequenceNumber, + "IcebergSplit", + "int64_t", + "dataSequenceNumber"); + to_json_key( + j, + "affinitySchedulingSectionSize", + p.affinitySchedulingSectionSize, + "IcebergSplit", + "int64_t", + "affinitySchedulingSectionSize"); } -void from_json(const json& j, IcebergTableLayoutHandle& p) { +void from_json(const json& j, IcebergSplit& p) { p._type = j["@type"]; + from_json_key(j, "path", p.path, "IcebergSplit", "String", "path"); + from_json_key(j, "start", p.start, "IcebergSplit", "int64_t", "start"); + from_json_key(j, "length", p.length, "IcebergSplit", "int64_t", "length"); from_json_key( j, - "partitionColumns", - p.partitionColumns, - "IcebergTableLayoutHandle", - "List", - "partitionColumns"); + "fileFormat", + p.fileFormat, + "IcebergSplit", + "FileFormat", + "fileFormat"); from_json_key( j, - "dataColumns", - p.dataColumns, - "IcebergTableLayoutHandle", - "List", - "dataColumns"); + "addresses", + p.addresses, + "IcebergSplit", + "List", + "addresses"); from_json_key( j, - "domainPredicate", - p.domainPredicate, - "IcebergTableLayoutHandle", - "TupleDomain", - "domainPredicate"); + "partitionKeys", + p.partitionKeys, + "IcebergSplit", + "Map", + "partitionKeys"); from_json_key( j, - "remainingPredicate", - p.remainingPredicate, - "IcebergTableLayoutHandle", - "std::shared_ptr", - "remainingPredicate"); + "partitionSpecAsJson", + p.partitionSpecAsJson, + "IcebergSplit", + "String", + "partitionSpecAsJson"); from_json_key( j, - "predicateColumns", - p.predicateColumns, - "IcebergTableLayoutHandle", - "Map", - "predicateColumns"); + "partitionDataJson", + p.partitionDataJson, + "IcebergSplit", + "String", + "partitionDataJson"); from_json_key( j, - "requestedColumns", - p.requestedColumns, - "IcebergTableLayoutHandle", - "List", - "requestedColumns"); + "nodeSelectionStrategy", + p.nodeSelectionStrategy, + "IcebergSplit", + "NodeSelectionStrategy", + "nodeSelectionStrategy"); from_json_key( j, - "pushdownFilterEnabled", - p.pushdownFilterEnabled, - "IcebergTableLayoutHandle", - "bool", - "pushdownFilterEnabled"); + "splitWeight", + p.splitWeight, + "IcebergSplit", + "SplitWeight", + "splitWeight"); + from_json_key( + j, "deletes", p.deletes, "IcebergSplit", "List", "deletes"); from_json_key( j, - "partitionColumnPredicate", - p.partitionColumnPredicate, - "IcebergTableLayoutHandle", - "TupleDomain>", - "partitionColumnPredicate"); + "changelogSplitInfo", + p.changelogSplitInfo, + "IcebergSplit", + "ChangelogSplitInfo", + "changelogSplitInfo"); from_json_key( j, - "table", - p.table, - "IcebergTableLayoutHandle", - "IcebergTableHandle", - "table"); + "dataSequenceNumber", + p.dataSequenceNumber, + "IcebergSplit", + "int64_t", + "dataSequenceNumber"); + from_json_key( + j, + "affinitySchedulingSectionSize", + p.affinitySchedulingSectionSize, + "IcebergSplit", + "int64_t", + "affinitySchedulingSectionSize"); } } // namespace facebook::presto::protocol::iceberg diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h index a659dc24d103b..388d9247a828c 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.h @@ -129,6 +129,50 @@ void to_json(json& j, const IcebergTableName& p); void from_json(const json& j, IcebergTableName& p); } // namespace facebook::presto::protocol::iceberg namespace facebook::presto::protocol::iceberg { +struct SortField { + int sourceColumnId = {}; + SortOrder sortOrder = {}; +}; +void to_json(json& j, const SortField& p); +void from_json(const json& j, SortField& p); +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { +struct IcebergTableHandle : public ConnectorTableHandle { + String schemaName = {}; + IcebergTableName icebergTableName = {}; + bool snapshotSpecified = {}; + std::shared_ptr outputPath = {}; + std::shared_ptr> storageProperties = {}; + std::shared_ptr tableSchemaJson = {}; + std::shared_ptr> partitionFieldIds = {}; + std::shared_ptr> equalityFieldIds = {}; + List sortOrder = {}; + List updatedColumns = {}; + std::shared_ptr materializedViewName = {}; + + IcebergTableHandle() noexcept; +}; +void to_json(json& j, const IcebergTableHandle& p); +void from_json(const json& j, IcebergTableHandle& p); +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { +struct IcebergTableLayoutHandle : public ConnectorTableLayoutHandle { + List partitionColumns = {}; + List dataColumns = {}; + TupleDomain domainPredicate = {}; + std::shared_ptr remainingPredicate = {}; + Map predicateColumns = {}; + std::shared_ptr> requestedColumns = {}; + bool pushdownFilterEnabled = {}; + TupleDomain> partitionColumnPredicate = {}; + IcebergTableHandle table = {}; + + IcebergTableLayoutHandle() noexcept; +}; +void to_json(json& j, const IcebergTableLayoutHandle& p); +void from_json(const json& j, IcebergTableLayoutHandle& p); +} // namespace facebook::presto::protocol::iceberg +namespace facebook::presto::protocol::iceberg { enum class PartitionTransformType { IDENTITY, HOUR, @@ -183,13 +227,28 @@ struct PrestoIcebergPartitionSpec { void to_json(json& j, const PrestoIcebergPartitionSpec& p); void from_json(const json& j, PrestoIcebergPartitionSpec& p); } // namespace facebook::presto::protocol::iceberg +// IcebergDistributedProcedureHandle is special since it needs an usage of +// hive::. + namespace facebook::presto::protocol::iceberg { -struct SortField { - int sourceColumnId = {}; - SortOrder sortOrder = {}; +struct IcebergDistributedProcedureHandle + : public ConnectorDistributedProcedureHandle { + String schemaName = {}; + IcebergTableName tableName = {}; + PrestoIcebergSchema schema = {}; + PrestoIcebergPartitionSpec partitionSpec = {}; + List inputColumns = {}; + String outputPath = {}; + FileFormat fileFormat = {}; + hive::HiveCompressionCodec compressionCodec = {}; + Map storageProperties = {}; + IcebergTableLayoutHandle tableLayoutHandle = {}; + Map relevantData = {}; + + IcebergDistributedProcedureHandle() noexcept; }; -void to_json(json& j, const SortField& p); -void from_json(const json& j, SortField& p); +void to_json(json& j, const IcebergDistributedProcedureHandle& p); +void from_json(const json& j, IcebergDistributedProcedureHandle& p); } // namespace facebook::presto::protocol::iceberg // IcebergInsertTableHandle is special since it needs an usage of // hive::. @@ -259,39 +318,3 @@ struct IcebergSplit : public ConnectorSplit { void to_json(json& j, const IcebergSplit& p); void from_json(const json& j, IcebergSplit& p); } // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { -struct IcebergTableHandle : public ConnectorTableHandle { - String schemaName = {}; - IcebergTableName icebergTableName = {}; - bool snapshotSpecified = {}; - std::shared_ptr outputPath = {}; - std::shared_ptr> storageProperties = {}; - std::shared_ptr tableSchemaJson = {}; - std::shared_ptr> partitionFieldIds = {}; - std::shared_ptr> equalityFieldIds = {}; - List sortOrder = {}; - List updatedColumns = {}; - std::shared_ptr materializedViewName = {}; - - IcebergTableHandle() noexcept; -}; -void to_json(json& j, const IcebergTableHandle& p); -void from_json(const json& j, IcebergTableHandle& p); -} // namespace facebook::presto::protocol::iceberg -namespace facebook::presto::protocol::iceberg { -struct IcebergTableLayoutHandle : public ConnectorTableLayoutHandle { - List partitionColumns = {}; - List dataColumns = {}; - TupleDomain domainPredicate = {}; - std::shared_ptr remainingPredicate = {}; - Map predicateColumns = {}; - std::shared_ptr> requestedColumns = {}; - bool pushdownFilterEnabled = {}; - TupleDomain> partitionColumnPredicate = {}; - IcebergTableHandle table = {}; - - IcebergTableLayoutHandle() noexcept; -}; -void to_json(json& j, const IcebergTableLayoutHandle& p); -void from_json(const json& j, IcebergTableLayoutHandle& p); -} // namespace facebook::presto::protocol::iceberg diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.yml b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.yml index 1a8be3d90b3b6..9ceec008fc7c7 100644 --- a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.yml +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/presto_protocol_iceberg.yml @@ -37,6 +37,11 @@ AbstractClasses: subclasses: - { name: IcebergInsertTableHandle, key: hive-iceberg } + ConnectorDistributedProcedureHandle: + super: JsonEncodedSubclass + subclasses: + - { name: IcebergDistributedProcedureHandle, key: hive-iceberg } + ConnectorTableLayoutHandle: super: JsonEncodedSubclass subclasses: @@ -62,6 +67,7 @@ JavaClasses: - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergTableLayoutHandle.java - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergOutputTableHandle.java - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergInsertTableHandle.java + - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergDistributedProcedureHandle.java - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergColumnHandle.java - presto-iceberg/src/main/java/com/facebook/presto/iceberg/ColumnIdentity.java - presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergPartitionField.java diff --git a/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/special/IcebergDistributedProcedureHandle.hpp.inc b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/special/IcebergDistributedProcedureHandle.hpp.inc new file mode 100644 index 0000000000000..3377b25a93492 --- /dev/null +++ b/presto-native-execution/presto_cpp/presto_protocol/connector/iceberg/special/IcebergDistributedProcedureHandle.hpp.inc @@ -0,0 +1,37 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// IcebergDistributedProcedureHandle is special since it needs an usage of +// hive::. + +namespace facebook::presto::protocol::iceberg { +struct IcebergDistributedProcedureHandle + : public ConnectorDistributedProcedureHandle { + String schemaName = {}; + IcebergTableName tableName = {}; + PrestoIcebergSchema schema = {}; + PrestoIcebergPartitionSpec partitionSpec = {}; + List inputColumns = {}; + String outputPath = {}; + FileFormat fileFormat = {}; + hive::HiveCompressionCodec compressionCodec = {}; + Map storageProperties = {}; + IcebergTableLayoutHandle tableLayoutHandle = {}; + Map relevantData = {}; + + IcebergDistributedProcedureHandle() noexcept; +}; +void to_json(json& j, const IcebergDistributedProcedureHandle& p); +void from_json(const json& j, IcebergDistributedProcedureHandle& p); +} // namespace facebook::presto::protocol::iceberg diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/plan/TableWriterNode.java b/presto-spi/src/main/java/com/facebook/presto/spi/plan/TableWriterNode.java index e1803582f7677..3fcf43a488c8e 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/plan/TableWriterNode.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/plan/TableWriterNode.java @@ -639,17 +639,20 @@ public static class CallDistributedProcedureTarget private final Object[] procedureArguments; private final Optional sourceHandle; private final SchemaTableName schemaTableName; + private final boolean sourceTableEliminated; public CallDistributedProcedureTarget( QualifiedObjectName procedureName, Object[] procedureArguments, Optional sourceHandle, - SchemaTableName schemaTableName) + SchemaTableName schemaTableName, + boolean sourceTableEliminated) { this.procedureName = requireNonNull(procedureName, "procedureName is null"); this.procedureArguments = requireNonNull(procedureArguments, "procedureArguments is null"); this.sourceHandle = requireNonNull(sourceHandle, "sourceHandle is null"); this.schemaTableName = requireNonNull(schemaTableName, "schemaTableName is null"); + this.sourceTableEliminated = sourceTableEliminated; } public QualifiedObjectName getProcedureName() @@ -672,6 +675,11 @@ public SchemaTableName getSchemaTableName() return schemaTableName; } + public boolean isSourceTableEliminated() + { + return sourceTableEliminated; + } + @Override public Optional> getOutputColumns() { diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/procedure/DistributedProcedure.java b/presto-spi/src/main/java/com/facebook/presto/spi/procedure/DistributedProcedure.java index 950eb92870c1f..c400bb6311b9f 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/procedure/DistributedProcedure.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/procedure/DistributedProcedure.java @@ -46,9 +46,9 @@ public DistributedProcedureType getType() public abstract ConnectorDistributedProcedureHandle begin(ConnectorSession session, ConnectorProcedureContext procedureContext, ConnectorTableLayoutHandle tableLayoutHandle, Object[] arguments); - public abstract void finish(ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments); + public abstract void finish(ConnectorSession session, ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments); - public ConnectorProcedureContext createContext() + public ConnectorProcedureContext createContext(Object... arguments) { throw new PrestoException(StandardErrorCode.NOT_SUPPORTED, "createContext not supported"); } diff --git a/presto-spi/src/main/java/com/facebook/presto/spi/procedure/TableDataRewriteDistributedProcedure.java b/presto-spi/src/main/java/com/facebook/presto/spi/procedure/TableDataRewriteDistributedProcedure.java index 129b023b47775..1831db06eece0 100644 --- a/presto-spi/src/main/java/com/facebook/presto/spi/procedure/TableDataRewriteDistributedProcedure.java +++ b/presto-spi/src/main/java/com/facebook/presto/spi/procedure/TableDataRewriteDistributedProcedure.java @@ -22,7 +22,7 @@ import java.util.Collection; import java.util.List; import java.util.OptionalInt; -import java.util.function.Supplier; +import java.util.function.Function; import static com.facebook.presto.common.type.StandardTypes.VARCHAR; import static com.facebook.presto.spi.procedure.DistributedProcedure.DistributedProcedureType.TABLE_DATA_REWRITE; @@ -38,7 +38,7 @@ public class TableDataRewriteDistributedProcedure private final BeginCallDistributedProcedure beginCallDistributedProcedure; private final FinishCallDistributedProcedure finishCallDistributedProcedure; - private Supplier contextSupplier; + private final Function contextProvider; private int schemaIndex = -1; private int tableNameIndex = -1; private OptionalInt filterIndex = OptionalInt.empty(); @@ -47,12 +47,12 @@ public TableDataRewriteDistributedProcedure(String schema, String name, List arguments, BeginCallDistributedProcedure beginCallDistributedProcedure, FinishCallDistributedProcedure finishCallDistributedProcedure, - Supplier contextSupplier) + Function contextProvider) { super(TABLE_DATA_REWRITE, schema, name, arguments); this.beginCallDistributedProcedure = requireNonNull(beginCallDistributedProcedure, "beginCallDistributedProcedure is null"); this.finishCallDistributedProcedure = requireNonNull(finishCallDistributedProcedure, "finishCallDistributedProcedure is null"); - this.contextSupplier = requireNonNull(contextSupplier, "contextSupplier is null"); + this.contextProvider = requireNonNull(contextProvider, "contextProvider is null"); for (int i = 0; i < getArguments().size(); i++) { if (getArguments().get(i).getName().equals(SCHEMA)) { checkArgument(getArguments().get(i).getType().getBase().equals(VARCHAR), @@ -79,14 +79,14 @@ public ConnectorDistributedProcedureHandle begin(ConnectorSession session, Conne } @Override - public void finish(ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments) + public void finish(ConnectorSession session, ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments) { - this.finishCallDistributedProcedure.finish(procedureContext, procedureHandle, fragments); + this.finishCallDistributedProcedure.finish(session, procedureContext, procedureHandle, fragments); } - public ConnectorProcedureContext createContext() + public ConnectorProcedureContext createContext(Object... arguments) { - return contextSupplier.get(); + return contextProvider.apply(arguments); } public String getSchema(Object[] parameters) @@ -118,6 +118,6 @@ public interface BeginCallDistributedProcedure @FunctionalInterface public interface FinishCallDistributedProcedure { - void finish(ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments); + void finish(ConnectorSession session, ConnectorProcedureContext procedureContext, ConnectorDistributedProcedureHandle procedureHandle, Collection fragments); } } diff --git a/presto-tests/src/test/java/com/facebook/presto/tests/TestProcedureCreation.java b/presto-tests/src/test/java/com/facebook/presto/tests/TestProcedureCreation.java index 4fcc8cc62ccf3..70563239a1fe1 100644 --- a/presto-tests/src/test/java/com/facebook/presto/tests/TestProcedureCreation.java +++ b/presto-tests/src/test/java/com/facebook/presto/tests/TestProcedureCreation.java @@ -127,8 +127,8 @@ public void showCreateDistributedProcedure() new DistributedProcedure.Argument("table_name", VARCHAR), new DistributedProcedure.Argument("schema", VARCHAR, false, null)), (session, transactionContext, tableLayoutHandle, arguments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)).isNotNull(); + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())).isNotNull(); } @Test @@ -142,8 +142,8 @@ public void shouldThrowExceptionForDistributedProcedureWithWrongArgument() new DistributedProcedure.Argument("table_name", VARCHAR), new DistributedProcedure.Argument("name3", VARCHAR, false, null)), (session, transactionContext, tableLayoutHandle, arguments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)) + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())) .isInstanceOf(PrestoException.class) .hasMessage("A distributed procedure need at least 2 arguments: `schema` and `table_name` for the target table"); @@ -155,8 +155,8 @@ public void shouldThrowExceptionForDistributedProcedureWithWrongArgument() new DistributedProcedure.Argument("name2", VARCHAR), new DistributedProcedure.Argument("schema", VARCHAR, false, null)), (session, transactionContext, tableLayoutHandle, arguments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)) + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())) .isInstanceOf(PrestoException.class) .hasMessage("A distributed procedure need at least 2 arguments: `schema` and `table_name` for the target table"); @@ -168,8 +168,8 @@ public void shouldThrowExceptionForDistributedProcedureWithWrongArgument() new DistributedProcedure.Argument("table_name", VARCHAR), new DistributedProcedure.Argument("schema", INTEGER, false, 123)), (session, transactionContext, tableLayoutHandle, arguments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)) + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())) .isInstanceOf(PrestoException.class) .hasMessage("Argument `schema` must be string type"); @@ -181,8 +181,8 @@ public void shouldThrowExceptionForDistributedProcedureWithWrongArgument() new DistributedProcedure.Argument("table_name", TIMESTAMP), new DistributedProcedure.Argument("schema", VARCHAR, false, null)), (session, transactionContext, tableLayoutHandle, arguments) -> null, - (transactionContext, procedureHandle, fragments) -> {}, - TestProcedureRegistry.TestProcedureContext::new)) + (session, transactionContext, procedureHandle, fragments) -> {}, + ignored -> new TestProcedureRegistry.TestProcedureContext())) .isInstanceOf(PrestoException.class) .hasMessage("Argument `table_name` must be string type"); }