From 47e1fd5ccd30de9d0953215e4354dd2d7e9ffba8 Mon Sep 17 00:00:00 2001 From: Chujun Song Date: Wed, 27 Aug 2025 14:20:52 -0400 Subject: [PATCH] Support copy-on-write change for Iceberg Connector --- .../main/java/io/trino/metadata/Metadata.java | 24 +- .../io/trino/metadata/MetadataManager.java | 32 +- .../trino/sql/analyzer/StatementAnalyzer.java | 28 +- .../tracing/TracingConnectorMetadata.java | 10 + .../io/trino/tracing/TracingMetadata.java | 44 +- .../trino/metadata/AbstractMockMetadata.java | 30 +- .../spi/connector/ConnectorMetadata.java | 26 +- .../io/trino/spi/connector/UpdateKind.java | 21 + .../ClassLoaderSafeConnectorMetadata.java | 9 + .../trino/plugin/iceberg/CommitTaskData.java | 3 +- .../plugin/iceberg/IcebergColumnHandle.java | 21 + .../trino/plugin/iceberg/IcebergConfig.java | 42 ++ .../plugin/iceberg/IcebergMergeSink.java | 290 +++++++- .../trino/plugin/iceberg/IcebergMetadata.java | 225 +++++- .../trino/plugin/iceberg/IcebergPageSink.java | 3 +- .../iceberg/IcebergPageSinkProvider.java | 14 +- .../iceberg/IcebergPageSourceProvider.java | 325 ++++++++- .../plugin/iceberg/IcebergTableHandle.java | 31 +- .../iceberg/IcebergTableProperties.java | 39 + .../io/trino/plugin/iceberg/IcebergUtil.java | 62 +- .../trino/plugin/iceberg/WriteChangeMode.java | 46 ++ .../iceberg/delete/PositionDeleteWriter.java | 3 +- .../TableChangesFunctionProcessor.java | 3 +- .../BaseIcebergConnectorSmokeTest.java | 5 +- .../iceberg/BaseIcebergConnectorTest.java | 102 ++- .../BaseIcebergMaterializedViewTest.java | 10 +- .../plugin/iceberg/IcebergTestUtils.java | 9 + .../TestFileBasedConflictDetection.java | 13 +- .../iceberg/TestIcebergAvroConnectorTest.java | 3 +- .../plugin/iceberg/TestIcebergConfig.java | 15 +- .../TestIcebergCopyOnWriteConnectorTest.java | 674 ++++++++++++++++++ .../TestIcebergMinioOrcConnectorTest.java | 3 +- ...stIcebergNodeLocalDynamicSplitPruning.java | 12 +- .../TestIcebergParquetConnectorTest.java | 3 +- .../iceberg/TestIcebergSplitSource.java | 3 +- ...alogNestedNamespaceConnectorSmokeTest.java | 7 +- ...TestConnectorPushdownRulesWithIceberg.java | 12 +- .../plugin/lakehouse/LakehouseMetadata.java | 11 +- .../BaseLakehouseConnectorSmokeTest.java | 5 +- .../lakehouse/TestLakehouseConnectorTest.java | 5 +- .../TestLakehouseFileConnectorSmokeTest.java | 5 +- ...estLakehouseIcebergConnectorSmokeTest.java | 5 +- .../TestLakehouseMotoConnectorSmokeTest.java | 5 +- .../hive/TestHiveRedirectionToIceberg.java | 5 +- 44 files changed, 2059 insertions(+), 184 deletions(-) create mode 100644 core/trino-spi/src/main/java/io/trino/spi/connector/UpdateKind.java create mode 100644 plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/WriteChangeMode.java create mode 100644 plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergCopyOnWriteConnectorTest.java diff --git a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java index a8e59e1f87de..b70f9aa9457f 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/Metadata.java +++ b/core/trino-main/src/main/java/io/trino/metadata/Metadata.java @@ -54,6 +54,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -107,6 +108,21 @@ public interface Metadata */ Optional getTableHandle(Session session, QualifiedObjectName tableName); + /** + * Returns a table handle for the specified table name with updateKind. + */ + Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind); + + /** + * Returns a table handle for the specified table name with a specified version. + */ + Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion); + + /** + * Returns a table handle for the specified table name with a specified version and updateKind. + */ + Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind); + Optional getSystemTable(Session session, QualifiedObjectName tableName); Optional getTableHandleForExecute( @@ -851,14 +867,14 @@ default boolean isMaterializedView(Session session, QualifiedObjectName viewName RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName); /** - * Get the target table handle after performing redirection with a table version. + * Get the target table handle after performing redirection with updateKind. */ - RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion); + RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind); /** - * Returns a table handle for the specified table name with a specified version + * Get the target table handle after performing redirection with a table version and updateKind. */ - Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion); + RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind); /** * Returns maximum number of tasks that can be created while writing data to specific connector. diff --git a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java index fa2be1501238..873308194e90 100644 --- a/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java +++ b/core/trino-main/src/main/java/io/trino/metadata/MetadataManager.java @@ -87,6 +87,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -270,11 +271,23 @@ public List listSchemaNames(Session session, String catalogName) @Override public Optional getTableHandle(Session session, QualifiedObjectName table) { - return getTableHandle(session, table, Optional.empty(), Optional.empty()); + return getTableHandle(session, table, Optional.empty(), Optional.empty(), Optional.empty()); + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName table, Optional updateKind) + { + return getTableHandle(session, table, Optional.empty(), Optional.empty(), updateKind); } @Override public Optional getTableHandle(Session session, QualifiedObjectName table, Optional startVersion, Optional endVersion) + { + return getTableHandle(session, table, startVersion, endVersion, Optional.empty()); + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName table, Optional startVersion, Optional endVersion, Optional updateKind) { requireNonNull(table, "table is null"); if (cannotExist(table)) { @@ -294,7 +307,8 @@ public Optional getTableHandle(Session session, QualifiedObjectName connectorSession, table.asSchemaTableName(), startTableVersion, - endTableVersion); + endTableVersion, + updateKind); return Optional.ofNullable(tableHandle) .map(connectorTableHandle -> new TableHandle( catalogHandle, @@ -1925,18 +1939,24 @@ private QualifiedObjectName getRedirectedTableName(Session session, QualifiedObj @Override public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName) { - return getRedirectionAwareTableHandle(session, tableName, Optional.empty(), Optional.empty()); + return getRedirectionAwareTableHandle(session, tableName, Optional.empty(), Optional.empty(), Optional.empty()); + } + + @Override + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind) + { + return getRedirectionAwareTableHandle(session, tableName, Optional.empty(), Optional.empty(), updateKind); } @Override - public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind) { QualifiedObjectName targetTableName = getRedirectedTableName(session, tableName, startVersion, endVersion); if (targetTableName.equals(tableName)) { - return noRedirection(getTableHandle(session, tableName, startVersion, endVersion)); + return noRedirection(getTableHandle(session, tableName, startVersion, endVersion, updateKind)); } - Optional tableHandle = getTableHandle(session, targetTableName, startVersion, endVersion); + Optional tableHandle = getTableHandle(session, targetTableName, startVersion, endVersion, updateKind); if (tableHandle.isPresent()) { return withRedirectionTo(targetTableName, tableHandle.get()); } diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java index ca962c1eef32..112f56c55365 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java @@ -72,6 +72,7 @@ import io.trino.spi.connector.PointerType; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.TableProcedureMetadata; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.function.CatalogSchemaFunctionName; import io.trino.spi.function.FunctionKind; import io.trino.spi.function.OperatorType; @@ -516,13 +517,6 @@ private Scope analyzeForUpdate(Relation relation, Optional outerQueryScop .process(relation, Optional.empty()); } - private enum UpdateKind - { - DELETE, - UPDATE, - MERGE, - } - /** * Visitor context represents local query scope (if exists). The invariant is * that the local query scopes hierarchy should always have outer query scope @@ -597,7 +591,7 @@ protected Scope visitInsert(Insert insert, Optional scope) endVersion = Optional.of(toTableVersion(branch)); } // verify the insert destination columns match the query - RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, targetTable, Optional.empty(), endVersion); + RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, targetTable, Optional.empty(), endVersion, Optional.of(UpdateKind.DELETE)); Optional targetTableHandle = redirection.tableHandle(); targetTable = redirection.redirectedTableName().orElse(targetTable); if (targetTableHandle.isEmpty()) { @@ -853,7 +847,7 @@ protected Scope visitDelete(Delete node, Optional scope) } endVersion = Optional.of(toTableVersion(branch)); } - RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalName, Optional.empty(), endVersion); + RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalName, Optional.empty(), endVersion, Optional.of(UpdateKind.DELETE)); QualifiedObjectName tableName = redirection.redirectedTableName().orElse(originalName); TableHandle handle = redirection.tableHandle() .orElseThrow(() -> semanticException(TABLE_NOT_FOUND, table, "Table '%s' does not exist", tableName)); @@ -2303,7 +2297,7 @@ protected Scope visitTable(Table table, Optional scope) } // This can only be a table - RedirectionAwareTableHandle redirection = getTableHandle(table, name, scope); + RedirectionAwareTableHandle redirection = getTableHandle(table, name, scope, updateKind); Optional tableHandle = redirection.tableHandle(); QualifiedObjectName targetTableName = redirection.redirectedTableName().orElse(name); analysis.addEmptyColumnReferencesForTable(accessControl, session.getIdentity(), targetTableName); @@ -2383,7 +2377,7 @@ private boolean isMaterializedViewSufficientlyFresh(Session session, QualifiedOb private void checkStorageTableNotRedirected(QualifiedObjectName source) { - metadata.getRedirectionAwareTableHandle(session, source).redirectedTableName().ifPresent(name -> { + metadata.getRedirectionAwareTableHandle(session, source, updateKind).redirectedTableName().ifPresent(name -> { throw new TrinoException(NOT_SUPPORTED, format("Redirection of materialized view storage table '%s' to '%s' is not supported", source, name)); }); } @@ -3489,7 +3483,7 @@ protected Scope visitUpdate(Update update, Optional scope) } endVersion = Optional.of(toTableVersion(branch)); } - RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalName, Optional.empty(), endVersion); + RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalName, Optional.empty(), endVersion, Optional.of(UpdateKind.UPDATE)); QualifiedObjectName tableName = redirection.redirectedTableName().orElse(originalName); TableHandle handle = redirection.tableHandle() .orElseThrow(() -> semanticException(TABLE_NOT_FOUND, table, "Table '%s' does not exist", tableName)); @@ -3628,7 +3622,7 @@ protected Scope visitMerge(Merge merge, Optional scope) analysis.setUpdateType("MERGE"); - RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalTableName, Optional.empty(), endVersion); + RedirectionAwareTableHandle redirection = metadata.getRedirectionAwareTableHandle(session, originalTableName, Optional.empty(), endVersion, Optional.of(UpdateKind.MERGE)); QualifiedObjectName tableName = redirection.redirectedTableName().orElse(originalTableName); TableHandle targetTableHandle = redirection.tableHandle() .orElseThrow(() -> semanticException(TABLE_NOT_FOUND, table, "Table '%s' does not exist", tableName)); @@ -6022,21 +6016,21 @@ private OutputColumn createOutputColumn(Field field) * Helper function that analyzes any versioning and returns the appropriate table handle. * If no for clause exists, this is just a wrapper around getRedirectionAwareTableHandle in MetadataManager. */ - private RedirectionAwareTableHandle getTableHandle(Table table, QualifiedObjectName name, Optional scope) + private RedirectionAwareTableHandle getTableHandle(Table table, QualifiedObjectName name, Optional scope, Optional updateKind) { if (table.getQueryPeriod().isPresent()) { verify(table.getBranch().isEmpty(), "branch must be empty"); Optional startVersion = extractTableVersion(table, table.getQueryPeriod().get().getStart(), scope); Optional endVersion = extractTableVersion(table, table.getQueryPeriod().get().getEnd(), scope); - return metadata.getRedirectionAwareTableHandle(session, name, startVersion, endVersion); + return metadata.getRedirectionAwareTableHandle(session, name, startVersion, endVersion, updateKind); } if (table.getBranch().isPresent()) { verify(table.getQueryPeriod().isEmpty(), "query period must be empty"); String branch = table.getBranch().get().getValue(); Optional endVersion = Optional.of(toTableVersion(branch)); - return metadata.getRedirectionAwareTableHandle(session, name, Optional.empty(), endVersion); + return metadata.getRedirectionAwareTableHandle(session, name, Optional.empty(), endVersion, updateKind); } - return metadata.getRedirectionAwareTableHandle(session, name, Optional.empty(), Optional.empty()); + return metadata.getRedirectionAwareTableHandle(session, name, Optional.empty(), Optional.empty(), updateKind); } /** diff --git a/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java b/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java index e4a7523bf586..37560f42c98e 100644 --- a/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java +++ b/core/trino-main/src/main/java/io/trino/tracing/TracingConnectorMetadata.java @@ -68,6 +68,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -144,6 +145,15 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable } } + @Override + public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion, Optional updateKind) + { + Span span = startSpan("getTableHandle", tableName); + try (var _ = scopedSpan(span)) { + return delegate.getTableHandle(session, tableName, startVersion, endVersion, updateKind); + } + } + @Override public Optional getTableHandleForExecute(ConnectorSession session, ConnectorAccessControl accessControl, ConnectorTableHandle tableHandle, String procedureName, Map executeProperties, RetryMode retryMode) { diff --git a/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java b/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java index 9a1df851f42c..1b09102ed83d 100644 --- a/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java +++ b/core/trino-main/src/main/java/io/trino/tracing/TracingMetadata.java @@ -85,6 +85,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -185,7 +186,34 @@ public Optional getTableHandle(Session session, QualifiedObjectName { Span span = startSpan("getTableHandle", tableName); try (var _ = scopedSpan(span)) { - return delegate.getTableHandle(session, tableName); + return delegate.getTableHandle(session, tableName, Optional.empty()); + } + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + { + Span span = startSpan("getTableHandle", tableName); + try (var _ = scopedSpan(span)) { + return delegate.getTableHandle(session, tableName, startVersion, endVersion); + } + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind) + { + Span span = startSpan("getTableHandle", tableName); + try (var _ = scopedSpan(span)) { + return delegate.getTableHandle(session, tableName, updateKind); + } + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind) + { + Span span = startSpan("getTableHandle", tableName); + try (var _ = scopedSpan(span)) { + return delegate.getTableHandle(session, tableName, startVersion, endVersion, updateKind); } } @@ -1548,29 +1576,29 @@ public Optional applyTableScanRedirect(Sessi } @Override - public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind) { Span span = startSpan("getRedirectionAwareTableHandle", tableName); try (var _ = scopedSpan(span)) { - return delegate.getRedirectionAwareTableHandle(session, tableName); + return delegate.getRedirectionAwareTableHandle(session, tableName, updateKind); } } @Override - public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName) { Span span = startSpan("getRedirectionAwareTableHandle", tableName); try (var _ = scopedSpan(span)) { - return delegate.getRedirectionAwareTableHandle(session, tableName, startVersion, endVersion); + return delegate.getRedirectionAwareTableHandle(session, tableName); } } @Override - public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind) { - Span span = startSpan("getTableHandle", tableName); + Span span = startSpan("getRedirectionAwareTableHandle", tableName); try (var _ = scopedSpan(span)) { - return delegate.getTableHandle(session, tableName, startVersion, endVersion); + return delegate.getRedirectionAwareTableHandle(session, tableName, startVersion, endVersion, updateKind); } } diff --git a/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java b/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java index 7b9850624112..405382796944 100644 --- a/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java +++ b/core/trino-main/src/test/java/io/trino/metadata/AbstractMockMetadata.java @@ -58,6 +58,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -145,6 +146,24 @@ public Optional getTableHandle(Session session, QualifiedObjectName throw new UnsupportedOperationException(); } + @Override + public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind) + { + throw new UnsupportedOperationException(); + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + { + throw new UnsupportedOperationException(); + } + + @Override + public Optional getTableHandle(Session session, QualifiedObjectName table, Optional startVersion, Optional endVersion, Optional updateKind) + { + throw new UnsupportedOperationException(); + } + @Override public Optional getTableHandleForExecute(Session session, TableHandle tableHandle, String procedureName, Map executeProperties) { @@ -1048,17 +1067,18 @@ public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session sessio } @Override - public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional updateKind) { - if (startVersion.isEmpty() || endVersion.isEmpty()) { - return noRedirection(getTableHandle(session, tableName)); - } throw new UnsupportedOperationException(); } @Override - public Optional getTableHandle(Session session, QualifiedObjectName table, Optional startVersion, Optional endVersion) + public RedirectionAwareTableHandle getRedirectionAwareTableHandle(Session session, QualifiedObjectName tableName, Optional startVersion, Optional endVersion, Optional updateKind) { + if (startVersion.isEmpty() || endVersion.isEmpty()) { + return noRedirection(getTableHandle(session, tableName)); + } + throw new UnsupportedOperationException(); } diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java index 8f32252f898c..37cc27dbe9cf 100644 --- a/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/ConnectorMetadata.java @@ -115,6 +115,26 @@ default ConnectorTableHandle getTableHandle( throw new TrinoException(GENERIC_INTERNAL_ERROR, "ConnectorMetadata getTableHandle() is not implemented"); } + /** + * Returns a table handle for the specified table name, version and updateKind, or {@code null} if {@code tableName} relation does not exist + * or is not a table (e.g. is a view, or a materialized view). + * + * @throws TrinoException implementation can throw this exception when {@code tableName} refers to a table that + * cannot be queried. + * @see #getView(ConnectorSession, SchemaTableName) + * @see #getMaterializedView(ConnectorSession, SchemaTableName) + */ + @Nullable + default ConnectorTableHandle getTableHandle( + ConnectorSession session, + SchemaTableName tableName, + Optional startVersion, + Optional endVersion, + Optional updateKind) + { + return getTableHandle(session, tableName, startVersion, endVersion); + } + /** * Create initial handle for execution of table procedure. The handle will be used through planning process. It will be converted to final * handle used for execution via @{link {@link ConnectorMetadata#beginTableExecute} @@ -394,7 +414,7 @@ default Iterator streamRelationComments(ConnectorSessio return RelationCommentMetadata.forRedirectedTable(tableName); } try { - ConnectorTableHandle tableHandle = getTableHandle(session, tableName, Optional.empty(), Optional.empty()); + ConnectorTableHandle tableHandle = getTableHandle(session, tableName, Optional.empty(), Optional.empty(), Optional.empty()); if (tableHandle == null) { // disappeared during listing return null; @@ -967,7 +987,7 @@ default Map getViews(ConnectorSession * Gets the view data for the specified view name. Returns {@link Optional#empty()} if {@code viewName} * relation does not or is not a view (e.g. is a table, or a materialized view). * - * @see #getTableHandle(ConnectorSession, SchemaTableName, Optional, Optional) + * @see #getTableHandle(ConnectorSession, SchemaTableName, Optional, Optional, Optional) * @see #getMaterializedView(ConnectorSession, SchemaTableName) */ default Optional getView(ConnectorSession session, SchemaTableName viewName) @@ -1755,7 +1775,7 @@ default Map getMaterialize * Gets the materialized view data for the specified materialized view name. Returns {@link Optional#empty()} * if {@code viewName} relation does not or is not a materialized view (e.g. is a table, or a view). * - * @see #getTableHandle(ConnectorSession, SchemaTableName, Optional, Optional) + * @see #getTableHandle(ConnectorSession, SchemaTableName, Optional, Optional, Optional) * @see #getView(ConnectorSession, SchemaTableName) */ default Optional getMaterializedView(ConnectorSession session, SchemaTableName viewName) diff --git a/core/trino-spi/src/main/java/io/trino/spi/connector/UpdateKind.java b/core/trino-spi/src/main/java/io/trino/spi/connector/UpdateKind.java new file mode 100644 index 000000000000..b9317b25bc4e --- /dev/null +++ b/core/trino-spi/src/main/java/io/trino/spi/connector/UpdateKind.java @@ -0,0 +1,21 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.spi.connector; + +public enum UpdateKind +{ + DELETE, + UPDATE, + MERGE +} diff --git a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java index 339a4b8738a0..f256120c6487 100644 --- a/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java +++ b/lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/classloader/ClassLoaderSafeConnectorMetadata.java @@ -68,6 +68,7 @@ import io.trino.spi.connector.TableFunctionApplicationResult; import io.trino.spi.connector.TableScanRedirectApplicationResult; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -1297,6 +1298,14 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable } } + @Override + public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion, Optional updateKind) + { + try (ThreadContextClassLoader _ = new ThreadContextClassLoader(classLoader)) { + return delegate.getTableHandle(session, tableName, startVersion, endVersion, updateKind); + } + } + @Override public RowChangeParadigm getRowChangeParadigm(ConnectorSession session, ConnectorTableHandle tableHandle) { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/CommitTaskData.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/CommitTaskData.java index 73140ce60f0a..ddc8a00d8cfd 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/CommitTaskData.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/CommitTaskData.java @@ -29,7 +29,8 @@ public record CommitTaskData( Optional partitionDataJson, FileContent content, Optional referencedDataFile, - Optional> fileSplitOffsets) + Optional> fileSplitOffsets, + boolean copyOnWriteDelete) { public CommitTaskData { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergColumnHandle.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergColumnHandle.java index 0f70cc51ea18..ccf01bcd20fb 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergColumnHandle.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergColumnHandle.java @@ -58,6 +58,27 @@ public class IcebergColumnHandle public static final int DATA_CHANGE_TIMESTAMP_ID = Integer.MIN_VALUE + 5; public static final String DATA_CHANGE_ORDINAL_NAME = "_change_ordinal"; public static final int DATA_CHANGE_ORDINAL_ID = Integer.MIN_VALUE + 6; + public static final int DATA_FILE_DATA_SEQUENCE_NUMBER_ID = Integer.MIN_VALUE + 7; + public static final int DELETE_FILES_CONTENT_ID = Integer.MIN_VALUE + 8; + public static final int DELETE_FILES_CONTENT_ELEMENT_ID = Integer.MIN_VALUE + 9; + public static final int DELETE_FILES_PATH_ID = Integer.MIN_VALUE + 10; + public static final int DELETE_FILES_PATH_ELEMENT_ID = Integer.MIN_VALUE + 11; + + public static final int DELETE_FILES_FORMAT_ID = Integer.MIN_VALUE + 12; + public static final int DELETE_FILES_FORMAT_ELEMENT_ID = Integer.MIN_VALUE + 13; + public static final int DELETE_FILES_RECORD_COUNT_ID = Integer.MIN_VALUE + 14; + public static final int DELETE_FILES_RECORD_COUNT_ELEMENT_ID = Integer.MIN_VALUE + 15; + public static final int DELETE_FILES_FILE_SIZE_IN_BYTES_ID = Integer.MIN_VALUE + 16; + public static final int DELETE_FILES_FILE_SIZE_IN_BYTES_ELEMENT_ID = Integer.MIN_VALUE + 17; + public static final int DELETE_FILES_EQUALITY_FIELD_IDS_ID = Integer.MIN_VALUE + 18; + public static final int DELETE_FILES_EQUALITY_FIELD_ID_ID = Integer.MIN_VALUE + 19; + public static final int DELETE_FILES_EQUALITY_FIELD_ID_ELEMENT_ID = Integer.MIN_VALUE + 20; + public static final int DELETE_FILES_ROW_POSITION_LOWER_BOUND_ID = Integer.MIN_VALUE + 21; + public static final int DELETE_FILES_ROW_POSITION_LOWER_BOUND_ELEMENT_ID = Integer.MIN_VALUE + 22; + public static final int DELETE_FILES_ROW_POSITION_UPPER_BOUND_ID = Integer.MIN_VALUE + 23; + public static final int DELETE_FILES_ROW_POSITION_UPPER_BOUND_ELEMENT_ID = Integer.MIN_VALUE + 24; + public static final int DELETE_FILES_DATA_SEQUENCE_NUMBER_ID = Integer.MIN_VALUE + 25; + public static final int DELETE_FILES_DATA_SEQUENCE_NUMBER_ELEMENT_ID = Integer.MIN_VALUE + 26; private final ColumnIdentity baseColumnIdentity; private final Type baseType; diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java index 926059a7c6c5..a818d801344e 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergConfig.java @@ -98,6 +98,9 @@ public class IcebergConfig private int metadataParallelism = 8; private boolean bucketExecutionEnabled = true; private boolean fileBasedConflictDetectionEnabled = true; + private WriteChangeMode writeDeleteMode = WriteChangeMode.MOR; + private WriteChangeMode writeUpdateMode = WriteChangeMode.MOR; + private WriteChangeMode writeMergeMode = WriteChangeMode.MOR; public CatalogType getCatalogType() { @@ -622,4 +625,43 @@ public IcebergConfig setFileBasedConflictDetectionEnabled(boolean fileBasedConfl this.fileBasedConflictDetectionEnabled = fileBasedConflictDetectionEnabled; return this; } + + public WriteChangeMode getWriteDeleteMode() + { + return writeDeleteMode; + } + + @Config("iceberg.write-delete-mode") + @ConfigDescription("Set mode used for table delete command: copy-on-write or merge-on-read") + public IcebergConfig setWriteDeleteMode(WriteChangeMode writeDeleteMode) + { + this.writeDeleteMode = writeDeleteMode; + return this; + } + + public WriteChangeMode getWriteUpdateMode() + { + return writeUpdateMode; + } + + @Config("iceberg.write-update-mode") + @ConfigDescription("Set mode used for table update command: copy-on-write or merge-on-read") + public IcebergConfig setWriteUpdateMode(WriteChangeMode writeUpdateMode) + { + this.writeUpdateMode = writeUpdateMode; + return this; + } + + public WriteChangeMode getWriteMergeMode() + { + return writeMergeMode; + } + + @Config("iceberg.write-merge-mode") + @ConfigDescription("Set mode used for table merge command: copy-on-write or merge-on-read") + public IcebergConfig setWriteMergeMode(WriteChangeMode writeMergeMode) + { + this.writeMergeMode = writeMergeMode; + return this; + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java index f6095c9d4bd9..227d16a11fab 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMergeSink.java @@ -13,41 +13,66 @@ */ package io.trino.plugin.iceberg; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import io.airlift.json.JsonCodec; import io.airlift.slice.Slice; +import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystem; +import io.trino.filesystem.TrinoInputFile; +import io.trino.plugin.iceberg.delete.DeleteFile; import io.trino.plugin.iceberg.delete.PositionDeleteWriter; import io.trino.spi.Page; +import io.trino.spi.TrinoException; import io.trino.spi.block.Block; import io.trino.spi.block.RowBlock; import io.trino.spi.connector.ConnectorMergeSink; import io.trino.spi.connector.ConnectorPageSink; +import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.MergePage; +import io.trino.spi.connector.SourcePage; +import io.trino.spi.connector.UpdateKind; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.type.ArrayType; import io.trino.spi.type.VarcharType; +import org.apache.iceberg.FileContent; +import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; import org.apache.iceberg.io.LocationProvider; +import org.apache.iceberg.mapping.NameMapping; import org.apache.iceberg.types.Type; import org.roaringbitmap.longlong.ImmutableLongBitmapDataProvider; import org.roaringbitmap.longlong.LongBitmapDataProvider; import org.roaringbitmap.longlong.Roaring64Bitmap; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalLong; +import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.IntStream; +import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.base.util.Closables.closeAllSuppress; +import static io.trino.plugin.iceberg.IcebergErrorCode.ICEBERG_WRITER_CLOSE_ERROR; +import static io.trino.plugin.iceberg.IcebergUtil.getWriteChangeMode; import static io.trino.spi.connector.MergePage.createDeleteAndInsertPages; import static io.trino.spi.type.BigintType.BIGINT; import static io.trino.spi.type.IntegerType.INTEGER; import static java.util.Objects.requireNonNull; import static java.util.concurrent.CompletableFuture.completedFuture; +import static java.util.function.Predicate.not; +import static org.apache.iceberg.FileContent.DATA; +import static org.apache.iceberg.MetadataColumns.ROW_POSITION; public class IcebergMergeSink implements ConnectorMergeSink @@ -63,7 +88,11 @@ public class IcebergMergeSink private final Map partitionsSpecs; private final ConnectorPageSink insertPageSink; private final int columnCount; + private final List regularColumns; + private final Optional nameMapping; private final Map fileDeletions = new HashMap<>(); + private final IcebergPageSourceProvider icebergPageSourceProvider; + private final UpdateKind updateKind; public IcebergMergeSink( LocationProvider locationProvider, @@ -76,7 +105,11 @@ public IcebergMergeSink( Schema schema, Map partitionsSpecs, ConnectorPageSink insertPageSink, - int columnCount) + int columnCount, + List regularColumns, + Optional nameMapping, + IcebergPageSourceProvider icebergPageSourceProvider, + UpdateKind updateKind) { this.locationProvider = requireNonNull(locationProvider, "locationProvider is null"); this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null"); @@ -89,6 +122,10 @@ public IcebergMergeSink( this.partitionsSpecs = ImmutableMap.copyOf(requireNonNull(partitionsSpecs, "partitionsSpecs is null")); this.insertPageSink = requireNonNull(insertPageSink, "insertPageSink is null"); this.columnCount = columnCount; + this.regularColumns = requireNonNull(regularColumns, "regularColumns is null"); + this.nameMapping = requireNonNull(nameMapping, "nameMapping is null"); + this.icebergPageSourceProvider = requireNonNull(icebergPageSourceProvider, "icebergPageSourceProvider is null"); + this.updateKind = requireNonNull(updateKind, "writeOperation is null"); } @Override @@ -104,15 +141,27 @@ public void storeMergedRows(Page page) Block rowPositionBlock = fields.get(1); Block partitionSpecIdBlock = fields.get(2); Block partitionDataBlock = fields.get(3); + for (int position = 0; position < fieldPathBlock.getPositionCount(); position++) { Slice filePath = VarcharType.VARCHAR.getSlice(fieldPathBlock, position); long rowPosition = BIGINT.getLong(rowPositionBlock, position); + final OptionalLong dataSequenceNumber; + + final List deleteFiles; + if (getWriteChangeMode(storageProperties, updateKind).equals(WriteChangeMode.COW)) { + dataSequenceNumber = OptionalLong.of(BIGINT.getLong(fields.get(4), position)); + deleteFiles = extractDeleteFilesFromFields(fields, position, 5); + } + else { + dataSequenceNumber = OptionalLong.empty(); + deleteFiles = List.of(); + } int index = position; FileDeletion deletion = fileDeletions.computeIfAbsent(filePath, _ -> { int partitionSpecId = INTEGER.getInt(partitionSpecIdBlock, index); String partitionData = VarcharType.VARCHAR.getSlice(partitionDataBlock, index).toStringUtf8(); - return new FileDeletion(partitionSpecId, partitionData); + return new FileDeletion(partitionSpecId, partitionData, deleteFiles, dataSequenceNumber); }); deletion.rowsToDelete().addLong(rowPosition); @@ -120,19 +169,86 @@ public void storeMergedRows(Page page) }); } + private static List extractDeleteFilesFromFields(List fields, int position, int startIdx) + { + ArrayType integerArrayType = new ArrayType(INTEGER); + ArrayType bigintArrayType = new ArrayType(BIGINT); + ArrayType varcharArrayType = new ArrayType(VarcharType.VARCHAR); + ArrayType integerArrayArrayType = new ArrayType(new ArrayType(INTEGER)); + + Block fileContentBlock = integerArrayType.getObject(fields.get(startIdx), position); + Block pathBlock = varcharArrayType.getObject(fields.get(startIdx + 1), position); + Block formatBlock = varcharArrayType.getObject(fields.get(startIdx + 2), position); + Block recordCountBlock = bigintArrayType.getObject(fields.get(startIdx + 3), position); + Block fileSizeBlock = bigintArrayType.getObject(fields.get(startIdx + 4), position); + Block equalityFieldIdsBlock = integerArrayArrayType.getObject(fields.get(startIdx + 5), position); + Block lowerBoundBlock = bigintArrayType.getObject(fields.get(startIdx + 6), position); + Block upperBoundBlock = bigintArrayType.getObject(fields.get(startIdx + 7), position); + Block sequenceBlock = bigintArrayType.getObject(fields.get(startIdx + 8), position); + + int count = fileContentBlock.getPositionCount(); + List result = new ArrayList<>(count); + + Function id2FileContent = id -> switch (id) { + case 0 -> FileContent.DATA; + case 1 -> FileContent.POSITION_DELETES; + case 2 -> FileContent.EQUALITY_DELETES; + default -> throw new IllegalArgumentException("Unknown FileContent id: " + id); + }; + + for (int i = 0; i < count; i++) { + FileContent content = id2FileContent.apply(INTEGER.getInt(fileContentBlock, i)); + String path = VarcharType.VARCHAR.getSlice(pathBlock, i).toStringUtf8(); + FileFormat format = FileFormat.fromString(VarcharType.VARCHAR.getSlice(formatBlock, i).toStringUtf8()); + long recordCount = BIGINT.getLong(recordCountBlock, i); + long fileSizeInBytes = BIGINT.getLong(fileSizeBlock, i); + + Block equalityFieldIdsSingleFileBlock = integerArrayType.getObject(equalityFieldIdsBlock, i); + List equalityFieldIds = new ArrayList<>(); + IntStream.range(0, equalityFieldIdsSingleFileBlock.getPositionCount()) + .mapToObj(j -> INTEGER.getInt(equalityFieldIdsSingleFileBlock, j)) + .forEach(equalityFieldIds::add); + + Optional rowPositionLowerBound = lowerBoundBlock.isNull(i) ? Optional.empty() : Optional.of(BIGINT.getLong(lowerBoundBlock, i)); + Optional rowPositionUpperBound = upperBoundBlock.isNull(i) ? Optional.empty() : Optional.of(BIGINT.getLong(upperBoundBlock, i)); + long sequenceNum = BIGINT.getLong(sequenceBlock, i); + + result.add(new DeleteFile( + content, + path, + format, + recordCount, + fileSizeInBytes, + equalityFieldIds, + rowPositionLowerBound, + rowPositionUpperBound, + sequenceNum)); + } + + return result; + } + @Override public CompletableFuture> finish() { - List fragments = new ArrayList<>(insertPageSink.finish().join()); + List fragments = new ArrayList<>(); - fileDeletions.forEach((dataFilePath, deletion) -> { - PositionDeleteWriter writer = createPositionDeleteWriter( - dataFilePath.toStringUtf8(), - partitionsSpecs.get(deletion.partitionSpecId()), - deletion.partitionDataJson()); + if (getWriteChangeMode(storageProperties, updateKind).equals(WriteChangeMode.COW)) { + fileDeletions.forEach((dataFilePath, deletion) -> + fragments.addAll(rewriteFile(dataFilePath.toStringUtf8(), deletion))); + } + else { + fileDeletions.forEach((dataFilePath, deletion) -> { + PositionDeleteWriter writer = createPositionDeleteWriter( + dataFilePath.toStringUtf8(), + partitionsSpecs.get(deletion.partitionSpecId()), + deletion.partitionDataJson()); - fragments.addAll(writePositionDeletes(writer, deletion.rowsToDelete())); - }); + fragments.addAll(writePositionDeletes(writer, deletion.rowsToDelete())); + }); + } + + fragments.addAll(insertPageSink.finish().join()); return completedFuture(fragments); } @@ -177,16 +293,20 @@ private static Collection writePositionDeletes(PositionDeleteWriter write } } - private static class FileDeletion + public static class FileDeletion { private final int partitionSpecId; private final String partitionDataJson; private final LongBitmapDataProvider rowsToDelete = new Roaring64Bitmap(); + private final List deleteFiles; + private final OptionalLong dataSequenceNumber; - public FileDeletion(int partitionSpecId, String partitionDataJson) + public FileDeletion(int partitionSpecId, String partitionDataJson, List deleteFiles, OptionalLong dataSequenceNumber) { this.partitionSpecId = partitionSpecId; this.partitionDataJson = requireNonNull(partitionDataJson, "partitionDataJson is null"); + this.deleteFiles = requireNonNull(deleteFiles, "deleteFilePaths is null"); + this.dataSequenceNumber = requireNonNull(dataSequenceNumber, "dataSequenceNumber is null"); } public int partitionSpecId() @@ -203,5 +323,151 @@ public LongBitmapDataProvider rowsToDelete() { return rowsToDelete; } + + public List getDeleteFiles() + { + return deleteFiles; + } + + public OptionalLong getDataSequenceNumber() + { + return dataSequenceNumber; + } + } + + private List rewriteFile(String dataFilePath, FileDeletion deletion) + { + try { + TrinoInputFile dataFile = fileSystem.newInputFile(Location.of(dataFilePath)); + // Fetch size early to use in task data + createParquetPageSource + long dataFileSize = dataFile.length(); + copyOnWriteRewriteFile(dataFile, dataFileSize, deletion); + CommitTaskData task = new CommitTaskData( + dataFilePath, + fileFormat, + dataFileSize, + // metrics is not used for OverwriteFiles.deleteFile, just pass a dummy one + new MetricsWrapper(0L, null, null, null, null, null, null), + PartitionSpecParser.toJson(partitionsSpecs.get(deletion.partitionSpecId)), + Optional.of(deletion.partitionDataJson), + DATA, + Optional.empty(), + Optional.empty(), + true); + return ImmutableList.of(utf8Slice(jsonCodec.toJson(task))); + } + catch (IOException e) { + throw new TrinoException(ICEBERG_WRITER_CLOSE_ERROR, "Unable to rewrite Parquet file", e); + } + } + + private void copyOnWriteRewriteFile(TrinoInputFile sourceDataFile, long sourceDataFileSize, FileDeletion deletion) + throws IOException + { + LongBitmapDataProvider rowsDeletedByDelete = deletion.rowsToDelete(); + try (PageSourceWithPosition pageSourceWithPosition = + createCopyOnWritePageSource(sourceDataFile, sourceDataFileSize, deletion)) { + // grab the inner source once, or inline the method call + ConnectorPageSource pageSource = pageSourceWithPosition.pageSource(); + + while (!pageSource.isFinished()) { + SourcePage sourcePage = pageSource.getNextSourcePage(); + if (sourcePage == null) { + continue; + } + // fully load page + Page page = sourcePage.getPage(); + + int positionCount = page.getPositionCount(); + int[] retained = new int[positionCount]; + int retainedCount = 0; + Block rowPositionBlock = page.getBlock(pageSourceWithPosition.posColumnIdx()); + for (int position = 0; position < positionCount; position++) { + long rowId = BIGINT.getLong(rowPositionBlock, position); + if (!rowsDeletedByDelete.contains(rowId)) { + retained[retainedCount++] = position; + } + } + + if (retainedCount != positionCount) { + page = page.getPositions(retained, 0, retainedCount); + } + page = page.getColumns(IntStream.range(0, page.getChannelCount()).filter(col -> col != pageSourceWithPosition.posColumnIdx).toArray()); + insertPageSink.appendPage(page); + } + } + catch (IOException e) { + throw new TrinoException(ICEBERG_WRITER_CLOSE_ERROR, + "Unable to rewrite parquet file", e); + } + } + + private PageSourceWithPosition createCopyOnWritePageSource(TrinoInputFile inputFile, long fileSize, FileDeletion deletion) + { + PartitionSpec partitionSpec = partitionsSpecs.get(deletion.partitionSpecId); + Type[] columnTypes = partitionSpec.fields().stream() + .map(field -> field.transform().getResultType(schema.findType(field.sourceId()))) + .toArray(Type[]::new); + PartitionData partitionData = PartitionData.fromJson(deletion.partitionDataJson, columnTypes); + + List requiredColumns = new ArrayList<>(regularColumns); + + List deletes = deletion.getDeleteFiles(); + + Set deleteFilterRequiredColumns = icebergPageSourceProvider.requiredColumnsForDeletes(schema, deletes); + + deleteFilterRequiredColumns.stream() + .filter(not(regularColumns::contains)) + .forEach(requiredColumns::add); + + int posColumnIdx = -1; + + for (int i = 0; i < requiredColumns.size(); i++) { + if (requiredColumns.get(i).isRowPositionColumn()) { + posColumnIdx = i; + break; + } + } + + if (posColumnIdx == -1) { + requiredColumns.add(icebergPageSourceProvider.getColumnHandle(ROW_POSITION)); + posColumnIdx = requiredColumns.size() - 1; + } + + ConnectorPageSource pageSource = icebergPageSourceProvider.createPageSource( + session, + inputFile, + fileSystem, + 0, + fileSize, + fileSize, + partitionSpec, + partitionData, + deletion.partitionDataJson, + fileFormat, + schema, + requiredColumns, + requiredColumns, + TupleDomain.all(), + nameMapping, + partitionSpec.partitionToPath(partitionData), + IcebergUtil.getPartitionKeys(partitionData, partitionSpec), + deletion.getDeleteFiles(), + deletion.getDataSequenceNumber(), + Optional.empty()); + return new PageSourceWithPosition(pageSource, posColumnIdx); + } + + private record PageSourceWithPosition( + ConnectorPageSource pageSource, + int posColumnIdx + ) implements AutoCloseable + { + @Override + public void close() + throws IOException + { + pageSource.close(); + } } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java index 295389c864c2..2908daecdca6 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java @@ -117,6 +117,7 @@ import io.trino.spi.connector.SystemTable; import io.trino.spi.connector.TableColumnsMetadata; import io.trino.spi.connector.TableNotFoundException; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.FunctionName; @@ -161,6 +162,7 @@ import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestReader; import org.apache.iceberg.MetadataColumns; +import org.apache.iceberg.OverwriteFiles; import org.apache.iceberg.PartitionField; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.PartitionSpecParser; @@ -197,6 +199,7 @@ import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.IntegerType; +import org.apache.iceberg.types.Types.LongType; import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.types.Types.StringType; import org.apache.iceberg.types.Types.StructType; @@ -269,6 +272,26 @@ import static io.trino.plugin.iceberg.ExpressionConverter.isConvertibleToIcebergExpression; import static io.trino.plugin.iceberg.ExpressionConverter.toIcebergExpression; import static io.trino.plugin.iceberg.IcebergAnalyzeProperties.getColumnNames; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DATA_FILE_DATA_SEQUENCE_NUMBER_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_CONTENT_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_CONTENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_DATA_SEQUENCE_NUMBER_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_DATA_SEQUENCE_NUMBER_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_EQUALITY_FIELD_IDS_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_EQUALITY_FIELD_ID_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_EQUALITY_FIELD_ID_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_FILE_SIZE_IN_BYTES_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_FILE_SIZE_IN_BYTES_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_FORMAT_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_FORMAT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_PATH_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_PATH_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_RECORD_COUNT_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_RECORD_COUNT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_ROW_POSITION_LOWER_BOUND_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_ROW_POSITION_LOWER_BOUND_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_ROW_POSITION_UPPER_BOUND_ELEMENT_ID; +import static io.trino.plugin.iceberg.IcebergColumnHandle.DELETE_FILES_ROW_POSITION_UPPER_BOUND_ID; import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_DATA; import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_PARTITION_SPEC_ID; import static io.trino.plugin.iceberg.IcebergColumnHandle.TRINO_MERGE_ROW_ID; @@ -317,6 +340,9 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.PARTITIONING_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.SORTED_BY_PROPERTY; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_DELETE_MODE; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_MERGE_MODE; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_UPDATE_MODE; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; import static io.trino.plugin.iceberg.IcebergTableProperties.getTableLocation; import static io.trino.plugin.iceberg.IcebergTableProperties.validateCompression; @@ -341,9 +367,11 @@ import static io.trino.plugin.iceberg.IcebergUtil.getSnapshotIdAsOfTime; import static io.trino.plugin.iceberg.IcebergUtil.getTableComment; import static io.trino.plugin.iceberg.IcebergUtil.getTopLevelColumns; +import static io.trino.plugin.iceberg.IcebergUtil.getWriteChangeMode; import static io.trino.plugin.iceberg.IcebergUtil.newCreateTableTransaction; import static io.trino.plugin.iceberg.IcebergUtil.readerForManifest; import static io.trino.plugin.iceberg.IcebergUtil.schemaFromMetadata; +import static io.trino.plugin.iceberg.IcebergUtil.toIcebergPropertyName; import static io.trino.plugin.iceberg.IcebergUtil.validateOrcBloomFilterColumns; import static io.trino.plugin.iceberg.IcebergUtil.validateParquetBloomFilterColumns; import static io.trino.plugin.iceberg.IcebergUtil.verifyExtraProperties; @@ -356,6 +384,7 @@ import static io.trino.plugin.iceberg.TableType.DATA; import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; import static io.trino.plugin.iceberg.TypeConverter.toIcebergTypeForNewColumn; +import static io.trino.plugin.iceberg.WriteChangeMode.COW; import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.ADD_FILES; import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.ADD_FILES_FROM_TABLE; import static io.trino.plugin.iceberg.procedure.IcebergTableProcedureId.DROP_EXTENDED_STATS; @@ -449,6 +478,9 @@ public class IcebergMetadata .add(PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY) .add(PARTITIONING_PROPERTY) .add(SORTED_BY_PROPERTY) + .add(WRITE_DELETE_MODE) + .add(WRITE_UPDATE_MODE) + .add(WRITE_MERGE_MODE) .build(); private static final String SYSTEM_SCHEMA = "system"; @@ -564,7 +596,8 @@ public ConnectorTableHandle getTableHandle( ConnectorSession session, SchemaTableName tableName, Optional startVersion, - Optional endVersion) + Optional endVersion, + Optional updateKind) { if (startVersion.isPresent()) { throw new TrinoException(NOT_SUPPORTED, "Read table with start version is not supported"); @@ -585,7 +618,7 @@ public ConnectorTableHandle getTableHandle( BaseTable storageTable = catalog.getMaterializedViewStorageTable(session, materializedViewName) .orElseThrow(() -> new TrinoException(TABLE_NOT_FOUND, "Storage table metadata not found for materialized view " + tableName)); - return tableHandleForCurrentSnapshot(session, tableName, storageTable); + return tableHandleForCurrentSnapshot(session, tableName, storageTable, updateKind); } if (!isDataTable(tableName.getTableName())) { @@ -617,12 +650,13 @@ public ConnectorTableHandle getTableHandle( table, Optional.of(snapshotId), schemaFor(table, snapshotId), - Optional.empty()); + Optional.empty(), + updateKind); } - return tableHandleForCurrentSnapshot(session, tableName, table); + return tableHandleForCurrentSnapshot(session, tableName, table, updateKind); } - private IcebergTableHandle tableHandleForCurrentSnapshot(ConnectorSession session, SchemaTableName tableName, BaseTable table) + private IcebergTableHandle tableHandleForCurrentSnapshot(ConnectorSession session, SchemaTableName tableName, BaseTable table, Optional updateKind) { return tableHandleForSnapshot( session, @@ -630,7 +664,8 @@ private IcebergTableHandle tableHandleForCurrentSnapshot(ConnectorSession sessio table, Optional.ofNullable(table.currentSnapshot()).map(Snapshot::snapshotId), table.schema(), - Optional.of(table.spec())); + Optional.of(table.spec()), + updateKind); } private IcebergTableHandle tableHandleForSnapshot( @@ -639,7 +674,8 @@ private IcebergTableHandle tableHandleForSnapshot( BaseTable table, Optional tableSnapshotId, Schema tableSchema, - Optional partitionSpec) + Optional partitionSpec, + Optional updateKind) { Map tableProperties = table.properties(); return new IcebergTableHandle( @@ -661,7 +697,8 @@ private IcebergTableHandle tableHandleForSnapshot( false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + updateKind); } private Optional getTablePartitioning(ConnectorSession session, Table icebergTable) @@ -1172,7 +1209,7 @@ public void dropSchema(ConnectorSession session, String schemaName, boolean casc dropView(session, viewName); } for (SchemaTableName tableName : listTables(session, Optional.of(schemaName))) { - dropTable(session, getTableHandle(session, tableName, Optional.empty(), Optional.empty())); + dropTable(session, getTableHandle(session, tableName, Optional.empty(), Optional.empty(), Optional.empty())); } } catalog.dropNamespace(session, schemaName); @@ -1282,7 +1319,7 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con String tableLocation = null; if (replace) { - ConnectorTableHandle tableHandle = getTableHandle(session, tableMetadata.getTableSchema().getTable(), Optional.empty(), Optional.empty()); + ConnectorTableHandle tableHandle = getTableHandle(session, tableMetadata.getTableSchema().getTable(), Optional.empty(), Optional.empty(), Optional.empty()); if (tableHandle != null) { checkValidTableHandle(tableHandle); IcebergTableHandle table = (IcebergTableHandle) tableHandle; @@ -2535,9 +2572,10 @@ public void setTableProperties(ConnectorSession session, ConnectorTableHandle ta updateProperties.defaultFormat(newFileFormat.toIceberg()); } + int formatVersion = table.getFormatVersion(); if (properties.containsKey(FORMAT_VERSION_PROPERTY)) { // UpdateProperties#commit will trigger any necessary metadata updates required for the new spec version - int formatVersion = (int) properties.get(FORMAT_VERSION_PROPERTY) + formatVersion = (int) properties.get(FORMAT_VERSION_PROPERTY) .orElseThrow(() -> new IllegalArgumentException("The format_version property cannot be empty")); updateProperties.set(FORMAT_VERSION, Integer.toString(formatVersion)); } @@ -2573,6 +2611,15 @@ public void setTableProperties(ConnectorSession session, ConnectorTableHandle ta } updateProperties.set(WRITE_DATA_LOCATION, dataLocation); } + for (String writeChange : new String[] {WRITE_DELETE_MODE, WRITE_UPDATE_MODE, WRITE_MERGE_MODE}) { + if (properties.containsKey(writeChange)) { + if (formatVersion >= 2) { + WriteChangeMode mode = (WriteChangeMode) properties.get(writeChange) + .orElseThrow(() -> new IllegalArgumentException(format("The %s property cannot be empty", writeChange))); + updateProperties.set(toIcebergPropertyName(writeChange), mode.toIcebergString()); + } + } + } try { updateProperties.commit(); @@ -2946,7 +2993,7 @@ public TableStatisticsMetadata getStatisticsCollectionMetadataForWrite(Connector return TableStatisticsMetadata.empty(); } - ConnectorTableHandle tableHandle = getTableHandle(session, tableMetadata.getTable(), Optional.empty(), Optional.empty()); + ConnectorTableHandle tableHandle = getTableHandle(session, tableMetadata.getTable(), Optional.empty(), Optional.empty(), Optional.empty()); if (tableHandle == null) { // Assume new table (CTAS), collect NDV stats on all columns return getStatisticsCollectionMetadata(tableMetadata, Optional.empty(), availableColumnNames -> {}); @@ -3119,24 +3166,87 @@ public RowChangeParadigm getRowChangeParadigm(ConnectorSession session, Connecto @Override public ColumnHandle getMergeRowIdColumnHandle(ConnectorSession session, ConnectorTableHandle tableHandle) { - StructType type = StructType.of(ImmutableList.builder() + // Build the common fields + ImmutableList.Builder fieldsBuilder = ImmutableList.builder() .add(MetadataColumns.FILE_PATH) .add(MetadataColumns.ROW_POSITION) - .add(NestedField.required(TRINO_MERGE_PARTITION_SPEC_ID, "partition_spec_id", IntegerType.get())) - .add(NestedField.required(TRINO_MERGE_PARTITION_DATA, "partition_data", StringType.get())) - .build()); - - NestedField field = NestedField.required(TRINO_MERGE_ROW_ID, TRINO_ROW_ID_NAME, type); - return getColumnHandle(field, typeManager); + .add(NestedField.required( + TRINO_MERGE_PARTITION_SPEC_ID, + "partition_spec_id", + IntegerType.get())) + .add(NestedField.required( + TRINO_MERGE_PARTITION_DATA, + "partition_data", + StringType.get())); + checkState(((IcebergTableHandle) tableHandle).getUpdateKind().isPresent()); + // In COW mode, add the delete-file-info struct + if (getWriteChangeMode(((IcebergTableHandle) tableHandle).getStorageProperties(), ((IcebergTableHandle) tableHandle).getUpdateKind().get()) == COW) { + fieldsBuilder.add(NestedField.required( + DATA_FILE_DATA_SEQUENCE_NUMBER_ID, + "data_file_data_sequence_number", + Types.LongType.get())); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_CONTENT_ID, + "deleted_files_content", + Types.ListType.ofRequired(DELETE_FILES_CONTENT_ELEMENT_ID, IntegerType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_PATH_ID, + "deleted_files_path", + Types.ListType.ofRequired(DELETE_FILES_PATH_ELEMENT_ID, StringType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_FORMAT_ID, + "deleted_files_format", + Types.ListType.ofRequired(DELETE_FILES_FORMAT_ELEMENT_ID, StringType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_RECORD_COUNT_ID, + "deleted_files_record_count", + Types.ListType.ofRequired(DELETE_FILES_RECORD_COUNT_ELEMENT_ID, LongType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_FILE_SIZE_IN_BYTES_ID, + "delete_files_file_size_in_bytes", + Types.ListType.ofRequired(DELETE_FILES_FILE_SIZE_IN_BYTES_ELEMENT_ID, LongType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_EQUALITY_FIELD_IDS_ID, + "delete_files_equality_field_ids", + Types.ListType.ofRequired(DELETE_FILES_EQUALITY_FIELD_ID_ID, Types.ListType.ofRequired(DELETE_FILES_EQUALITY_FIELD_ID_ELEMENT_ID, IntegerType.get())))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_ROW_POSITION_LOWER_BOUND_ID, + "delete_files_row_position_lower_bound", + Types.ListType.ofRequired(DELETE_FILES_ROW_POSITION_LOWER_BOUND_ELEMENT_ID, LongType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_ROW_POSITION_UPPER_BOUND_ID, + "delete_files_row_position_upper_bound", + Types.ListType.ofRequired(DELETE_FILES_ROW_POSITION_UPPER_BOUND_ELEMENT_ID, LongType.get()))); + fieldsBuilder.add(NestedField.required( + DELETE_FILES_DATA_SEQUENCE_NUMBER_ID, + "delete_files_data_sequence_number", + Types.ListType.ofRequired(DELETE_FILES_DATA_SEQUENCE_NUMBER_ELEMENT_ID, LongType.get()))); + } + + // Create the StructType once, using all accumulated fields + StructType type = StructType.of(fieldsBuilder.build()); + + // Wrap it into the merge-row-id column and return + NestedField rowIdField = NestedField.required( + TRINO_MERGE_ROW_ID, + TRINO_ROW_ID_NAME, + type); + return getColumnHandle(rowIdField, typeManager); } @Override public Optional getUpdateLayout(ConnectorSession session, ConnectorTableHandle tableHandle) { - return getInsertLayout(session, tableHandle) + Optional updateLayout = getInsertLayout(session, tableHandle) .flatMap(ConnectorTableLayout::getPartitioning) .map(IcebergPartitioningHandle.class::cast) .map(IcebergPartitioningHandle::forUpdate); + checkState(((IcebergTableHandle) tableHandle).getUpdateKind().isPresent()); + if ((getWriteChangeMode(((IcebergTableHandle) tableHandle).getStorageProperties(), ((IcebergTableHandle) tableHandle).getUpdateKind().get()) == COW) && updateLayout.isEmpty()) { + updateLayout = Optional.of(new IcebergPartitioningHandle(true, List.of())); + } + + return updateLayout; } @Override @@ -3160,7 +3270,8 @@ public void finishMerge(ConnectorSession session, ConnectorMergeTableHandle merg IcebergMergeTableHandle mergeHandle = (IcebergMergeTableHandle) mergeTableHandle; IcebergTableHandle handle = mergeHandle.getTableHandle(); RetryMode retryMode = mergeHandle.getInsertTableHandle().retryMode(); - finishWrite(session, handle, fragments, retryMode); + checkState(mergeHandle.getTableHandle().getUpdateKind().isPresent()); + finishWrite(session, handle, fragments, retryMode, mergeHandle.getTableHandle().getUpdateKind().get()); } private static void verifyTableVersionForUpdate(IcebergTableHandle table) @@ -3177,7 +3288,7 @@ private static void validateNotModifyingOldSnapshot(IcebergTableHandle table, Ta } } - private void finishWrite(ConnectorSession session, IcebergTableHandle table, Collection fragments, RetryMode retryMode) + private void finishWrite(ConnectorSession session, IcebergTableHandle table, Collection fragments, RetryMode retryMode, UpdateKind updateKind) { Table icebergTable = transaction.table(); @@ -3194,6 +3305,65 @@ private void finishWrite(ConnectorSession session, IcebergTableHandle table, Col Schema schema = SchemaParser.fromJson(table.getTableSchemaJson()); + if (getWriteChangeMode(table.getStorageProperties(), updateKind) == COW) { + OverwriteFiles overwriteFiles = transaction.newOverwrite(); + table.getSnapshotId().map(icebergTable::snapshot).ifPresent(s -> overwriteFiles.validateFromSnapshot(s.snapshotId())); + TupleDomain dataColumnPredicate = table.getEnforcedPredicate().filter((column, domain) -> !isMetadataColumnId(column.getId())); + if (!dataColumnPredicate.isAll()) { + overwriteFiles.conflictDetectionFilter(toIcebergExpression(dataColumnPredicate)); + } + + overwriteFiles.validateNoConflictingData(); + overwriteFiles.validateNoConflictingDeletes(); + + ImmutableSet.Builder writtenFiles = ImmutableSet.builder(); + for (CommitTaskData task : commitTasks) { + PartitionSpec partitionSpec = PartitionSpecParser.fromJson(schema, task.partitionSpecJson()); + Type[] partitionColumnTypes = partitionSpec.fields().stream() + .map(field -> field.transform().getResultType(schema.findType(field.sourceId()))) + .toArray(Type[]::new); + switch (task.content()) { + case DATA -> { + DataFiles.Builder builder = DataFiles.builder(partitionSpec) + .withPath(task.path()) + .withFormat(task.fileFormat().toIceberg()) + .withFileSizeInBytes(task.fileSizeInBytes()) + .withMetrics(task.metrics().metrics()); + if (!icebergTable.spec().fields().isEmpty()) { + String partitionDataJson = task.partitionDataJson() + .orElseThrow(() -> new VerifyException("No partition data for partitioned table")); + builder.withPartition(PartitionData.fromJson(partitionDataJson, partitionColumnTypes)); + } + if (task.copyOnWriteDelete()) { + log.info("File %s task.path deleted", task.path()); + overwriteFiles.deleteFile(builder.build()); + } + else { + overwriteFiles.addFile(builder.build()); + } + writtenFiles.add(task.path()); + } + default -> throw new UnsupportedOperationException("Unsupported task content: " + task.content()); + } + } + + // try to leave as little garbage as possible behind + if (retryMode != NO_RETRIES) { + cleanExtraOutputFiles(session, writtenFiles.build()); + } + + try { + commit(overwriteFiles, session); +// Failed to commit Iceberg + commitTransaction(transaction, "write cow"); +// transaction.commitTransaction(); + return; + } + catch (ValidationException | CommitFailedException e) { + throw new TrinoException(ICEBERG_COMMIT_ERROR, "Failed to commit Iceberg update to table: " + table.getSchemaTableName(), e); + } + } + RowDelta rowDelta = transaction.newRowDelta(); table.getSnapshotId().map(icebergTable::snapshot).ifPresent(s -> rowDelta.validateFromSnapshot(s.snapshotId())); TupleDomain dataColumnPredicate = table.getEnforcedPredicate().filter((column, domain) -> !isMetadataColumnId(column.getId())); @@ -3429,7 +3599,8 @@ public Optional> applyLimit(Connect table.isRecordScannedFiles(), table.getMaxScannedFileSize(), table.getConstraintColumns(), - table.getForAnalyze()); + table.getForAnalyze(), + table.getUpdateKind()); return Optional.of(new LimitApplicationResult<>(table, false, false)); } @@ -3530,7 +3701,8 @@ else if (isMetadataColumnId(columnHandle.getId())) { table.isRecordScannedFiles(), table.getMaxScannedFileSize(), newConstraintColumns, - table.getForAnalyze()), + table.getForAnalyze(), + table.getUpdateKind()), remainingConstraint.transformKeys(ColumnHandle.class::cast), extractionResult.remainingExpression(), false)); @@ -3702,7 +3874,8 @@ public TableStatistics getTableStatistics(ConnectorSession session, ConnectorTab false, // recordScannedFiles does not affect stats originalHandle.getMaxScannedFileSize(), ImmutableSet.of(), // constraintColumns do not affect stats - Optional.empty()); // forAnalyze does not affect stats + Optional.empty(), // forAnalyze does not affect stats + Optional.empty()); // updateKind does not affect stats return getIncrementally( tableStatisticsCache, cacheKey, @@ -4045,7 +4218,7 @@ else if (strings.size() != 2) { String schema = strings.get(0); String name = strings.get(1); SchemaTableName schemaTableName = new SchemaTableName(schema, name); - ConnectorTableHandle tableHandle = getTableHandle(session, schemaTableName, Optional.empty(), Optional.empty()); + ConnectorTableHandle tableHandle = getTableHandle(session, schemaTableName, Optional.empty(), Optional.empty(), Optional.empty()); if (tableHandle == null || tableHandle instanceof CorruptedIcebergTableHandle) { // Base table is gone or table is corrupted diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java index 3432159270f5..d46274d6bf96 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSink.java @@ -431,7 +431,8 @@ private void closeWriter(int writerIndex) writeContext.getPartitionData().map(PartitionData::toJson), DATA, Optional.empty(), - writer.getFileMetrics().splitOffsets()); + writer.getFileMetrics().splitOffsets(), + false); commitTasks.add(wrappedBuffer(jsonCodec.toJsonBytes(task))); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java index be6b8cb9dfa7..cdc3db46d84a 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSinkProvider.java @@ -37,9 +37,11 @@ import org.apache.iceberg.Schema; import org.apache.iceberg.SchemaParser; import org.apache.iceberg.io.LocationProvider; +import org.apache.iceberg.mapping.NameMappingParser; import java.util.Map; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.collect.Maps.transformValues; import static io.trino.plugin.iceberg.IcebergSessionProperties.maxPartitionsPerWriter; import static io.trino.plugin.iceberg.IcebergUtil.getLocationProvider; @@ -56,6 +58,7 @@ public class IcebergPageSinkProvider private final int sortingFileWriterMaxOpenFiles; private final TypeManager typeManager; private final PageSorter pageSorter; + private final IcebergPageSourceProviderFactory icebergPageSourceProviderFactory; @Inject public IcebergPageSinkProvider( @@ -65,7 +68,8 @@ public IcebergPageSinkProvider( PageIndexerFactory pageIndexerFactory, SortingFileWriterConfig sortingFileWriterConfig, TypeManager typeManager, - PageSorter pageSorter) + PageSorter pageSorter, + IcebergPageSourceProviderFactory icebergPageSourceProviderFactory) { this.fileSystemFactory = requireNonNull(fileSystemFactory, "fileSystemFactory is null"); this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null"); @@ -75,6 +79,7 @@ public IcebergPageSinkProvider( this.sortingFileWriterMaxOpenFiles = sortingFileWriterConfig.getMaxOpenSortFiles(); this.typeManager = requireNonNull(typeManager, "typeManager is null"); this.pageSorter = requireNonNull(pageSorter, "pageSorter is null"); + this.icebergPageSourceProviderFactory = requireNonNull(icebergPageSourceProviderFactory, "icebergPageSourceProviderFactory is null"); } @Override @@ -166,6 +171,7 @@ public ConnectorMergeSink createMergeSink(ConnectorTransactionHandle transaction Map partitionsSpecs = transformValues(tableHandle.partitionsSpecsAsJson(), json -> PartitionSpecParser.fromJson(schema, json)); ConnectorPageSink pageSink = createPageSink(session, tableHandle); + checkState(((IcebergTableHandle) mergeHandle.getTableHandle()).getUpdateKind().isPresent()); return new IcebergMergeSink( locationProvider, fileWriterFactory, @@ -177,6 +183,10 @@ public ConnectorMergeSink createMergeSink(ConnectorTransactionHandle transaction schema, partitionsSpecs, pageSink, - schema.columns().size()); + schema.columns().size(), + IcebergUtil.getTopLevelColumns(schema, typeManager), + merge.getTableHandle().getNameMappingJson().map(NameMappingParser::fromJson), + (IcebergPageSourceProvider) icebergPageSourceProviderFactory.createPageSourceProvider(), + ((IcebergTableHandle) mergeHandle.getTableHandle()).getUpdateKind().get()); } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java index aad2add5ec40..c4172fbe8bea 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergPageSourceProvider.java @@ -60,11 +60,15 @@ import io.trino.plugin.iceberg.system.files.FilesTableSplit; import io.trino.spi.Page; import io.trino.spi.TrinoException; +import io.trino.spi.block.ArrayBlock; import io.trino.spi.block.Block; +import io.trino.spi.block.BlockBuilder; import io.trino.spi.block.IntArrayBlock; +import io.trino.spi.block.LongArrayBlock; import io.trino.spi.block.RowBlock; import io.trino.spi.block.RunLengthEncodedBlock; import io.trino.spi.block.VariableWidthBlock; +import io.trino.spi.block.VariableWidthBlockBuilder; import io.trino.spi.connector.ColumnHandle; import io.trino.spi.connector.ConnectorPageSource; import io.trino.spi.connector.ConnectorPageSourceProvider; @@ -81,6 +85,7 @@ import io.trino.spi.predicate.NullableValue; import io.trino.spi.predicate.TupleDomain; import io.trino.spi.type.ArrayType; +import io.trino.spi.type.IntegerType; import io.trino.spi.type.MapType; import io.trino.spi.type.RowType; import io.trino.spi.type.Type; @@ -99,6 +104,7 @@ import org.apache.iceberg.mapping.NameMappingParser; import org.apache.iceberg.parquet.ParquetSchemaUtil; import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.NestedField; import org.apache.iceberg.util.StructLikeWrapper; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.io.MessageColumnIO; @@ -169,10 +175,11 @@ import static io.trino.plugin.iceberg.IcebergSplitManager.ICEBERG_DOMAIN_COMPACTION_THRESHOLD; import static io.trino.plugin.iceberg.IcebergSplitSource.partitionMatchesPredicate; import static io.trino.plugin.iceberg.IcebergUtil.deserializePartitionValue; -import static io.trino.plugin.iceberg.IcebergUtil.getColumnHandle; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionKeys; import static io.trino.plugin.iceberg.IcebergUtil.getPartitionValues; +import static io.trino.plugin.iceberg.IcebergUtil.getWriteChangeMode; import static io.trino.plugin.iceberg.IcebergUtil.schemaFromHandles; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static io.trino.plugin.iceberg.util.OrcIcebergIds.fileColumnsByIcebergId; import static io.trino.plugin.iceberg.util.OrcTypeConverter.ORC_ICEBERG_ID_KEY; import static io.trino.spi.block.PageBuilderStatus.DEFAULT_MAX_PAGE_SIZE_IN_BYTES; @@ -183,6 +190,7 @@ import static java.lang.Math.min; import static java.lang.Math.toIntExact; import static java.lang.String.format; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Locale.ENGLISH; import static java.util.Objects.checkIndex; import static java.util.Objects.requireNonNull; @@ -282,7 +290,8 @@ public ConnectorPageSource createPageSource( split.getFileFormat(), split.getFileIoProperties(), split.getDataSequenceNumber(), - tableHandle.getNameMappingJson().map(NameMappingParser::fromJson)); + tableHandle.getNameMappingJson().map(NameMappingParser::fromJson), + tableHandle.getUpdateKind().isPresent() ? Optional.of(getWriteChangeMode(tableHandle.getStorageProperties(), tableHandle.getUpdateKind().get())) : Optional.empty()); } public ConnectorPageSource createPageSource( @@ -304,7 +313,8 @@ public ConnectorPageSource createPageSource( IcebergFileFormat fileFormat, Map fileIoProperties, long dataSequenceNumber, - Optional nameMapping) + Optional nameMapping, + Optional writeChangeMode) { Map> partitionKeys = getPartitionKeys(partitionData, partitionSpec); TupleDomain effectivePredicate = getUnenforcedPredicate( @@ -345,6 +355,50 @@ public ConnectorPageSource createPageSource( .filter(not(icebergColumns::contains)) .forEach(requiredColumns::add); + return createPageSource(session, + inputFile, + fileSystem, + start, + length, + fileSize, + partitionSpec, + partitionData, + partitionDataJson, + fileFormat, + tableSchema, + icebergColumns, + requiredColumns, + effectivePredicate, + nameMapping, + partition, + partitionKeys, + deletes, + OptionalLong.of(dataSequenceNumber), + writeChangeMode); + } + + public ConnectorPageSource createPageSource( + ConnectorSession session, + TrinoInputFile inputFile, + TrinoFileSystem fileSystem, + long start, + long length, + long fileSize, + PartitionSpec partitionSpec, + PartitionData partitionData, + String partitionDataJson, + IcebergFileFormat fileFormat, + Schema tableSchema, + List regularColumns, + List requiredColumns, + TupleDomain predicate, + Optional nameMapping, + String partition, + Map> partitionKeys, + List deleteFiles, + OptionalLong dataSequenceNumber, + Optional writeChangeMode) + { ReaderPageSourceWithRowPositions readerPageSourceWithRowPositions = createDataPageSource( session, inputFile, @@ -356,20 +410,24 @@ public ConnectorPageSource createPageSource( fileFormat, tableSchema, requiredColumns, - effectivePredicate, + predicate, nameMapping, partition, - partitionKeys); + partitionKeys, + deleteFiles, + dataSequenceNumber, + writeChangeMode); ConnectorPageSource pageSource = readerPageSourceWithRowPositions.pageSource(); // filter out deleted rows - if (!deletes.isEmpty()) { + if (!deleteFiles.isEmpty()) { + checkState(dataSequenceNumber.isPresent()); Supplier> deletePredicate = memoize(() -> getDeleteManager(partitionSpec, partitionData) .getDeletePredicate( - path, - dataSequenceNumber, - deletes, + inputFile.location().toString(), + dataSequenceNumber.getAsLong(), + deleteFiles, requiredColumns, tableSchema, readerPageSourceWithRowPositions, @@ -377,11 +435,11 @@ public ConnectorPageSource createPageSource( pageSource = TransformConnectorPageSource.create(pageSource, page -> { try { Optional rowPredicate = deletePredicate.get(); - rowPredicate.ifPresent(predicate -> predicate.applyFilter(page)); - if (icebergColumns.size() == page.getChannelCount()) { + rowPredicate.ifPresent(pred -> pred.applyFilter(page)); + if (regularColumns.size() == page.getChannelCount()) { return page; } - return new PrefixColumnsSourcePage(page, icebergColumns.size()); + return new PrefixColumnsSourcePage(page, regularColumns.size()); } catch (RuntimeException e) { throwIfInstanceOf(e, TrinoException.class); @@ -392,7 +450,7 @@ public ConnectorPageSource createPageSource( return pageSource; } - private DeleteManager getDeleteManager(PartitionSpec partitionSpec, PartitionData partitionData) + public DeleteManager getDeleteManager(PartitionSpec partitionSpec, PartitionData partitionData) { if (partitionSpec.isUnpartitioned()) { return unpartitionedTableDeleteManager; @@ -447,7 +505,7 @@ private TupleDomain prunePredicate( } Set partitionColumns = partitionKeys.keySet().stream() - .map(fieldId -> getColumnHandle(tableSchema.findField(fieldId), typeManager)) + .map(fieldId -> getColumnHandle(tableSchema.findField(fieldId))) .collect(toImmutableSet()); Supplier> partitionValues = memoize(() -> getPartitionValues(partitionColumns, partitionKeys)); if (!partitionMatchesPredicate(partitionColumns, partitionValues, unenforcedPredicate)) { @@ -461,16 +519,16 @@ private TupleDomain prunePredicate( .filter((handle, domain) -> !domain.contains(fileStatisticsDomain.getDomain(handle, domain.getType()))); } - private Set requiredColumnsForDeletes(Schema schema, List deletes) + public Set requiredColumnsForDeletes(Schema schema, List deletes) { ImmutableSet.Builder requiredColumns = ImmutableSet.builder(); for (DeleteFile deleteFile : deletes) { if (deleteFile.content() == POSITION_DELETES) { - requiredColumns.add(getColumnHandle(ROW_POSITION, typeManager)); + requiredColumns.add(getColumnHandle(ROW_POSITION)); } else if (deleteFile.content() == EQUALITY_DELETES) { deleteFile.equalityFieldIds().stream() - .map(id -> getColumnHandle(schema.findField(id), typeManager)) + .map(id -> getColumnHandle(schema.findField(id))) .forEach(requiredColumns::add); } } @@ -478,6 +536,11 @@ else if (deleteFile.content() == EQUALITY_DELETES) { return requiredColumns.build(); } + public IcebergColumnHandle getColumnHandle(NestedField column) + { + return IcebergUtil.getColumnHandle(column, typeManager); + } + private ConnectorPageSource openDeletes( ConnectorSession session, TrinoFileSystem fileSystem, @@ -499,7 +562,10 @@ private ConnectorPageSource openDeletes( tupleDomain, Optional.empty(), "", - ImmutableMap.of()) + ImmutableMap.of(), + ImmutableList.of(), + OptionalLong.empty(), + Optional.empty()) .pageSource(); } @@ -517,7 +583,10 @@ private ReaderPageSourceWithRowPositions createDataPageSource( TupleDomain predicate, Optional nameMapping, String partition, - Map> partitionKeys) + Map> partitionKeys, + List deleteFiles, + OptionalLong dataSequenceNumber, + Optional writeChangeMode) { return switch (fileFormat) { case ORC -> createOrcPageSource( @@ -541,8 +610,12 @@ private ReaderPageSourceWithRowPositions createDataPageSource( typeManager, nameMapping, partition, - partitionKeys); + partitionKeys, + deleteFiles, + dataSequenceNumber, + writeChangeMode); case PARQUET -> createParquetPageSource( + session, inputFile, start, length, @@ -564,7 +637,10 @@ private ReaderPageSourceWithRowPositions createDataPageSource( fileFormatDataSourceStats, nameMapping, partition, - partitionKeys); + partitionKeys, + deleteFiles, + dataSequenceNumber, + writeChangeMode); case AVRO -> createAvroPageSource( inputFile, start, @@ -574,7 +650,10 @@ private ReaderPageSourceWithRowPositions createDataPageSource( fileSchema, nameMapping, partition, - dataColumns); + dataColumns, + deleteFiles, + dataSequenceNumber, + writeChangeMode); }; } @@ -626,7 +705,10 @@ private static ReaderPageSourceWithRowPositions createOrcPageSource( TypeManager typeManager, Optional nameMapping, String partition, - Map> partitionKeys) + Map> partitionKeys, + List deleteFiles, + OptionalLong dataSequenceNumber, + Optional writeChangeMode) { OrcDataSource orcDataSource = null; try { @@ -685,7 +767,13 @@ else if (column.isFileModifiedTimeColumn()) { } else if (column.isMergeRowIdColumn()) { appendRowNumberColumn = true; - transforms.transform(MergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData))); + checkState(writeChangeMode.isPresent()); + if (writeChangeMode.get().equals(MOR)) { + transforms.transform(MorMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData))); + } + else { + transforms.transform(CowMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData), dataSequenceNumber.getAsLong(), deleteFiles)); + } } else if (column.isRowPositionColumn()) { appendRowNumberColumn = true; @@ -896,7 +984,8 @@ public OrcColumn get(String fieldName) } } - private static ReaderPageSourceWithRowPositions createParquetPageSource( + public static ReaderPageSourceWithRowPositions createParquetPageSource( + ConnectorSession session, TrinoInputFile inputFile, long start, long length, @@ -909,7 +998,10 @@ private static ReaderPageSourceWithRowPositions createParquetPageSource( FileFormatDataSourceStats fileFormatDataSourceStats, Optional nameMapping, String partition, - Map> partitionKeys) + Map> partitionKeys, + List deleteFiles, + OptionalLong dataSequenceNumber, + Optional writeChangeMode) { AggregatedMemoryContext memoryContext = newSimpleAggregatedMemoryContext(); @@ -960,7 +1052,13 @@ else if (column.isFileModifiedTimeColumn()) { } else if (column.isMergeRowIdColumn()) { appendRowNumberColumn = true; - transforms.transform(MergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData))); + checkState(writeChangeMode.isPresent()); + if (writeChangeMode.get().equals(MOR)) { + transforms.transform(MorMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData))); + } + else { + transforms.transform(CowMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData), dataSequenceNumber.getAsLong(), deleteFiles)); + } } else if (column.isRowPositionColumn()) { appendRowNumberColumn = true; @@ -1112,7 +1210,10 @@ private static ReaderPageSourceWithRowPositions createAvroPageSource( Schema fileSchema, Optional nameMapping, String partition, - List columns) + List columns, + List deleteFiles, + OptionalLong dataSequenceNumber, + Optional writeChangeMode) { InputFile file = new ForwardingInputFile(inputFile); OptionalLong fileModifiedTime = OptionalLong.empty(); @@ -1156,7 +1257,13 @@ else if (column.isFileModifiedTimeColumn()) { } else if (column.isMergeRowIdColumn()) { appendRowNumberColumn = true; - transforms.transform(MergeRowIdTransform.create(utf8Slice(file.location()), partitionSpecId, utf8Slice(partitionData))); + checkState(writeChangeMode.isPresent()); + if (writeChangeMode.get().equals(MOR)) { + transforms.transform(MorMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData))); + } + else { + transforms.transform(CowMergeRowIdTransform.create(utf8Slice(inputFile.location().toString()), partitionSpecId, utf8Slice(partitionData), dataSequenceNumber.getAsLong(), deleteFiles)); + } } else if (column.isRowPositionColumn()) { appendRowNumberColumn = true; @@ -1495,12 +1602,12 @@ public int hashCode() } } - private record MergeRowIdTransform(VariableWidthBlock filePath, IntArrayBlock partitionSpecId, VariableWidthBlock partitionData) + private record MorMergeRowIdTransform(VariableWidthBlock filePath, IntArrayBlock partitionSpecId, VariableWidthBlock partitionData) implements Function { private static Function create(Slice filePath, int partitionSpecId, Slice partitionData) { - return new MergeRowIdTransform( + return new MorMergeRowIdTransform( new VariableWidthBlock(1, filePath, new int[] {0, filePath.length()}, Optional.empty()), new IntArrayBlock(1, Optional.empty(), new int[] {partitionSpecId}), new VariableWidthBlock(1, partitionData, new int[] {0, partitionData.length()}, Optional.empty())); @@ -1520,6 +1627,162 @@ public Block apply(SourcePage page) } } + private record CowMergeRowIdTransform(VariableWidthBlock filePath, IntArrayBlock partitionSpecId, VariableWidthBlock partitionData, + LongArrayBlock dataFileDataSequenceNumber, ArrayBlock deleteFilesContent, ArrayBlock deleteFilesPath, + ArrayBlock deleteFilesFormat, ArrayBlock deleteFilesRecordCount, ArrayBlock deleteFilesFileSizeInBytes, + ArrayBlock deleteFilesEqualityFieldIds, ArrayBlock deleteFilesRowPositionLowerBound, ArrayBlock deleteFilesRowPositionUpperBound, + ArrayBlock deleteFilesDataSequenceNumber) + implements Function + { + private static Function create(Slice filePath, int partitionSpecId, Slice partitionData, long dataFileDataSequenceNumber, List deleteFiles) + { + Function, VariableWidthBlock> strList2Slice = (strList) -> { + // 1. Estimate total byte size to size the builder appropriately + int totalBytes = 0; + for (String s : strList) { + totalBytes += s.getBytes(UTF_8).length; + } + + // 2. Create a VariableWidthBlockBuilder: + // - new BlockBuilderStatus() for per-query accounting + // - strList.size() entries + // - totalBytes as approximate expected bytes + VariableWidthBlockBuilder builder = + new VariableWidthBlockBuilder(null, strList.size(), totalBytes); + + // 3. Write each string as a Slice entry + for (String s : strList) { + byte[] bytes = s.getBytes(UTF_8); + builder.writeEntry(bytes, 0, bytes.length); + } + + // 4. Build and cast to VariableWidthBlock + return (VariableWidthBlock) builder.build(); + }; + + int[] deleteFilesFileContent = deleteFiles.stream() + .mapToInt(deleteFile -> deleteFile.content().id()) + .toArray(); + + IntArrayBlock deleteFilesFileContentBlock = new IntArrayBlock(deleteFilesFileContent.length, Optional.empty(), deleteFilesFileContent); + VariableWidthBlock deleteFilesPathBlock = strList2Slice.apply(deleteFiles.stream().map(DeleteFile::path).collect(toImmutableList())); + VariableWidthBlock deleteFilesFormatBlock = strList2Slice.apply(deleteFiles.stream().map(deleteFile -> deleteFile.format().toString()).collect(toImmutableList())); + + long[] deleteFilesRecordCount = deleteFiles.stream() + .mapToLong(DeleteFile::recordCount) + .toArray(); + + LongArrayBlock deleteFilesRecordCountBlock = new LongArrayBlock(deleteFilesRecordCount.length, Optional.empty(), deleteFilesRecordCount); + + long[] deleteFilesFileSizeInBytes = deleteFiles.stream() + .mapToLong(DeleteFile::fileSizeInBytes) + .toArray(); + + LongArrayBlock deleteFilesFileSizeInBytesBlock = new LongArrayBlock(deleteFilesFileSizeInBytes.length, Optional.empty(), deleteFilesFileSizeInBytes); + + ArrayBlock deleteFilesEqualityFieldIdsBlock = buildNestedIntArrayBlock(deleteFiles.stream().map(DeleteFile::equalityFieldIds).collect(toImmutableList())); + + LongArrayBlock deleteFilesRowPositionLowerBoundBlock = toLongArrayBlock(deleteFiles.stream().map(DeleteFile::rowPositionLowerBound).collect(toImmutableList())); + + LongArrayBlock deleteFilesRowPositionUpperBoundBlock = toLongArrayBlock(deleteFiles.stream().map(DeleteFile::rowPositionUpperBound).collect(toImmutableList())); + + long[] deleteFilesDataSequenceNumber = deleteFiles.stream() + .mapToLong(DeleteFile::dataSequenceNumber) + .toArray(); + + LongArrayBlock deleteFilesDataSequenceNumberBlock = new LongArrayBlock(deleteFilesDataSequenceNumber.length, Optional.empty(), deleteFilesDataSequenceNumber); + + return new CowMergeRowIdTransform( + new VariableWidthBlock(1, filePath, new int[] {0, filePath.length()}, Optional.empty()), + new IntArrayBlock(1, Optional.empty(), new int[] {partitionSpecId}), + new VariableWidthBlock(1, partitionData, new int[] {0, partitionData.length()}, Optional.empty()), + new LongArrayBlock(1, Optional.empty(), new long[] {dataFileDataSequenceNumber}), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesFileContentBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesPathBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesFormatBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesRecordCountBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesFileSizeInBytesBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesEqualityFieldIdsBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesRowPositionLowerBoundBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesRowPositionUpperBoundBlock), + ArrayBlock.fromElementBlock(1, Optional.empty(), new int[] {0, deleteFiles.size()}, deleteFilesDataSequenceNumberBlock)); + } + + private static ArrayBlock buildNestedIntArrayBlock(List> data) + { + Type elementType = IntegerType.INTEGER; // the inner-most element type + ArrayType innerArrayType = new ArrayType(elementType); // List → Array + ArrayType outerArrayType = new ArrayType(innerArrayType); // List> → Array> + + BlockBuilder outerBuilder = outerArrayType.createBlockBuilder(null, data.size()); + + for (List innerList : data) { + BlockBuilder innerBuilder = innerArrayType.createBlockBuilder(null, innerList.size()); + for (Integer value : innerList) { + elementType.writeLong(innerBuilder, value.longValue()); + } + Block innerBlock = innerBuilder.build(); + + innerArrayType.writeObject(outerBuilder, innerBlock); + } + + Block genericBlock = outerBuilder.build(); + return (ArrayBlock) genericBlock; + } + + private static LongArrayBlock toLongArrayBlock(List> data) + { + int positionCount = data.size(); + + // 1) allocate storage + long[] values = new long[positionCount]; + boolean[] valueIsNull = new boolean[positionCount]; + + // 2) fill in values + null‐mask + for (int i = 0; i < positionCount; i++) { + Optional opt = data.get(i); + if (opt.isPresent()) { + values[i] = opt.get(); + valueIsNull[i] = false; + } + else { + // values[i] may be left at 0, but + // valueIsNull[i] must be true + valueIsNull[i] = true; + } + } + + // 3) build the block + return new LongArrayBlock( + positionCount, + Optional.of(valueIsNull), + values); + } + + @Override + public Block apply(SourcePage page) + { + Block rowPosition = page.getBlock(page.getChannelCount() - 1); + Block[] fields = new Block[] { + RunLengthEncodedBlock.create(filePath, rowPosition.getPositionCount()), + rowPosition, + RunLengthEncodedBlock.create(partitionSpecId, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(partitionData, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(dataFileDataSequenceNumber, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesContent, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesPath, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesFormat, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesRecordCount, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesFileSizeInBytes, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesEqualityFieldIds, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesRowPositionLowerBound, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesRowPositionUpperBound, rowPosition.getPositionCount()), + RunLengthEncodedBlock.create(deleteFilesDataSequenceNumber, rowPosition.getPositionCount())}; + + return RowBlock.fromFieldBlocks(rowPosition.getPositionCount(), fields); + } + } + private record GetRowPositionFromSource() implements Function { diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableHandle.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableHandle.java index 710d71eadd74..58cd7867b94c 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableHandle.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergTableHandle.java @@ -22,6 +22,7 @@ import io.airlift.units.DataSize; import io.trino.spi.connector.ConnectorTableHandle; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.predicate.TupleDomain; import java.util.Locale; @@ -73,6 +74,8 @@ public class IcebergTableHandle // ANALYZE only. Coordinator-only private final Optional forAnalyze; + private final Optional updateKind; + @JsonCreator @DoNotCall // For JSON deserialization only public static IcebergTableHandle fromJsonForDeserializationOnly( @@ -89,7 +92,8 @@ public static IcebergTableHandle fromJsonForDeserializationOnly( @JsonProperty("projectedColumns") Set projectedColumns, @JsonProperty("nameMappingJson") Optional nameMappingJson, @JsonProperty("tableLocation") String tableLocation, - @JsonProperty("storageProperties") Map storageProperties) + @JsonProperty("storageProperties") Map storageProperties, + @JsonProperty("updateKind") Optional updateKind) { return new IcebergTableHandle( schemaName, @@ -110,7 +114,8 @@ public static IcebergTableHandle fromJsonForDeserializationOnly( false, Optional.empty(), ImmutableSet.of(), - Optional.empty()); + Optional.empty(), + updateKind); } public IcebergTableHandle( @@ -132,7 +137,8 @@ public IcebergTableHandle( boolean recordScannedFiles, Optional maxScannedFileSize, Set constraintColumns, - Optional forAnalyze) + Optional forAnalyze, + Optional updateKind) { this.schemaName = requireNonNull(schemaName, "schemaName is null"); this.tableName = requireNonNull(tableName, "tableName is null"); @@ -153,6 +159,7 @@ public IcebergTableHandle( this.maxScannedFileSize = requireNonNull(maxScannedFileSize, "maxScannedFileSize is null"); this.constraintColumns = ImmutableSet.copyOf(requireNonNull(constraintColumns, "constraintColumns is null")); this.forAnalyze = requireNonNull(forAnalyze, "forAnalyze is null"); + this.updateKind = requireNonNull(updateKind, "updateKind is null"); } @JsonProperty @@ -273,6 +280,12 @@ public Optional getForAnalyze() return forAnalyze; } + @JsonProperty + public Optional getUpdateKind() + { + return updateKind; + } + public SchemaTableName getSchemaTableName() { return new SchemaTableName(schemaName, tableName); @@ -304,7 +317,8 @@ public IcebergTableHandle withProjectedColumns(Set projecte recordScannedFiles, maxScannedFileSize, constraintColumns, - forAnalyze); + forAnalyze, + updateKind); } public IcebergTableHandle forAnalyze() @@ -328,7 +342,8 @@ public IcebergTableHandle forAnalyze() recordScannedFiles, maxScannedFileSize, constraintColumns, - Optional.of(true)); + Optional.of(true), + updateKind); } public IcebergTableHandle forOptimize(boolean recordScannedFiles, DataSize maxScannedFileSize) @@ -352,7 +367,8 @@ public IcebergTableHandle forOptimize(boolean recordScannedFiles, DataSize maxSc recordScannedFiles, Optional.of(maxScannedFileSize), constraintColumns, - forAnalyze); + forAnalyze, + updateKind); } public IcebergTableHandle withTablePartitioning(Optional requiredTablePartitioning) @@ -376,7 +392,8 @@ public IcebergTableHandle withTablePartitioning(Optional SUPPORTED_PROPERTIES = ImmutableSet.builder() .add(FILE_FORMAT_PROPERTY) @@ -85,6 +88,9 @@ public class IcebergTableProperties .add(DATA_LOCATION_PROPERTY) .add(EXTRA_PROPERTIES_PROPERTY) .add(PARQUET_BLOOM_FILTER_COLUMNS_PROPERTY) + .add(WRITE_DELETE_MODE) + .add(WRITE_UPDATE_MODE) + .add(WRITE_MERGE_MODE) .build(); // These properties are used by Trino or Iceberg internally and cannot be set directly by users through extra_properties @@ -216,6 +222,24 @@ public IcebergTableProperties( "File system location URI for the table's data files", null, false)) + .add(enumProperty( + WRITE_DELETE_MODE, + "Mode used for table delete command: copy-on-write or merge-on-read (v2 only)", + WriteChangeMode.class, + icebergConfig.getWriteDeleteMode(), + false)) + .add(enumProperty( + WRITE_UPDATE_MODE, + "Mode used for table update command: copy-on-write or merge-on-read (v2 only)", + WriteChangeMode.class, + icebergConfig.getWriteUpdateMode(), + false)) + .add(enumProperty( + WRITE_MERGE_MODE, + "Mode used for table merge command: copy-on-write or merge-on-read (v2 only)", + WriteChangeMode.class, + icebergConfig.getWriteMergeMode(), + false)) .build(); checkState(SUPPORTED_PROPERTIES.containsAll(tableProperties.stream() @@ -329,6 +353,21 @@ public static Optional getDataLocation(Map tableProperti return Optional.ofNullable((String) tableProperties.get(DATA_LOCATION_PROPERTY)); } + public static Optional getWriteDeleteMode(Map tableProperties) + { + return Optional.ofNullable((WriteChangeMode) tableProperties.get(WRITE_DELETE_MODE)); + } + + public static Optional getWriteUpdateMode(Map tableProperties) + { + return Optional.ofNullable((WriteChangeMode) tableProperties.get(WRITE_UPDATE_MODE)); + } + + public static Optional getWriteMergeMode(Map tableProperties) + { + return Optional.ofNullable((WriteChangeMode) tableProperties.get(WRITE_MERGE_MODE)); + } + public static Optional> getExtraProperties(Map tableProperties) { return Optional.ofNullable((Map) tableProperties.get(EXTRA_PROPERTIES_PROPERTY)); diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java index 0b309f98d275..e8a19e5e256d 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergUtil.java @@ -41,6 +41,7 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.ConnectorTableMetadata; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.function.InvocationConvention; import io.trino.spi.predicate.Domain; import io.trino.spi.predicate.NullableValue; @@ -141,6 +142,9 @@ import static io.trino.plugin.iceberg.IcebergTableProperties.PROTECTED_ICEBERG_NATIVE_PROPERTIES; import static io.trino.plugin.iceberg.IcebergTableProperties.SORTED_BY_PROPERTY; import static io.trino.plugin.iceberg.IcebergTableProperties.SUPPORTED_PROPERTIES; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_DELETE_MODE; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_MERGE_MODE; +import static io.trino.plugin.iceberg.IcebergTableProperties.WRITE_UPDATE_MODE; import static io.trino.plugin.iceberg.IcebergTableProperties.getPartitioning; import static io.trino.plugin.iceberg.IcebergTableProperties.getSortOrder; import static io.trino.plugin.iceberg.IcebergTableProperties.validateCompression; @@ -152,6 +156,7 @@ import static io.trino.plugin.iceberg.TypeConverter.toIcebergType; import static io.trino.plugin.iceberg.TypeConverter.toIcebergTypeForNewColumn; import static io.trino.plugin.iceberg.TypeConverter.toTrinoType; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static io.trino.plugin.iceberg.util.Timestamps.timestampTzFromMicros; import static io.trino.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS; @@ -187,7 +192,9 @@ import static org.apache.iceberg.TableProperties.COMMIT_NUM_RETRIES; import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT; import static org.apache.iceberg.TableProperties.DEFAULT_FILE_FORMAT_DEFAULT; +import static org.apache.iceberg.TableProperties.DELETE_MODE; import static org.apache.iceberg.TableProperties.FORMAT_VERSION; +import static org.apache.iceberg.TableProperties.MERGE_MODE; import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED; import static org.apache.iceberg.TableProperties.OBJECT_STORE_ENABLED_DEFAULT; import static org.apache.iceberg.TableProperties.ORC_BLOOM_FILTER_COLUMNS; @@ -195,6 +202,7 @@ import static org.apache.iceberg.TableProperties.ORC_COMPRESSION; import static org.apache.iceberg.TableProperties.PARQUET_BLOOM_FILTER_COLUMN_ENABLED_PREFIX; import static org.apache.iceberg.TableProperties.PARQUET_COMPRESSION; +import static org.apache.iceberg.TableProperties.UPDATE_MODE; import static org.apache.iceberg.TableProperties.WRITE_DATA_LOCATION; import static org.apache.iceberg.TableProperties.WRITE_LOCATION_PROVIDER_IMPL; import static org.apache.iceberg.TableUtil.formatVersion; @@ -369,6 +377,12 @@ public static Map getIcebergTableProperties(BaseTable icebergTab properties.put(OBJECT_STORE_LAYOUT_ENABLED_PROPERTY, true); } + for (String tableChangeMode : new String[] {UPDATE_MODE, DELETE_MODE, MERGE_MODE}) { + if (icebergTable.properties().containsKey(tableChangeMode)) { + properties.put(toTrinoIcebergPropertyName(tableChangeMode), WriteChangeMode.fromIcebergString(icebergTable.properties().get(tableChangeMode))); + } + } + Optional dataLocation = Optional.ofNullable(icebergTable.properties().get(WRITE_DATA_LOCATION)); dataLocation.ifPresent(location -> properties.put(DATA_LOCATION_PROPERTY, location)); @@ -902,7 +916,9 @@ public static Map createTableProperties(ConnectorTableMetadata t ImmutableMap.Builder propertiesBuilder = ImmutableMap.builder(); IcebergFileFormat fileFormat = IcebergTableProperties.getFileFormat(tableMetadata.getProperties()); propertiesBuilder.put(DEFAULT_FILE_FORMAT, fileFormat.toIceberg().toString()); - propertiesBuilder.put(FORMAT_VERSION, Integer.toString(IcebergTableProperties.getFormatVersion(tableMetadata.getProperties()))); + + int formatVersion = IcebergTableProperties.getFormatVersion(tableMetadata.getProperties()); + propertiesBuilder.put(FORMAT_VERSION, Integer.toString(formatVersion)); IcebergTableProperties.getMaxCommitRetry(tableMetadata.getProperties()) .ifPresent(value -> propertiesBuilder.put(COMMIT_NUM_RETRIES, Integer.toString(value))); @@ -955,6 +971,16 @@ public static Map createTableProperties(ConnectorTableMetadata t propertiesBuilder.put(TABLE_COMMENT, tableMetadata.getComment().get()); } + Optional writeDeleteMode = IcebergTableProperties.getWriteDeleteMode(tableMetadata.getProperties()); + Optional writeUpdateMode = IcebergTableProperties.getWriteUpdateMode(tableMetadata.getProperties()); + Optional writeMergeMode = IcebergTableProperties.getWriteMergeMode(tableMetadata.getProperties()); + + if (formatVersion >= 2) { + propertiesBuilder.put(DELETE_MODE, writeDeleteMode.orElse(MOR).toIcebergString()); + propertiesBuilder.put(UPDATE_MODE, writeUpdateMode.orElse(MOR).toIcebergString()); + propertiesBuilder.put(MERGE_MODE, writeMergeMode.orElse(MOR).toIcebergString()); + } + Map baseProperties = propertiesBuilder.buildOrThrow(); Map extraProperties = IcebergTableProperties.getExtraProperties(tableMetadata.getProperties()).orElseGet(ImmutableMap::of); @@ -1251,4 +1277,38 @@ public static ManifestReader> readerForManifest(Manifes case DELETES -> ManifestFiles.readDeleteManifest(manifest, fileIO, specsById); }; } + + public static WriteChangeMode getWriteChangeMode(Map storageProperties, UpdateKind updateKind) + { + return WriteChangeMode.fromIcebergString(storageProperties.getOrDefault(getChangeName(updateKind), MOR.toIcebergString())); + } + + public static String getChangeName(UpdateKind updateKind) + { + return switch (updateKind) { + case DELETE -> DELETE_MODE; + case UPDATE -> UPDATE_MODE; + case MERGE -> MERGE_MODE; + }; + } + + public static String toIcebergPropertyName(String icebergPropertyName) + { + return switch (icebergPropertyName) { + case WRITE_DELETE_MODE -> DELETE_MODE; + case WRITE_UPDATE_MODE -> UPDATE_MODE; + case WRITE_MERGE_MODE -> MERGE_MODE; + default -> throw new IllegalStateException("Unexpected icebergPropertyName: " + icebergPropertyName); + }; + } + + public static String toTrinoIcebergPropertyName(String icebergPropertyName) + { + return switch (icebergPropertyName) { + case DELETE_MODE -> WRITE_DELETE_MODE; + case UPDATE_MODE -> WRITE_UPDATE_MODE; + case MERGE_MODE -> WRITE_MERGE_MODE; + default -> throw new IllegalStateException("Unexpected icebergPropertyName: " + icebergPropertyName); + }; + } } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/WriteChangeMode.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/WriteChangeMode.java new file mode 100644 index 000000000000..5c4beee3155c --- /dev/null +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/WriteChangeMode.java @@ -0,0 +1,46 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +public enum WriteChangeMode +{ + MOR("merge-on-read"), + COW("copy-on-write"); + + private final String icebergValue; + + WriteChangeMode(String icebergValue) + { + this.icebergValue = icebergValue; + } + + public String toIcebergString() + { + return icebergValue; + } + + public WriteChangeMode alternate() + { + return this == MOR ? COW : MOR; + } + + public static WriteChangeMode fromIcebergString(String value) + { + return switch (value) { + case "merge-on-read" -> MOR; + case "copy-on-write" -> COW; + default -> throw new IllegalArgumentException("Unexpected value: " + value); + }; + } +} diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/PositionDeleteWriter.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/PositionDeleteWriter.java index 1f6bc8f71543..9dc8bf7037d3 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/PositionDeleteWriter.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/delete/PositionDeleteWriter.java @@ -100,7 +100,8 @@ public Collection write(ImmutableLongBitmapDataProvider rowsToDelete) partition.map(PartitionData::toJson), FileContent.POSITION_DELETES, Optional.of(dataFilePath), - writer.getFileMetrics().splitOffsets()); + writer.getFileMetrics().splitOffsets(), + false); return List.of(wrappedBuffer(jsonCodec.toJsonBytes(task))); } diff --git a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/functions/tablechanges/TableChangesFunctionProcessor.java b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/functions/tablechanges/TableChangesFunctionProcessor.java index cbed46f2a570..e19325626263 100644 --- a/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/functions/tablechanges/TableChangesFunctionProcessor.java +++ b/plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/functions/tablechanges/TableChangesFunctionProcessor.java @@ -132,7 +132,8 @@ else if (column.getId() == DATA_CHANGE_ORDINAL_ID) { split.fileFormat(), split.fileIoProperties(), 0, - functionHandle.nameMappingJson().map(NameMappingParser::fromJson)); + functionHandle.nameMappingJson().map(NameMappingParser::fromJson), + Optional.empty()); this.delegateColumnMap = delegateColumnMap; this.changeTypeIndex = changeTypeIndex; diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java index 173dcc0626a8..76fe54315543 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorSmokeTest.java @@ -115,7 +115,10 @@ public void testShowCreateTable() "WITH \\(\n" + " format = '" + format.name() + "',\n" + " format_version = 2,\n" + - format(" location = '.*/" + schemaName + "/region.*'\n") + + format(" location = '.*/" + schemaName + "/region.*',\n") + + " write_delete_mode = 'MOR',\n" + + " write_merge_mode = 'MOR',\n" + + " write_update_mode = 'MOR'\n" + "\\)"); } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java index 29a3a0f40d6c..5555edf9d23e 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java @@ -187,13 +187,15 @@ public abstract class BaseIcebergConnectorTest private static final Pattern WITH_CLAUSE_EXTRACTOR = Pattern.compile(".*(WITH\\s*\\([^)]*\\))\\s*$", Pattern.DOTALL); protected final IcebergFileFormat format; + protected final WriteChangeMode writeChangeMode; protected TrinoFileSystem fileSystem; protected TimeUnit storageTimePrecision; - protected BaseIcebergConnectorTest(IcebergFileFormat format) + protected BaseIcebergConnectorTest(IcebergFileFormat format, WriteChangeMode writeChangeMode) { this.format = requireNonNull(format, "format is null"); + this.writeChangeMode = requireNonNull(writeChangeMode, "format is null"); } @Override @@ -213,6 +215,9 @@ protected IcebergQueryRunner.Builder createQueryRunnerBuilder() .put("iceberg.allowed-extra-properties", "extra.property.one,extra.property.two,extra.property.three,sorted_by") // Allows testing the sorting writer flushing to the file system with smaller tables .put("iceberg.writer-sort-buffer-size", "1MB") + .put("iceberg.write-delete-mode", writeChangeMode.toString()) + .put("iceberg.write-update-mode", writeChangeMode.toString()) + .put("iceberg.write-merge-mode", writeChangeMode.toString()) .buildOrThrow()) .setInitialTables(REQUIRED_TPCH_TABLES); } @@ -383,7 +388,10 @@ public void testShowCreateTable() "WITH (\n" + " format = '" + format.name() + "',\n" + " format_version = 2,\n" + - " location = '\\E.*/tpch/orders-.*\\Q'\n" + + " location = '\\E.*/tpch/orders-.*\\Q',\n" + + format(" write_delete_mode = '%s',\n", writeChangeMode.toString()) + + format(" write_merge_mode = '%s',\n", writeChangeMode.toString()) + + format(" write_update_mode = '%s'\n", writeChangeMode.toString()) + ")\\E"); } @@ -1269,7 +1277,10 @@ public void testCreatePartitionedTableAs() "WITH (" + "format_version = 2," + "location = '" + tempDirPath + "', " + - "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(\"order key\",9)']" + + "partitioning = ARRAY['ORDER_STATUS', 'Ship_Priority', 'Bucket(\"order key\",9)']," + + "write_delete_mode = '" + writeChangeMode + "'," + + "write_merge_mode = '" + writeChangeMode + "'," + + "write_update_mode = '" + writeChangeMode + "'" + ") " + "AS " + "SELECT orderkey AS \"order key\", shippriority AS ship_priority, orderstatus AS order_status " + @@ -1286,13 +1297,19 @@ public void testCreatePartitionedTableAs() " format = '%s',\n" + " format_version = 2,\n" + " location = '%s',\n" + - " partitioning = ARRAY['order_status','ship_priority','bucket(\"order key\", 9)']\n" + + " partitioning = ARRAY['order_status','ship_priority','bucket(\"order key\", 9)'],\n" + + " write_delete_mode = '%s',\n" + + " write_merge_mode = '%s',\n" + + " write_update_mode = '%s'\n" + ")", getSession().getCatalog().orElseThrow(), getSession().getSchema().orElseThrow(), "test_create_partitioned_table_as", format, - tempDirPath)); + tempDirPath, + writeChangeMode, + writeChangeMode, + writeChangeMode)); assertQuery("SELECT * from test_create_partitioned_table_as", "SELECT orderkey, shippriority, orderstatus FROM orders"); @@ -1524,25 +1541,25 @@ public void testSortOrderChange() Session withSmallRowGroups = withSmallRowGroups(getSession()); try (TestTable table = newTrinoTable( "test_sort_order_change", - "WITH (sorted_by = ARRAY['comment']) AS SELECT * FROM nation WITH NO DATA")) { - assertUpdate(withSmallRowGroups, "INSERT INTO " + table.getName() + " SELECT * FROM nation", 25); + "WITH (sorted_by = ARRAY['custkey']) AS SELECT * FROM customer WITH NO DATA")) { + assertUpdate(withSmallRowGroups, "INSERT INTO " + table.getName() + " SELECT * FROM customer", 1500); Set sortedByComment = new HashSet<>(); computeActual("SELECT file_path from \"" + table.getName() + "$files\"").getOnlyColumnAsSet() .forEach(fileName -> sortedByComment.add((String) fileName)); assertUpdate("ALTER TABLE " + table.getName() + " SET PROPERTIES sorted_by = ARRAY['name']"); - assertUpdate(withSmallRowGroups, "INSERT INTO " + table.getName() + " SELECT * FROM nation", 25); + assertUpdate(withSmallRowGroups, "INSERT INTO " + table.getName() + " SELECT * from customer", 1500); for (Object filePath : computeActual("SELECT file_path from \"" + table.getName() + "$files\"").getOnlyColumnAsSet()) { String path = (String) filePath; if (sortedByComment.contains(path)) { - assertThat(isFileSorted(path, "comment")).isTrue(); + assertThat(isFileSorted(path, "custkey")).isTrue(); } else { assertThat(isFileSorted(path, "name")).isTrue(); } } - assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM nation UNION ALL SELECT * FROM nation"); + assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM customer UNION ALL SELECT * FROM customer"); } } @@ -1554,12 +1571,12 @@ public void testSortingDisabled() .build(); try (TestTable table = newTrinoTable( "test_sorting_disabled", - "WITH (sorted_by = ARRAY['comment']) AS SELECT * FROM nation WITH NO DATA")) { - assertUpdate(withSortingDisabled, "INSERT INTO " + table.getName() + " SELECT * FROM nation", 25); + "WITH (sorted_by = ARRAY['custkey']) AS SELECT * FROM customer WITH NO DATA")) { + assertUpdate(withSortingDisabled, "INSERT INTO " + table.getName() + " SELECT * FROM customer", 1500); for (Object filePath : computeActual("SELECT file_path from \"" + table.getName() + "$files\"").getOnlyColumnAsSet()) { assertThat(isFileSorted((String) filePath, "comment")).isFalse(); } - assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM nation"); + assertQuery("SELECT * FROM " + table.getName(), "SELECT * FROM customer"); } } @@ -1648,7 +1665,10 @@ public void testTableComments() "WITH (\n" + format(" format = '%s',\n", format) + " format_version = 2,\n" + - format(" location = '%s'\n", tempDirPath) + + format(" location = '%s',\n", tempDirPath) + + format(" write_delete_mode = '%s',\n",writeChangeMode.toString()) + + format(" write_merge_mode = '%s',\n",writeChangeMode.toString()) + + format(" write_update_mode = '%s'\n",writeChangeMode.toString()) + ")"; String createTableWithoutComment = "" + "CREATE TABLE iceberg.tpch.test_table_comments (\n" + @@ -1657,7 +1677,10 @@ public void testTableComments() "WITH (\n" + " format = '" + format + "',\n" + " format_version = 2,\n" + - " location = '" + tempDirPath + "'\n" + + " location = '" + tempDirPath + "',\n" + + format(" write_delete_mode = '%s',\n",writeChangeMode.toString()) + + format(" write_merge_mode = '%s',\n",writeChangeMode.toString()) + + format(" write_update_mode = '%s'\n",writeChangeMode.toString()) + ")"; String createTableSql = format(createTableTemplate, "test table comment", format); assertUpdate(createTableSql); @@ -1957,17 +1980,23 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat) // LIKE source INCLUDING PROPERTIES copies all the properties of the source table, including the `location`. // For this reason the source and the copied table will share the same directory. // This test does not drop intentionally the created tables to avoid affecting the source table or the information_schema. - assertUpdate(format("CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = '%s', location = '%s', partitioning = ARRAY['aDate'])", format, tempDirPath)); + assertUpdate(format("CREATE TABLE test_create_table_like_original (col1 INTEGER, aDate DATE) WITH(format = '%s', location = '%s', partitioning = ARRAY['aDate'], write_delete_mode= '%s', write_merge_mode= '%s', write_update_mode= '%s' )", format, tempDirPath, writeChangeMode, writeChangeMode, writeChangeMode)); assertThat(getTablePropertiesString("test_create_table_like_original")).isEqualTo(format( """ WITH ( format = '%s', format_version = 2, location = '%s', - partitioning = ARRAY['adate'] + partitioning = ARRAY['adate'], + write_delete_mode = '%s', + write_merge_mode = '%s', + write_update_mode = '%s' )""", format, - tempDirPath)); + tempDirPath, + writeChangeMode, + writeChangeMode, + writeChangeMode)); assertUpdate("CREATE TABLE test_create_table_like_copy0 (LIKE test_create_table_like_original, col2 INTEGER)"); assertUpdate("INSERT INTO test_create_table_like_copy0 (col1, aDate, col2) VALUES (1, CAST('1950-06-28' AS DATE), 3)", 1); @@ -1979,10 +2008,16 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat) WITH ( format = '%s', format_version = 2, - location = '%s' + location = '%s', + write_delete_mode = '%s', + write_merge_mode = '%s', + write_update_mode = '%s' )""", format, - getTableLocation("test_create_table_like_copy1"))); + getTableLocation("test_create_table_like_copy1"), + writeChangeMode, + writeChangeMode, + writeChangeMode)); assertUpdate("CREATE TABLE test_create_table_like_copy2 (LIKE test_create_table_like_original EXCLUDING PROPERTIES)"); assertThat(getTablePropertiesString("test_create_table_like_copy2")).isEqualTo(format( @@ -1990,10 +2025,16 @@ private void testCreateTableLikeForFormat(IcebergFileFormat otherFormat) WITH ( format = '%s', format_version = 2, - location = '%s' + location = '%s', + write_delete_mode = '%s', + write_merge_mode = '%s', + write_update_mode = '%s' )""", format, - getTableLocation("test_create_table_like_copy2"))); + getTableLocation("test_create_table_like_copy2"), + writeChangeMode, + writeChangeMode, + writeChangeMode)); assertUpdate("DROP TABLE test_create_table_like_copy2"); assertQueryFails("CREATE TABLE test_create_table_like_copy3 (LIKE test_create_table_like_original INCLUDING PROPERTIES)", @@ -4267,7 +4308,7 @@ private void assertFilterPushdown( Metadata metadata = getQueryRunner().getPlannerContext().getMetadata(); newTransaction().execute(getSession(), session -> { - TableHandle table = metadata.getTableHandle(session, tableName) + TableHandle table = metadata.getTableHandle(session, tableName, Optional.empty()) .orElseThrow(() -> new TableNotFoundException(tableName.asSchemaTableName())); Map columns = metadata.getColumnHandles(session, table); @@ -5756,7 +5797,7 @@ private List getActiveFiles(String tableName) .collect(toImmutableList()); } - private List getIcebergEntries(String tableName) + protected List getIcebergEntries(String tableName) { return computeActual(format("SELECT status, data_file.file_path, sequence_number, file_sequence_number FROM \"%s$entries\"", tableName)) .getMaterializedRows() @@ -5765,7 +5806,7 @@ private List getIcebergEntries(String tableName) .collect(toImmutableList()); } - private record IcebergEntry(int status, String filePath, Long sequenceNumber, Long fileSequenceNumber) {} + protected record IcebergEntry(int status, String filePath, Long sequenceNumber, Long fileSequenceNumber) {} protected String getTableLocation(String tableName) { @@ -7915,6 +7956,15 @@ public void testMergeWithDifferentPartitioning() "CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) WITH (partitioning = ARRAY['customer'])"); } + @Test + public void testMergeWithDifferentPartitioningFailed() + { + testMergeWithDifferentPartitioning( + "target_flat_source_partitioned_by_customer", + "CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)", + "CREATE TABLE %s (purchases INT, address VARCHAR, customer VARCHAR) WITH (partitioning = ARRAY['customer'])"); + } + private void testMergeWithDifferentPartitioning(String testDescription, String createTargetTableSql, String createSourceTableSql) { String targetTable = format("%s_target_%s", testDescription, randomNameSuffix()); @@ -9328,7 +9378,7 @@ private Session withSingleWriterPerTask(Session session) .build(); } - private Session prepareCleanUpSession() + protected Session prepareCleanUpSession() { return Session.builder(getSession()) .setCatalogSessionProperty("iceberg", "expire_snapshots_min_retention", "0s") diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java index d840314fe501..dcd46ac6306a 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergMaterializedViewTest.java @@ -218,7 +218,10 @@ public void testShowCreate() " orc_bloom_filter_columns = ARRAY['_date'],\n" + " orc_bloom_filter_fpp = 1E-1,\n" + " partitioning = ARRAY['_date'],\n" + - " storage_schema = '" + schema + "'\n" + + " storage_schema = '" + schema + "',\n" + + " write_delete_mode = 'MOR',\n" + + " write_merge_mode = 'MOR',\n" + + " write_update_mode = 'MOR'\n" + ") AS\n" + "SELECT\n" + " _bigint\n" + @@ -532,7 +535,10 @@ public void testSqlFeatures() " format_version = 2,\n" + " location = '" + getSchemaDirectory() + "/materialized_view_window-\\E[0-9a-f]+\\Q',\n" + " partitioning = ARRAY['_date'],\n" + - " storage_schema = '" + schema + "'\n" + + " storage_schema = '" + schema + "',\n" + + " write_delete_mode = 'MOR',\n" + + " write_merge_mode = 'MOR',\n" + + " write_update_mode = 'MOR'\n" + ") AS\n" + "SELECT\n" + " _date\n" + diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java index 293a923d3b77..535b328dad48 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/IcebergTestUtils.java @@ -53,6 +53,7 @@ import io.trino.spi.connector.ConnectorSession; import io.trino.spi.connector.SchemaTableName; import io.trino.spi.connector.SourcePage; +import io.trino.spi.type.BigintType; import io.trino.spi.type.TestingTypeManager; import io.trino.spi.type.Type; import io.trino.testing.QueryRunner; @@ -109,6 +110,13 @@ public static Session withSmallRowGroups(Session session) .build(); } + public static Session withLowMaxWriterCount(Session session) + { + return Session.builder(session) + .setSystemProperty("task_max_writer_count", "1") + .build(); + } + public static boolean checkOrcFileSorting(TrinoFileSystem fileSystem, Location path, String sortColumnName) { return checkOrcFileSorting(() -> { @@ -162,6 +170,7 @@ private static Type getType(OrcType.OrcTypeKind orcTypeKind) { return switch (orcTypeKind) { case OrcType.OrcTypeKind.STRING, OrcType.OrcTypeKind.VARCHAR -> VARCHAR; + case OrcType.OrcTypeKind.LONG -> BigintType.BIGINT; default -> throw new IllegalArgumentException("Unsupported orc type: " + orcTypeKind); }; } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestFileBasedConflictDetection.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestFileBasedConflictDetection.java index e94d5d44b452..48e5d636fb02 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestFileBasedConflictDetection.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestFileBasedConflictDetection.java @@ -211,10 +211,10 @@ void testConflictDetectionOnEvolvedTable() {"partitionValues":[40]} """; CommitTaskData commitTaskData1 = new CommitTaskData("test_location/data/new.parquet", IcebergFileFormat.PARQUET, 0, new MetricsWrapper(new Metrics()), PartitionSpecParser.toJson(currentPartitionSpec), - Optional.of(partitionDataJson), DATA, Optional.empty(), Optional.empty()); + Optional.of(partitionDataJson), DATA, Optional.empty(), Optional.empty(), false); // Remove file from version with previous partition specification CommitTaskData commitTaskData2 = new CommitTaskData("test_location/data/old.parquet", IcebergFileFormat.PARQUET, 0, new MetricsWrapper(new Metrics()), PartitionSpecParser.toJson(previousPartitionSpec), - Optional.of(partitionDataJson), POSITION_DELETES, Optional.empty(), Optional.empty()); + Optional.of(partitionDataJson), POSITION_DELETES, Optional.empty(), Optional.empty(), false); TupleDomain icebergColumnHandleTupleDomain = extractTupleDomainsFromCommitTasks(getIcebergTableHandle(currentPartitionSpec), icebergTable, List.of(commitTaskData1, commitTaskData2), null); assertThat(icebergColumnHandleTupleDomain.getDomains().orElseThrow()).isEmpty(); @@ -233,7 +233,8 @@ private static List getCommitTaskDataForUpdate(PartitionSpec par partitionDataJson, DATA, Optional.empty(), - Optional.empty()); + Optional.empty(), + false); CommitTaskData commitTaskData2 = new CommitTaskData( "test_location/data/old.parquet", IcebergFileFormat.PARQUET, @@ -243,7 +244,8 @@ private static List getCommitTaskDataForUpdate(PartitionSpec par partitionDataJson, POSITION_DELETES, Optional.empty(), - Optional.empty()); + Optional.empty(), + false); return List.of(commitTaskData1, commitTaskData2); } @@ -270,7 +272,8 @@ private static IcebergTableHandle getIcebergTableHandle(PartitionSpec partitionS false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); } private static Table createIcebergTable(PartitionSpec partitionSpec) diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java index d85a8fe78043..3b29ef654078 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergAvroConnectorTest.java @@ -16,6 +16,7 @@ import org.junit.jupiter.api.Test; import static io.trino.plugin.iceberg.IcebergFileFormat.AVRO; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static org.junit.jupiter.api.Assumptions.abort; public class TestIcebergAvroConnectorTest @@ -23,7 +24,7 @@ public class TestIcebergAvroConnectorTest { public TestIcebergAvroConnectorTest() { - super(AVRO); + super(AVRO, MOR); } @Override diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java index d7410a93f069..1b4d71026fad 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergConfig.java @@ -35,6 +35,8 @@ import static io.trino.plugin.iceberg.CatalogType.HIVE_METASTORE; import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; +import static io.trino.plugin.iceberg.WriteChangeMode.COW; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static java.util.concurrent.TimeUnit.DAYS; import static java.util.concurrent.TimeUnit.HOURS; import static java.util.concurrent.TimeUnit.SECONDS; @@ -82,7 +84,10 @@ public void testDefaults() .setObjectStoreLayoutEnabled(false) .setMetadataParallelism(8) .setBucketExecutionEnabled(true) - .setFileBasedConflictDetectionEnabled(true)); + .setFileBasedConflictDetectionEnabled(true) + .setWriteDeleteMode(MOR) + .setWriteUpdateMode(MOR) + .setWriteMergeMode(MOR)); } @Test @@ -126,6 +131,9 @@ public void testExplicitPropertyMappings() .put("iceberg.metadata.parallelism", "10") .put("iceberg.bucket-execution", "false") .put("iceberg.file-based-conflict-detection", "false") + .put("iceberg.write-delete-mode", "COW") + .put("iceberg.write-update-mode", "COW") + .put("iceberg.write-merge-mode", "COW") .buildOrThrow(); IcebergConfig expected = new IcebergConfig() @@ -166,7 +174,10 @@ public void testExplicitPropertyMappings() .setObjectStoreLayoutEnabled(true) .setMetadataParallelism(10) .setBucketExecutionEnabled(false) - .setFileBasedConflictDetectionEnabled(false); + .setFileBasedConflictDetectionEnabled(false) + .setWriteDeleteMode(COW) + .setWriteUpdateMode(COW) + .setWriteMergeMode(COW); assertFullMapping(properties, expected); } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergCopyOnWriteConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergCopyOnWriteConnectorTest.java new file mode 100644 index 000000000000..ee463598f9d0 --- /dev/null +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergCopyOnWriteConnectorTest.java @@ -0,0 +1,674 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.iceberg; + +import com.google.common.collect.ImmutableMap; +import io.trino.Session; +import io.trino.filesystem.Location; +import io.trino.testing.MaterializedResult; +import io.trino.testing.sql.TestTable; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static io.trino.plugin.iceberg.IcebergFileFormat.PARQUET; +import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; +import static io.trino.plugin.iceberg.IcebergTestUtils.withLowMaxWriterCount; +import static io.trino.plugin.iceberg.IcebergTestUtils.withSmallRowGroups; +import static io.trino.plugin.iceberg.WriteChangeMode.COW; +import static io.trino.testing.TestingConnectorBehavior.SUPPORTS_DELETE; +import static io.trino.testing.TestingConnectorBehavior.SUPPORTS_MERGE; +import static io.trino.testing.TestingNames.randomNameSuffix; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestIcebergCopyOnWriteConnectorTest + extends BaseIcebergConnectorTest +{ + @Override + protected IcebergQueryRunner.Builder createQueryRunnerBuilder() + { + return IcebergQueryRunner.builder() + .setIcebergProperties(ImmutableMap.builder() + .put("iceberg.file-format", format.name()) + // Allows testing the sorting writer flushing to the file system with smaller tables + .put("iceberg.allowed-extra-properties", "extra.property.one,extra.property.two,extra.property.three,sorted_by") + .put("iceberg.writer-sort-buffer-size", "1MB") + .put("iceberg.write-delete-mode", "cow") + .put("iceberg.write-update-mode", "cow") + .put("iceberg.write-merge-mode", "cow") + .buildOrThrow()) + .setInitialTables(REQUIRED_TPCH_TABLES); + } + + public TestIcebergCopyOnWriteConnectorTest() + { + super(PARQUET, COW); + } + + @Override + protected boolean supportsIcebergFileStatistics(String typeName) + { + return true; + } + + @Override + protected boolean supportsRowGroupStatistics(String typeName) + { + return + !(typeName.equalsIgnoreCase("varbinary") || + typeName.equalsIgnoreCase("time") || + typeName.equalsIgnoreCase("time(6)") || + typeName.equalsIgnoreCase("timestamp(3) with time zone") || + typeName.equalsIgnoreCase("timestamp(6) with time zone")); + } + + @Override + protected boolean supportsPhysicalPushdown() + { + return true; + } + + @Test + public void testMergeSimpleUpdate() + { + skipTestUnless(hasBehavior(SUPPORTS_MERGE)); + + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "merge_simple_target_" + randomNameSuffix(); + String sourceTable = "merge_simple_source_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) " + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch')", targetTable), 1); + + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR) " + "with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches')", sourceTable), 1); + + assertUpdate(format("MERGE INTO %s t USING %s s ON (t.customer = s.customer)", targetTable, sourceTable) + + " WHEN MATCHED AND s.address = 'Centreville' THEN DELETE" + + " WHEN MATCHED THEN UPDATE SET purchases = s.purchases + t.purchases, address = s.address" + + " WHEN NOT MATCHED THEN INSERT (customer, purchases, address) VALUES(s.customer, s.purchases, s.address)", 1); + + assertQuery("SELECT * FROM " + targetTable, "VALUES ('Aaron', 11, 'Arches')"); + + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + targetTable); + } + } + + @Test + public void testMergeSimpleDelete() + { + skipTestUnless(hasBehavior(SUPPORTS_MERGE)); + + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "merge_simple_target_" + randomNameSuffix(); + String sourceTable = "merge_simple_source_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch')", targetTable), 1); + + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches')", sourceTable), 1); + + assertUpdate(format("MERGE INTO %s t USING %s s ON (t.customer = s.customer)", targetTable, sourceTable) + + " WHEN MATCHED AND s.address = 'Arches' THEN DELETE" + + " WHEN MATCHED THEN UPDATE SET purchases = s.purchases + t.purchases, address = s.address" + + " WHEN NOT MATCHED THEN INSERT (customer, purchases, address) VALUES(s.customer, s.purchases, s.address)", 1); + + assertQuery("SELECT count(*) FROM " + targetTable, "SELECT 0"); + + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + targetTable); + } + } + + @Test + public void testMergeSimpleInsert() + { + skipTestUnless(hasBehavior(SUPPORTS_MERGE)); + + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "merge_simple_target_" + randomNameSuffix(); + String sourceTable = "merge_simple_source_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable), 4); + + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable), 4); + + assertUpdate(format("MERGE INTO %s t USING %s s ON (t.customer = s.customer)", targetTable, sourceTable) + + " WHEN NOT MATCHED THEN INSERT (customer, purchases, address) VALUES(s.customer, s.purchases, s.address)", 1); + + assertQuery("SELECT * FROM " + targetTable, "VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon'), ('Ed', 7, 'Etherville')"); + + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + targetTable); + } + } + + @Test + public void testSimpleDelete() + { + skipTestUnless(hasBehavior(SUPPORTS_DELETE)); + + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "delete_simple_target_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'),('Dog', 6, 'Dogma'), ('Carol', 3, 'Cambridge')", targetTable), 3); + + assertUpdate("DELETE FROM " + targetTable + " WHERE purchases <= 4", 1); + + assertQuery("SELECT * FROM " + targetTable, "VALUES ('Aaron', 5, 'Antioch'),('Dog', 6, 'Dogma')"); + + assertUpdate("DROP TABLE " + targetTable); + } + } + + @Test + public void testMergeSimpleUpdateInsert() + { + skipTestUnless(hasBehavior(SUPPORTS_MERGE)); + + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "merge_simple_target_" + randomNameSuffix(); + String sourceTable = "merge_simple_source_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable), 4); + + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable), 4); + + assertUpdate(format("MERGE INTO %s t USING %s s ON (t.customer = s.customer)", targetTable, sourceTable) + + " WHEN MATCHED THEN UPDATE SET purchases = s.purchases + t.purchases, address = s.address" + + " WHEN NOT MATCHED THEN INSERT (customer, purchases, address) VALUES(s.customer, s.purchases, s.address)", 4); + + assertQuery("SELECT * FROM " + targetTable, "VALUES ('Aaron', 11, 'Arches'), ('Ed', 7, 'Etherville'), ('Bill', 7, 'Buena'), ('Carol', 12, 'Centreville'), ('Dave', 22, 'Darbyshire')"); + + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + targetTable); + } + } + +// @Test +// public void testMergeSimpleSelect() +// { +// skipTestUnless(hasBehavior(SUPPORTS_MERGE)); +// +// for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { +// String targetTable = "merge_simple_target_" + randomNameSuffix(); +// String sourceTable = "merge_simple_source_" + randomNameSuffix(); +// createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", targetTable, Optional.of("customer")); +// +// assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 5, 'Antioch'), ('Bill', 7, 'Buena'), ('Carol', 3, 'Cambridge'), ('Dave', 11, 'Devon')", targetTable), 4); +// +// createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchases INT, address VARCHAR)" + "with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); +// +// assertUpdate(format("INSERT INTO %s (customer, purchases, address) VALUES ('Aaron', 6, 'Arches'), ('Ed', 7, 'Etherville'), ('Carol', 9, 'Centreville'), ('Dave', 11, 'Darbyshire')", sourceTable), 4); +// +// assertUpdate(format("MERGE INTO %s t USING %s s ON (t.customer = s.customer)", targetTable, sourceTable) + +// " WHEN MATCHED AND s.address = 'Centreville' THEN DELETE" + +// " WHEN MATCHED THEN UPDATE SET purchases = s.purchases + t.purchases, address = s.address" + +// " WHEN NOT MATCHED THEN INSERT (customer, purchases, address) VALUES(s.customer, s.purchases, s.address)", 4); +// +// assertQuery("SELECT * FROM " + targetTable, "VALUES ('Aaron', 11, 'Arches'), ('Ed', 7, 'Etherville'), ('Bill', 7, 'Buena'), ('Dave', 22, 'Darbyshire')"); +// +// assertUpdate("DROP TABLE " + sourceTable); +// assertUpdate("DROP TABLE " + targetTable); +// } +// } + + @Test + public void testCopyOnWriteUpdate() + { + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String tableName = "test_update" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + " with (format = '" + fileFormat + "')" + " AS SELECT * FROM nation", 25); + + assertUpdate("UPDATE " + tableName + " SET nationkey = 100 + nationkey WHERE regionkey = 2", 5); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches("SELECT IF(regionkey=2, nationkey + 100, nationkey) nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + + // UPDATE after UPDATE + assertUpdate("UPDATE " + tableName + " SET nationkey = nationkey * 2 WHERE regionkey IN (2,3)", 10); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches("SELECT CASE regionkey WHEN 2 THEN 2*(nationkey+100) WHEN 3 THEN 2*nationkey ELSE nationkey END nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + // Undeterministic number of files added, just check there is no delete files + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + } + } + + @Test + public void testCopyOnWriteUpdateMixed() + { + for (WriteChangeMode mode : WriteChangeMode.values()) { + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String tableName = "test_update" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + " with (format = '" + fileFormat + "')" + " AS SELECT * FROM nation", 25); + + mode = mode.alternate(); + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_update_mode = '" + mode + "'"); + assertUpdate("UPDATE " + tableName + " SET nationkey = 100 + nationkey WHERE regionkey = 2", 5); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches("SELECT IF(regionkey=2, nationkey + 100, nationkey) nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + + // UPDATE after UPDATE + mode = mode.alternate(); + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_update_mode = '" + mode + "'"); + assertUpdate("UPDATE " + tableName + " SET nationkey = nationkey * 2 WHERE regionkey IN (2,3)", 10); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches("SELECT CASE regionkey WHEN 2 THEN 2*(nationkey+100) WHEN 3 THEN 2*nationkey ELSE nationkey END nationkey, name, regionkey, comment FROM tpch.tiny.nation"); + } + } + } + + @Test + public void testCopyOnWriteMerge() + { + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String targetTable = "merge_various_target_" + randomNameSuffix(); + String sourceTable = "merge_various_source_" + randomNameSuffix(); + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR)" + " with (format = '" + fileFormat + "')", targetTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchase) VALUES ('Dave', 'dates'), ('Lou', 'limes'), ('Carol', 'candles')", targetTable), 3); + + createTableForWrites("CREATE TABLE %s (customer VARCHAR, purchase VARCHAR)" + " with (format = '" + fileFormat + "')", sourceTable, Optional.empty()); + + assertUpdate(format("INSERT INTO %s (customer, purchase) VALUES ('Craig', 'candles'), ('Len', 'limes'), ('Joe', 'jellybeans')", sourceTable), 3); + + assertUpdate(format("MERGE INTO %s t USING %s s ON (t.purchase = s.purchase)", targetTable, sourceTable) + + " WHEN MATCHED AND s.purchase = 'limes' THEN DELETE" + + " WHEN MATCHED THEN UPDATE SET customer = CONCAT(t.customer, '_', s.customer)" + + " WHEN NOT MATCHED THEN INSERT (customer, purchase) VALUES(s.customer, s.purchase)", 3); + + assertQuery("SELECT * FROM " + targetTable, "VALUES ('Dave', 'dates'), ('Carol_Craig', 'candles'), ('Joe', 'jellybeans')"); + + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + targetTable + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(targetTable), + "VALUES '0'"); + + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + targetTable); + } + } + + @Override + protected Optional filterTypeCoercionOnCreateTableAsSelectProvider(TypeCoercionTestSetup setup) + { + return Optional.of(setup); + } + + @Test + public void testCopyOnWriteDeleteMixed() + { + // delete successive parts of the table + for (WriteChangeMode mode : WriteChangeMode.values()) { + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String tableName = "test_delete_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + " with (format = '" + fileFormat + "')" + " AS SELECT * FROM orders", 15000); + + mode = mode.alternate(); + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_delete_mode = '" + mode + "'"); + assertUpdate("DELETE FROM " + tableName + " WHERE custkey <= 100", "SELECT count(*) FROM orders WHERE custkey <= 100"); + assertQuery("SELECT * FROM " + tableName, "SELECT * FROM orders WHERE custkey > 100"); + + mode = mode.alternate(); + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_delete_mode = '" + mode + "'"); + assertUpdate("DELETE FROM " + tableName + " WHERE custkey <= 300", "SELECT count(*) FROM orders WHERE custkey > 100 AND custkey <= 300"); + assertQuery("SELECT * FROM " + tableName, "SELECT * FROM orders WHERE custkey > 300"); + + mode = mode.alternate(); + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_delete_mode = '" + mode + "'"); + assertUpdate("DELETE FROM " + tableName + " WHERE custkey <= 500", "SELECT count(*) FROM orders WHERE custkey > 300 AND custkey <= 500"); + assertQuery("SELECT * FROM " + tableName, "SELECT * FROM orders WHERE custkey > 500"); + } + } + } + + @Override + @Test + public void testOptimizeTableAfterDeleteWithFormatVersion2() + { + String tableName = "test_optimize_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM nation", 25); + + List initialFiles = getActiveFiles(tableName); + + assertUpdate("DELETE FROM " + tableName + " WHERE nationkey = 7", 1); + + // Verify that delete files do not exist + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + + // Verify that data files are added and removed + assertQuery( + "SELECT summary['added-data-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '1'"); + + // For optimize we need to set task_writer_count to 1, otherwise it will create more than one file. + computeActual(withSingleWriterPerTask(getSession()), "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); + + List updatedFiles = getActiveFiles(tableName); + assertThat(updatedFiles) + .hasSize(1) + .isNotEqualTo(initialFiles); + + assertThat(query("SELECT * FROM " + tableName)) + .matches("SELECT * FROM nation WHERE nationkey != 7"); + + assertUpdate("DROP TABLE " + tableName); + } + + private List getActiveFiles(String tableName) + { + return computeActual(format("SELECT file_path FROM \"%s$files\"", tableName)).getOnlyColumn() + .map(String.class::cast) + .collect(toImmutableList()); + } + + private Session withSingleWriterPerTask(Session session) + { + return Session.builder(session) + .setSystemProperty("task_min_writer_count", "1") + .build(); + } + + private Session with(Session session) + { + return Session.builder(session) + .setSystemProperty("task_min_writer_count", "1") + .build(); + } + + private long getCurrentSnapshotId(String tableName) + { + return (long) computeScalar("SELECT snapshot_id FROM \"" + tableName + "$snapshots\" ORDER BY committed_at DESC FETCH FIRST 1 ROW WITH TIES"); + } + + @Override + @Test + public void testOptimizeCleansUpDeleteFiles() + throws IOException + { + String tableName = "test_optimize_" + randomNameSuffix(); + Session sessionWithShortRetentionUnlocked = prepareCleanUpSession(); + assertUpdate("CREATE TABLE " + tableName + " WITH (partitioning = ARRAY['regionkey']) AS SELECT * FROM nation", 25); + + List allDataFilesInitially = getAllDataFilesFromTableDirectory(tableName); + assertThat(allDataFilesInitially).hasSize(5); + + assertUpdate("DELETE FROM " + tableName + " WHERE nationkey = 7", 1); + + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + + List allDataFilesAfterDelete = getAllDataFilesFromTableDirectory(tableName); + assertThat(allDataFilesAfterDelete).hasSize(6); + + // For optimize we need to set task_min_writer_count to 1, otherwise it will create more than one file. + computeActual(withSingleWriterPerTask(getSession()), "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE WHERE regionkey = 3"); + computeActual(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); + computeActual(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); + + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + List allDataFilesAfterOptimizeWithWhere = getAllDataFilesFromTableDirectory(tableName); + assertThat(allDataFilesAfterOptimizeWithWhere) + .hasSize(5) + .doesNotContain(allDataFilesInitially.stream().filter(file -> file.contains("regionkey=3")) + .toArray(String[]::new)) + .contains(allDataFilesInitially.stream().filter(file -> !file.contains("regionkey=3")) + .toArray(String[]::new)); + + assertThat(query("SELECT * FROM " + tableName)) + .matches("SELECT * FROM nation WHERE nationkey != 7"); + + // For optimize we need to set task_min_writer_count to 1, otherwise it will create more than one file. + computeActual(withSingleWriterPerTask(getSession()), "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); + computeActual(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE EXPIRE_SNAPSHOTS (retention_threshold => '0s')"); + computeActual(sessionWithShortRetentionUnlocked, "ALTER TABLE " + tableName + " EXECUTE REMOVE_ORPHAN_FILES (retention_threshold => '0s')"); + + assertQuery( + "SELECT summary['total-delete-files'] FROM \"" + tableName + "$snapshots\" WHERE snapshot_id = " + getCurrentSnapshotId(tableName), + "VALUES '0'"); + List allDataFilesAfterFullOptimize = getAllDataFilesFromTableDirectory(tableName); + assertThat(allDataFilesAfterFullOptimize) + .hasSize(5) + // All files skipped from OPTIMIZE as they have no deletes and there's only one file per partition + .contains(allDataFilesAfterOptimizeWithWhere.toArray(new String[0])); + + assertThat(query("SELECT * FROM " + tableName)) + .matches("SELECT * FROM nation WHERE nationkey != 7"); + + assertUpdate("DROP TABLE " + tableName); + } + + @Override + @Test + public void testOptimizeFilesDoNotInheritSequenceNumber() + throws IOException + { + String tableName = "test_optimize_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + tableName + " AS SELECT * FROM nation", 25); + + assertUpdate("DELETE FROM " + tableName + " WHERE nationkey = 7", 1); + + // For optimize we need to set task_min_writer_count to 1, otherwise it will create more than one file. + computeActual(withSingleWriterPerTask(getSession()), "ALTER TABLE " + tableName + " EXECUTE OPTIMIZE"); + + List activeEntries = getIcebergEntries(tableName); + assertThat(activeEntries).hasSize(2); + + assertThat(activeEntries.stream().filter(entry -> entry.status() == 2)) + .hasSize(1) + .allMatch(entry -> entry.sequenceNumber().equals(entry.fileSequenceNumber())); + + assertThat(query("SELECT * FROM " + tableName)) + .matches("SELECT * FROM nation WHERE nationkey != 7"); + + assertUpdate("DROP TABLE " + tableName); + } + + @Test + public void testRowGroupResetDictionary() + { + try (TestTable table = new TestTable( + getQueryRunner()::execute, + "test_row_group_reset_dictionary", + "(plain_col varchar, dict_col int)")) { + String tableName = table.getName(); + String values = IntStream.range(0, 100) + .mapToObj(i -> "('ABCDEFGHIJ" + i + "' , " + (i < 20 ? "1" : "null") + ")") + .collect(Collectors.joining(", ")); + assertUpdate(withSmallRowGroups(getSession()), "INSERT INTO " + tableName + " VALUES " + values, 100); + + MaterializedResult result = getDistributedQueryRunner().execute(String.format("SELECT * FROM %s", tableName)); + assertThat(result.getRowCount()).isEqualTo(100); + } + } + + @Test + public void testMixedTableChange() + { + // This test iterates through every file format and every combination of write modes (COW vs. MOR) + // to ensure the final table state is correct regardless of the underlying mechanism used for each DML operation. + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + for (WriteChangeMode updateMode : WriteChangeMode.values()) { + for (WriteChangeMode deleteMode : WriteChangeMode.values()) { + for (WriteChangeMode mergeMode : WriteChangeMode.values()) { + String tableName = "test_mixed_table_" + randomNameSuffix(); + String sourceName = "test_mixed_source_" + randomNameSuffix(); + + try { + // 1. SETUP: Create a target table with 25 rows and a source for the MERGE + assertUpdate("CREATE TABLE " + tableName + + " WITH (format = '" + fileFormat + "')" + + " AS SELECT nationkey AS id, name AS data, regionkey FROM nation", 25); + assertUpdate("CREATE TABLE " + sourceName + + " WITH (format = '" + fileFormat + "')" + + " AS SELECT nationkey AS id, name AS data, regionkey FROM nation WHERE regionkey = 1", 5); + + // 2. UPDATE: Use the mode for the current iteration (cow or merge-on-read) + assertUpdate(format("ALTER TABLE %s SET PROPERTIES write_update_mode = '%s'", tableName, updateMode.name())); + assertUpdate("UPDATE " + tableName + " SET data = 'UPDATED' WHERE id = 1", 1); + // Simple verification that the update was successful + assertQuery("SELECT data FROM " + tableName + " WHERE id = 1", "VALUES 'UPDATED'"); + + // 3. DELETE: Use the mode for the current iteration + assertUpdate(format("ALTER TABLE %s SET PROPERTIES write_delete_mode = '%s'", tableName, deleteMode.name())); + assertUpdate("DELETE FROM " + tableName + " WHERE id = 10", 1); + // Simple verification that the delete was successful + assertQuery("SELECT count(*) FROM " + tableName, "SELECT 24"); + + // 4. MERGE: Use the mode for the current iteration + // Add a new row to the source to test the 'NOT MATCHED' clause + assertUpdate("INSERT INTO " + sourceName + " (id, data, regionkey) VALUES (100, 'NEW_ROW', 42)", 1); + assertUpdate(format("ALTER TABLE %s SET PROPERTIES write_merge_mode = '%s'", tableName, mergeMode.name())); + String mergeSql = format( + "MERGE INTO %s t USING %s s ON (t.id = s.id) " + + "WHEN MATCHED AND s.id = 1 THEN UPDATE SET data = 'MERGED' " + + "WHEN MATCHED AND s.id = 2 THEN DELETE " + + "WHEN NOT MATCHED THEN INSERT (id, data, regionkey) VALUES (s.id, s.data, s.regionkey)", + tableName, sourceName); + assertUpdate(mergeSql, 3); + + // 5. FINAL VERIFICATION: Check the final state of the data. + // This block of assertions is the same for all mode combinations. + // Initial 25 -> DELETE -> 24 -> MERGE (1 delete, 1 insert) -> 24 rows final + assertQuery("SELECT count(*) FROM " + tableName, "SELECT 24"); + assertQuery("SELECT data FROM " + tableName + " WHERE id = 1", "VALUES 'MERGED'"); // Row was updated by MERGE + assertQuery("SELECT count(*) FROM " + tableName + " WHERE id = 2", "SELECT 0"); // Row was deleted by MERGE + assertQuery("SELECT data FROM " + tableName + " WHERE id = 100", "VALUES 'NEW_ROW'"); // Row was inserted by MERGE + assertQuery("SELECT count(*) FROM " + tableName + " WHERE id = 10", "SELECT 0"); // Row from earlier DELETE is still gone + } + finally { + assertUpdate("DROP TABLE IF EXISTS " + tableName); + assertUpdate("DROP TABLE IF EXISTS " + sourceName); + } + } + } + } + } + } + + @Test + public void testMixedTableChange2() + { + for (WriteChangeMode mode : WriteChangeMode.values()) { + for (IcebergFileFormat fileFormat : IcebergFileFormat.values()) { + String tableName = "test_mixed_" + randomNameSuffix(); + + // Create and populate the target table + assertUpdate("CREATE TABLE " + tableName + " (id integer, value varchar) with (format = '" + fileFormat + "')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')", 4); + + // Update with mode + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_update_mode = '" + mode + "'"); + assertUpdate("UPDATE " + tableName + " SET value = 'updated_' || value WHERE id = 2", 1); + + // Delete with alternate mode + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_delete_mode = '" + mode.alternate() + "'"); + assertUpdate("DELETE FROM " + tableName + " WHERE id = 3", 1); + + // Create and populate the source table for merge + String sourceTable = "source_" + randomNameSuffix(); + assertUpdate("CREATE TABLE " + sourceTable + " (id integer, value varchar) with (format = '" + fileFormat + "')"); + assertUpdate("INSERT INTO " + sourceTable + " VALUES (2, 'merged_two'), (5, 'five')", 2); + + // Merge with mode + assertUpdate("ALTER TABLE " + tableName + " SET PROPERTIES write_merge_mode = '" + mode + "'"); + assertUpdate( + "MERGE INTO " + tableName + " t USING " + sourceTable + " s ON t.id = s.id " + + "WHEN MATCHED THEN UPDATE SET value = s.value " + + "WHEN NOT MATCHED THEN INSERT (id, value) VALUES (s.id, s.value)", + 2); + + // Verify the final state + assertQuery( + "SELECT * FROM " + tableName + " ORDER BY id", + "VALUES (1, 'one'), (2, 'merged_two'), (4, 'four'), (5, 'five')"); + + // Clean up + assertUpdate("DROP TABLE " + sourceTable); + assertUpdate("DROP TABLE " + tableName); + } + } + } + + @Override + protected Optional filterSetColumnTypesDataProvider(SetColumnTypeSetup setup) + { + switch ("%s -> %s".formatted(setup.sourceColumnType(), setup.newColumnType())) { + case "row(x integer) -> row(y integer)": + // TODO https://github.com/trinodb/trino/issues/15822 The connector returns incorrect NULL when a field in row type doesn't exist in Parquet files + return Optional.of(setup.withNewValueLiteral("NULL")); + } + return super.filterSetColumnTypesDataProvider(setup); + } + + @Override + @Test + public void testUpdateWithSortOrder() + { + Session withSmallRowGroupsAndLowMaxWriterCount = withLowMaxWriterCount(withSmallRowGroups(getSession())); + + try (TestTable table = newTrinoTable( + "test_sorted_update", + "WITH (sorted_by = ARRAY['comment']) AS TABLE tpch.tiny.customer WITH NO DATA")) { + assertUpdate( + withSmallRowGroupsAndLowMaxWriterCount, + "INSERT INTO " + table.getName() + " TABLE tpch.tiny.customer", + "VALUES 1500"); + assertUpdate(withSmallRowGroupsAndLowMaxWriterCount, "UPDATE " + table.getName() + " SET comment = substring(comment, 2)", 1500); + assertQuery( + "SELECT custkey, name, address, nationkey, phone, acctbal, mktsegment, comment FROM " + table.getName(), + "SELECT custkey, name, address, nationkey, phone, acctbal, mktsegment, substring(comment, 2) FROM customer"); + for (Object filePath : computeActual("SELECT file_path from \"" + table.getName() + "$files\" WHERE content != 1").getOnlyColumnAsSet()) { + assertThat(isFileSorted((String) filePath, "comment")).isTrue(); + } + } + } + + @Override + protected boolean isFileSorted(String path, String sortColumnName) + { + return checkParquetFileSorting( + fileSystem.newInputFile(Location.of(path)), + sortColumnName); + } +} diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMinioOrcConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMinioOrcConnectorTest.java index 7f6a39290266..7754cba8c33e 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMinioOrcConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergMinioOrcConnectorTest.java @@ -31,6 +31,7 @@ import static com.google.common.io.Resources.getResource; import static io.trino.plugin.iceberg.IcebergFileFormat.ORC; import static io.trino.plugin.iceberg.IcebergTestUtils.checkOrcFileSorting; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static io.trino.testing.TestingNames.randomNameSuffix; import static io.trino.testing.containers.Minio.MINIO_ACCESS_KEY; import static io.trino.testing.containers.Minio.MINIO_REGION; @@ -50,7 +51,7 @@ public class TestIcebergMinioOrcConnectorTest public TestIcebergMinioOrcConnectorTest() { - super(ORC); + super(ORC, MOR); } @Override diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java index ddd063c54a53..77dd85e5cbdd 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergNodeLocalDynamicSplitPruning.java @@ -175,7 +175,8 @@ public void testDynamicSplitPruningOnUnpartitionedTable() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)), + Optional.of(false), + Optional.empty()), transaction); TupleDomain splitPruningPredicate = TupleDomain.withColumnDomains( @@ -235,7 +236,8 @@ public void testDynamicSplitPruningOnUnpartitionedTable() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)), + Optional.of(false), + Optional.empty()), transaction); try (ConnectorPageSource emptyPageSource = createTestingPageSource(transaction, icebergConfig, split, tableHandle, ImmutableList.of(keyColumnHandle, dataColumnHandle), getDynamicFilter(splitPruningPredicate))) { @@ -345,7 +347,8 @@ public void testDynamicSplitPruningWithExplicitPartitionFilter() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)), + Optional.of(false), + Optional.empty()), transaction); // Simulate situations where the dynamic filter (e.g.: while performing a JOIN with another table) reduces considerably @@ -506,7 +509,8 @@ public void testDynamicSplitPruningWithExplicitPartitionFilterPartitionEvolution false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)), + Optional.of(false), + Optional.empty()), transaction); // Simulate situations where the dynamic filter (e.g.: while performing a JOIN with another table) reduces considerably diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java index 29ed4e0eaa22..11381c12e2ad 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergParquetConnectorTest.java @@ -35,6 +35,7 @@ import static io.trino.plugin.iceberg.IcebergTestUtils.checkParquetFileSorting; import static io.trino.plugin.iceberg.IcebergTestUtils.getParquetFileMetadata; import static io.trino.plugin.iceberg.IcebergTestUtils.withSmallRowGroups; +import static io.trino.plugin.iceberg.WriteChangeMode.MOR; import static io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder; import static java.time.ZoneOffset.UTC; import static org.assertj.core.api.Assertions.assertThat; @@ -44,7 +45,7 @@ public class TestIcebergParquetConnectorTest { public TestIcebergParquetConnectorTest() { - super(PARQUET); + super(PARQUET, MOR); } @Override diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java index 9dcf3f603736..80e14798ec5b 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergSplitSource.java @@ -475,6 +475,7 @@ private static IcebergTableHandle createTableHandle(SchemaTableName schemaTableN false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); } } diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/rest/TestIcebergRestCatalogNestedNamespaceConnectorSmokeTest.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/rest/TestIcebergRestCatalogNestedNamespaceConnectorSmokeTest.java index 66592c98762c..1b320b3da350 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/rest/TestIcebergRestCatalogNestedNamespaceConnectorSmokeTest.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/catalog/rest/TestIcebergRestCatalogNestedNamespaceConnectorSmokeTest.java @@ -178,8 +178,11 @@ public void testShowCreateTable() "WITH \\(\n" + " format = '" + format.name() + "',\n" + " format_version = 2,\n" + - format(" location = '.*/" + schemaName + "/region.*'\n") + - "\\)"); + format(" location = '.*/" + schemaName + "/region.*',\n" + + " write_delete_mode = 'MOR',\n" + + " write_merge_mode = 'MOR',\n" + + " write_update_mode = 'MOR'\n" + + "\\)")); } @Test diff --git a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java index 674c0b58ea7e..4b3ea7c0a639 100644 --- a/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java +++ b/plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/optimizer/TestConnectorPushdownRulesWithIceberg.java @@ -175,7 +175,8 @@ public void testProjectionPushdown() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); TableHandle table = new TableHandle(catalogHandle, icebergTable, new HiveTransactionHandle(false)); IcebergColumnHandle fullColumn = partialColumn.getBaseColumn(); @@ -259,7 +260,8 @@ public void testPredicatePushdown() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); TableHandle table = new TableHandle(catalogHandle, icebergTable, new HiveTransactionHandle(false)); IcebergColumnHandle column = IcebergColumnHandle.optional(primitiveColumnIdentity(1, "a")).columnType(INTEGER).build(); @@ -310,7 +312,8 @@ public void testColumnPruningProjectionPushdown() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); TableHandle table = new TableHandle(catalogHandle, icebergTable, new HiveTransactionHandle(false)); IcebergColumnHandle columnA = IcebergColumnHandle.optional(primitiveColumnIdentity(0, "a")).columnType(INTEGER).build(); @@ -371,7 +374,8 @@ public void testPushdownWithDuplicateExpressions() false, Optional.empty(), ImmutableSet.of(), - Optional.of(false)); + Optional.of(false), + Optional.empty()); TableHandle table = new TableHandle(catalogHandle, icebergTable, new HiveTransactionHandle(false)); IcebergColumnHandle bigintColumn = IcebergColumnHandle.optional(primitiveColumnIdentity(1, "just_bigint")).columnType(BIGINT).build(); diff --git a/plugin/trino-lakehouse/src/main/java/io/trino/plugin/lakehouse/LakehouseMetadata.java b/plugin/trino-lakehouse/src/main/java/io/trino/plugin/lakehouse/LakehouseMetadata.java index fd581d527d51..1ca54c5433d7 100644 --- a/plugin/trino-lakehouse/src/main/java/io/trino/plugin/lakehouse/LakehouseMetadata.java +++ b/plugin/trino-lakehouse/src/main/java/io/trino/plugin/lakehouse/LakehouseMetadata.java @@ -82,6 +82,7 @@ import io.trino.spi.connector.SystemTable; import io.trino.spi.connector.TableColumnsMetadata; import io.trino.spi.connector.TopNApplicationResult; +import io.trino.spi.connector.UpdateKind; import io.trino.spi.connector.WriterScalingOptions; import io.trino.spi.expression.ConnectorExpression; import io.trino.spi.expression.Constant; @@ -150,9 +151,15 @@ public List listSchemaNames(ConnectorSession session) @Override public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion) + { + return getTableHandle(session, tableName, startVersion, endVersion, Optional.empty()); + } + + @Override + public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTableName tableName, Optional startVersion, Optional endVersion, Optional updateKind) { if (isIcebergTableName(tableName.getTableName()) && isMaterializedViewStorage(tableName.getTableName())) { - return icebergMetadata.getTableHandle(session, tableName, startVersion, endVersion); + return icebergMetadata.getTableHandle(session, tableName, startVersion, endVersion, updateKind); } Table table = hiveMetadata.getMetastore() @@ -162,7 +169,7 @@ public ConnectorTableHandle getTableHandle(ConnectorSession session, SchemaTable return null; } if (isIcebergTable(table)) { - return icebergMetadata.getTableHandle(session, tableName, startVersion, endVersion); + return icebergMetadata.getTableHandle(session, tableName, startVersion, endVersion, updateKind); } if (isDeltaLakeTable(table)) { return deltaMetadata.getTableHandle(session, tableName, startVersion, endVersion); diff --git a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/BaseLakehouseConnectorSmokeTest.java b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/BaseLakehouseConnectorSmokeTest.java index d5faf3eb11a1..c191df535300 100644 --- a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/BaseLakehouseConnectorSmokeTest.java +++ b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/BaseLakehouseConnectorSmokeTest.java @@ -133,7 +133,10 @@ public void testCreateIcebergTable() format = 'ORC', format_version = 2, location = \\E's3://test-bucket-.*/tpch/create_iceberg-.*'\\Q, - type = 'ICEBERG' + type = 'ICEBERG', + write_delete_mode = 'MOR', + write_merge_mode = 'MOR', + write_update_mode = 'MOR' )\\E"""); assertUpdate("DROP TABLE create_iceberg"); diff --git a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseConnectorTest.java b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseConnectorTest.java index 0e4bc7787ff4..38ceae734c11 100644 --- a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseConnectorTest.java +++ b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseConnectorTest.java @@ -362,7 +362,10 @@ public void testShowCreateTable() format = 'PARQUET', format_version = 2, location = \\E's3://test-bucket-.*/tpch/orders-.*'\\Q, - type = 'ICEBERG' + type = 'ICEBERG', + write_delete_mode = 'MOR', + write_merge_mode = 'MOR', + write_update_mode = 'MOR' )\\E"""); } } diff --git a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseFileConnectorSmokeTest.java b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseFileConnectorSmokeTest.java index 45de4bf98792..0cf163a7f19b 100644 --- a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseFileConnectorSmokeTest.java +++ b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseFileConnectorSmokeTest.java @@ -100,7 +100,10 @@ public void testShowCreateTable() format = 'PARQUET', format_version = 2, location = 's3://test-bucket/tpch/region-\\E.*\\Q', - type = 'ICEBERG' + type = 'ICEBERG', + write_delete_mode = 'MOR', + write_merge_mode = 'MOR', + write_update_mode = 'MOR' )\\E"""); } } diff --git a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseIcebergConnectorSmokeTest.java b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseIcebergConnectorSmokeTest.java index 42bfaa102b22..0c801fe0cb4d 100644 --- a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseIcebergConnectorSmokeTest.java +++ b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseIcebergConnectorSmokeTest.java @@ -41,7 +41,10 @@ public void testShowCreateTable() format = 'PARQUET', format_version = 2, location = \\E's3://test-bucket-.*/tpch/region-.*'\\Q, - type = 'ICEBERG' + type = 'ICEBERG', + write_delete_mode = 'MOR', + write_merge_mode = 'MOR', + write_update_mode = 'MOR' )\\E"""); } } diff --git a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseMotoConnectorSmokeTest.java b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseMotoConnectorSmokeTest.java index 02754305adeb..cdd79265a7ae 100644 --- a/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseMotoConnectorSmokeTest.java +++ b/plugin/trino-lakehouse/src/test/java/io/trino/plugin/lakehouse/TestLakehouseMotoConnectorSmokeTest.java @@ -78,7 +78,10 @@ public void testShowCreateTable() format = 'PARQUET', format_version = 2, location = 's3://test-bucket/tpch/region-\\E.*\\Q', - type = 'ICEBERG' + type = 'ICEBERG', + write_delete_mode = 'MOR', + write_merge_mode = 'MOR', + write_update_mode = 'MOR' )\\E"""); } } diff --git a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRedirectionToIceberg.java b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRedirectionToIceberg.java index 3d7c7e55b009..a5022f1f11b6 100644 --- a/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRedirectionToIceberg.java +++ b/testing/trino-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveRedirectionToIceberg.java @@ -298,7 +298,10 @@ public void testShowCreateTable() " format = 'PARQUET',\n" + " format_version = 2,\n" + format(" location = 'hdfs://hadoop-master:9000/user/hive/warehouse/%s-\\E.*\\Q',\n", tableName) + - " partitioning = ARRAY['regionkey']\n" + // 'partitioning' comes from Iceberg + " partitioning = ARRAY['regionkey'],\n" + // 'partitioning' comes from Iceberg + " write_delete_mode = 'MOR',\n" + + " write_merge_mode = 'MOR',\n" + + " write_update_mode = 'MOR'\n" + ")\\E"); onTrino().executeQuery("DROP TABLE " + icebergTableName);