From 96bf02e6734f11f858308512e43d8f660e3763f2 Mon Sep 17 00:00:00 2001 From: Andrew Xie Date: Wed, 11 Jun 2025 15:59:52 -0700 Subject: [PATCH] [Iceberg] Fix querying data_sequence_number with equality deletes --- .../IcebergEqualityDeleteAsJoin.java | 7 +++-- .../iceberg/IcebergDistributedTestBase.java | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/optimizer/IcebergEqualityDeleteAsJoin.java b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/optimizer/IcebergEqualityDeleteAsJoin.java index 18635bb4ce806..8a2bbba1c2423 100644 --- a/presto-iceberg/src/main/java/com/facebook/presto/iceberg/optimizer/IcebergEqualityDeleteAsJoin.java +++ b/presto-iceberg/src/main/java/com/facebook/presto/iceberg/optimizer/IcebergEqualityDeleteAsJoin.java @@ -266,9 +266,10 @@ public PlanNode visitTableScan(TableScanNode node, RewriteContext context) new SpecialFormExpression(SpecialFormExpression.Form.IS_NULL, BooleanType.BOOLEAN, new SpecialFormExpression(SpecialFormExpression.Form.COALESCE, BigintType.BIGINT, deleteVersionColumns))); + boolean hasExplicitDataSequenceNumberCol = node.getAssignments().containsValue(DATA_SEQUENCE_NUMBER_COLUMN_HANDLE); Assignments.Builder assignmentsBuilder = Assignments.builder(); filter.getOutputVariables().stream() - .filter(variableReferenceExpression -> !variableReferenceExpression.getName().startsWith(DATA_SEQUENCE_NUMBER_COLUMN_HANDLE.getName())) + .filter(variableReferenceExpression -> hasExplicitDataSequenceNumberCol || !variableReferenceExpression.getName().startsWith(DATA_SEQUENCE_NUMBER_COLUMN_HANDLE.getName())) .forEach(variableReferenceExpression -> assignmentsBuilder.put(variableReferenceExpression, variableReferenceExpression)); return new ProjectNode(Optional.empty(), idAllocator.getNextId(), filter, assignmentsBuilder.build(), ProjectNode.Locality.LOCAL); } @@ -368,12 +369,12 @@ private TableScanNode createNewRoot(TableScanNode node, IcebergTableHandle icebe VariableReferenceExpression dataSequenceNumberVariableReference = toVariableReference(DATA_SEQUENCE_NUMBER_COLUMN_HANDLE); ImmutableMap.Builder assignmentsBuilder = ImmutableMap.builder() - .put(dataSequenceNumberVariableReference, DATA_SEQUENCE_NUMBER_COLUMN_HANDLE) .putAll(unselectedAssignments) .putAll(node.getAssignments()); ImmutableList.Builder outputsBuilder = ImmutableList.builder(); outputsBuilder.addAll(node.getOutputVariables()); - if (!node.getAssignments().containsKey(dataSequenceNumberVariableReference)) { + if (!node.getAssignments().containsValue(DATA_SEQUENCE_NUMBER_COLUMN_HANDLE)) { + assignmentsBuilder.put(dataSequenceNumberVariableReference, DATA_SEQUENCE_NUMBER_COLUMN_HANDLE); outputsBuilder.add(dataSequenceNumberVariableReference); } outputsBuilder.addAll(unselectedAssignments.keySet()); diff --git a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java index ed91212089c3a..5e30f9bdaab72 100644 --- a/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java +++ b/presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java @@ -1423,6 +1423,36 @@ public void testEqualityDeletesWithHiddenPartitionsEvolution(String fileFormat, assertQuery(session, "SELECT * FROM " + tableName, "VALUES (1, '1001', NULL, NULL), (3, '1003', NULL, NULL), (6, '1004', 1, NULL), (6, '1006', 2, 'th002')"); } + @Test(dataProvider = "equalityDeleteOptions") + public void testEqualityDeletesWithDataSequenceNumber(String fileFormat, boolean joinRewriteEnabled) + throws Exception + { + Session session = deleteAsJoinEnabled(joinRewriteEnabled); + String tableName = "test_v2_row_delete_" + randomTableSuffix(); + String tableName2 = "test_v2_row_delete_2_" + randomTableSuffix(); + assertUpdate("CREATE TABLE " + tableName + "(id int, data varchar) WITH (\"write.format.default\" = '" + fileFormat + "')"); + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'a')", 1); + + assertUpdate("CREATE TABLE " + tableName2 + "(id int, data varchar) WITH (\"write.format.default\" = '" + fileFormat + "')"); + assertUpdate("INSERT INTO " + tableName2 + " VALUES (1, 'a')", 1); + + Table icebergTable = updateTable(tableName); + writeEqualityDeleteToNationTable(icebergTable, ImmutableMap.of("id", 1)); + + Table icebergTable2 = updateTable(tableName2); + writeEqualityDeleteToNationTable(icebergTable2, ImmutableMap.of("id", 1)); + + assertUpdate("INSERT INTO " + tableName + " VALUES (1, 'b'), (2, 'a'), (3, 'a')", 3); + assertUpdate("INSERT INTO " + tableName2 + " VALUES (1, 'b'), (2, 'a'), (3, 'a')", 3); + + assertQuery(session, "SELECT * FROM " + tableName, "VALUES (1, 'b'), (2, 'a'), (3, 'a')"); + + assertQuery(session, "SELECT \"$data_sequence_number\", * FROM " + tableName, "VALUES (3, 1, 'b'), (3, 2, 'a'), (3, 3, 'a')"); + + assertQuery(session, "SELECT a.\"$data_sequence_number\", b.\"$data_sequence_number\" from " + tableName + " as a, " + tableName2 + " as b where a.id = b.id", + "VALUES (3, 3), (3, 3), (3, 3)"); + } + @Test public void testPartShowStatsWithFilters() {