Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@
import io.trino.sql.planner.optimizations.OptimizerStats;
import io.trino.sql.planner.optimizations.PlanOptimizer;
import io.trino.sql.planner.optimizations.PredicatePushDown;
import io.trino.sql.planner.optimizations.ReplicateSemiJoinInDelete;
import io.trino.sql.planner.optimizations.ReplicateJoinAndSemiJoinInDelete;
import io.trino.sql.planner.optimizations.StatsRecordingPlanOptimizer;
import io.trino.sql.planner.optimizations.TransformQuantifiedComparisonApplyToCorrelatedJoin;
import io.trino.sql.planner.optimizations.UnaliasSymbolReferences;
Expand Down Expand Up @@ -763,6 +763,8 @@ public PlanOptimizers(
ImmutableSet.of(
new ApplyPreferredTableWriterPartitioning(),
new ApplyPreferredTableExecutePartitioning())),
// Make sure to run ReplicateJoinAndSemiJoinInDelete before ReorderJoins and AddExchanges
new ReplicateJoinAndSemiJoinInDelete(),
Comment on lines 766 to 767
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand why ReplicateSemiJoinInDelete/ReplicateJoinAndSemiJoinInDelete needs to be run before ReorderJoins.
I think ReplicateSemiJoinInDelete/ReplicateJoinAndSemiJoinInDelete would undo distirbution type picked by ReorderJoins. What am i missing?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since ReorderJoins can flip the JoinNode - we would like to avoid it for the PlanNode between Delete and TableScan. So we run them before applying these optimizer.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a good way to solve this issue. Optimizer rules need to be able to run regardless of which order they are applied (i.e., correctness of the plan should not depend on the order of rule application -- only query performance should be affected).

How is this PR different from what we did for #8286 ?

// Because ReorderJoins runs only once,
// PredicatePushDown, columnPruningOptimizer and RemoveRedundantIdentityProjections
// need to run beforehand in order to produce an optimal join order
Expand Down Expand Up @@ -811,7 +813,6 @@ public PlanOptimizers(
new OptimizeDuplicateInsensitiveJoins(metadata))));

if (!forceSingleNode) {
builder.add(new ReplicateSemiJoinInDelete()); // Must run before AddExchanges
builder.add(new IterativeOptimizer(
plannerContext,
ruleStats,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import io.trino.sql.planner.PlanNodeIdAllocator;
import io.trino.sql.planner.SymbolAllocator;
import io.trino.sql.planner.TypeProvider;
import io.trino.sql.planner.plan.AggregationNode;
import io.trino.sql.planner.plan.AssignUniqueId;
import io.trino.sql.planner.plan.DeleteNode;
import io.trino.sql.planner.plan.ExchangeNode;
Expand Down Expand Up @@ -318,6 +319,9 @@ private TableHandle findTableScanHandleForDeleteOrUpdate(PlanNode node)
if (node instanceof MarkDistinctNode) {
return findTableScanHandleForDeleteOrUpdate(((MarkDistinctNode) node).getSource());
}
if (node instanceof AggregationNode) {
return findTableScanHandleForDeleteOrUpdate(((AggregationNode) node).getSource());
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}
throw new IllegalArgumentException("Invalid descendant for DeleteNode or UpdateNode: " + node.getClass().getName());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,25 @@
*/
package io.trino.sql.planner.optimizations;

import com.google.common.collect.ImmutableList;
import io.trino.Session;
import io.trino.cost.TableStatsProvider;
import io.trino.execution.warnings.WarningCollector;
import io.trino.sql.planner.PlanNodeIdAllocator;
import io.trino.sql.planner.SymbolAllocator;
import io.trino.sql.planner.TypeProvider;
import io.trino.sql.planner.plan.DeleteNode;
import io.trino.sql.planner.plan.JoinNode;
import io.trino.sql.planner.plan.JoinNode.Type;
import io.trino.sql.planner.plan.PlanNode;
import io.trino.sql.planner.plan.SemiJoinNode;
import io.trino.sql.planner.plan.SimplePlanRewriter;

import static io.trino.sql.planner.plan.SemiJoinNode.DistributionType.REPLICATED;
import static io.trino.sql.planner.plan.JoinNode.Type.INNER;
import static io.trino.sql.planner.plan.JoinNode.Type.LEFT;
import static java.util.Objects.requireNonNull;

public class ReplicateSemiJoinInDelete
public class ReplicateJoinAndSemiJoinInDelete
implements PlanOptimizer
{
@Override
Expand All @@ -42,26 +46,31 @@ private static class Rewriter
{
private boolean isDeleteQuery;

@Override
public PlanNode visitJoin(JoinNode node, RewriteContext<Void> context)
{
PlanNode leftSourceRewritten = context.rewrite(node.getLeft(), context.get());

// This should be applied to Joins directly between TableScan and DeleteNode, not to all Joins in the plan
JoinNode rewrittenNode = (JoinNode) node.replaceChildren(ImmutableList.of(leftSourceRewritten, node.getRight()));
Type joinType = rewrittenNode.getType();
if (isDeleteQuery && (joinType == INNER || joinType == LEFT)) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess this should be:

if (isDeleteQuery) {
  verify(joinType == INNER || joinType == LEFT

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what about UPDATE?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UPDATE can be applied in a follow up PR.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess this should be:

if (isDeleteQuery) {
  verify(joinType == INNER || joinType == LEFT

has this been resolved?
or, am i being wrong here?

return rewrittenNode.withDistributionType(JoinNode.DistributionType.REPLICATED);
}

return rewrittenNode;
}

@Override
public PlanNode visitSemiJoin(SemiJoinNode node, RewriteContext<Void> context)
{
PlanNode sourceRewritten = context.rewrite(node.getSource(), context.get());
PlanNode filteringSourceRewritten = context.rewrite(node.getFilteringSource(), context.get());

SemiJoinNode rewrittenNode = new SemiJoinNode(
node.getId(),
sourceRewritten,
filteringSourceRewritten,
node.getSourceJoinSymbol(),
node.getFilteringSourceJoinSymbol(),
node.getSemiJoinOutput(),
node.getSourceHashSymbol(),
node.getFilteringSourceHashSymbol(),
node.getDistributionType(),
node.getDynamicFilterId());
SemiJoinNode rewrittenNode = (SemiJoinNode) node.replaceChildren(ImmutableList.of(sourceRewritten, filteringSourceRewritten));

if (isDeleteQuery) {
return rewrittenNode.withDistributionType(REPLICATED);
return rewrittenNode.withDistributionType(SemiJoinNode.DistributionType.REPLICATED);
}

return rewrittenNode;
Expand All @@ -74,12 +83,7 @@ public PlanNode visitDelete(DeleteNode node, RewriteContext<Void> context)
// so you can't do a distributed semi-join
isDeleteQuery = true;
PlanNode rewrittenSource = context.rewrite(node.getSource());
return new DeleteNode(
node.getId(),
rewrittenSource,
node.getTarget(),
node.getRowId(),
node.getOutputSymbols());
return node.replaceChildren(ImmutableList.of(rewrittenSource));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2870,6 +2870,7 @@ public void testDeleteWithComplexPredicate()
@Test
public void testDeleteWithSubquery()
{
// TODO (https://github.com/trinodb/trino/issues/13210) Migrate these tests to AbstractTestEngineOnlyQueries
skipTestUnless(hasBehavior(SUPPORTS_DELETE));

// TODO (https://github.com/trinodb/trino/issues/5901) Use longer table name once Oracle version is updated
Expand All @@ -2889,6 +2890,22 @@ public void testDeleteWithSubquery()
assertUpdate("DELETE FROM " + table.getName() + " WHERE EXISTS(SELECT 1 WHERE false)", 0);
assertUpdate("DELETE FROM " + table.getName() + " WHERE EXISTS(SELECT 1)", "SELECT count(*) - 1 FROM orders");
}

try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_correlated_exists_subquery", "AS SELECT * FROM nation")) {
// delete using correlated EXISTS subquery
assertUpdate(format("DELETE FROM %1$s WHERE EXISTS(SELECT regionkey FROM region WHERE regionkey = %1$s.regionkey AND name LIKE 'A%%')", table.getName()), 15);
assertQuery(
"SELECT * FROM " + table.getName(),
"SELECT * FROM nation WHERE regionkey IN (SELECT regionkey FROM region WHERE name NOT LIKE 'A%')");
}

try (TestTable table = new TestTable(getQueryRunner()::execute, "test_delete_correlated_exists_subquery", "AS SELECT * FROM nation")) {
// delete using correlated IN subquery
assertUpdate(format("DELETE FROM %1$s WHERE regionkey IN (SELECT regionkey FROM region WHERE regionkey = %1$s.regionkey AND name LIKE 'A%%')", table.getName()), 15);
assertQuery(
"SELECT * FROM " + table.getName(),
"SELECT * FROM nation WHERE regionkey IN (SELECT regionkey FROM region WHERE name NOT LIKE 'A%')");
}
}

@Test
Expand Down