diff --git a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java index 5c77f57648c32..6ddf04757fa05 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -174,6 +174,7 @@ public final class SystemSessionProperties public static final String ENABLE_INTERMEDIATE_AGGREGATIONS = "enable_intermediate_aggregations"; public static final String PUSH_AGGREGATION_THROUGH_JOIN = "push_aggregation_through_join"; public static final String PUSH_SEMI_JOIN_THROUGH_UNION = "push_semi_join_through_union"; + public static final String SIMPLIFY_COALESCE_OVER_JOIN_KEYS = "simplify_coalesce_over_join_keys"; public static final String PUSHDOWN_THROUGH_UNNEST = "pushdown_through_unnest"; public static final String PUSH_PARTIAL_AGGREGATION_THROUGH_JOIN = "push_partial_aggregation_through_join"; public static final String PARSE_DECIMAL_LITERALS_AS_DOUBLE = "parse_decimal_literals_as_double"; @@ -931,6 +932,11 @@ public SystemSessionProperties( "Allow pushing semi joins through union", featuresConfig.isPushSemiJoinThroughUnion(), false), + booleanProperty( + SIMPLIFY_COALESCE_OVER_JOIN_KEYS, + "Simplify redundant COALESCE expressions over equi-join keys", + featuresConfig.isSimplifyCoalesceOverJoinKeys(), + false), booleanProperty( PUSHDOWN_THROUGH_UNNEST, "Allow pushing projections and filters below unnest", @@ -2615,6 +2621,11 @@ public static boolean isPushSemiJoinThroughUnion(Session session) return session.getSystemProperty(PUSH_SEMI_JOIN_THROUGH_UNION, Boolean.class); } + public static boolean isSimplifyCoalesceOverJoinKeys(Session session) + { + return session.getSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, Boolean.class); + } + public static boolean isPushdownThroughUnnest(Session session) { return session.getSystemProperty(PUSHDOWN_THROUGH_UNNEST, Boolean.class); diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java index cd9d4e2141cf1..d02cf6000c871 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FeaturesConfig.java @@ -156,6 +156,7 @@ public class FeaturesConfig private double defaultWriterReplicationCoefficient = 3; private boolean pushAggregationThroughJoin = true; private boolean pushSemiJoinThroughUnion; + private boolean simplifyCoalesceOverJoinKeys; private boolean pushdownThroughUnnest; private double memoryRevokingTarget = 0.5; private double memoryRevokingThreshold = 0.9; @@ -1691,6 +1692,19 @@ public FeaturesConfig setPushSemiJoinThroughUnion(boolean pushSemiJoinThroughUni return this; } + public boolean isSimplifyCoalesceOverJoinKeys() + { + return simplifyCoalesceOverJoinKeys; + } + + @Config("optimizer.simplify-coalesce-over-join-keys") + @ConfigDescription("Simplify redundant COALESCE expressions over equi-join keys based on join type") + public FeaturesConfig setSimplifyCoalesceOverJoinKeys(boolean simplifyCoalesceOverJoinKeys) + { + this.simplifyCoalesceOverJoinKeys = simplifyCoalesceOverJoinKeys; + return this; + } + public boolean isPushdownThroughUnnest() { return pushdownThroughUnnest; diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java index a0b41de3574dd..52903e08719ba 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/PlanOptimizers.java @@ -148,6 +148,7 @@ import com.facebook.presto.sql.planner.iterative.rule.RuntimeReorderJoinSides; import com.facebook.presto.sql.planner.iterative.rule.ScaledWriterRule; import com.facebook.presto.sql.planner.iterative.rule.SimplifyCardinalityMap; +import com.facebook.presto.sql.planner.iterative.rule.SimplifyCoalesceOverJoinKeys; import com.facebook.presto.sql.planner.iterative.rule.SimplifyCountOverConstant; import com.facebook.presto.sql.planner.iterative.rule.SimplifyRowExpressions; import com.facebook.presto.sql.planner.iterative.rule.SimplifySortWithConstantInput; @@ -582,7 +583,8 @@ public PlanOptimizers( ruleStats, statsCalculator, estimatedExchangesCostCalculator, - ImmutableSet.of(new RemoveCrossJoinWithConstantInput(metadata.getFunctionAndTypeManager())))); + ImmutableSet.of(new RemoveCrossJoinWithConstantInput(metadata.getFunctionAndTypeManager()), + new SimplifyCoalesceOverJoinKeys()))); builder.add(new IterativeOptimizer( metadata, diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/planner/iterative/rule/SimplifyCoalesceOverJoinKeys.java b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/iterative/rule/SimplifyCoalesceOverJoinKeys.java new file mode 100644 index 0000000000000..d11bf1c531d32 --- /dev/null +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/planner/iterative/rule/SimplifyCoalesceOverJoinKeys.java @@ -0,0 +1,189 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.iterative.rule; + +import com.facebook.presto.Session; +import com.facebook.presto.matching.Capture; +import com.facebook.presto.matching.Captures; +import com.facebook.presto.matching.Pattern; +import com.facebook.presto.spi.plan.Assignments; +import com.facebook.presto.spi.plan.EquiJoinClause; +import com.facebook.presto.spi.plan.JoinNode; +import com.facebook.presto.spi.plan.JoinType; +import com.facebook.presto.spi.plan.ProjectNode; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.iterative.Rule; +import com.google.common.collect.ImmutableSet; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.facebook.presto.SystemSessionProperties.isSimplifyCoalesceOverJoinKeys; +import static com.facebook.presto.matching.Capture.newCapture; +import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.COALESCE; +import static com.facebook.presto.sql.planner.plan.Patterns.join; +import static com.facebook.presto.sql.planner.plan.Patterns.project; +import static com.facebook.presto.sql.planner.plan.Patterns.source; + +/** + * Simplifies redundant COALESCE expressions over equi-join keys based on join type. + *

+ * For equi-join l.x = r.y: + *

+ *

+ * This is important because tool-generated queries often produce patterns like + * {@code SELECT COALESCE(l.x, r.y) FROM l LEFT JOIN r ON l.x = r.y} which + * interferes with bucketed join planning. + */ +public class SimplifyCoalesceOverJoinKeys + implements Rule +{ + private static final Capture JOIN = newCapture(); + + private static final Pattern PATTERN = project() + .with(source().matching(join().capturedAs(JOIN))); + + @Override + public Pattern getPattern() + { + return PATTERN; + } + + @Override + public boolean isEnabled(Session session) + { + return isSimplifyCoalesceOverJoinKeys(session); + } + + @Override + public Result apply(ProjectNode project, Captures captures, Context context) + { + JoinNode joinNode = captures.get(JOIN); + JoinType joinType = joinNode.getType(); + + // FULL JOIN: both sides may be null, cannot simplify + if (joinType == JoinType.FULL) { + return Result.empty(); + } + + List criteria = joinNode.getCriteria(); + if (criteria.isEmpty()) { + return Result.empty(); + } + + Set leftVariables = ImmutableSet.copyOf(joinNode.getLeft().getOutputVariables()); + Set rightVariables = ImmutableSet.copyOf(joinNode.getRight().getOutputVariables()); + + // Build a map of join key pairs for quick lookup + // Maps (leftVar, rightVar) pairs from equi-join criteria + Map leftToRight = new HashMap<>(); + Map rightToLeft = new HashMap<>(); + for (EquiJoinClause clause : criteria) { + leftToRight.put(clause.getLeft(), clause.getRight()); + rightToLeft.put(clause.getRight(), clause.getLeft()); + } + + Assignments assignments = project.getAssignments(); + boolean anySimplified = false; + Assignments.Builder newAssignments = Assignments.builder(); + + for (Map.Entry entry : assignments.getMap().entrySet()) { + RowExpression expression = entry.getValue(); + RowExpression simplified = trySimplifyCoalesce(expression, joinType, leftVariables, rightVariables, leftToRight, rightToLeft); + if (simplified != expression) { + anySimplified = true; + } + newAssignments.put(entry.getKey(), simplified); + } + + if (!anySimplified) { + return Result.empty(); + } + + return Result.ofPlanNode(new ProjectNode( + project.getSourceLocation(), + context.getIdAllocator().getNextId(), + joinNode, + newAssignments.build(), + project.getLocality())); + } + + private static RowExpression trySimplifyCoalesce( + RowExpression expression, + JoinType joinType, + Set leftVariables, + Set rightVariables, + Map leftToRight, + Map rightToLeft) + { + if (!(expression instanceof SpecialFormExpression)) { + return expression; + } + + SpecialFormExpression specialForm = (SpecialFormExpression) expression; + if (specialForm.getForm() != COALESCE) { + return expression; + } + + List arguments = specialForm.getArguments(); + if (arguments.size() != 2) { + return expression; + } + + // Both arguments must be variable references + if (!(arguments.get(0) instanceof VariableReferenceExpression) || + !(arguments.get(1) instanceof VariableReferenceExpression)) { + return expression; + } + + VariableReferenceExpression first = (VariableReferenceExpression) arguments.get(0); + VariableReferenceExpression second = (VariableReferenceExpression) arguments.get(1); + + // Check if these two variables form an equi-join key pair + boolean isLeftRight = leftVariables.contains(first) && rightVariables.contains(second) && + leftToRight.containsKey(first) && leftToRight.get(first).equals(second); + boolean isRightLeft = rightVariables.contains(first) && leftVariables.contains(second) && + rightToLeft.containsKey(first) && rightToLeft.get(first).equals(second); + + if (!isLeftRight && !isRightLeft) { + return expression; + } + + VariableReferenceExpression leftKey = isLeftRight ? first : second; + VariableReferenceExpression rightKey = isLeftRight ? second : first; + + switch (joinType) { + case INNER: + // Both sides non-null on join keys; pick the first argument + return first; + case LEFT: + // Left key guaranteed non-null + return leftKey; + case RIGHT: + // Right key guaranteed non-null + return rightKey; + default: + return expression; + } + } +} diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java index cf66b20fca3e9..e7c0f09da91ac 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFeaturesConfig.java @@ -137,6 +137,7 @@ public void testDefaults() .setEnableIntermediateAggregations(false) .setPushAggregationThroughJoin(true) .setPushSemiJoinThroughUnion(false) + .setSimplifyCoalesceOverJoinKeys(false) .setPushdownThroughUnnest(false) .setForceSingleNodeOutput(true) .setPagesIndexEagerCompactionEnabled(false) @@ -359,6 +360,7 @@ public void testExplicitPropertyMappings() .put("optimizer.treat-low-confidence-zero-estimation-as-unknown", "true") .put("optimizer.push-aggregation-through-join", "false") .put("optimizer.push-semi-join-through-union", "true") + .put("optimizer.simplify-coalesce-over-join-keys", "true") .put("optimizer.pushdown-through-unnest", "true") .put("optimizer.aggregation-partition-merging", "top_down") .put("optimizer.local-exchange-parent-preference-strategy", "automatic") @@ -593,6 +595,7 @@ public void testExplicitPropertyMappings() .setAggregationPartitioningMergingStrategy(TOP_DOWN) .setPushAggregationThroughJoin(false) .setPushSemiJoinThroughUnion(true) + .setSimplifyCoalesceOverJoinKeys(true) .setPushdownThroughUnnest(true) .setSpillEnabled(true) .setJoinSpillingEnabled(false) diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/planner/iterative/rule/TestSimplifyCoalesceOverJoinKeys.java b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/iterative/rule/TestSimplifyCoalesceOverJoinKeys.java new file mode 100644 index 0000000000000..13d1bb3003cfb --- /dev/null +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/planner/iterative/rule/TestSimplifyCoalesceOverJoinKeys.java @@ -0,0 +1,402 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.sql.planner.iterative.rule; + +import com.facebook.presto.spi.plan.Assignments; +import com.facebook.presto.spi.plan.EquiJoinClause; +import com.facebook.presto.spi.plan.JoinType; +import com.facebook.presto.spi.relation.SpecialFormExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.planner.iterative.rule.test.BaseRuleTest; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.testng.annotations.Test; + +import java.util.Optional; + +import static com.facebook.presto.SystemSessionProperties.SIMPLIFY_COALESCE_OVER_JOIN_KEYS; +import static com.facebook.presto.common.type.BigintType.BIGINT; +import static com.facebook.presto.spi.relation.SpecialFormExpression.Form.COALESCE; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.equiJoinClause; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.expression; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.join; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.project; +import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values; + +public class TestSimplifyCoalesceOverJoinKeys + extends BaseRuleTest +{ + @Test + public void testLeftJoinCoalesceLeftRight() + { + // COALESCE(l.x, r.y) on LEFT JOIN l.x = r.y -> l.x + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("left_key")), + join(JoinType.LEFT, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testLeftJoinCoalesceRightLeft() + { + // COALESCE(r.y, l.x) on LEFT JOIN l.x = r.y -> l.x (left key is always non-null) + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, rightKey, leftKey)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("left_key")), + join(JoinType.LEFT, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testRightJoinCoalesceLeftRight() + { + // COALESCE(l.x, r.y) on RIGHT JOIN l.x = r.y -> r.y (right key is always non-null) + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.RIGHT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("right_key")), + join(JoinType.RIGHT, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testRightJoinCoalesceRightLeft() + { + // COALESCE(r.y, l.x) on RIGHT JOIN l.x = r.y -> r.y + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, rightKey, leftKey)) + .build(), + p.join(JoinType.RIGHT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("right_key")), + join(JoinType.RIGHT, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testInnerJoinCoalesceLeftRight() + { + // COALESCE(l.x, r.y) on INNER JOIN l.x = r.y -> l.x (first arg, since both non-null) + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.INNER, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("left_key")), + join(JoinType.INNER, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testInnerJoinCoalesceRightLeft() + { + // COALESCE(r.y, l.x) on INNER JOIN l.x = r.y -> r.y (first arg, since both non-null) + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, rightKey, leftKey)) + .build(), + p.join(JoinType.INNER, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("right_key")), + join(JoinType.INNER, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key"), + values("right_key")))); + } + + @Test + public void testDoesNotFireOnFullJoin() + { + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.FULL, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .doesNotFire(); + } + + @Test + public void testDoesNotFireOnCrossJoin() + { + // Cross join has no equi-join criteria + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.INNER, + p.values(leftKey), + p.values(rightKey))); + }) + .doesNotFire(); + } + + @Test + public void testDoesNotFireOnNonJoinKeyCoalesce() + { + // COALESCE(l.val, r.val) where val columns are NOT join keys + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression leftVal = p.variable("left_val", BIGINT); + VariableReferenceExpression rightVal = p.variable("right_val", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftVal, rightVal)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey, leftVal), + p.values(rightKey, rightVal), + new EquiJoinClause(leftKey, rightKey))); + }) + .doesNotFire(); + } + + @Test + public void testDoesNotFireWhenDisabled() + { + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "false") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .doesNotFire(); + } + + @Test + public void testDoesNotFireOnNonCoalesceProject() + { + // Project with identity assignments (no COALESCE) + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + return p.project( + Assignments.builder() + .put(leftKey, leftKey) + .put(rightKey, rightKey) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .doesNotFire(); + } + + @Test + public void testMultipleJoinKeys() + { + // Multiple join keys, COALESCE on both + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey1 = p.variable("left_key1", BIGINT); + VariableReferenceExpression rightKey1 = p.variable("right_key1", BIGINT); + VariableReferenceExpression leftKey2 = p.variable("left_key2", BIGINT); + VariableReferenceExpression rightKey2 = p.variable("right_key2", BIGINT); + VariableReferenceExpression output1 = p.variable("output1", BIGINT); + VariableReferenceExpression output2 = p.variable("output2", BIGINT); + return p.project( + Assignments.builder() + .put(output1, new SpecialFormExpression(COALESCE, BIGINT, leftKey1, rightKey1)) + .put(output2, new SpecialFormExpression(COALESCE, BIGINT, leftKey2, rightKey2)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey1, leftKey2), + p.values(rightKey1, rightKey2), + new EquiJoinClause(leftKey1, rightKey1), + new EquiJoinClause(leftKey2, rightKey2))); + }) + .matches( + project(ImmutableMap.of("output1", expression("left_key1"), "output2", expression("left_key2")), + join(JoinType.LEFT, + ImmutableList.of(equiJoinClause("left_key1", "right_key1"), equiJoinClause("left_key2", "right_key2")), + Optional.empty(), + values("left_key1", "left_key2"), + values("right_key1", "right_key2")))); + } + + @Test + public void testMixedCoalesceAndIdentity() + { + // One assignment is COALESCE over join keys, another is identity + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression leftVal = p.variable("left_val", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey)) + .put(leftVal, leftVal) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey, leftVal), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .matches( + project(ImmutableMap.of("output", expression("left_key"), "left_val", expression("left_val")), + join(JoinType.LEFT, ImmutableList.of(equiJoinClause("left_key", "right_key")), Optional.empty(), + values("left_key", "left_val"), + values("right_key")))); + } + + @Test + public void testDoesNotFireOnThreeArgCoalesce() + { + // COALESCE with 3 arguments -- should not simplify + tester().assertThat(new SimplifyCoalesceOverJoinKeys()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .on(p -> + { + VariableReferenceExpression leftKey = p.variable("left_key", BIGINT); + VariableReferenceExpression rightKey = p.variable("right_key", BIGINT); + VariableReferenceExpression extra = p.variable("extra", BIGINT); + VariableReferenceExpression output = p.variable("output", BIGINT); + return p.project( + Assignments.builder() + .put(output, new SpecialFormExpression(COALESCE, BIGINT, leftKey, rightKey, extra)) + .build(), + p.join(JoinType.LEFT, + p.values(leftKey, extra), + p.values(rightKey), + new EquiJoinClause(leftKey, rightKey))); + }) + .doesNotFire(); + } +} diff --git a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java index fc5cfde65b792..d7ae901378265 100644 --- a/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java +++ b/presto-tests/src/main/java/com/facebook/presto/tests/AbstractTestQueries.java @@ -90,6 +90,7 @@ import static com.facebook.presto.SystemSessionProperties.REWRITE_EXPRESSION_WITH_CONSTANT_EXPRESSION; import static com.facebook.presto.SystemSessionProperties.REWRITE_LEFT_JOIN_NULL_FILTER_TO_SEMI_JOIN; import static com.facebook.presto.SystemSessionProperties.REWRITE_MIN_MAX_BY_TO_TOP_N; +import static com.facebook.presto.SystemSessionProperties.SIMPLIFY_COALESCE_OVER_JOIN_KEYS; import static com.facebook.presto.SystemSessionProperties.SIMPLIFY_PLAN_WITH_EMPTY_INPUT; import static com.facebook.presto.SystemSessionProperties.USE_DEFAULTS_FOR_CORRELATED_AGGREGATION_PUSHDOWN_THROUGH_OUTER_JOINS; import static com.facebook.presto.common.type.BigintType.BIGINT; @@ -8229,4 +8230,50 @@ protected String getDateExpression(String storageFormat, String columnExpression // DWRF does not support date type. return storageFormat.equals("DWRF") ? "cast(" + columnExpression + " as DATE)" : columnExpression; } + + @Test + public void testSimplifyCoalesceOverJoinKeys() + { + Session enabledSession = Session.builder(getSession()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "true") + .build(); + Session disabledSession = Session.builder(getSession()) + .setSystemProperty(SIMPLIFY_COALESCE_OVER_JOIN_KEYS, "false") + .build(); + + // LEFT JOIN: COALESCE(l.x, r.y) should be simplified to l.x + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(n.nationkey, r.regionkey) FROM nation n LEFT JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // LEFT JOIN: COALESCE(r.y, l.x) should also simplify to l.x + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(r.regionkey, n.nationkey) FROM nation n LEFT JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // RIGHT JOIN: COALESCE(l.x, r.y) should simplify to r.y + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(n.nationkey, r.regionkey) FROM nation n RIGHT JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // INNER JOIN: COALESCE(l.x, r.y) should simplify to l.x (first arg) + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(n.nationkey, r.regionkey) FROM nation n INNER JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // FULL JOIN: COALESCE should NOT be simplified — verify results still match + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(n.nationkey, r.regionkey) FROM nation n FULL JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // Multiple columns with COALESCE on join key plus other columns + assertQueryWithSameQueryRunner(enabledSession, + "SELECT COALESCE(n.nationkey, r.regionkey), n.name FROM nation n LEFT JOIN region r ON n.nationkey = r.regionkey", + disabledSession); + + // JOIN USING produces COALESCE automatically + assertQueryWithSameQueryRunner(enabledSession, + "SELECT regionkey FROM nation LEFT JOIN region USING (regionkey)", + disabledSession); + } }