Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ public final class SystemSessionProperties
public static final String ADD_EXCHANGE_BELOW_PARTIAL_AGGREGATION_OVER_GROUP_ID = "add_exchange_below_partial_aggregation_over_group_id";
public static final String QUERY_CLIENT_TIMEOUT = "query_client_timeout";
public static final String REWRITE_MIN_MAX_BY_TO_TOP_N = "rewrite_min_max_by_to_top_n";
public static final String ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD = "add_distinct_below_semi_join_build";

// TODO: Native execution related session properties that are temporarily put here. They will be relocated in the future.
public static final String NATIVE_AGGREGATION_SPILL_ALL = "native_aggregation_spill_all";
Expand Down Expand Up @@ -1906,7 +1907,11 @@ public SystemSessionProperties(
queryManagerConfig.getClientTimeout(),
false,
value -> Duration.valueOf((String) value),
Duration::toString));
Duration::toString),
booleanProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD,
"Add distinct aggregation below semi join build",
featuresConfig.isAddDistinctBelowSemiJoinBuild(),
false));
}

public static boolean isSpoolingOutputBufferEnabled(Session session)
Expand Down Expand Up @@ -3238,6 +3243,11 @@ public static boolean isEnabledAddExchangeBelowGroupId(Session session)
return session.getSystemProperty(ADD_EXCHANGE_BELOW_PARTIAL_AGGREGATION_OVER_GROUP_ID, Boolean.class);
}

public static boolean isAddDistinctBelowSemiJoinBuildEnabled(Session session)
{
return session.getSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, Boolean.class);
}

public static boolean isCanonicalizedJsonExtract(Session session)
{
return session.getSystemProperty(CANONICALIZED_JSON_EXTRACT, Boolean.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ public class FeaturesConfig
private boolean nativeExecutionTypeRewriteEnabled;
private String expressionOptimizerName = DEFAULT_EXPRESSION_OPTIMIZER_NAME;
private boolean addExchangeBelowPartialAggregationOverGroupId;
private boolean addDistinctBelowSemiJoinBuild;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be on by default?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to run it for some time before turning it on as default.


public enum PartitioningPrecisionStrategy
{
Expand Down Expand Up @@ -3014,4 +3015,17 @@ public boolean getAddExchangeBelowPartialAggregationOverGroupId()
{
return addExchangeBelowPartialAggregationOverGroupId;
}

@Config("optimizer.add-distinct-below-semi-join-build")
@ConfigDescription("Add a distinct aggregation below build side of semi join")
public FeaturesConfig setAddDistinctBelowSemiJoinBuild(boolean addDistinctBelowSemiJoinBuild)
{
this.addDistinctBelowSemiJoinBuild = addDistinctBelowSemiJoinBuild;
return this;
}

public boolean isAddDistinctBelowSemiJoinBuild()
{
return addDistinctBelowSemiJoinBuild;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.facebook.presto.sql.planner.iterative.IterativeOptimizer;
import com.facebook.presto.sql.planner.iterative.Rule;
import com.facebook.presto.sql.planner.iterative.properties.LogicalPropertiesProviderImpl;
import com.facebook.presto.sql.planner.iterative.rule.AddDistinctForSemiJoinBuild;
import com.facebook.presto.sql.planner.iterative.rule.AddExchangesBelowPartialAggregationOverGroupIdRuleSet;
import com.facebook.presto.sql.planner.iterative.rule.AddIntermediateAggregations;
import com.facebook.presto.sql.planner.iterative.rule.AddNotNullFiltersToJoinNode;
Expand Down Expand Up @@ -593,6 +594,12 @@ public PlanOptimizers(
statsCalculator,
estimatedExchangesCostCalculator,
ImmutableSet.of(new LeftJoinNullFilterToSemiJoin(metadata.getFunctionAndTypeManager()))),
new IterativeOptimizer(
metadata,
ruleStats,
statsCalculator,
estimatedExchangesCostCalculator,
ImmutableSet.of(new AddDistinctForSemiJoinBuild())),
new KeyBasedSampler(metadata),
new IterativeOptimizer(
metadata,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql.planner.iterative.rule;

import com.facebook.presto.Session;
import com.facebook.presto.matching.Captures;
import com.facebook.presto.matching.Pattern;
import com.facebook.presto.spi.plan.AggregationNode;
import com.facebook.presto.spi.plan.FilterNode;
import com.facebook.presto.spi.plan.PlanNode;
import com.facebook.presto.spi.plan.ProjectNode;
import com.facebook.presto.spi.plan.SemiJoinNode;
import com.facebook.presto.spi.relation.RowExpression;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.planner.iterative.Rule;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;

import java.util.Optional;

import static com.facebook.presto.SystemSessionProperties.isAddDistinctBelowSemiJoinBuildEnabled;
import static com.facebook.presto.spi.plan.AggregationNode.isDistinct;
import static com.facebook.presto.spi.plan.AggregationNode.singleGroupingSet;
import static com.facebook.presto.sql.planner.plan.Patterns.semiJoin;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add small comment explaining the rule

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

public class AddDistinctForSemiJoinBuild
implements Rule<SemiJoinNode>
{
@Override
public Pattern<SemiJoinNode> getPattern()
{
return semiJoin();
}

@Override
public boolean isEnabled(Session session)
{
return isAddDistinctBelowSemiJoinBuildEnabled(session);
}

@Override
public Result apply(SemiJoinNode node, Captures captures, Context context)
{
PlanNode filterSource = context.getLookup().resolve(node.getFilteringSource());
VariableReferenceExpression filteringSourceVariable = node.getFilteringSourceJoinVariable();
if (isOutputDistinct(filterSource, filteringSourceVariable, context)) {
return Result.empty();
}
AggregationNode.GroupingSetDescriptor groupingSetDescriptor = singleGroupingSet(ImmutableList.of(node.getFilteringSourceJoinVariable()));
AggregationNode distinctAggregation = new AggregationNode(
node.getSourceLocation(),
context.getIdAllocator().getNextId(),
filterSource,
ImmutableMap.of(),
groupingSetDescriptor,
ImmutableList.of(),
AggregationNode.Step.SINGLE,
Optional.empty(),
Optional.empty(),
Optional.empty());

return Result.ofPlanNode(node.replaceChildren(ImmutableList.of(node.getSource(), distinctAggregation)));
}

boolean isOutputDistinct(PlanNode node, VariableReferenceExpression output, Context context)
{
if (node instanceof AggregationNode) {
AggregationNode aggregationNode = (AggregationNode) node;
return isDistinct(aggregationNode) && aggregationNode.getGroupingKeys().size() == 1 && aggregationNode.getGroupingKeys().contains(output);
}
else if (node instanceof ProjectNode) {
ProjectNode projectNode = (ProjectNode) node;
RowExpression inputExpression = projectNode.getAssignments().get(output);
if (inputExpression instanceof VariableReferenceExpression) {
return isOutputDistinct(context.getLookup().resolve(projectNode.getSource()), (VariableReferenceExpression) inputExpression, context);
}
return false;
}
else if (node instanceof FilterNode) {
return isOutputDistinct(context.getLookup().resolve(((FilterNode) node).getSource()), output, context);
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ public void testDefaults()
.setExpressionOptimizerName("default")
.setExcludeInvalidWorkerSessionProperties(false)
.setAddExchangeBelowPartialAggregationOverGroupId(false)
.setAddDistinctBelowSemiJoinBuild(false)
.setInnerJoinPushdownEnabled(false)
.setInEqualityJoinPushdownEnabled(false)
.setRewriteMinMaxByToTopNEnabled(false)
Expand Down Expand Up @@ -467,6 +468,7 @@ public void testExplicitPropertyMappings()
.put("enhanced-cte-scheduling-enabled", "false")
.put("expression-optimizer-name", "custom")
.put("exclude-invalid-worker-session-properties", "true")
.put("optimizer.add-distinct-below-semi-join-build", "true")
.put("optimizer.add-exchange-below-partial-aggregation-over-group-id", "true")
.build();

Expand Down Expand Up @@ -670,6 +672,7 @@ public void testExplicitPropertyMappings()
.setExpressionOptimizerName("custom")
.setExcludeInvalidWorkerSessionProperties(true)
.setAddExchangeBelowPartialAggregationOverGroupId(true)
.setAddDistinctBelowSemiJoinBuild(true)
.setInEqualityJoinPushdownEnabled(true)
.setRewriteMinMaxByToTopNEnabled(true)
.setInnerJoinPushdownEnabled(true)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.sql.planner.iterative.rule;

import com.facebook.presto.spi.plan.AggregationNode;
import com.facebook.presto.spi.relation.VariableReferenceExpression;
import com.facebook.presto.sql.planner.iterative.rule.test.BaseRuleTest;
import com.google.common.collect.ImmutableMap;
import org.testng.annotations.Test;

import java.util.Optional;

import static com.facebook.presto.SystemSessionProperties.ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.aggregation;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.semiJoin;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.singleGroupingSet;
import static com.facebook.presto.sql.planner.assertions.PlanMatchPattern.values;
import static com.facebook.presto.sql.planner.iterative.rule.test.PlanBuilder.assignment;

public class TestAddDistinctForSemiJoinBuild
extends BaseRuleTest
{
@Test
public void testTrigger()
{
tester().assertThat(new AddDistinctForSemiJoinBuild())
.setSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, "true")
.on(p ->
{
VariableReferenceExpression sourceJoinVariable = p.variable("sourceJoinVariable");
VariableReferenceExpression filteringSourceJoinVariable = p.variable("filteringSourceJoinVariable");
VariableReferenceExpression semiJoinOutput = p.variable("semiJoinOutput");
return p.semiJoin(
sourceJoinVariable,
filteringSourceJoinVariable,
semiJoinOutput,
Optional.empty(),
Optional.empty(),
p.values(sourceJoinVariable),
p.values(filteringSourceJoinVariable));
}).matches(
semiJoin(
"sourceJoinVariable",
"filteringSourceJoinVariable",
"semiJoinOutput",
values("sourceJoinVariable"),
aggregation(
singleGroupingSet("filteringSourceJoinVariable"),
ImmutableMap.of(),
ImmutableMap.of(),
Optional.empty(),
AggregationNode.Step.SINGLE,
values("filteringSourceJoinVariable"))));
}

@Test
public void testTriggerOverNonQualifiedDistinctAggregation()
{
tester().assertThat(new AddDistinctForSemiJoinBuild())
.setSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, "true")
.on(p ->
{
VariableReferenceExpression sourceJoinVariable = p.variable("sourceJoinVariable");
VariableReferenceExpression filteringSourceJoinVariable = p.variable("filteringSourceJoinVariable");
VariableReferenceExpression semiJoinOutput = p.variable("semiJoinOutput");
VariableReferenceExpression col1 = p.variable("col1");
return p.semiJoin(
sourceJoinVariable,
filteringSourceJoinVariable,
semiJoinOutput,
Optional.empty(),
Optional.empty(),
p.values(sourceJoinVariable),
p.aggregation((a) -> a
.singleGroupingSet(filteringSourceJoinVariable, col1)
.step(AggregationNode.Step.SINGLE)
.source(p.values(filteringSourceJoinVariable, col1))));
}).matches(
semiJoin(
"sourceJoinVariable",
"filteringSourceJoinVariable",
"semiJoinOutput",
values("sourceJoinVariable"),
aggregation(
singleGroupingSet("filteringSourceJoinVariable"),
ImmutableMap.of(),
ImmutableMap.of(),
Optional.empty(),
AggregationNode.Step.SINGLE,
aggregation(
singleGroupingSet("filteringSourceJoinVariable", "col1"),
ImmutableMap.of(),
ImmutableMap.of(),
Optional.empty(),
AggregationNode.Step.SINGLE,
values("filteringSourceJoinVariable", "col1")))));
}

@Test
public void testNotTriggerOverDistinct()
{
tester().assertThat(new AddDistinctForSemiJoinBuild())
.setSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, "true")
.on(p ->
{
VariableReferenceExpression sourceJoinVariable = p.variable("sourceJoinVariable");
VariableReferenceExpression filteringSourceJoinVariable = p.variable("filteringSourceJoinVariable");
VariableReferenceExpression semiJoinOutput = p.variable("semiJoinOutput");
return p.semiJoin(
sourceJoinVariable,
filteringSourceJoinVariable,
semiJoinOutput,
Optional.empty(),
Optional.empty(),
p.values(sourceJoinVariable),
p.aggregation((a) -> a
.singleGroupingSet(filteringSourceJoinVariable)
.step(AggregationNode.Step.SINGLE)
.source(p.values(filteringSourceJoinVariable))));
}).doesNotFire();
}

@Test
public void testNotTriggerOverDistinctUnderProject()
{
tester().assertThat(new AddDistinctForSemiJoinBuild())
.setSystemProperty(ADD_DISTINCT_BELOW_SEMI_JOIN_BUILD, "true")
.on(p ->
{
VariableReferenceExpression sourceJoinVariable = p.variable("sourceJoinVariable");
VariableReferenceExpression filteringSourceJoinVariable = p.variable("filteringSourceJoinVariable");
VariableReferenceExpression semiJoinOutput = p.variable("semiJoinOutput");
VariableReferenceExpression col1 = p.variable("col1");
return p.semiJoin(
sourceJoinVariable,
filteringSourceJoinVariable,
semiJoinOutput,
Optional.empty(),
Optional.empty(),
p.values(sourceJoinVariable),
p.project(
assignment(filteringSourceJoinVariable, p.rowExpression("col1")),
p.aggregation((a) -> a
.singleGroupingSet(col1)
.step(AggregationNode.Step.SINGLE)
.source(p.values(col1)))));
}).doesNotFire();
}
}
Loading
Loading