-
Notifications
You must be signed in to change notification settings - Fork 5.5k
fix: Improve sort merge join rule to do merge join when one sided is sorted #26361
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
|
|
||
| import com.facebook.presto.Session; | ||
| import com.facebook.presto.spi.plan.JoinType; | ||
| import com.facebook.presto.sql.analyzer.FeaturesConfig; | ||
| import com.facebook.presto.sql.planner.assertions.PlanMatchPattern; | ||
| import com.facebook.presto.testing.QueryRunner; | ||
| import com.facebook.presto.tests.AbstractTestQueryFramework; | ||
|
|
@@ -56,6 +57,12 @@ protected QueryRunner createQueryRunner() | |
| Optional.empty()); | ||
| } | ||
|
|
||
| @Override | ||
| protected FeaturesConfig createFeaturesConfig() | ||
| { | ||
| return new FeaturesConfig().setNativeExecutionEnabled(true); | ||
| } | ||
|
|
||
| @Test | ||
| public void testJoinType() | ||
| { | ||
|
|
@@ -83,19 +90,19 @@ public void testJoinType() | |
| assertPlan( | ||
| mergeJoinEnabled(), | ||
| "select * from test_join_customer_join_type left join test_join_order_join_type on test_join_customer_join_type.custkey = test_join_order_join_type.custkey", | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), LEFT, false)); | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), LEFT, true)); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now not only inner joins are supported
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only INNER join is supported -> is this true for Prestissimo only?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I mean |
||
|
|
||
| // Right join | ||
| assertPlan( | ||
| mergeJoinEnabled(), | ||
| "select * from test_join_customer_join_type right join test_join_order_join_type on test_join_customer_join_type.custkey = test_join_order_join_type.custkey", | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), RIGHT, false)); | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), RIGHT, true)); | ||
|
|
||
| // Outer join | ||
| assertPlan( | ||
| mergeJoinEnabled(), | ||
| "select * from test_join_customer_join_type full join test_join_order_join_type on test_join_customer_join_type.custkey = test_join_order_join_type.custkey", | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), FULL, false)); | ||
| joinPlan("test_join_customer_join_type", "test_join_order_join_type", ImmutableList.of("custkey"), ImmutableList.of("custkey"), FULL, true)); | ||
| } | ||
| finally { | ||
| queryRunner.execute("DROP TABLE IF EXISTS test_join_customer_join_type"); | ||
|
|
||
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -39,7 +39,6 @@ | |||
| import java.util.OptionalInt; | ||||
|
|
||||
| import static com.facebook.presto.SystemSessionProperties.GROUPED_EXECUTION; | ||||
| import static com.facebook.presto.SystemSessionProperties.isGroupedExecutionEnabled; | ||||
| import static com.facebook.presto.SystemSessionProperties.preferSortMergeJoin; | ||||
| import static com.facebook.presto.spi.StandardErrorCode.INVALID_PLAN_ERROR; | ||||
| import static com.facebook.presto.spi.connector.ConnectorCapabilities.SUPPORTS_PAGE_SINK_COMMIT; | ||||
|
|
@@ -58,13 +57,15 @@ class GroupedExecutionTagger | |||
| private final Metadata metadata; | ||||
| private final NodePartitioningManager nodePartitioningManager; | ||||
| private final boolean groupedExecutionEnabled; | ||||
| private final boolean isPrestoOnSpark; | ||||
|
|
||||
| public GroupedExecutionTagger(Session session, Metadata metadata, NodePartitioningManager nodePartitioningManager) | ||||
| public GroupedExecutionTagger(Session session, Metadata metadata, NodePartitioningManager nodePartitioningManager, boolean groupedExecutionEnabled, boolean isPrestoOnSpark) | ||||
| { | ||||
| this.session = requireNonNull(session, "session is null"); | ||||
| this.metadata = requireNonNull(metadata, "metadata is null"); | ||||
| this.nodePartitioningManager = requireNonNull(nodePartitioningManager, "nodePartitioningManager is null"); | ||||
| this.groupedExecutionEnabled = isGroupedExecutionEnabled(session); | ||||
| this.groupedExecutionEnabled = groupedExecutionEnabled; | ||||
| this.isPrestoOnSpark = isPrestoOnSpark; | ||||
| } | ||||
|
|
||||
| @Override | ||||
|
|
@@ -166,6 +167,15 @@ public GroupedExecutionTagger.GroupedExecutionProperties visitMergeJoin(MergeJoi | |||
| // TODO: This will break the other use case for merge join operating on sorted tables, which requires grouped execution for correctness. | ||||
| return GroupedExecutionTagger.GroupedExecutionProperties.notCapable(); | ||||
| } | ||||
|
|
||||
| if (isPrestoOnSpark) { | ||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When presto on spark is the environment, if one side is grouped execution capable, even if the other side is not, the data will still be partitioned by buckets, and have a bucket by bucket execution in presto on spark
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Grouped execution is not needed in Presto on Spark. In fact we explicitly check it is not enabled for Presto on Spark: Line 53 in 3d99a78
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems like change to the GroupedExecutionTagger should not be needed. @feilong-liu could you please check?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @arhimondr Thanks for review. The check here is not to enable grouped execution, it's to make sure that the merge join's input will be per bucket so that each task for presto on spark gets one bucket of input, so that within each task the data is sorted (here we skip sort if the input is bucket and sorted in each bucket). |
||||
| GroupedExecutionTagger.GroupedExecutionProperties mergeJoinLeft = node.getLeft().accept(new GroupedExecutionTagger(session, metadata, nodePartitioningManager, true, true), null); | ||||
| GroupedExecutionTagger.GroupedExecutionProperties mergeJoinRight = node.getRight().accept(new GroupedExecutionTagger(session, metadata, nodePartitioningManager, true, true), null); | ||||
| if (mergeJoinLeft.currentNodeCapable || mergeJoinRight.currentNodeCapable) { | ||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If one side is capable of grouped execution, it means that the input is partitioned by buckets, even if the other side is not capable of grouped execution, the input will be co-located, i.e. also partitioned by buckets, hence doing effective bucket by bucket execution in presto on spark |
||||
| return GroupedExecutionTagger.GroupedExecutionProperties.notCapable(); | ||||
| } | ||||
| } | ||||
|
|
||||
| throw new PrestoException( | ||||
| INVALID_PLAN_ERROR, | ||||
| format("When grouped execution can't be enabled, merge join plan is not valid." + | ||||
|
|
||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Set the test environment to be native execution