-
Notifications
You must be signed in to change notification settings - Fork 1.9k
fix: EnforceSorting should not remove a needed coalesces #14637
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
670eff3
0661ed7
89556c2
dce44c5
7ec42b4
06961b1
29d799c
b265c4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -47,16 +47,16 @@ use crate::enforce_sorting::sort_pushdown::{ | |||||
| assign_initial_requirements, pushdown_sorts, SortPushDown, | ||||||
| }; | ||||||
| use crate::utils::{ | ||||||
| add_sort_above, add_sort_above_with_check, is_coalesce_partitions, is_limit, | ||||||
| is_repartition, is_sort, is_sort_preserving_merge, is_union, is_window, | ||||||
| add_sort_above, add_sort_above_with_check, is_aggregate, is_coalesce_partitions, | ||||||
| is_limit, is_repartition, is_sort, is_sort_preserving_merge, is_union, is_window, | ||||||
| }; | ||||||
| use crate::PhysicalOptimizerRule; | ||||||
|
|
||||||
| use datafusion_common::config::ConfigOptions; | ||||||
| use datafusion_common::plan_err; | ||||||
| use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; | ||||||
| use datafusion_common::Result; | ||||||
| use datafusion_physical_expr::{Distribution, Partitioning}; | ||||||
| use datafusion_physical_expr::Distribution; | ||||||
| use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; | ||||||
| use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; | ||||||
| use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; | ||||||
|
|
@@ -138,29 +138,65 @@ fn update_sort_ctx_children_data( | |||||
| /// [`CoalescePartitionsExec`] descendant(s) for every child of a plan. The data | ||||||
| /// attribute stores whether the plan is a `CoalescePartitionsExec` or is | ||||||
| /// connected to a `CoalescePartitionsExec` via its children. | ||||||
| /// | ||||||
| /// The tracker halts at each [`SortExec`] (where the SPM will act to replace the coalesce). | ||||||
| /// | ||||||
| /// This requires a bottom-up traversal was previously performed, updating the | ||||||
| /// children previously. | ||||||
| pub type PlanWithCorrespondingCoalescePartitions = PlanContext<bool>; | ||||||
|
Comment on lines
+141
to
146
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did a refactor. See this commit.
|
||||||
|
|
||||||
| /// Determines if the coalesce may be safely removed. | ||||||
| fn is_coalesce_to_remove( | ||||||
| node: &Arc<dyn ExecutionPlan>, | ||||||
| parent: &Arc<dyn ExecutionPlan>, | ||||||
| ) -> bool { | ||||||
| node.as_any().downcast_ref::<CoalescePartitionsExec>() | ||||||
| .map(|_coalesce| { | ||||||
| // TODO(wiedld): find a more generalized approach that does not rely on | ||||||
| // pattern matching the structure of the DAG | ||||||
| // Note that the `Partitioning::satisfy()` (parent vs. coalesce.child) cannot be used for cases of: | ||||||
| // * Repartition -> Coalesce -> Repartition | ||||||
| // * Coalesce -> AggregateExec(input=hash-partitioned) | ||||||
|
|
||||||
| let parent_req_single_partition = matches!(parent.required_input_distribution()[0], Distribution::SinglePartition) | ||||||
| // handle aggregates with input=hashPartitioning with a single output partition | ||||||
| || (is_aggregate(parent) && parent.properties().output_partitioning().partition_count() <= 1); | ||||||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This single line is the fix for our found issue.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @alamb -- lmk if you want the refactor to go in first, in a separate PR, before the the reproducer + fix PR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I don't understand what this is asking -- if it is still relevant can you perhaps clarify what refactor you are referring to? |
||||||
|
|
||||||
| // node above does not require single distribution | ||||||
| !parent_req_single_partition | ||||||
| // it doesn't immediately repartition | ||||||
| || is_repartition(parent) | ||||||
| // any adjacent Coalesce->Sort can be replaced | ||||||
| || is_sort(parent) | ||||||
| }).unwrap_or(false) | ||||||
| } | ||||||
|
|
||||||
| fn update_coalesce_ctx_children( | ||||||
| coalesce_context: &mut PlanWithCorrespondingCoalescePartitions, | ||||||
| ) { | ||||||
| let children = &coalesce_context.children; | ||||||
| coalesce_context.data = if children.is_empty() { | ||||||
| // Plan has no children, it cannot be a `CoalescePartitionsExec`. | ||||||
| false | ||||||
| } else if is_coalesce_partitions(&coalesce_context.plan) { | ||||||
| // Initiate a connection: | ||||||
| true | ||||||
| } else { | ||||||
| children.iter().enumerate().any(|(idx, node)| { | ||||||
| // Only consider operators that don't require a single partition, | ||||||
| // and connected to some `CoalescePartitionsExec`: | ||||||
| node.data | ||||||
| && !matches!( | ||||||
| coalesce_context.plan.required_input_distribution()[idx], | ||||||
| Distribution::SinglePartition | ||||||
| ) | ||||||
| }) | ||||||
| }; | ||||||
| // perform lookahead(1) during bottom up traversal | ||||||
| // since we are checking distribution requirements after the coalesce occurs | ||||||
| let parent = &coalesce_context.plan; | ||||||
|
|
||||||
| for child_context in coalesce_context.children.iter_mut() { | ||||||
| // determine if child, or it's descendents, are a coalesce to be removed | ||||||
| child_context.data = if child_context.children.is_empty() { | ||||||
| // Plan has no children, it cannot be a `CoalescePartitionsExec`. | ||||||
| false | ||||||
| } else if is_coalesce_to_remove(&child_context.plan, parent) { | ||||||
| // Initiate a connection: | ||||||
| true | ||||||
| } else if is_sort(&child_context.plan) { | ||||||
| // halt coalesce removals at the sort | ||||||
| false | ||||||
| } else { | ||||||
| // propagate | ||||||
| child_context | ||||||
| .children | ||||||
| .iter() | ||||||
| .any(|grandchild| grandchild.data) | ||||||
| }; | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| /// Performs optimizations based upon a series of subrules. | ||||||
|
|
@@ -316,25 +352,43 @@ fn replace_with_partial_sort( | |||||
| /// are transformed into | ||||||
| /// ```text | ||||||
| /// "SortPreservingMergeExec: \[a@0 ASC\]", | ||||||
| /// " ...nodes..." | ||||||
| /// " SortExec: expr=\[a@0 ASC\]", | ||||||
| /// " SortExec: expr=\[a@0 ASC\]", | ||||||
| /// " ...nodes..." | ||||||
| /// " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", | ||||||
| /// ``` | ||||||
| /// by following connections from [`CoalescePartitionsExec`]s to [`SortExec`]s. | ||||||
| /// By performing sorting in parallel, we can increase performance in some scenarios. | ||||||
| /// | ||||||
| /// This requires that there are no nodes between the [`SortExec`] and [`CoalescePartitionsExec`] | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Think I need better words. 😆 The context is made to find linked Sort->Coalesce cascades. This linkage is then used to say "if we find a sort, remove the linked coalesces from the subplan". Specifically, this code. If the link is broken, a.k.a. if So the link only exists as long as "no nodes" break the link. Example of an unlinked Coalesce->Sort, since the aggregate requires the coalesce for single partitioned input:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What would be a better way to say/explain this? 🙏🏼 Maybe I should add docs to the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I got it. Now I think current doc is very good. 😆 |
||||||
| /// which require single partitioning. Do not parallelize when the following scenario occurs: | ||||||
| /// ```text | ||||||
| /// "SortExec: expr=\[a@0 ASC\]", | ||||||
| /// " ...nodes requiring single partitioning..." | ||||||
| /// " CoalescePartitionsExec", | ||||||
| /// " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", | ||||||
| /// ``` | ||||||
| pub fn parallelize_sorts( | ||||||
| mut requirements: PlanWithCorrespondingCoalescePartitions, | ||||||
| ) -> Result<Transformed<PlanWithCorrespondingCoalescePartitions>> { | ||||||
| requirements = requirements.update_plan_from_children()?; | ||||||
| update_coalesce_ctx_children(&mut requirements); | ||||||
| let coalesce_can_be_removed = requirements.children.iter().any(|child| child.data); | ||||||
|
|
||||||
| let should_parallelize_sort = (is_sort(&requirements.plan) | ||||||
| || is_sort_preserving_merge(&requirements.plan)) | ||||||
| && requirements.plan.output_partitioning().partition_count() <= 1 | ||||||
| && coalesce_can_be_removed; | ||||||
|
|
||||||
| // Repartition -> Coalesce -> Repartition | ||||||
| let unneeded_coalesce = is_repartition(&requirements.plan) && coalesce_can_be_removed; | ||||||
|
|
||||||
| if requirements.children.is_empty() || !requirements.children[0].data { | ||||||
| // We only take an action when the plan is either a `SortExec`, a | ||||||
| // `SortPreservingMergeExec` or a `CoalescePartitionsExec`, and they | ||||||
| // all have a single child. Therefore, if the first child has no | ||||||
| // connection, we can return immediately. | ||||||
| Ok(Transformed::no(requirements)) | ||||||
| } else if (is_sort(&requirements.plan) | ||||||
| || is_sort_preserving_merge(&requirements.plan)) | ||||||
| && requirements.plan.output_partitioning().partition_count() <= 1 | ||||||
| { | ||||||
| } else if should_parallelize_sort { | ||||||
| // Take the initial sort expressions and requirements | ||||||
| let (sort_exprs, fetch) = get_sort_exprs(&requirements.plan)?; | ||||||
| let sort_reqs = LexRequirement::from(sort_exprs.clone()); | ||||||
|
|
@@ -349,8 +403,11 @@ pub fn parallelize_sorts( | |||||
| // We also need to remove the self node since `remove_corresponding_coalesce_in_sub_plan` | ||||||
| // deals with the children and their children and so on. | ||||||
| requirements = requirements.children.swap_remove(0); | ||||||
| // sync the requirements.plan.children with the mutated requirements.children | ||||||
| requirements = requirements.update_plan_from_children()?; | ||||||
|
|
||||||
| requirements = add_sort_above_with_check(requirements, sort_reqs, fetch); | ||||||
| requirements = requirements.update_plan_from_children()?; | ||||||
|
|
||||||
| let spm = | ||||||
| SortPreservingMergeExec::new(sort_exprs, Arc::clone(&requirements.plan)); | ||||||
|
|
@@ -361,20 +418,11 @@ pub fn parallelize_sorts( | |||||
| vec![requirements], | ||||||
| ), | ||||||
| )) | ||||||
| } else if is_coalesce_partitions(&requirements.plan) { | ||||||
| // There is an unnecessary `CoalescePartitionsExec` in the plan. | ||||||
| // This will handle the recursive `CoalescePartitionsExec` plans. | ||||||
| } else if unneeded_coalesce { | ||||||
| requirements = remove_bottleneck_in_subplan(requirements)?; | ||||||
| // For the removal of self node which is also a `CoalescePartitionsExec`. | ||||||
| requirements = requirements.children.swap_remove(0); | ||||||
| requirements = requirements.update_plan_from_children()?; | ||||||
|
|
||||||
| Ok(Transformed::yes( | ||||||
| PlanWithCorrespondingCoalescePartitions::new( | ||||||
| Arc::new(CoalescePartitionsExec::new(Arc::clone(&requirements.plan))), | ||||||
| false, | ||||||
| vec![requirements], | ||||||
| ), | ||||||
| )) | ||||||
|
Comment on lines
-364
to
-377
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With the context refactor, the |
||||||
| Ok(Transformed::yes(requirements)) | ||||||
| } else { | ||||||
| Ok(Transformed::yes(requirements)) | ||||||
| } | ||||||
|
|
@@ -614,19 +662,7 @@ fn remove_bottleneck_in_subplan( | |||||
| }) | ||||||
| .collect::<Result<_>>()?; | ||||||
| } | ||||||
| let mut new_reqs = requirements.update_plan_from_children()?; | ||||||
| if let Some(repartition) = new_reqs.plan.as_any().downcast_ref::<RepartitionExec>() { | ||||||
| let input_partitioning = repartition.input().output_partitioning(); | ||||||
| // We can remove this repartitioning operator if it is now a no-op: | ||||||
| let mut can_remove = input_partitioning.eq(repartition.partitioning()); | ||||||
| // We can also remove it if we ended up with an ineffective RR: | ||||||
| if let Partitioning::RoundRobinBatch(n_out) = repartition.partitioning() { | ||||||
| can_remove |= *n_out == input_partitioning.partition_count(); | ||||||
| } | ||||||
| if can_remove { | ||||||
| new_reqs = new_reqs.children.swap_remove(0) | ||||||
| } | ||||||
| } | ||||||
|
Comment on lines
-617
to
-629
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is about identifying (and removing) Repartition->Coalesce->Repartition, to make it only a singular repartition. Since the removal decisions were already being made when the context is built, I consolidated this removal decision to the same place ( |
||||||
| let new_reqs = requirements.update_plan_from_children()?; | ||||||
| Ok(new_reqs) | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
Uh oh!
There was an error while loading. Please reload this page.