-
Notifications
You must be signed in to change notification settings - Fork 1.9k
perf: Introduce sort prefix computation for early TopK exit optimization on partially sorted input (10x speedup on top10 bench) #15563
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e3ea6d6
a861842
57028a3
9e89688
4f14bef
b1b1e18
2fe1480
31310d8
1f94f80
3e2e7ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -966,6 +966,8 @@ pub struct SortExec { | |
| preserve_partitioning: bool, | ||
| /// Fetch highest/lowest n results | ||
| fetch: Option<usize>, | ||
| /// Normalized common sort prefix between the input and the sort expressions (only used with fetch) | ||
| common_sort_prefix: LexOrdering, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, can we just keep the common prefix expr count of /// Sort expressions
expr: LexOrdering? I think it'll be more simplified, and avoiding duplication
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure we can do that simply, as the |
||
| /// Cache holding plan properties like equivalences, output partitioning etc. | ||
| cache: PlanProperties, | ||
| } | ||
|
|
@@ -975,13 +977,15 @@ impl SortExec { | |
| /// sorted output partition. | ||
| pub fn new(expr: LexOrdering, input: Arc<dyn ExecutionPlan>) -> Self { | ||
| let preserve_partitioning = false; | ||
| let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning); | ||
| let (cache, sort_prefix) = | ||
| Self::compute_properties(&input, expr.clone(), preserve_partitioning); | ||
| Self { | ||
| expr, | ||
| input, | ||
| metrics_set: ExecutionPlanMetricsSet::new(), | ||
| preserve_partitioning, | ||
| fetch: None, | ||
| common_sort_prefix: sort_prefix, | ||
| cache, | ||
| } | ||
| } | ||
|
|
@@ -1033,6 +1037,7 @@ impl SortExec { | |
| expr: self.expr.clone(), | ||
| metrics_set: self.metrics_set.clone(), | ||
| preserve_partitioning: self.preserve_partitioning, | ||
| common_sort_prefix: self.common_sort_prefix.clone(), | ||
| fetch, | ||
| cache, | ||
| } | ||
|
|
@@ -1066,19 +1071,21 @@ impl SortExec { | |
| } | ||
|
|
||
| /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. | ||
| /// It also returns the common sort prefix between the input and the sort expressions. | ||
| fn compute_properties( | ||
| input: &Arc<dyn ExecutionPlan>, | ||
| sort_exprs: LexOrdering, | ||
| preserve_partitioning: bool, | ||
| ) -> PlanProperties { | ||
| ) -> (PlanProperties, LexOrdering) { | ||
| // Determine execution mode: | ||
| let requirement = LexRequirement::from(sort_exprs); | ||
| let sort_satisfied = input | ||
|
|
||
| let (sort_prefix, sort_satisfied) = input | ||
| .equivalence_properties() | ||
| .ordering_satisfy_requirement(&requirement); | ||
| .extract_common_sort_prefix(&requirement); | ||
|
|
||
| // The emission type depends on whether the input is already sorted: | ||
| // - If already sorted, we can emit results in the same way as the input | ||
| // - If already fully sorted, we can emit results in the same way as the input | ||
| // - If not sorted, we must wait until all data is processed to emit results (Final) | ||
| let emission_type = if sort_satisfied { | ||
| input.pipeline_behavior() | ||
|
|
@@ -1114,11 +1121,14 @@ impl SortExec { | |
| let output_partitioning = | ||
| Self::output_partitioning_helper(input, preserve_partitioning); | ||
|
|
||
| PlanProperties::new( | ||
| eq_properties, | ||
| output_partitioning, | ||
| emission_type, | ||
| boundedness, | ||
| ( | ||
| PlanProperties::new( | ||
| eq_properties, | ||
| output_partitioning, | ||
| emission_type, | ||
| boundedness, | ||
| ), | ||
| LexOrdering::from(sort_prefix), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice addition logic in this |
||
| ) | ||
| } | ||
| } | ||
|
|
@@ -1130,7 +1140,12 @@ impl DisplayAs for SortExec { | |
| let preserve_partitioning = self.preserve_partitioning; | ||
| match self.fetch { | ||
| Some(fetch) => { | ||
| write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr) | ||
| write!(f, "SortExec: TopK(fetch={fetch}), expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr)?; | ||
| if !self.common_sort_prefix.is_empty() { | ||
| write!(f, ", sort_prefix=[{}]", self.common_sort_prefix) | ||
| } else { | ||
| Ok(()) | ||
| } | ||
| } | ||
| None => write!(f, "SortExec: expr=[{}], preserve_partitioning=[{preserve_partitioning}]", self.expr), | ||
| } | ||
|
|
@@ -1203,10 +1218,12 @@ impl ExecutionPlan for SortExec { | |
|
|
||
| trace!("End SortExec's input.execute for partition: {}", partition); | ||
|
|
||
| let requirement = &LexRequirement::from(self.expr.clone()); | ||
|
|
||
| let sort_satisfied = self | ||
| .input | ||
| .equivalence_properties() | ||
| .ordering_satisfy_requirement(&LexRequirement::from(self.expr.clone())); | ||
| .ordering_satisfy_requirement(requirement); | ||
|
|
||
| match (sort_satisfied, self.fetch.as_ref()) { | ||
| (true, Some(fetch)) => Ok(Box::pin(LimitStream::new( | ||
|
|
@@ -1220,6 +1237,7 @@ impl ExecutionPlan for SortExec { | |
| let mut topk = TopK::try_new( | ||
| partition, | ||
| input.schema(), | ||
| self.common_sort_prefix.clone(), | ||
| self.expr.clone(), | ||
| *fetch, | ||
| context.session_config().batch_size(), | ||
|
|
@@ -1232,6 +1250,9 @@ impl ExecutionPlan for SortExec { | |
| while let Some(batch) = input.next().await { | ||
| let batch = batch?; | ||
| topk.insert_batch(batch)?; | ||
| if topk.finished { | ||
| break; | ||
| } | ||
| } | ||
| topk.emit() | ||
| }) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very nice and clear addition to the explain
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we just keep the common prefix count, it will simplify the displays too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Careful, the
expris unnormalized, thesort_prefixis normalized. I agree this is a bit confusing, butnormalized_common_sort_prefixseems a bit too verbose. Any suggestions?