diff --git a/datafusion-examples/examples/relation_planner/table_sample.rs b/datafusion-examples/examples/relation_planner/table_sample.rs index 42342e5f1a641..e56c9a03f4f57 100644 --- a/datafusion-examples/examples/relation_planner/table_sample.rs +++ b/datafusion-examples/examples/relation_planner/table_sample.rs @@ -108,7 +108,7 @@ use datafusion::{ }, physical_expr::EquivalenceProperties, physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, StatisticsContext, metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput}, }, physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner}, @@ -722,8 +722,12 @@ impl ExecutionPlan for SampleExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let mut stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let mut stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); let ratio = self.upper_bound - self.lower_bound; // Scale statistics by sampling ratio (inexact due to randomness) diff --git a/datafusion/core/src/datasource/file_format/csv.rs b/datafusion/core/src/datasource/file_format/csv.rs index a068b4f5c0413..e36044602acba 100644 --- a/datafusion/core/src/datasource/file_format/csv.rs +++ b/datafusion/core/src/datasource/file_format/csv.rs @@ -45,7 +45,7 @@ mod tests { use datafusion_datasource::file_format::FileFormat; use datafusion_datasource::write::BatchSerializer; use datafusion_expr::{col, lit}; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use arrow::array::{ Array, BooleanArray, Float64Array, Int32Array, RecordBatch, StringArray, @@ -215,9 +215,12 @@ mod tests { assert_eq!(tt_batches, 50 /* 100/2 */); // test metadata - assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.num_rows, + Precision::Absent + ); + assert_eq!( + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent ); diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs index 5b3e22705620e..7d19b7f0f48d2 100644 --- a/datafusion/core/src/datasource/file_format/json.rs +++ b/datafusion/core/src/datasource/file_format/json.rs @@ -36,7 +36,7 @@ mod tests { BatchDeserializer, DecoderDeserializer, DeserializerOutput, }; use datafusion_datasource::file_format::FileFormat; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use arrow::compute::concat_batches; use arrow::datatypes::{DataType, Field}; @@ -117,9 +117,12 @@ mod tests { assert_eq!(tt_batches, 6 /* 12/2 */); // test metadata - assert_eq!(exec.partition_statistics(None)?.num_rows, Precision::Absent); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.num_rows, + Precision::Absent + ); + assert_eq!( + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent ); diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs index 6a8f7ab999757..207b990ff85b8 100644 --- a/datafusion/core/src/datasource/file_format/parquet.rs +++ b/datafusion/core/src/datasource/file_format/parquet.rs @@ -142,7 +142,7 @@ mod tests { use datafusion_execution::runtime_env::RuntimeEnv; use datafusion_expr::dml::InsertOp; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; - use datafusion_physical_plan::{ExecutionPlan, collect}; + use datafusion_physical_plan::{ExecutionPlan, collect, compute_statistics}; use crate::test_util::bounded_stream; use arrow::array::{ @@ -715,12 +715,12 @@ mod tests { // test metadata assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); @@ -764,11 +764,11 @@ mod tests { // note: even if the limit is set, the executor rounds up to the batch size assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); let batches = collect(exec, task_ctx).await?; diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index d14ec1f56dce2..206cc9775c8f1 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -144,7 +144,9 @@ mod tests { use datafusion_physical_expr::expressions::binary; use datafusion_physical_expr_common::sort_expr::LexOrdering; use datafusion_physical_plan::empty::EmptyExec; - use datafusion_physical_plan::{ExecutionPlanProperties, collect}; + use datafusion_physical_plan::{ + ExecutionPlanProperties, collect, compute_statistics, + }; use std::collections::HashMap; use std::io::Write; use std::sync::Arc; @@ -247,11 +249,11 @@ mod tests { // test metadata assert_eq!( - exec.partition_statistics(None)?.num_rows, + compute_statistics(exec.as_ref(), None)?.num_rows, Precision::Exact(8) ); assert_eq!( - exec.partition_statistics(None)?.total_byte_size, + compute_statistics(exec.as_ref(), None)?.total_byte_size, Precision::Absent, ); @@ -1355,13 +1357,13 @@ mod tests { let exec_default = table_default.scan(&state, None, &[], None).await?; assert_eq!( - exec_default.partition_statistics(None)?.num_rows, + compute_statistics(exec_default.as_ref(), None)?.num_rows, Precision::Absent ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec_default.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_default.as_ref(), None)?.total_byte_size, Precision::Absent ); @@ -1376,11 +1378,11 @@ mod tests { let exec_disabled = table_disabled.scan(&state, None, &[], None).await?; assert_eq!( - exec_disabled.partition_statistics(None)?.num_rows, + compute_statistics(exec_disabled.as_ref(), None)?.num_rows, Precision::Absent ); assert_eq!( - exec_disabled.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_disabled.as_ref(), None)?.total_byte_size, Precision::Absent ); @@ -1395,12 +1397,12 @@ mod tests { let exec_enabled = table_enabled.scan(&state, None, &[], None).await?; assert_eq!( - exec_enabled.partition_statistics(None)?.num_rows, + compute_statistics(exec_enabled.as_ref(), None)?.num_rows, Precision::Exact(8) ); // TODO correct byte size: https://github.com/apache/datafusion/issues/14936 assert_eq!( - exec_enabled.partition_statistics(None)?.total_byte_size, + compute_statistics(exec_enabled.as_ref(), None)?.total_byte_size, Precision::Absent, ); diff --git a/datafusion/core/tests/custom_sources_cases/mod.rs b/datafusion/core/tests/custom_sources_cases/mod.rs index cef75b444f6fe..92954a0d5583e 100644 --- a/datafusion/core/tests/custom_sources_cases/mod.rs +++ b/datafusion/core/tests/custom_sources_cases/mod.rs @@ -41,6 +41,7 @@ use datafusion_common::stats::Precision; use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_plan::PlanProperties; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; @@ -179,7 +180,11 @@ impl ExecutionPlan for CustomExecutionPlan { Ok(Box::pin(TestCustomRecordBatchStream { nb_batch: 1 })) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema()))); } diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs index 01c4deac5ccd3..cb81b05326ad8 100644 --- a/datafusion/core/tests/custom_sources_cases/statistics.rs +++ b/datafusion/core/tests/custom_sources_cases/statistics.rs @@ -36,6 +36,8 @@ use datafusion_catalog::Session; use datafusion_common::tree_node::TreeNodeRecursion; use datafusion_common::{project_schema, stats::Precision}; use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::StatisticsContext; +use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; use async_trait::async_trait; @@ -174,7 +176,11 @@ impl ExecutionPlan for StatisticsValidation { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } else { @@ -247,7 +253,7 @@ async fn sql_basic() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); // the statistics should be those of the source - assert_eq!(stats, *physical_plan.partition_statistics(None)?); + assert_eq!(stats, *compute_statistics(physical_plan.as_ref(), None)?); Ok(()) } @@ -263,7 +269,7 @@ async fn sql_filter() -> Result<()> { .unwrap(); let physical_plan = df.create_physical_plan().await.unwrap(); - let stats = physical_plan.partition_statistics(None)?; + let stats = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(stats.num_rows, Precision::Inexact(7)); Ok(()) @@ -278,7 +284,7 @@ async fn sql_limit() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); // when the limit is smaller than the original number of lines we mark the statistics as inexact // and cap NDV at the new row count - let limit_stats = physical_plan.partition_statistics(None)?; + let limit_stats = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(limit_stats.num_rows, Precision::Exact(5)); // c1: NDV=2 stays at 2 (already below limit of 5) assert_eq!( @@ -297,7 +303,7 @@ async fn sql_limit() -> Result<()> { .unwrap(); let physical_plan = df.create_physical_plan().await.unwrap(); // when the limit is larger than the original number of lines, statistics remain unchanged - assert_eq!(stats, *physical_plan.partition_statistics(None)?); + assert_eq!(stats, *compute_statistics(physical_plan.as_ref(), None)?); Ok(()) } @@ -314,7 +320,7 @@ async fn sql_window() -> Result<()> { let physical_plan = df.create_physical_plan().await.unwrap(); - let result = physical_plan.partition_statistics(None)?; + let result = compute_statistics(physical_plan.as_ref(), None)?; assert_eq!(stats.num_rows, result.num_rows); let col_stats = &result.column_statistics; diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index 84396be8a6a67..f2a9aa4cf3388 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -41,6 +41,7 @@ use datafusion_common::config::ConfigOptions; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_plan::ExecutionPlan; +use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::filter::FilterExec; use tempfile::tempdir; @@ -61,7 +62,7 @@ async fn check_stats_precision_with_filter_pushdown() { // Scan without filter, stats are exact let exec = table.scan(&state, None, &[], None).await.unwrap(); assert_eq!( - exec.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec.as_ref(), None).unwrap().num_rows, Precision::Exact(8), "Stats without filter should be exact" ); @@ -93,7 +94,9 @@ async fn check_stats_precision_with_filter_pushdown() { ); // Scan with filter pushdown, stats are inexact assert_eq!( - optimized_exec.partition_statistics(None).unwrap().num_rows, + compute_statistics(optimized_exec.as_ref(), None) + .unwrap() + .num_rows, Precision::Inexact(8), "Stats after filter pushdown should be inexact" ); @@ -121,11 +124,13 @@ async fn load_table_stats_with_session_level_cache() { let exec1 = table1.scan(&state1, None, &[], None).await.unwrap(); assert_eq!( - exec1.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec1.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec1.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec1.as_ref(), None) + .unwrap() + .total_byte_size, // Byte size is absent because we cannot estimate the output size // of the Arrow data since there are variable length columns. Precision::Absent, @@ -137,11 +142,13 @@ async fn load_table_stats_with_session_level_cache() { assert_eq!(get_static_cache_size(&state2), 0); let exec2 = table2.scan(&state2, None, &[], None).await.unwrap(); assert_eq!( - exec2.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec2.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec2.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec2.as_ref(), None) + .unwrap() + .total_byte_size, // Absent because the data contains variable length columns Precision::Absent, ); @@ -152,11 +159,13 @@ async fn load_table_stats_with_session_level_cache() { assert_eq!(get_static_cache_size(&state1), 1); let exec3 = table1.scan(&state1, None, &[], None).await.unwrap(); assert_eq!( - exec3.partition_statistics(None).unwrap().num_rows, + compute_statistics(exec3.as_ref(), None).unwrap().num_rows, Precision::Exact(8) ); assert_eq!( - exec3.partition_statistics(None).unwrap().total_byte_size, + compute_statistics(exec3.as_ref(), None) + .unwrap() + .total_byte_size, // Absent because the data contains variable length columns Precision::Absent, ); diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs index 050baa9e792e9..82709e1f73e10 100644 --- a/datafusion/core/tests/physical_optimizer/join_selection.rs +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -45,7 +45,8 @@ use datafusion_physical_plan::joins::utils::JoinFilter; use datafusion_physical_plan::joins::{HashJoinExec, NestedLoopJoinExec, PartitionMode}; use datafusion_physical_plan::projection::ProjectionExec; use datafusion_physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, StatisticsContext, + compute_statistics, execution_plan::{Boundedness, EmissionType}, }; @@ -249,17 +250,13 @@ async fn test_join_with_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -297,17 +294,13 @@ async fn test_left_join_no_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -348,17 +341,13 @@ async fn test_join_with_swap_semi() { assert_eq!(swapped_join.schema().fields().len(), 1); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -401,17 +390,13 @@ async fn test_join_with_swap_mark() { assert_eq!(swapped_join.schema().fields().len(), 2); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -529,17 +514,13 @@ async fn test_join_no_swap() { .expect("The type of the plan should not be changed"); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -604,17 +585,13 @@ async fn test_nl_join_with_swap(join_type: JoinType) { ); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -677,17 +654,13 @@ async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { ); assert_eq!( - swapped_join - .left() - .partition_statistics(None) + compute_statistics(swapped_join.left().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(8192) ); assert_eq!( - swapped_join - .right() - .partition_statistics(None) + compute_statistics(swapped_join.right().as_ref(), None) .unwrap() .total_byte_size, Precision::Inexact(2097152) @@ -1167,7 +1140,11 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(if partition.is_some() { Statistics::new_unknown(&self.schema) } else { diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs index d06e506abfebf..ff69f576adbc8 100644 --- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs @@ -45,6 +45,7 @@ mod test { }; use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; use datafusion_physical_plan::common::compute_record_batch_statistics; + use datafusion_physical_plan::compute_statistics; use datafusion_physical_plan::empty::EmptyExec; use datafusion_physical_plan::filter::FilterExec; use datafusion_physical_plan::joins::{ @@ -238,7 +239,7 @@ mod test { async fn test_statistics_by_partition_of_data_source() -> Result<()> { let scan = create_scan_exec_with_statistics(None, Some(2)).await; let statistics = (0..scan.output_partitioning().partition_count()) - .map(|idx| scan.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(scan.as_ref(), Some(idx))) .collect::>>()?; // Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02] let expected_statistic_partition_1 = create_partition_statistics( @@ -282,7 +283,7 @@ mod test { let projection: Arc = Arc::new(ProjectionExec::try_new(exprs, scan)?); let statistics = (0..projection.output_partitioning().partition_count()) - .map(|idx| projection.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(projection.as_ref(), Some(idx))) .collect::>>()?; // Projection only includes id column, not the date partition column let expected_statistic_partition_1 = @@ -314,7 +315,7 @@ mod test { let sort = SortExec::new(ordering.clone().into(), scan_1); let sort_exec: Arc = Arc::new(sort); let statistics = (0..sort_exec.output_partitioning().partition_count()) - .map(|idx| sort_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(sort_exec.as_ref(), Some(idx))) .collect::>>()?; // All 4 files merged: ids [1-4], dates [2025-03-01, 2025-03-04] let expected_statistic_partition = create_partition_statistics( @@ -353,7 +354,7 @@ mod test { Some((DATE_2025_03_03, DATE_2025_03_04)), ); let statistics = (0..sort_exec.output_partitioning().partition_count()) - .map(|idx| sort_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(sort_exec.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); assert_eq!(*statistics[0], expected_statistic_partition_1); @@ -380,7 +381,7 @@ mod test { )?; let filter: Arc = Arc::new(FilterExec::try_new(predicate, scan)?); - let full_statistics = filter.partition_statistics(None)?; + let full_statistics = compute_statistics(filter.as_ref(), None)?; let expected_full_statistic = Statistics { num_rows: Precision::Inexact(0), total_byte_size: Precision::Inexact(0), @@ -406,7 +407,7 @@ mod test { assert_eq!(*full_statistics, expected_full_statistic); let statistics = (0..filter.output_partitioning().partition_count()) - .map(|idx| filter.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(filter.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); let expected_partition_statistic = Statistics { @@ -442,7 +443,7 @@ mod test { let union_exec: Arc = UnionExec::try_new(vec![scan.clone(), scan])?; let statistics = (0..union_exec.output_partitioning().partition_count()) - .map(|idx| union_exec.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(union_exec.as_ref(), Some(idx))) .collect::>>()?; // Check that we have 4 partitions (2 from each scan) assert_eq!(statistics.len(), 4); @@ -505,7 +506,7 @@ mod test { // Verify the result of partition statistics let stats = (0..interleave.output_partitioning().partition_count()) - .map(|idx| interleave.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(interleave.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(stats.len(), 2); @@ -551,7 +552,7 @@ mod test { let cross_join: Arc = Arc::new(CrossJoinExec::new(left_scan, right_scan)); let statistics = (0..cross_join.output_partitioning().partition_count()) - .map(|idx| cross_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(cross_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have 2 partitions assert_eq!(statistics.len(), 2); @@ -658,7 +659,7 @@ mod test { // Test partition_statistics(None) - returns overall statistics // For RightSemi join, output columns come from right side only - let full_statistics = nested_loop_join.partition_statistics(None)?; + let full_statistics = compute_statistics(nested_loop_join.as_ref(), None)?; // With empty join columns, estimate_join_statistics returns Inexact row count // based on the outer side (right side for RightSemi) let mut expected_full_statistics = create_partition_statistics( @@ -696,7 +697,7 @@ mod test { expected_statistic_partition_2.total_byte_size = Precision::Absent; let statistics = (0..nested_loop_join.output_partitioning().partition_count()) - .map(|idx| nested_loop_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(nested_loop_join.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); assert_eq!(*statistics[0], expected_statistic_partition_1); @@ -726,7 +727,7 @@ mod test { Some((DATE_2025_03_01, DATE_2025_03_04)), ); let statistics = (0..coalesce_partitions.output_partitioning().partition_count()) - .map(|idx| coalesce_partitions.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(coalesce_partitions.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 1); assert_eq!(*statistics[0], expected_statistic_partition); @@ -743,7 +744,7 @@ mod test { let local_limit: Arc = Arc::new(LocalLimitExec::new(scan.clone(), 1)); let statistics = (0..local_limit.output_partitioning().partition_count()) - .map(|idx| local_limit.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(local_limit.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); let mut expected_0 = Statistics::clone(&statistics[0]); @@ -770,7 +771,7 @@ mod test { let global_limit: Arc = Arc::new(GlobalLimitExec::new(scan.clone(), 0, Some(2))); let statistics = (0..global_limit.output_partitioning().partition_count()) - .map(|idx| global_limit.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(global_limit.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 1); // GlobalLimit takes from first partition: ids [3,4], dates [2025-03-01, 2025-03-02] @@ -829,7 +830,7 @@ mod test { @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]" ); - let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?; + let p0_statistics = compute_statistics(aggregate_exec_partial.as_ref(), Some(0))?; // Aggregate doesn't propagate num_rows and ColumnStatistics byte_size from input let expected_p0_statistics = Statistics { @@ -868,7 +869,7 @@ mod test { ], }; - let p1_statistics = aggregate_exec_partial.partition_statistics(Some(1))?; + let p1_statistics = compute_statistics(aggregate_exec_partial.as_ref(), Some(1))?; assert_eq!(*p1_statistics, expected_p1_statistics); validate_statistics_with_data( @@ -890,10 +891,10 @@ mod test { aggregate_exec_partial.schema(), )?); - let p0_statistics = agg_final.partition_statistics(Some(0))?; + let p0_statistics = compute_statistics(agg_final.as_ref(), Some(0))?; assert_eq!(*p0_statistics, expected_p0_statistics); - let p1_statistics = agg_final.partition_statistics(Some(1))?; + let p1_statistics = compute_statistics(agg_final.as_ref(), Some(1))?; assert_eq!(*p1_statistics, expected_p1_statistics); validate_statistics_with_data( @@ -938,8 +939,14 @@ mod test { ], }; - assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(0))?); - assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(1))?); + assert_eq!( + empty_stat, + *compute_statistics(agg_partial.as_ref(), Some(0))? + ); + assert_eq!( + empty_stat, + *compute_statistics(agg_partial.as_ref(), Some(1))? + ); validate_statistics_with_data( agg_partial.clone(), vec![ExpectedStatistics::Empty, ExpectedStatistics::Empty], @@ -965,8 +972,14 @@ mod test { agg_partial.schema(), )?); - assert_eq!(empty_stat, *agg_final.partition_statistics(Some(0))?); - assert_eq!(empty_stat, *agg_final.partition_statistics(Some(1))?); + assert_eq!( + empty_stat, + *compute_statistics(agg_final.as_ref(), Some(0))? + ); + assert_eq!( + empty_stat, + *compute_statistics(agg_final.as_ref(), Some(1))? + ); validate_statistics_with_data( agg_final, @@ -1002,7 +1015,10 @@ mod test { column_statistics: vec![ColumnStatistics::new_unknown()], }; - assert_eq!(expect_stat, *agg_final.partition_statistics(Some(0))?); + assert_eq!( + expect_stat, + *compute_statistics(agg_final.as_ref(), Some(0))? + ); // Verify that the aggregate final result has exactly one partition with one row let mut partitions = execute_stream_partitioned( @@ -1030,7 +1046,7 @@ mod test { let mut all_batches = vec![]; for (i, partition_stream) in partitions.into_iter().enumerate() { let batches: Vec = partition_stream.try_collect().await?; - let actual = plan.partition_statistics(Some(i))?; + let actual = compute_statistics(plan.as_ref(), Some(i))?; let expected = compute_record_batch_statistics( std::slice::from_ref(&batches), &schema, @@ -1040,7 +1056,7 @@ mod test { all_batches.push(batches); } - let actual = plan.partition_statistics(None)?; + let actual = compute_statistics(plan.as_ref(), None)?; let expected = compute_record_batch_statistics(&all_batches, &schema, None); assert_eq!(*actual, expected); @@ -1057,7 +1073,7 @@ mod test { )?); let statistics = (0..repartition.partitioning().partition_count()) - .map(|idx| repartition.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(repartition.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 3); @@ -1108,7 +1124,7 @@ mod test { Partitioning::RoundRobinBatch(2), )?); - let result = repartition.partition_statistics(Some(2)); + let result = compute_statistics(repartition.as_ref(), Some(2)); assert!(result.is_err()); let error = result.unwrap_err(); assert!( @@ -1137,7 +1153,7 @@ mod test { Partitioning::RoundRobinBatch(0), )?); - let result = repartition.partition_statistics(Some(0))?; + let result = compute_statistics(repartition.as_ref(), Some(0))?; assert_eq!(*result, Statistics::new_unknown(&scan_schema)); // Verify that the result has exactly 0 partitions @@ -1164,7 +1180,7 @@ mod test { // Verify the result of partition statistics of repartition let stats = (0..repartition.partitioning().partition_count()) - .map(|idx| repartition.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(repartition.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(stats.len(), 2); @@ -1222,7 +1238,7 @@ mod test { // Verify partition statistics are properly propagated (not unknown) let statistics = (0..window_agg.output_partitioning().partition_count()) - .map(|idx| window_agg.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(window_agg.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 2); @@ -1308,7 +1324,7 @@ mod test { // Try to test with single partition let empty_single = Arc::new(EmptyExec::new(Arc::clone(&schema))); - let stats = empty_single.partition_statistics(Some(0))?; + let stats = compute_statistics(empty_single.as_ref(), Some(0))?; assert_eq!(stats.num_rows, Precision::Exact(0)); assert_eq!(stats.total_byte_size, Precision::Exact(0)); assert_eq!(stats.column_statistics.len(), 2); @@ -1323,7 +1339,7 @@ mod test { assert_eq!(col_stat.byte_size, Precision::Exact(0)); } - let overall_stats = empty_single.partition_statistics(None)?; + let overall_stats = compute_statistics(empty_single.as_ref(), None)?; assert_eq!(stats, overall_stats); validate_statistics_with_data(empty_single, vec![ExpectedStatistics::Empty], 0) @@ -1334,7 +1350,7 @@ mod test { Arc::new(EmptyExec::new(Arc::clone(&schema)).with_partitions(3)); let statistics = (0..empty_multi.output_partitioning().partition_count()) - .map(|idx| empty_multi.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(empty_multi.as_ref(), Some(idx))) .collect::>>()?; assert_eq!(statistics.len(), 3); @@ -1394,7 +1410,7 @@ mod test { // Test partition statistics for CollectLeft mode let statistics = (0..collect_left_join.output_partitioning().partition_count()) - .map(|idx| collect_left_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(collect_left_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions @@ -1470,7 +1486,7 @@ mod test { // Test partition statistics for Partitioned mode let statistics = (0..partitioned_join.output_partitioning().partition_count()) - .map(|idx| partitioned_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(partitioned_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions @@ -1544,7 +1560,7 @@ mod test { // Test partition statistics for Auto mode let statistics = (0..auto_join.output_partitioning().partition_count()) - .map(|idx| auto_join.partition_statistics(Some(idx))) + .map(|idx| compute_statistics(auto_join.as_ref(), Some(idx))) .collect::>>()?; // Check that we have the expected number of partitions diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs index 6814ab2358ffc..8eb02a5dd84ff 100644 --- a/datafusion/core/tests/physical_optimizer/test_utils.rs +++ b/datafusion/core/tests/physical_optimizer/test_utils.rs @@ -70,7 +70,7 @@ use datafusion_physical_plan::union::UnionExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, create_window_expr}; use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionPlan, InputOrderMode, Partitioning, - PlanProperties, SortOrderPushdownResult, displayable, + PlanProperties, SortOrderPushdownResult, StatisticsContext, displayable, }; /// Create a non sorted parquet exec @@ -983,7 +983,11 @@ impl ExecutionPlan for TestScan { internal_err!("TestScan is for testing optimizer only, not for execution") } - fn partition_statistics(&self, _partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } diff --git a/datafusion/core/tests/sql/path_partition.rs b/datafusion/core/tests/sql/path_partition.rs index 2eff1c262f855..8b7b9265f20fc 100644 --- a/datafusion/core/tests/sql/path_partition.rs +++ b/datafusion/core/tests/sql/path_partition.rs @@ -38,6 +38,7 @@ use datafusion_common::ScalarValue; use datafusion_common::stats::Precision; use datafusion_common::test_util::batches_to_sort_string; use datafusion_execution::config::SessionConfig; +use datafusion_physical_plan::compute_statistics; use async_trait::async_trait; use bytes::Bytes; @@ -461,8 +462,7 @@ async fn parquet_statistics() -> Result<()> { let schema = physical_plan.schema(); assert_eq!(schema.fields().len(), 4); - let stat_cols = physical_plan - .partition_statistics(None)? + let stat_cols = compute_statistics(physical_plan.as_ref(), None)? .column_statistics .clone(); assert_eq!(stat_cols.len(), 4); @@ -488,8 +488,7 @@ async fn parquet_statistics() -> Result<()> { let schema = physical_plan.schema(); assert_eq!(schema.fields().len(), 2); - let stat_cols = physical_plan - .partition_statistics(None)? + let stat_cols = compute_statistics(physical_plan.as_ref(), None)? .column_statistics .clone(); assert_eq!(stat_cols.len(), 2); diff --git a/datafusion/datasource/src/file_scan_config/mod.rs b/datafusion/datasource/src/file_scan_config/mod.rs index 04b74528d5ac1..3f9d1105cffa8 100644 --- a/datafusion/datasource/src/file_scan_config/mod.rs +++ b/datafusion/datasource/src/file_scan_config/mod.rs @@ -2246,7 +2246,6 @@ mod tests { // of just the projected ones. use crate::source::DataSourceExec; - use datafusion_physical_plan::ExecutionPlan; // Create a schema with 4 columns let schema = Arc::new(Schema::new(vec![ @@ -2300,7 +2299,8 @@ mod tests { let exec = DataSourceExec::from_data_source(config); // Get statistics for partition 0 - let partition_stats = exec.partition_statistics(Some(0)).unwrap(); + let partition_stats = + datafusion_physical_plan::compute_statistics(exec.as_ref(), Some(0)).unwrap(); // Verify that only 2 columns are in the statistics (the projected ones) assert_eq!( diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs index 9f4f8aa0f3635..bee4735777ab9 100644 --- a/datafusion/datasource/src/memory.rs +++ b/datafusion/datasource/src/memory.rs @@ -1000,7 +1000,7 @@ mod tests { let values = MemorySourceConfig::try_new_as_values(schema, data)?; assert_eq!( - *values.partition_statistics(None)?, + *datafusion_physical_plan::compute_statistics(values.as_ref(), None)?, Statistics { num_rows: Precision::Exact(rows), total_byte_size: Precision::Exact(8), // not important diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs index 420c6b508ce4f..47ed411c2eba9 100644 --- a/datafusion/datasource/src/source.rs +++ b/datafusion/datasource/src/source.rs @@ -46,6 +46,7 @@ use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; use datafusion_physical_plan::SortOrderPushdownResult; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::filter_pushdown::{ ChildPushdownResult, FilterPushdownPhase, FilterPushdownPropagation, PushedDown, }; @@ -451,7 +452,11 @@ impl ExecutionPlan for DataSourceExec { Some(metrics) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { self.data_source.partition_statistics(partition) } diff --git a/datafusion/physical-optimizer/src/aggregate_statistics.rs b/datafusion/physical-optimizer/src/aggregate_statistics.rs index 75da1873263d8..da7f0a5e48c60 100644 --- a/datafusion/physical-optimizer/src/aggregate_statistics.rs +++ b/datafusion/physical-optimizer/src/aggregate_statistics.rs @@ -24,7 +24,7 @@ use datafusion_physical_plan::aggregates::{AggregateExec, AggregateInputMode}; use datafusion_physical_plan::placeholder_row::PlaceholderRowExec; use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr}; use datafusion_physical_plan::udaf::{AggregateFunctionExpr, StatisticsArgs}; -use datafusion_physical_plan::{ExecutionPlan, expressions}; +use datafusion_physical_plan::{ExecutionPlan, compute_statistics, expressions}; use std::sync::Arc; use crate::PhysicalOptimizerRule; @@ -53,7 +53,7 @@ impl PhysicalOptimizerRule for AggregateStatistics { let partial_agg_exec = partial_agg_exec .downcast_ref::() .expect("take_optimizable() ensures that this is a AggregateExec"); - let stats = partial_agg_exec.input().partition_statistics(None)?; + let stats = compute_statistics(partial_agg_exec.input().as_ref(), None)?; let mut projections = vec![]; for expr in partial_agg_exec.aggr_expr() { let field = expr.field(); diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs index c522867c05196..a8c065894dab1 100644 --- a/datafusion/physical-optimizer/src/enforce_distribution.rs +++ b/datafusion/physical-optimizer/src/enforce_distribution.rs @@ -59,7 +59,9 @@ use datafusion_physical_plan::tree_node::PlanContext; use datafusion_physical_plan::union::{InterleaveExec, UnionExec, can_interleave}; use datafusion_physical_plan::windows::WindowAggExec; use datafusion_physical_plan::windows::{BoundedWindowAggExec, get_best_fitting_window}; -use datafusion_physical_plan::{Distribution, ExecutionPlan, Partitioning}; +use datafusion_physical_plan::{ + Distribution, ExecutionPlan, Partitioning, compute_statistics, +}; use itertools::izip; @@ -1140,7 +1142,8 @@ fn get_repartition_requirement_status( { // Decide whether adding a round robin is beneficial depending on // the statistical information we have on the number of rows: - let roundrobin_beneficial_stats = match child.partition_statistics(None)?.num_rows + let roundrobin_beneficial_stats = match compute_statistics(child.as_ref(), None)? + .num_rows { Precision::Exact(n_rows) => n_rows > batch_size, Precision::Inexact(n_rows) => !should_use_estimates || (n_rows > batch_size), diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs index 74c6cbb19aea9..c658f83c2b82c 100644 --- a/datafusion/physical-optimizer/src/join_selection.rs +++ b/datafusion/physical-optimizer/src/join_selection.rs @@ -40,7 +40,9 @@ use datafusion_physical_plan::joins::{ StreamJoinPartitionMode, SymmetricHashJoinExec, }; use datafusion_physical_plan::operator_statistics::StatisticsRegistry; -use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties}; +use datafusion_physical_plan::{ + ExecutionPlan, ExecutionPlanProperties, compute_statistics, +}; use std::sync::Arc; /// The [`JoinSelection`] rule tries to modify a given plan so that it can @@ -65,7 +67,7 @@ fn get_stats( reg.compute(plan) .map(|s| Arc::::clone(s.base_arc())) } else { - plan.partition_statistics(None) + compute_statistics(plan, None) } } diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index 81df6f943c15e..02acfa2b09099 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -34,6 +34,7 @@ use datafusion_common::{Result, Statistics}; use datafusion_execution::TaskContext; use datafusion_physical_expr::Distribution; use datafusion_physical_expr_common::sort_expr::OrderingRequirements; +use datafusion_physical_plan::StatisticsContext; use datafusion_physical_plan::execution_plan::Boundedness; use datafusion_physical_plan::projection::{ ProjectionExec, make_with_child, update_expr, update_ordering_requirement, @@ -242,8 +243,15 @@ impl ExecutionPlan for OutputRequirementExec { unreachable!(); } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition), + None => Ok(Arc::clone(&ctx.child_stats()[0])), + } } fn try_swapping_with_projection( diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 7c52ee6e172a7..7935421bb55ae 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -112,3 +112,7 @@ required-features = ["test_utils"] harness = false name = "aggregate_vectorized" required-features = ["test_utils"] + +[[bench]] +harness = false +name = "compute_statistics" diff --git a/datafusion/physical-plan/benches/compute_statistics.rs b/datafusion/physical-plan/benches/compute_statistics.rs new file mode 100644 index 0000000000000..851424a54aebc --- /dev/null +++ b/datafusion/physical-plan/benches/compute_statistics.rs @@ -0,0 +1,223 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Benchmarks for `compute_statistics` with `StatsCache`. +//! +//! Demonstrates that caching eliminates redundant subtree walks in plans +//! containing partition-merging operators (CoalescePartitionsExec) and +//! binary join trees (CrossJoinExec). +//! +//! The plan shapes here mirror the reproducers from the planning-speed +//! EPIC (): +//! - Coalesce chain: deep linear plans (e.g. deeply nested subqueries) +//! - Cross-join tree: balanced binary trees from multi-way joins +//! (mirrors the `physical_many_self_joins` sql_planner benchmark) + +use std::fmt; +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use datafusion_common::tree_node::TreeNodeRecursion; +use datafusion_common::{Result, Statistics}; +use datafusion_execution::TaskContext; +use datafusion_physical_expr::EquivalenceProperties; +use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion_physical_plan::execution_plan::{ + Boundedness, EmissionType, ExecutionPlan, PlanProperties, +}; +use datafusion_physical_plan::joins::CrossJoinExec; +use datafusion_physical_plan::statistics_context::{ + StatisticsContext, compute_statistics, +}; +use datafusion_physical_plan::{ + DisplayAs, DisplayFormatType, Partitioning, SendableRecordBatchStream, +}; + +/// Minimal leaf node for benchmarking +#[derive(Debug)] +struct BenchLeaf { + schema: SchemaRef, + cache: Arc, +} + +impl BenchLeaf { + fn new(col_name: &str) -> Self { + let schema = Arc::new(Schema::new(vec![Field::new( + col_name, + DataType::Int32, + false, + )])); + let cache = Arc::new(PlanProperties::new( + EquivalenceProperties::new(Arc::clone(&schema)), + Partitioning::UnknownPartitioning(2), + EmissionType::Incremental, + Boundedness::Bounded, + )); + Self { schema, cache } + } +} + +impl DisplayAs for BenchLeaf { + fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "BenchLeaf") + } +} + +impl ExecutionPlan for BenchLeaf { + fn name(&self) -> &str { + "BenchLeaf" + } + + fn schema(&self) -> SchemaRef { + Arc::clone(&self.schema) + } + + fn properties(&self) -> &Arc { + &self.cache + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _children: Vec>, + ) -> Result> { + Ok(self) + } + + fn apply_expressions( + &self, + _f: &mut dyn FnMut( + &dyn datafusion_physical_expr::PhysicalExpr, + ) -> Result, + ) -> Result { + Ok(TreeNodeRecursion::Continue) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + unimplemented!() + } + + fn partition_statistics_with_context( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::new(Statistics::new_unknown(&self.schema))) + } +} + +/// Build: CoalescePartitions^depth -> BenchLeaf +fn build_coalesce_chain(depth: usize) -> Arc { + let mut plan: Arc = Arc::new(BenchLeaf::new("a")); + for _ in 0..depth { + plan = Arc::new(CoalescePartitionsExec::new(plan)); + } + plan +} + +/// Build a balanced binary tree of CrossJoinExec with 2^depth leaves. +/// Mirrors the plan shape produced by multi-way self-joins like the +/// `physical_many_self_joins` benchmark in sql_planner.rs (#19795). +fn build_cross_join_tree(depth: usize, next_col: &mut usize) -> Arc { + if depth == 0 { + let col_name = format!("c{next_col}"); + *next_col += 1; + return Arc::new(BenchLeaf::new(&col_name)); + } + let left = build_cross_join_tree(depth - 1, next_col); + let right = build_cross_join_tree(depth - 1, next_col); + Arc::new(CrossJoinExec::new(left, right)) +} + +/// Recursive walk without a shared cross-node cache, simulating pre-cache behavior. +/// Each operator's internal `compute_child_statistics` call triggers a fresh +/// subtree walk, resulting in O(n^2) total node visits for a chain of depth n. +/// +/// Note: each `compute_child_statistics` re-walk still benefits from its own +/// ephemeral cache; only the cross-node sharing is removed. +fn compute_statistics_without_shared_cache( + plan: &dyn ExecutionPlan, + partition: Option, +) -> Result> { + let child_stats = plan + .children() + .iter() + .map(|child| compute_statistics_without_shared_cache(child.as_ref(), partition)) + .collect::>>()?; + let ctx = StatisticsContext::new(child_stats); + plan.partition_statistics_with_context(partition, &ctx) +} + +fn bench_compute_statistics(c: &mut Criterion) { + // --- Coalesce chain (linear plan) --- + // Deep linear plans arise from deeply nested subqueries, CTEs, etc. + let mut group = c.benchmark_group("compute_statistics_coalesce_chain"); + for depth in [10, 20, 50] { + let plan = build_coalesce_chain(depth); + group.bench_with_input(BenchmarkId::new("cached", depth), &plan, |b, plan| { + b.iter(|| compute_statistics(plan.as_ref(), None).unwrap()); + }); + group.bench_with_input( + BenchmarkId::new("no_shared_cache", depth), + &plan, + |b, plan| { + b.iter(|| { + compute_statistics_without_shared_cache(plan.as_ref(), None).unwrap() + }); + }, + ); + } + group.finish(); + + // --- Cross-join tree (balanced binary plan) --- + // Binary trees arise from multi-way joins (e.g. physical_many_self_joins + // in sql_planner.rs, see #19795). CrossJoinExec calls + // compute_child_statistics for per-partition stats, re-walking the left + // subtree at each node. The gap between cached/uncached is smaller than + // the linear chain because only the left child triggers a re-walk. + let mut group = c.benchmark_group("compute_statistics_cross_join_tree"); + for depth in [3, 5, 7] { + let mut next_col = 0; + let plan = build_cross_join_tree(depth, &mut next_col); + let label = format!("depth={depth}_leaves={}", 1usize << depth); + group.bench_with_input(BenchmarkId::new("cached", &label), &plan, |b, plan| { + b.iter(|| compute_statistics(plan.as_ref(), Some(0)).unwrap()); + }); + group.bench_with_input( + BenchmarkId::new("no_shared_cache", &label), + &plan, + |b, plan| { + b.iter(|| { + compute_statistics_without_shared_cache(plan.as_ref(), Some(0)) + .unwrap() + }); + }, + ); + } + group.finish(); +} + +criterion_group!(benches, bench_compute_statistics); +criterion_main!(benches); diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs index 76ecb3f1485a4..04163e3502474 100644 --- a/datafusion/physical-plan/src/aggregates/mod.rs +++ b/datafusion/physical-plan/src/aggregates/mod.rs @@ -30,6 +30,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, PushedDownPredicate, }; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode, SendableRecordBatchStream, Statistics, check_if_same_properties, @@ -1555,8 +1556,15 @@ impl ExecutionPlan for AggregateExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let child_statistics = self.input().partition_statistics(partition)?; + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let child_statistics = match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition)?, + None => Arc::clone(&ctx.child_stats()[0]), + }; Ok(Arc::new(self.statistics_inner(&child_statistics)?)) } @@ -2180,6 +2188,7 @@ mod tests { use crate::execution_plan::Boundedness; use crate::expressions::col; use crate::metrics::MetricValue; + use crate::statistics_context::compute_statistics; use crate::test::TestMemoryExec; use crate::test::assert_is_pending; use crate::test::exec::{ @@ -2561,7 +2570,7 @@ mod tests { )?); // Verify statistics are preserved proportionally through aggregation - let final_stats = merged_aggregate.partition_statistics(None)?; + let final_stats = compute_statistics(merged_aggregate.as_ref(), None)?; assert!(final_stats.total_byte_size.get_value().is_some()); let task_ctx = if spill { @@ -2703,9 +2712,10 @@ mod tests { Ok(Box::pin(stream)) } - fn partition_statistics( + fn partition_statistics_with_context( &self, partition: Option, + _ctx: &StatisticsContext, ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(self.schema().as_ref()))); @@ -3910,7 +3920,7 @@ mod tests { PhysicalGroupBy::default(), None, )?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!(stats.total_byte_size, Precision::Absent); let zero_row_stats = Statistics { @@ -3927,7 +3937,7 @@ mod tests { PhysicalGroupBy::default(), None, )?; - let stats_zero = agg_zero.partition_statistics(None)?; + let stats_zero = compute_statistics(&agg_zero, None)?; assert_eq!(stats_zero.total_byte_size, Precision::Absent); Ok(()) @@ -4280,7 +4290,7 @@ mod tests { let agg = build_test_aggregate(&schema, input_stats, group_by, case.limit_options)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.num_rows, case.expected_num_rows, "FAILED: '{}' — expected {:?}, got {:?}", @@ -4319,7 +4329,7 @@ mod tests { None, )?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.column_statistics[0].distinct_count, Precision::Exact(100), @@ -4373,7 +4383,7 @@ mod tests { let agg = build_test_aggregate(&schema, input_stats, grouping_set, None)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; // Per-set NDV: (a,NULL)=100, (NULL,b)=50, (a,b)=100*50=5000 // Total = 100 + 50 + 5000 = 5150 assert_eq!( @@ -4422,7 +4432,7 @@ mod tests { PhysicalGroupBy::new_single(vec![(expr_a_plus_b, "a+b".to_string())]); let agg = build_test_aggregate(&schema, input_stats, group_by, None)?; - let stats = agg.partition_statistics(None)?; + let stats = compute_statistics(&agg, None)?; assert_eq!( stats.num_rows, Precision::Inexact(1_000_000), diff --git a/datafusion/physical-plan/src/buffer.rs b/datafusion/physical-plan/src/buffer.rs index 0cc4a1d71814e..b07b3f3ce9caa 100644 --- a/datafusion/physical-plan/src/buffer.rs +++ b/datafusion/physical-plan/src/buffer.rs @@ -24,6 +24,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, }; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, SortOrderPushdownResult, @@ -244,8 +245,15 @@ impl ExecutionPlan for BufferExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition), + None => Ok(Arc::clone(&ctx.child_stats()[0])), + } } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs index 2bf046f03b6cf..3914c959b9320 100644 --- a/datafusion/physical-plan/src/coalesce_batches.rs +++ b/datafusion/physical-plan/src/coalesce_batches.rs @@ -24,6 +24,7 @@ use std::task::{Context, Poll}; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics}; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream, check_if_same_properties, @@ -222,8 +223,17 @@ impl ExecutionPlan for CoalesceBatchesExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 9290d725165e9..e5be125968af5 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -30,6 +30,7 @@ use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType}; use crate::filter_pushdown::{FilterDescription, FilterPushdownPhase}; use crate::projection::{ProjectionExec, make_with_child}; use crate::sort_pushdown::SortOrderPushdownResult; +use crate::statistics_context::StatisticsContext; use crate::{DisplayFormatType, ExecutionPlan, Partitioning, check_if_same_properties}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; @@ -239,8 +240,12 @@ impl ExecutionPlan for CoalescePartitionsExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(None)?); + fn partition_statistics_with_context( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/coop.rs b/datafusion/physical-plan/src/coop.rs index fe6a3bc3d5678..568b5b067d786 100644 --- a/datafusion/physical-plan/src/coop.rs +++ b/datafusion/physical-plan/src/coop.rs @@ -85,6 +85,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, }; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, RecordBatchStream, SendableRecordBatchStream, SortOrderPushdownResult, check_if_same_properties, @@ -306,8 +307,15 @@ impl ExecutionPlan for CooperativeExec { Ok(make_cooperative(child_stream)) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition), + None => Ok(Arc::clone(&ctx.child_stats()[0])), + } } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs index 756a68b1a958d..6e2dd07fb9c83 100644 --- a/datafusion/physical-plan/src/display.rs +++ b/datafusion/physical-plan/src/display.rs @@ -31,6 +31,8 @@ use datafusion_physical_expr::LexOrdering; use crate::metrics::{MetricCategory, MetricType}; use crate::render_tree::RenderTree; +use crate::statistics_context::compute_statistics; + use super::{ExecutionPlan, ExecutionPlanVisitor, accept}; /// Options for controlling how each [`ExecutionPlan`] should format itself @@ -480,7 +482,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> { } } if self.show_statistics { - let stats = plan.partition_statistics(None).map_err(|_e| fmt::Error)?; + let stats = compute_statistics(plan, None).map_err(|_e| fmt::Error)?; write!(self.f, ", statistics=[{stats}]")?; } if self.show_schema { @@ -576,7 +578,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> { }; let statistics = if self.show_statistics { - let stats = plan.partition_statistics(None).map_err(|_e| fmt::Error)?; + let stats = compute_statistics(plan, None).map_err(|_e| fmt::Error)?; format!("statistics=[{stats}]") } else { "".to_string() @@ -1173,6 +1175,7 @@ mod tests { use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_physical_expr::PhysicalExpr; + use crate::statistics_context::StatisticsContext; use crate::{DisplayAs, ExecutionPlan, PlanProperties}; use super::DisplayableExecutionPlan; @@ -1229,9 +1232,10 @@ mod tests { todo!() } - fn partition_statistics( + fn partition_statistics_with_context( &self, partition: Option, + _ctx: &StatisticsContext, ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(self.schema().as_ref()))); diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs index 8103695ad08fa..2385f5921d626 100644 --- a/datafusion/physical-plan/src/empty.rs +++ b/datafusion/physical-plan/src/empty.rs @@ -35,6 +35,7 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use crate::execution_plan::SchedulingType; +use crate::statistics_context::StatisticsContext; use log::trace; /// Execution plan for empty relation with produce_one_row=false @@ -159,7 +160,11 @@ impl ExecutionPlan for EmptyExec { )?)) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if let Some(partition) = partition { assert_or_internal_err!( partition < self.partitions, diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 1a67ea0ded11b..68f1ecc0a2b5a 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -47,6 +47,7 @@ use crate::coalesce_partitions::CoalescePartitionsExec; use crate::display::DisplayableExecutionPlan; use crate::metrics::MetricsSet; use crate::projection::ProjectionExec; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use arrow::array::{Array, RecordBatch}; @@ -546,9 +547,14 @@ pub trait ExecutionPlan: Any + Debug + DisplayAs + Send + Sync { } /// Returns statistics for a specific partition of this `ExecutionPlan` node. - /// If statistics are not available, should return [`Statistics::new_unknown`] - /// (the default), not an error. - /// If `partition` is `None`, it returns statistics for the entire plan. + /// + /// Deprecated: use [`Self::partition_statistics_with_context`] instead, + /// which accepts a [`StatisticsContext`] carrying pre-computed child + /// statistics. + #[deprecated( + since = "54.0.0", + note = "Use partition_statistics_with_context instead" + )] fn partition_statistics(&self, partition: Option) -> Result> { if let Some(idx) = partition { // Validate partition index @@ -563,6 +569,30 @@ pub trait ExecutionPlan: Any + Debug + DisplayAs + Send + Sync { Ok(Arc::new(Statistics::new_unknown(&self.schema()))) } + /// Returns statistics for a specific partition of this `ExecutionPlan` node. + /// If statistics are not available, should return [`Statistics::new_unknown`] + /// (the default), not an error. + /// If `partition` is `None`, it returns statistics for the entire plan. + /// + /// The [`StatisticsContext`] carries pre-computed overall (`None`) child + /// statistics via [`StatisticsContext::child_stats`] and a shared cache + /// via [`StatisticsContext::compute_child_statistics`]. Operators that + /// need per-partition child stats should call + /// `ctx.compute_child_statistics(child, partition)`. + /// + /// [`StatisticsContext`]: crate::statistics_context::StatisticsContext + /// [`StatisticsContext::child_stats`]: crate::statistics_context::StatisticsContext::child_stats + /// [`StatisticsContext::compute_child_statistics`]: crate::statistics_context::StatisticsContext::compute_child_statistics + /// [`compute_statistics`]: crate::statistics_context::compute_statistics + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { + #[expect(deprecated)] + self.partition_statistics(partition) + } + /// Returns `true` if a limit can be safely pushed down through this /// `ExecutionPlan` node. /// @@ -1655,9 +1685,10 @@ mod tests { unimplemented!() } - fn partition_statistics( + fn partition_statistics_with_context( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } @@ -1724,9 +1755,10 @@ mod tests { unimplemented!() } - fn partition_statistics( + fn partition_statistics_with_context( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } @@ -1788,9 +1820,10 @@ mod tests { unimplemented!() } - fn partition_statistics( + fn partition_statistics_with_context( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { unimplemented!() } diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 1119d1b240788..c65bdb91455b8 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -42,6 +42,7 @@ use crate::projection::{ EmbeddedProjection, ProjectionExec, ProjectionExpr, make_with_child, try_embed_projection, update_expr, }; +use crate::statistics_context::{StatisticsContext, compute_statistics}; use crate::{ DisplayFormatType, ExecutionPlan, metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics}, @@ -400,7 +401,7 @@ impl FilterExec { let schema = input.schema(); let stats = Self::statistics_helper( &schema, - Arc::unwrap_or_clone(input.partition_statistics(None)?), + Arc::unwrap_or_clone(compute_statistics(input.as_ref(), None)?), predicate, default_selectivity, )?; @@ -576,9 +577,17 @@ impl ExecutionPlan for FilterExec { /// The output statistics of a filtering operation can be estimated if the /// predicate's selectivity value can be determined for the incoming data. - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stats = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; let stats = Self::statistics_helper( &self.input.schema(), input_stats, @@ -1152,6 +1161,7 @@ mod tests { use super::*; use crate::empty::EmptyExec; use crate::expressions::*; + use crate::statistics_context::compute_statistics; use crate::test; use crate::test::exec::StatisticsExec; use arrow::datatypes::{Field, Schema, UnionFields, UnionMode}; @@ -1228,7 +1238,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(25)); assert_eq!( statistics.total_byte_size, @@ -1278,7 +1288,7 @@ mod tests { sub_filter, )?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(16)); assert_eq!( statistics.column_statistics, @@ -1338,7 +1348,7 @@ mod tests { binary(col("a", &schema)?, Operator::GtEq, lit(10i32), &schema)?, b_gt_5, )?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // On a uniform distribution, only fifteen rows will satisfy the // filter that 'a' proposed (a >= 10 AND a <= 25) (15/100) and only // 5 rows will satisfy the filter that 'b' proposed (b > 45) (5/50). @@ -1383,7 +1393,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Absent); Ok(()) @@ -1456,7 +1466,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // 0.5 (from a) * 0.333333... (from b) * 0.798387... (from c) ≈ 0.1330... // num_rows after ceil => 133.0... => 134 // total_byte_size after ceil => 532.0... => 533 @@ -1552,10 +1562,12 @@ mod tests { )), )); // Since filter predicate passes all entries, statistics after filter shouldn't change. - let expected = input.partition_statistics(None)?.column_statistics.clone(); + let expected = compute_statistics(input.as_ref(), None)? + .column_statistics + .clone(); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(1000)); assert_eq!(statistics.total_byte_size, Precision::Inexact(4000)); @@ -1608,7 +1620,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(0)); assert_eq!(statistics.total_byte_size, Precision::Inexact(0)); @@ -1695,7 +1707,7 @@ mod tests { Arc::new(FilterExec::try_new(outer_predicate, inner_filter)?); // Should succeed without error - let statistics = outer_filter.partition_statistics(None)?; + let statistics = compute_statistics(outer_filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(0)); Ok(()) @@ -1734,7 +1746,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!(statistics.num_rows, Precision::Inexact(490)); assert_eq!(statistics.total_byte_size, Precision::Inexact(1960)); @@ -1784,7 +1796,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let filter_statistics = filter.partition_statistics(None)?; + let filter_statistics = compute_statistics(filter.as_ref(), None)?; let expected_filter_statistics = Statistics { num_rows: Precision::Absent, @@ -1819,7 +1831,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let filter_statistics = filter.partition_statistics(None)?; + let filter_statistics = compute_statistics(filter.as_ref(), None)?; // First column is "a", and it is a column with only one value after the filter. assert!(filter_statistics.column_statistics[0].is_singleton()); @@ -1866,11 +1878,11 @@ mod tests { Arc::new(Literal::new(ScalarValue::Decimal128(Some(10), 10, 10))), )); let filter = FilterExec::try_new(predicate, input)?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; assert_eq!(statistics.num_rows, Precision::Inexact(200)); assert_eq!(statistics.total_byte_size, Precision::Inexact(800)); let filter = filter.with_default_selectivity(40)?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; assert_eq!(statistics.num_rows, Precision::Inexact(400)); assert_eq!(statistics.total_byte_size, Precision::Inexact(1600)); Ok(()) @@ -1905,7 +1917,7 @@ mod tests { Arc::new(EmptyExec::new(Arc::clone(&schema))), )?; - exec.partition_statistics(None).unwrap(); + compute_statistics(&exec, None).unwrap(); Ok(()) } @@ -2061,8 +2073,8 @@ mod tests { assert_eq!(filter1.projection(), filter2.projection()); // Verify statistics are the same - let stats1 = filter1.partition_statistics(None)?; - let stats2 = filter2.partition_statistics(None)?; + let stats1 = compute_statistics(&filter1, None)?; + let stats2 = compute_statistics(&filter2, None)?; assert_eq!(stats1.num_rows, stats2.num_rows); assert_eq!(stats1.total_byte_size, stats2.total_byte_size); @@ -2115,7 +2127,7 @@ mod tests { .unwrap() .build()?; - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(&filter, None)?; // Verify statistics reflect both filtering and projection assert!(matches!(statistics.num_rows, Precision::Inexact(_))); @@ -2346,7 +2358,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; let col_b_stats = &statistics.column_statistics[1]; assert_eq!(col_b_stats.min_value, Precision::Absent); assert_eq!(col_b_stats.max_value, Precision::Absent); @@ -2631,7 +2643,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; for (i, expected) in expected_ndvs.iter().enumerate() { assert_eq!( @@ -2705,7 +2717,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // a = 42 collapses to single value assert_eq!( statistics.column_statistics[0].distinct_count, @@ -2751,7 +2763,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2784,7 +2796,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2817,7 +2829,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2850,7 +2862,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2884,7 +2896,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -2930,7 +2942,7 @@ mod tests { )); let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; assert_eq!( statistics.column_statistics[0].distinct_count, Precision::Exact(1) @@ -3231,7 +3243,7 @@ mod tests { let filter: Arc = Arc::new(FilterExec::try_new(predicate, input)?); - let statistics = filter.partition_statistics(None)?; + let statistics = compute_statistics(filter.as_ref(), None)?; // Filter estimates ~10 rows (selectivity = 10/100) assert_eq!(statistics.num_rows, Precision::Inexact(10)); // NDV should be capped at the filtered row count (10), not the original 80 diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 3027fb130f087..94094a6b2f7d8 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -31,6 +31,7 @@ use crate::projection::{ ProjectionExec, join_allows_pushdown, join_table_borders, new_join_children, physical_to_column_exprs, }; +use crate::statistics_context::StatisticsContext; use crate::{ ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, @@ -380,11 +381,22 @@ impl ExecutionPlan for CrossJoinExec { } } - fn partition_statistics(&self, partition: Option) -> Result> { - // Get the all partitions statistics of the left - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(None)?); - let right_stats = - Arc::unwrap_or_clone(self.right.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + // Left side is always broadcast (collected into a single partition), + // so it always needs overall stats (child_stats provides these). + // Right side can have multiple partitions, so it needs per-partition + // stats when a specific partition is requested. + let left_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); + let right_stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.right.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])), + }; Ok(Arc::new(stats_cartesian_product(left_stats, right_stats))) } diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs index 4ebbf7cb31ccf..cc82539b4f220 100644 --- a/datafusion/physical-plan/src/joins/hash_join/exec.rs +++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs @@ -53,6 +53,7 @@ use crate::projection::{ }; use crate::repartition::REPARTITION_RANDOM_STATE; use crate::spill::get_record_batch_memory_size; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -1440,14 +1441,19 @@ impl ExecutionPlan for HashJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { let stats = match (partition, self.mode) { - // For CollectLeft mode, the left side is collected into a single partition, - // so all left partitions are available to each output partition. - // For the right side, we need the specific partition statistics. - (Some(partition), PartitionMode::CollectLeft) => { - let left_stats = self.left.partition_statistics(None)?; - let right_stats = self.right.partition_statistics(Some(partition))?; + // For CollectLeft mode, the left side is broadcast (collected into + // a single partition), so it needs overall stats (child_stats). + // Right side is partitioned, so it needs per-partition stats. + (Some(_), PartitionMode::CollectLeft) => { + let left_stats = Arc::clone(&ctx.child_stats()[0]); + let right_stats = + ctx.compute_child_statistics(self.right.as_ref(), partition)?; estimate_join_statistics( Arc::unwrap_or_clone(left_stats), @@ -1458,12 +1464,27 @@ impl ExecutionPlan for HashJoinExec { )? } - // For Partitioned mode, both sides are partitioned, so each output partition - // only has access to the corresponding partition from both sides. - (Some(partition), PartitionMode::Partitioned) => { - let left_stats = self.left.partition_statistics(Some(partition))?; - let right_stats = self.right.partition_statistics(Some(partition))?; + // For Partitioned mode, both sides are hash-partitioned symmetrically, + // so each output partition uses the matching partition from both sides. + (Some(_), PartitionMode::Partitioned) => { + let left_stats = + ctx.compute_child_statistics(self.left.as_ref(), partition)?; + let right_stats = + ctx.compute_child_statistics(self.right.as_ref(), partition)?; + + estimate_join_statistics( + Arc::unwrap_or_clone(left_stats), + Arc::unwrap_or_clone(right_stats), + &self.on, + &self.join_type, + &self.join_schema, + )? + } + // Overall stats requested, context has overall child stats. + (None, _) => { + let left_stats = Arc::clone(&ctx.child_stats()[0]); + let right_stats = Arc::clone(&ctx.child_stats()[1]); estimate_join_statistics( Arc::unwrap_or_clone(left_stats), Arc::unwrap_or_clone(right_stats), @@ -1473,14 +1494,11 @@ impl ExecutionPlan for HashJoinExec { )? } - // For Auto mode or when no specific partition is requested, fall back to - // the current behavior of getting all partition statistics. - (None, _) | (Some(_), PartitionMode::Auto) => { - // TODO stats: it is not possible in general to know the output size of joins - // There are some special cases though, for example: - // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` - let left_stats = self.left.partition_statistics(None)?; - let right_stats = self.right.partition_statistics(None)?; + // Auto mode hasn't decided partitioning yet, so it needs + // overall stats from both sides. + (Some(_), PartitionMode::Auto) => { + let left_stats = Arc::clone(&ctx.child_stats()[0]); + let right_stats = Arc::clone(&ctx.child_stats()[1]); estimate_join_statistics( Arc::unwrap_or_clone(left_stats), Arc::unwrap_or_clone(right_stats), diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index db8c75b4a578b..348aa91f89d4c 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -42,6 +42,7 @@ use crate::projection::{ EmbeddedProjection, JoinData, ProjectionExec, try_embed_projection, try_pushdown_through_join, }; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, @@ -701,7 +702,11 @@ impl ExecutionPlan for NestedLoopJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { // NestedLoopJoinExec is designed for joins without equijoin keys in the // ON clause (e.g., `t1 JOIN t2 ON (t1.v1 + t2.v1) % 2 = 0`). Any join // predicates are stored in `self.filter`, but `estimate_join_statistics` @@ -711,15 +716,17 @@ impl ExecutionPlan for NestedLoopJoinExec { // unknown row counts. let join_columns = Vec::new(); - // Left side is always a single partition (Distribution::SinglePartition), - // so we always request overall stats with `None`. Right side can have - // multiple partitions, so we forward the partition parameter to get - // partition-specific statistics when requested. - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(None)?); - let right_stats = Arc::unwrap_or_clone(match partition { - Some(partition) => self.right.partition_statistics(Some(partition))?, - None => self.right.partition_statistics(None)?, - }); + // Left side is always broadcast (collected into a single partition), + // so it always needs overall stats (child_stats provides these). + // Right side can have multiple partitions, so it needs per-partition + // stats when a specific partition is requested. + let left_stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); + let right_stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.right.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])), + }; let stats = estimate_join_statistics( left_stats, @@ -2980,6 +2987,7 @@ fn build_unmatched_batch( #[cfg(test)] pub(crate) mod tests { use super::*; + use crate::statistics_context::compute_statistics; use crate::test::{TestMemoryExec, assert_join_metrics}; use crate::{ common, expressions::Column, repartition::RepartitionExec, test::build_table_i32, @@ -3359,7 +3367,7 @@ pub(crate) mod tests { &JoinType::Left, Some(vec![1, 2]), )?; - let stats = nested_loop_join.partition_statistics(None)?; + let stats = compute_statistics(&nested_loop_join, None)?; assert_eq!( nested_loop_join.schema().fields().len(), stats.column_statistics.len(), diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs index 3f309431614a4..18f825ad7ec9a 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/exec.rs @@ -38,6 +38,7 @@ use crate::projection::{ physical_to_column_exprs, update_join_on, }; use crate::spill::spill_manager::SpillManager; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, SendableRecordBatchStream, Statistics, check_if_same_properties, @@ -581,21 +582,32 @@ impl ExecutionPlan for SortMergeJoinExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { // SortMergeJoinExec uses symmetric hash partitioning where both left and right // inputs are hash-partitioned on the join keys. This means partition `i` of the // left input is joined with partition `i` of the right input. // - // Therefore, partition-specific statistics can be computed by getting the - // partition-specific statistics from both children and combining them via - // `estimate_join_statistics`. - // // TODO stats: it is not possible in general to know the output size of joins // There are some special cases though, for example: // - `A LEFT JOIN B ON A.col=B.col` with `COUNT_DISTINCT(B.col)=COUNT(B.col)` - let left_stats = Arc::unwrap_or_clone(self.left.partition_statistics(partition)?); - let right_stats = - Arc::unwrap_or_clone(self.right.partition_statistics(partition)?); + let (left_stats, right_stats) = match partition { + Some(_) => ( + Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.left.as_ref(), partition)?, + ), + Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.right.as_ref(), partition)?, + ), + ), + None => ( + Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[1])), + ), + }; Ok(Arc::new(estimate_join_statistics( left_stats, right_stats, diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs index 5d70530528728..e7daca9692288 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs @@ -3148,7 +3148,6 @@ async fn test_left_outer_join_filtered_mask() -> Result<()> { #[test] fn test_partition_statistics() -> Result<()> { - use crate::ExecutionPlan; use datafusion_common::stats::Precision; let left = build_table( @@ -3185,7 +3184,7 @@ fn test_partition_statistics() -> Result<()> { // Test aggregate statistics (partition = None) // Should return meaningful statistics computed from both inputs - let stats = join_exec.partition_statistics(None)?; + let stats = crate::statistics_context::compute_statistics(&join_exec, None)?; assert_eq!( stats.column_statistics.len(), expected_cols, @@ -3203,7 +3202,8 @@ fn test_partition_statistics() -> Result<()> { // Since the child TestMemoryExec returns unknown stats for specific partitions, // the join output will also have Absent num_rows. This is expected behavior // as the statistics depend on what the children can provide. - let partition_stats = join_exec.partition_statistics(Some(0))?; + let partition_stats = + crate::statistics_context::compute_statistics(&join_exec, Some(0))?; assert_eq!( partition_stats.column_statistics.len(), expected_cols, diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs index 3005e975424b4..ce5cf047d277a 100644 --- a/datafusion/physical-plan/src/lib.rs +++ b/datafusion/physical-plan/src/lib.rs @@ -49,6 +49,7 @@ pub use crate::execution_plan::{ pub use crate::metrics::Metric; pub use crate::ordering::InputOrderMode; pub use crate::sort_pushdown::SortOrderPushdownResult; +pub use crate::statistics_context::{StatisticsContext, compute_statistics}; pub use crate::stream::EmptyRecordBatchStream; pub use crate::topk::TopK; pub use crate::visitor::{ExecutionPlanVisitor, accept, visit_execution_plan}; @@ -89,6 +90,7 @@ pub mod scalar_subquery; pub mod sort_pushdown; pub mod sorts; pub mod spill; +pub mod statistics_context; pub mod stream; pub mod streaming; pub mod tree_node; diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs index 51bef5d24bd2d..ae4a1185dc815 100644 --- a/datafusion/physical-plan/src/limit.rs +++ b/datafusion/physical-plan/src/limit.rs @@ -27,6 +27,7 @@ use super::{ SendableRecordBatchStream, Statistics, }; use crate::execution_plan::{Boundedness, CardinalityEffect}; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayFormatType, Distribution, ExecutionPlan, Partitioning, check_if_same_properties, @@ -234,8 +235,17 @@ impl ExecutionPlan for GlobalLimitExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; Ok(Arc::new(stats.with_fetch(self.fetch, self.skip, 1)?)) } @@ -411,8 +421,17 @@ impl ExecutionPlan for LocalLimitExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; Ok(Arc::new(stats.with_fetch(Some(self.fetch), 0, 1)?)) } @@ -559,6 +578,7 @@ mod tests { use super::*; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::common::collect; + use crate::statistics_context::compute_statistics; use crate::test; use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy}; @@ -837,7 +857,7 @@ mod tests { let offset = GlobalLimitExec::new(Arc::new(CoalescePartitionsExec::new(csv)), skip, fetch); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } pub fn build_group_by( @@ -877,7 +897,7 @@ mod tests { fetch, ); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } async fn row_number_statistics_for_local_limit( @@ -890,7 +910,7 @@ mod tests { let offset = LocalLimitExec::new(csv, fetch); - Ok(offset.partition_statistics(None)?.num_rows) + Ok(compute_statistics(&offset, None)?.num_rows) } /// Return a RecordBatch with a single array with row_count sz diff --git a/datafusion/physical-plan/src/operator_statistics/mod.rs b/datafusion/physical-plan/src/operator_statistics/mod.rs index 20266e9768ebe..c86dab1d7d7ea 100644 --- a/datafusion/physical-plan/src/operator_statistics/mod.rs +++ b/datafusion/physical-plan/src/operator_statistics/mod.rs @@ -95,6 +95,7 @@ use datafusion_common::stats::Precision; use datafusion_common::{Result, Statistics}; use crate::ExecutionPlan; +use crate::statistics_context::compute_statistics; // ============================================================================ // ExtendedStatistics: Statistics with type-safe extensions @@ -271,7 +272,7 @@ impl StatisticsProvider for DefaultStatisticsProvider { plan: &dyn ExecutionPlan, _child_stats: &[ExtendedStatistics], ) -> Result { - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; Ok(StatisticsResult::Computed(ExtendedStatistics::new_arc( base, ))) @@ -363,7 +364,7 @@ impl StatisticsRegistry { pub fn compute(&self, plan: &dyn ExecutionPlan) -> Result { // Fast path: no providers registered, skip the walk entirely if self.providers.is_empty() { - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; return Ok(ExtendedStatistics::new_arc(base)); } @@ -387,7 +388,7 @@ impl StatisticsRegistry { } } // Fallback: use plan's built-in stats - let base = plan.partition_statistics(None)?; + let base = compute_statistics(plan, None)?; Ok(ExtendedStatistics::new_arc(base)) } @@ -510,7 +511,7 @@ fn computed_with_row_count( plan: &dyn ExecutionPlan, num_rows: Precision, ) -> Result { - let mut base = Arc::unwrap_or_clone(plan.partition_statistics(None)?); + let mut base = Arc::unwrap_or_clone(compute_statistics(plan, None)?); rescale_byte_size(&mut base, num_rows); Ok(StatisticsResult::Computed(ExtendedStatistics::new(base))) } @@ -1031,6 +1032,7 @@ mod tests { use super::*; use crate::filter::FilterExec; use crate::projection::ProjectionExec; + use crate::statistics_context::StatisticsContext; use crate::{DisplayAs, DisplayFormatType, PlanProperties}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion_common::stats::Precision; @@ -1137,9 +1139,10 @@ mod tests { unimplemented!() } - fn partition_statistics( + fn partition_statistics_with_context( &self, _partition: Option, + _ctx: &StatisticsContext, ) -> Result> { Ok(Arc::new(self.stats.clone())) } diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs index ae8e73cd74ade..c8aa37c25ef43 100644 --- a/datafusion/physical-plan/src/placeholder_row.rs +++ b/datafusion/physical-plan/src/placeholder_row.rs @@ -35,6 +35,7 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::EquivalenceProperties; use datafusion_physical_expr::PhysicalExpr; +use crate::statistics_context::StatisticsContext; use log::trace; /// Execution plan for empty relation with produce_one_row=true @@ -173,7 +174,11 @@ impl ExecutionPlan for PlaceholderRowExec { Ok(Box::pin(cooperative(ms))) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { let batches = self .data() .expect("Create single row placeholder RecordBatch should not fail"); diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index e5b91fbb1c5d4..c89f9a0c7ae9f 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -33,6 +33,7 @@ use crate::filter_pushdown::{ FilterPushdownPropagation, FilterRemapper, PushedDownPredicate, }; use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn, JoinOnRef}; +use crate::statistics_context::StatisticsContext; use crate::{DisplayFormatType, ExecutionPlan, PhysicalExpr, check_if_same_properties}; use std::collections::HashMap; use std::pin::Pin; @@ -359,9 +360,17 @@ impl ExecutionPlan for ProjectionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stats = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stats = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; let output_schema = self.schema(); Ok(Arc::new( self.projector @@ -1184,6 +1193,7 @@ mod tests { use crate::common::collect; use crate::filter_pushdown::PushedDown; + use crate::statistics_context::compute_statistics; use crate::test; use crate::test::exec::StatisticsExec; @@ -1374,7 +1384,7 @@ mod tests { let projection = ProjectionExec::try_new(exprs, input).unwrap(); - let stats = projection.partition_statistics(None).unwrap(); + let stats = compute_statistics(&projection, None).unwrap(); assert_eq!(stats.num_rows, Precision::Exact(10)); assert_eq!( diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index b4af6e2c09a5c..14fc448182dad 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -38,6 +38,7 @@ use crate::projection::{ProjectionExec, all_columns, make_with_child, update_exp use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::spill::spill_manager::SpillManager; use crate::spill::spill_pool::{self, SpillPoolWriter}; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::{ DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics, @@ -1184,7 +1185,11 @@ impl ExecutionPlan for RepartitionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { if let Some(partition) = partition { let partition_count = self.partitioning().partition_count(); if partition_count == 0 { @@ -1198,7 +1203,7 @@ impl ExecutionPlan for RepartitionExec { partition_count ); - let mut stats = Arc::unwrap_or_clone(self.input.partition_statistics(None)?); + let mut stats = Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])); // Distribute statistics across partitions stats.num_rows = stats @@ -1221,7 +1226,7 @@ impl ExecutionPlan for RepartitionExec { Ok(Arc::new(stats)) } else { - self.input.partition_statistics(None) + Ok(Arc::clone(&ctx.child_stats()[0])) } } diff --git a/datafusion/physical-plan/src/scalar_subquery.rs b/datafusion/physical-plan/src/scalar_subquery.rs index 82421d66dee9e..8b2c1afa660bb 100644 --- a/datafusion/physical-plan/src/scalar_subquery.rs +++ b/datafusion/physical-plan/src/scalar_subquery.rs @@ -35,6 +35,7 @@ use datafusion_physical_expr::PhysicalExpr; use crate::execution_plan::{CardinalityEffect, ExecutionPlan, PlanProperties}; use crate::joins::utils::{OnceAsync, OnceFut}; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::{DisplayAs, DisplayFormatType, SendableRecordBatchStream}; @@ -244,8 +245,15 @@ impl ExecutionPlan for ScalarSubqueryExec { vec![false; self.subqueries.len() + 1] } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition), + None => Ok(Arc::clone(&ctx.child_stats()[0])), + } } fn cardinality_effect(&self) -> CardinalityEffect { diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 28b8745235918..76865dc606628 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -58,6 +58,7 @@ use std::task::{Context, Poll}; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::sorts::sort::sort_batch; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -345,8 +346,15 @@ impl ExecutionPlan for PartialSortExec { Some(self.metrics_set.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - self.input.partition_statistics(partition) + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition), + None => Ok(Arc::clone(&ctx.child_stats()[0])), + } } } diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index 6c02af8dec6d3..4bdd6ec3aed4d 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -45,6 +45,7 @@ use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder}; use crate::spill::get_record_batch_memory_size; use crate::spill::in_progress_spill_file::InProgressSpillFile; use crate::spill::spill_manager::{GetSlicedSize, SpillManager}; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::stream::ReservationStream; use crate::topk::TopK; @@ -1276,13 +1277,22 @@ impl ExecutionPlan for SortExec { Some(self.metrics_set.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let p = if !self.preserve_partitioning() { - None + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let child_stats = if self.preserve_partitioning() { + match partition { + Some(_) => { + ctx.compute_child_statistics(self.input.as_ref(), partition)? + } + None => Arc::clone(&ctx.child_stats()[0]), + } } else { - partition + Arc::clone(&ctx.child_stats()[0]) }; - let stats = Arc::unwrap_or_clone(self.input.partition_statistics(p)?); + let stats = Arc::unwrap_or_clone(child_stats); Ok(Arc::new(stats.with_fetch(self.fetch, 0, 1)?)) } diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index 13c28ccb10991..9013237f5d135 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -24,6 +24,7 @@ use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use crate::projection::{ProjectionExec, make_with_child, update_ordering}; use crate::sorts::streaming_merge::StreamingMergeBuilder; +use crate::statistics_context::StatisticsContext; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics, @@ -393,8 +394,12 @@ impl ExecutionPlan for SortPreservingMergeExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { - self.input.partition_statistics(None) + fn partition_statistics_with_context( + &self, + _partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) } fn supports_limit_pushdown(&self) -> bool { diff --git a/datafusion/physical-plan/src/statistics_context.rs b/datafusion/physical-plan/src/statistics_context.rs new file mode 100644 index 0000000000000..cc40308ca812a --- /dev/null +++ b/datafusion/physical-plan/src/statistics_context.rs @@ -0,0 +1,235 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Context for computing statistics in physical plans. +//! +//! [`StatisticsContext`] provides external context to +//! [`ExecutionPlan::partition_statistics_with_context`], enabling operators +//! to receive pre-computed child statistics and additional context for +//! statistics computation. + +use crate::ExecutionPlan; +use datafusion_common::Result; +use datafusion_common::Statistics; +use std::cell::RefCell; +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::Arc; + +/// Per-call memoization cache for [`compute_statistics`]. +/// +/// Keyed by `(plan node pointer address, partition)`. Created once per +/// top-level [`compute_statistics`] call and shared across all recursive +/// and operator-internal calls via [`StatisticsContext`]. +/// +/// The pointer-based key is safe within a single synchronous +/// `compute_statistics` call: all `Arc` nodes are held +/// by the plan tree for the duration of the walk, so addresses cannot be +/// reused. +#[derive(Debug, Default)] +struct StatsCache(HashMap<(usize, Option), Arc>); + +impl StatsCache { + fn get( + &self, + plan: &dyn ExecutionPlan, + partition: Option, + ) -> Option<&Arc> { + let key = ( + plan as *const dyn ExecutionPlan as *const () as usize, + partition, + ); + self.0.get(&key) + } + + fn insert( + &mut self, + plan: &dyn ExecutionPlan, + partition: Option, + stats: Arc, + ) { + let key = ( + plan as *const dyn ExecutionPlan as *const () as usize, + partition, + ); + self.0.insert(key, stats); + } +} + +/// Context passed to [`ExecutionPlan::partition_statistics_with_context`] +/// carrying external information that operators can use when computing +/// their statistics. +#[derive(Debug)] +pub struct StatisticsContext { + /// Pre-computed statistics for each child of the current node, + /// in the same order as [`ExecutionPlan::children`]. + child_stats: Vec>, + /// Shared memoization cache for the current `compute_statistics` walk + cache: Option>>, +} + +impl StatisticsContext { + /// Creates a new context with pre-computed child statistics. + pub fn new(child_stats: Vec>) -> Self { + Self { + child_stats, + cache: None, + } + } + + /// Creates an empty context (for leaf nodes or when child stats + /// are not available). + pub fn empty() -> Self { + Self { + child_stats: Vec::new(), + cache: None, + } + } + + /// Returns the pre-computed overall (`None`) statistics for each child node. + /// For per-partition stats, use [`Self::compute_child_statistics`]. + pub fn child_stats(&self) -> &[Arc] { + &self.child_stats + } + + /// Computes statistics for a child plan, using the shared cache + /// from the current [`compute_statistics`] walk. + /// + /// Use this when [`Self::child_stats`] does not provide the right + /// granularity: partition-preserving operators needing per-partition + /// child stats (via `Some(partition)`), or partition-merging operators + /// needing overall stats (via `None`). + pub fn compute_child_statistics( + &self, + plan: &dyn ExecutionPlan, + partition: Option, + ) -> Result> { + match &self.cache { + Some(cache) => compute_statistics_inner(plan, partition, cache), + None => compute_statistics(plan, partition), + } + } +} + +impl Clone for StatisticsContext { + fn clone(&self) -> Self { + Self { + child_stats: self.child_stats.clone(), + cache: self.cache.clone(), + } + } +} + +impl Default for StatisticsContext { + fn default() -> Self { + Self::empty() + } +} + +/// Computes statistics for a plan node by first recursively computing +/// overall (`None`) statistics for all children, then calling +/// [`ExecutionPlan::partition_statistics_with_context`] with the pre-computed +/// child statistics. +/// +/// Results are memoized within a single call: operators that internally +/// call [`StatisticsContext::compute_child_statistics`] will hit the +/// cache instead of re-walking subtrees. +/// +/// Children are always pre-computed with `partition = None` (overall stats), +/// so [`StatisticsContext::child_stats`] always contains overall statistics. +/// Operators that need per-partition child stats (e.g., partition-preserving +/// operators when called with `Some(partition)`) should use +/// [`StatisticsContext::compute_child_statistics`] with the desired partition. +pub fn compute_statistics( + plan: &dyn ExecutionPlan, + partition: Option, +) -> Result> { + let cache = Rc::new(RefCell::new(StatsCache::default())); + compute_statistics_inner(plan, partition, &cache) +} + +fn compute_statistics_inner( + plan: &dyn ExecutionPlan, + partition: Option, + cache: &Rc>, +) -> Result> { + if let Some(cached) = cache.borrow().get(plan, partition) { + return Ok(Arc::clone(cached)); + } + + let child_stats = plan + .children() + .iter() + .map(|child| compute_statistics_inner(child.as_ref(), None, cache)) + .collect::>>()?; + + let ctx = StatisticsContext { + child_stats, + cache: Some(Rc::clone(cache)), + }; + let result = plan.partition_statistics_with_context(partition, &ctx)?; + + cache + .borrow_mut() + .insert(plan, partition, Arc::clone(&result)); + Ok(result) +} + +#[cfg(all(test, feature = "test_utils"))] +mod tests { + use super::*; + use crate::coalesce_partitions::CoalescePartitionsExec; + use crate::test::exec::StatisticsExec; + use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::{ColumnStatistics, stats::Precision}; + + fn make_stats_leaf(num_rows: usize) -> Arc { + let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]); + let col_stats = vec![ColumnStatistics { + null_count: Precision::Exact(0), + max_value: Precision::Absent, + min_value: Precision::Absent, + sum_value: Precision::Absent, + distinct_count: Precision::Absent, + byte_size: Precision::Absent, + }]; + Arc::new(StatisticsExec::new( + Statistics { + num_rows: Precision::Exact(num_rows), + total_byte_size: Precision::Absent, + column_statistics: col_stats, + }, + schema, + )) + } + + #[test] + fn child_stats_always_returns_overall_stats() { + // CoalescePartitionsExec merges partitions, so when called with + // Some(0) the walk should still pre-compute children with None + let leaf = make_stats_leaf(100); + let plan: Arc = Arc::new(CoalescePartitionsExec::new(leaf)); + + // Calling with Some(0) should still work and return correct stats + let stats = compute_statistics(plan.as_ref(), Some(0)).unwrap(); + assert_eq!(stats.num_rows, Precision::Exact(100)); + + // Calling with None should return the same + let stats_none = compute_statistics(plan.as_ref(), None).unwrap(); + assert_eq!(stats_none.num_rows, Precision::Exact(100)); + } +} diff --git a/datafusion/physical-plan/src/test.rs b/datafusion/physical-plan/src/test.rs index 4c4724e4dcc4f..5019f265f3071 100644 --- a/datafusion/physical-plan/src/test.rs +++ b/datafusion/physical-plan/src/test.rs @@ -29,6 +29,7 @@ use crate::common; use crate::execution_plan::{Boundedness, EmissionType}; use crate::memory::MemoryStream; use crate::metrics::MetricsSet; +use crate::statistics_context::StatisticsContext; use crate::stream::RecordBatchStreamAdapter; use crate::streaming::PartitionStream; use crate::{DisplayAs, DisplayFormatType, PlanProperties}; @@ -181,7 +182,11 @@ impl ExecutionPlan for TestMemoryExec { unimplemented!() } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { Ok(Arc::new(Statistics::new_unknown(&self.schema))) } else { diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index 200223b9b660a..0dde3f637cfe7 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -20,7 +20,7 @@ use crate::{ DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, common, - execution_plan::Boundedness, + execution_plan::Boundedness, statistics_context::StatisticsContext, }; use crate::{ execution_plan::EmissionType, @@ -257,7 +257,11 @@ impl ExecutionPlan for MockExec { } // Panics if one of the batches is an error - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema))); } @@ -489,7 +493,11 @@ impl ExecutionPlan for BarrierExec { Ok(builder.build()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { if partition.is_some() { return Ok(Arc::new(Statistics::new_unknown(&self.schema))); } @@ -683,7 +691,11 @@ impl ExecutionPlan for StatisticsExec { unimplemented!("This plan only serves for testing statistics") } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(if partition.is_some() { Statistics::new_unknown(&self.schema) } else { diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index ec9ea376e0b6d..4f76a76e52163 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -43,6 +43,7 @@ use crate::filter_pushdown::{ }; use crate::metrics::BaselineMetrics; use crate::projection::{ProjectionExec, make_with_child}; +use crate::statistics_context::StatisticsContext; use crate::stream::ObservedStream; use arrow::datatypes::{Field, Schema, SchemaRef}; @@ -326,26 +327,39 @@ impl ExecutionPlan for UnionExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { if let Some(partition_idx) = partition { // For a specific partition, find which input it belongs to let mut remaining_idx = partition_idx; - for input in &self.inputs { + for (i, input) in self.inputs.iter().enumerate() { let input_partition_count = input.output_partitioning().partition_count(); if remaining_idx < input_partition_count { - // This partition belongs to this input - return input.partition_statistics(Some(remaining_idx)); + // This partition belongs to this input - compute stats + // for the specific child at the specific partition + let child = &self.inputs[i]; + return ctx + .compute_child_statistics(child.as_ref(), Some(remaining_idx)); } remaining_idx -= input_partition_count; } // If we get here, the partition index is out of bounds Ok(Arc::new(Statistics::new_unknown(&self.schema()))) } else { - let schema = self.schema(); - Ok(Arc::new(merge_input_statistics( - &self.inputs, - None, - schema.as_ref(), + // Use pre-computed child stats from context + let stats = ctx + .child_stats() + .iter() + .map(|s| s.as_ref()) + .collect::>(); + + Ok(Arc::new(Statistics::try_merge_iter_with_ndv_fallback( + stats.into_iter(), + self.schema().as_ref(), + NdvFallback::Sum, )?)) } } @@ -656,12 +670,31 @@ impl ExecutionPlan for InterleaveExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let schema = self.schema(); - Ok(Arc::new(merge_input_statistics( - &self.inputs, - partition, - schema.as_ref(), + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let stats = match partition { + Some(_) => self + .inputs + .iter() + .map(|input| { + ctx.compute_child_statistics(input.as_ref(), partition) + .map(Arc::unwrap_or_clone) + }) + .collect::>>()?, + None => ctx + .child_stats() + .iter() + .map(|s| Arc::unwrap_or_clone(Arc::clone(s))) + .collect::>(), + }; + + Ok(Arc::new(Statistics::try_merge_iter_with_ndv_fallback( + stats.iter(), + self.schema().as_ref(), + NdvFallback::Sum, )?)) } @@ -821,28 +854,12 @@ impl Stream for CombinedRecordBatchStream { } } -fn merge_input_statistics( - inputs: &[Arc], - partition: Option, - schema: &Schema, -) -> Result { - let stats = inputs - .iter() - .map(|input| { - input - .partition_statistics(partition) - .map(Arc::unwrap_or_clone) - }) - .collect::>>()?; - - Statistics::try_merge_iter_with_ndv_fallback(stats.iter(), schema, NdvFallback::Sum) -} - #[cfg(test)] mod tests { use super::*; use crate::collect; use crate::repartition::RepartitionExec; + use crate::statistics_context::compute_statistics; use crate::test::exec::StatisticsExec; use crate::test::{self, TestMemoryExec}; @@ -1033,7 +1050,7 @@ mod tests { Arc::new(StatisticsExec::new(right, schema.as_ref().clone())); let union = UnionExec::try_new(vec![left, right])?; - let stats = union.partition_statistics(None)?; + let stats = compute_statistics(union.as_ref(), None)?; assert_eq!(stats.as_ref(), &expected); Ok(()) @@ -1050,7 +1067,7 @@ mod tests { Arc::new(StatisticsExec::new(right, schema.as_ref().clone())); let union = UnionExec::try_new(vec![left, right])?; - let stats = union.partition_statistics(None)?; + let stats = compute_statistics(union.as_ref(), None)?; assert_eq!(stats.as_ref(), &expected); Ok(()) @@ -1071,7 +1088,7 @@ mod tests { )?); let interleave = InterleaveExec::try_new(vec![left, right])?; - let stats = interleave.partition_statistics(None)?; + let stats = compute_statistics(&interleave, None)?; assert_eq!(stats.as_ref(), &expected); Ok(()) @@ -1093,7 +1110,7 @@ mod tests { )?); let interleave = InterleaveExec::try_new(vec![left, right])?; - let stats = interleave.partition_statistics(Some(0))?; + let stats = compute_statistics(&interleave, Some(0))?; let expected = Statistics::default() .with_num_rows(Precision::Inexact(5)) diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs index 14f8ce5e95ffd..55423199c54c5 100644 --- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs @@ -28,6 +28,7 @@ use std::task::{Context, Poll}; use super::utils::create_schema; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, window_equivalence_properties, @@ -390,9 +391,17 @@ impl ExecutionPlan for BoundedWindowAggExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stat = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stat = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; Ok(Arc::new(self.statistics_helper(input_stat)?)) } diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs index 5098c84034062..065b3bb0c1d8c 100644 --- a/datafusion/physical-plan/src/windows/window_agg_exec.rs +++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs @@ -24,6 +24,7 @@ use std::task::{Context, Poll}; use super::utils::create_schema; use crate::execution_plan::{CardinalityEffect, EmissionType}; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::statistics_context::StatisticsContext; use crate::windows::{ calc_requirements, get_ordered_partition_by_indices, get_partition_by_sort_exprs, window_equivalence_properties, @@ -293,9 +294,17 @@ impl ExecutionPlan for WindowAggExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, partition: Option) -> Result> { - let input_stat = - Arc::unwrap_or_clone(self.input.partition_statistics(partition)?); + fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, + ) -> Result> { + let input_stat = match partition { + Some(_) => Arc::unwrap_or_clone( + ctx.compute_child_statistics(self.input.as_ref(), partition)?, + ), + None => Arc::unwrap_or_clone(Arc::clone(&ctx.child_stats()[0])), + }; let win_cols = self.window_expr.len(); let input_cols = self.input.schema().fields().len(); // TODO stats: some windowing function will maintain invariants such as min, max... diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs index 0855dbf2fd635..f981e8fee2903 100644 --- a/datafusion/physical-plan/src/work_table.rs +++ b/datafusion/physical-plan/src/work_table.rs @@ -29,6 +29,7 @@ use crate::{ SendableRecordBatchStream, Statistics, }; +use crate::statistics_context::StatisticsContext; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; use datafusion_common::tree_node::TreeNodeRecursion; @@ -235,7 +236,11 @@ impl ExecutionPlan for WorkTableExec { Some(self.metrics.clone_inner()) } - fn partition_statistics(&self, _partition: Option) -> Result> { + fn partition_statistics_with_context( + &self, + _partition: Option, + _ctx: &StatisticsContext, + ) -> Result> { Ok(Arc::new(Statistics::new_unknown(&self.schema()))) } diff --git a/docs/source/library-user-guide/upgrading/54.0.0.md b/docs/source/library-user-guide/upgrading/54.0.0.md index 34d1f7c61eaf1..294e13337a392 100644 --- a/docs/source/library-user-guide/upgrading/54.0.0.md +++ b/docs/source/library-user-guide/upgrading/54.0.0.md @@ -497,3 +497,79 @@ impl Default for MyTreeNode { } } ``` + +### `ExecutionPlan::partition_statistics` deprecated in favor of `partition_statistics_with_context` + +`ExecutionPlan::partition_statistics` is deprecated. A new method +`partition_statistics_with_context` accepts a `StatisticsContext` parameter +that carries pre-computed child statistics and additional context for +statistics computation. + +Existing implementations of `partition_statistics` continue to work unchanged. +The default `partition_statistics_with_context` delegates to the deprecated +method, so no migration is required until the deprecated method is removed. + +**Who is affected:** + +- Users who implement custom `ExecutionPlan` nodes (recommended to migrate) +- Users who call `partition_statistics` directly (recommended to switch to `compute_statistics`) + +**Migration guide:** + +For **implementations**, override `partition_statistics_with_context` instead +of `partition_statistics`. Leaf nodes that do not have children can ignore +the context. + +`ctx.child_stats()` always contains **overall** (`None`) statistics for each +child. Partition-preserving operators that need per-partition child stats +when `partition` is `Some` should call `ctx.compute_child_statistics`: + +```rust,ignore +// Before: +fn partition_statistics(&self, partition: Option) -> Result> { + let child_stats = self.input.partition_statistics(partition)?; + // ... transform child_stats ... +} + +// After: +fn partition_statistics_with_context( + &self, + partition: Option, + ctx: &StatisticsContext, +) -> Result> { + let child_stats = match partition { + Some(_) => ctx.compute_child_statistics(self.input.as_ref(), partition)?, + None => Arc::clone(&ctx.child_stats()[0]), + }; + // ... transform child_stats ... +} +``` + +Operators that **merge or repartition** their input (e.g., coalesce, sort +without partition preservation, sort-preserving merge) always need overall +child statistics regardless of which output partition is requested. These +operators can use `ctx.child_stats()` directly: + +```rust,ignore +fn partition_statistics_with_context( + &self, + _partition: Option, + ctx: &StatisticsContext, +) -> Result> { + Ok(Arc::clone(&ctx.child_stats()[0])) +} +``` + +For **callers**, replace direct calls with `compute_statistics`, which walks +the plan tree bottom-up and threads child statistics through the context +automatically: + +```rust,ignore +use datafusion_physical_plan::compute_statistics; + +// Before: +let stats = plan.partition_statistics(None)?; + +// After: +let stats = compute_statistics(plan.as_ref(), None)?; +```