diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs index 1370ac064747..47c7db9dba24 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs @@ -144,8 +144,9 @@ impl PagePruningAccessPlanFilter { return None; } - if pp.required_columns().n_columns() > 1 { + if pp.required_columns().single_column().is_none() { debug!("Ignoring multi-column page pruning predicate: {predicate}"); + return None; } Some(pp) @@ -196,7 +197,6 @@ impl PagePruningAccessPlanFilter { // The selection for this particular row group let mut overall_selection = None; for predicate in page_index_predicates { - // find column index in the parquet schema let col_idx = find_column_index(predicate, arrow_schema, parquet_schema); let row_group_metadata = &groups[r]; diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/core/src/physical_optimizer/pruning.rs index 0f3a8f3f0f37..18d42a7d2c44 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/core/src/physical_optimizer/pruning.rs @@ -738,16 +738,22 @@ impl RequiredColumns { Self::default() } - /// Returns number of unique columns + /// Returns Some(column) if this is a single column predicate. + /// + /// Returns None if this is a multi-column predicate. /// /// Examples: - /// * `a > 5 OR a < 10` returns `1` - /// * `a > 5 OR b < 10` returns `2` - pub(crate) fn n_columns(&self) -> usize { - self.iter() - .map(|(c, _s, _f)| c) - .collect::>() - .len() + /// * `a > 5 OR a < 10` returns `Some(a)` + /// * `a > 5 OR b < 10` returns `None` + /// * `true` returns None + pub(crate) fn single_column(&self) -> Option<&phys_expr::Column> { + let cols = self.iter().map(|(c, _s, _f)| c).collect::>(); + + if cols.len() == 1 { + cols.iter().next().copied() + } else { + None + } } /// Returns an iterator over items in columns (see doc on