From 424b73d9a65d9c240bfadae2982b70afcc263c46 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 15 Oct 2025 12:36:02 -0700
Subject: [PATCH 001/109] Refactor: split
 test_window_partial_constant_and_set_monotonicity into multiple tests
 (#17952)

---
 .../physical_optimizer/enforce_sorting.rs     | 1216 +-----------
 .../enforce_sorting_monotonicity.rs           | 1715 +++++++++++++++++
 .../core/tests/physical_optimizer/mod.rs      |    1 +
 3 files changed, 1724 insertions(+), 1208 deletions(-)
 create mode 100644 datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index a2c604a84e76f..ad77a453350f8 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -33,16 +33,12 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{TreeNode, TransformedResult};
-use datafusion_common::{Result, ScalarValue, TableReference};
+use datafusion_common::{Result,  TableReference};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
 use datafusion_expr_common::operator::Operator;
-use datafusion_expr::{JoinType, SortExpr, WindowFrame, WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition};
+use datafusion_expr::{JoinType, SortExpr};
 use datafusion_execution::object_store::ObjectStoreUrl;
-use datafusion_functions_aggregate::average::avg_udaf;
-use datafusion_functions_aggregate::count::count_udaf;
-use datafusion_functions_aggregate::min_max::{max_udaf, min_udaf};
-use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::{
     LexOrdering, PhysicalSortExpr, PhysicalSortRequirement, OrderingRequirements
 };
@@ -52,8 +48,7 @@ use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::sorts::sort::SortExec;
-use datafusion_physical_plan::windows::{create_window_expr, BoundedWindowAggExec, WindowAggExec};
-use datafusion_physical_plan::{displayable, get_plan_string, ExecutionPlan, InputOrderMode};
+use datafusion_physical_plan::{displayable, get_plan_string, ExecutionPlan};
 use datafusion::datasource::physical_plan::CsvSource;
 use datafusion::datasource::listing::PartitionedFile;
 use datafusion_physical_optimizer::enforce_sorting::{EnforceSorting, PlanWithCorrespondingCoalescePartitions, PlanWithCorrespondingSort, parallelize_sorts, ensure_sorting};
@@ -93,13 +88,13 @@ fn csv_exec_sorted(
 
 /// Runs the sort enforcement optimizer and asserts the plan
 /// against the original and expected plans
-struct EnforceSortingTest {
+pub(crate) struct EnforceSortingTest {
     plan: Arc<dyn ExecutionPlan>,
     repartition_sorts: bool,
 }
 
 impl EnforceSortingTest {
-    fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
+    pub(crate) fn new(plan: Arc<dyn ExecutionPlan>) -> Self {
         Self {
             plan,
             repartition_sorts: false,
@@ -107,14 +102,14 @@ impl EnforceSortingTest {
     }
 
     /// Set whether to repartition sorts
-    fn with_repartition_sorts(mut self, repartition_sorts: bool) -> Self {
+    pub(crate) fn with_repartition_sorts(mut self, repartition_sorts: bool) -> Self {
         self.repartition_sorts = repartition_sorts;
         self
     }
 
     /// Runs the enforce sorting test and returns a string with the input and
     /// optimized plan as strings for snapshot comparison using insta
-    fn run(&self) -> String {
+    pub(crate) fn run(&self) -> String {
         let mut config = ConfigOptions::new();
         config.optimizer.repartition_sorts = self.repartition_sorts;
 
@@ -2487,1203 +2482,8 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()>
     ");
     Ok(())
 }
-// aal here
 
-#[tokio::test]
-async fn test_window_partial_constant_and_set_monotonicity() -> Result<()> {
-    let input_schema = create_test_schema()?;
-    let ordering = [sort_expr_options(
-        "nullable_col",
-        &input_schema,
-        SortOptions {
-            descending: false,
-            nulls_first: false,
-        },
-    )]
-    .into();
-    let source = parquet_exec_with_sort(input_schema.clone(), vec![ordering]) as _;
-
-    // Macro for testing window function optimization with snapshots
-    macro_rules! test_window_case {
-        (
-            partition_by: $partition_by:expr,
-            window_frame: $window_frame:expr,
-            func: ($func_def:expr, $func_name:expr, $func_args:expr),
-            required_sort: [$($col:expr, $asc:expr, $nulls_first:expr),*],
-            @ $expected:literal
-        ) => {{
-            let partition_by_exprs = if $partition_by {
-                vec![col("nullable_col", &input_schema)?]
-            } else {
-                vec![]
-            };
-
-            let window_expr = create_window_expr(
-                &$func_def,
-                $func_name,
-                &$func_args,
-                &partition_by_exprs,
-                &[],
-                $window_frame,
-                Arc::clone(&input_schema),
-                false,
-                false,
-                None,
-            )?;
-
-            let window_exec = if window_expr.uses_bounded_memory() {
-                Arc::new(BoundedWindowAggExec::try_new(
-                    vec![window_expr],
-                    Arc::clone(&source),
-                    InputOrderMode::Sorted,
-                    $partition_by,
-                )?) as Arc<dyn ExecutionPlan>
-            } else {
-                Arc::new(WindowAggExec::try_new(
-                    vec![window_expr],
-                    Arc::clone(&source),
-                    $partition_by,
-                )?) as Arc<dyn ExecutionPlan>
-            };
-
-            let output_schema = window_exec.schema();
-            let sort_expr = vec![
-                $(
-                    sort_expr_options(
-                        $col,
-                        &output_schema,
-                        SortOptions {
-                            descending: !$asc,
-                            nulls_first: $nulls_first,
-                        },
-                    )
-                ),*
-            ];
-            let ordering = LexOrdering::new(sort_expr).unwrap();
-            let physical_plan = sort_exec(ordering, window_exec);
-
-            let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
-
-            assert_snapshot!(test.run(), @ $expected);
-
-            Result::<(), datafusion_common::DataFusionError>::Ok(())
-        }};
-    }
-
-    // Function definition - Alias of the resulting column - Arguments of the function
-    #[derive(Clone)]
-    struct WindowFuncParam(WindowFunctionDefinition, String, Vec<Arc<dyn PhysicalExpr>>);
-    let function_arg_ordered = vec![col("nullable_col", &input_schema)?];
-    let function_arg_unordered = vec![col("non_nullable_col", &input_schema)?];
-    let fn_count_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(count_udaf()),
-        "count".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_max_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(max_udaf()),
-        "max".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_min_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(min_udaf()),
-        "min".to_string(),
-        function_arg_ordered.clone(),
-    );
-    let fn_avg_on_ordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
-        "avg".to_string(),
-        function_arg_ordered,
-    );
-    let fn_count_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(count_udaf()),
-        "count".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_max_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(max_udaf()),
-        "max".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_min_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(min_udaf()),
-        "min".to_string(),
-        function_arg_unordered.clone(),
-    );
-    let fn_avg_on_unordered = WindowFuncParam(
-        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
-        "avg".to_string(),
-        function_arg_unordered,
-    );
-
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
-    // Case 0:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 1:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 2:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-    Optimized Plan:
-    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 3:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
-    // Case 4:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 5:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", false, false, "max", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 6:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false, "non_nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 7:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
-    // Case 8:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 9:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 10:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 11:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
-    // Case 12:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 13:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["non_nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 14:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, false, "non_nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 15:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(None)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column
-    // Case 16:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 17:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", false, true, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 18:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", true, true, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 19:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column
-    // Case 20:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 21:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 22:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 23:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column
-    // Case 24:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 25:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 26:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 27:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column
-    // Case 28:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 29:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 30:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 31:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
-    // Case 32:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 33:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 34:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 35:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
-    // Case 36:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, true],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 37:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["max", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 38:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", false, true, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 39:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
-    // Case 40:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 41:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 42:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 43:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
-    // Case 44:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", true, true],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 45:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 46:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 47:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new(Some(true))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column
-    // Case 48:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 49:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 50:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 51:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column
-    // Case 52:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 53:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 54:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["min", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 55:
-    test_window_case!(
-        partition_by: false,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column
-    // Case 56:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_ordered.0.clone(), fn_count_on_ordered.1.clone(), fn_count_on_ordered.2.clone()),
-        required_sort: ["count", true, false, "nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-
-    // Case 57:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32)?))),
-        func: (fn_max_on_ordered.0.clone(), fn_max_on_ordered.1.clone(), fn_max_on_ordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 58:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_ordered.0.clone(), fn_min_on_ordered.1.clone(), fn_min_on_ordered.2.clone()),
-        required_sort: ["min", false, false, "nullable_col", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 59:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_ordered.0.clone(), fn_avg_on_ordered.1.clone(), fn_avg_on_ordered.2.clone()),
-        required_sort: ["avg", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-    // =============================================REGION ENDS=============================================
-    // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
-    // ============================================REGION STARTS============================================
-    // BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column
-    // Case 60:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_count_on_unordered.0.clone(), fn_count_on_unordered.1.clone(), fn_count_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "count", true, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 61:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_max_on_unordered.0.clone(), fn_max_on_unordered.1.clone(), fn_max_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "max", true, true],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 62:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_min_on_unordered.0.clone(), fn_min_on_unordered.1.clone(), fn_min_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false, "min", false, false],
-        @ r#"
-    Input / Optimized Plan:
-    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-    "#
-    )?;
-
-    // Case 63:
-    test_window_case!(
-        partition_by: true,
-        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32)?), WindowFrameBound::CurrentRow)),
-        func: (fn_avg_on_unordered.0.clone(), fn_avg_on_unordered.1.clone(), fn_avg_on_unordered.2.clone()),
-        required_sort: ["nullable_col", true, false],
-        @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
-    )?;
-    // =============================================REGION ENDS=============================================
-
-    Ok(())
-}
+// Test that verifies that an orthogonal sort (a sort on columns not in the input ordering)
 #[test]
 fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let schema = create_test_schema3()?;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
new file mode 100644
index 0000000000000..7d6c0484b624b
--- /dev/null
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
@@ -0,0 +1,1715 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::physical_optimizer::test_utils::{
+    create_test_schema, parquet_exec_with_sort, sort_exec, sort_expr_options,
+};
+use arrow::datatypes::DataType;
+use arrow_schema::SortOptions;
+use datafusion::common::ScalarValue;
+use datafusion::logical_expr::WindowFrameBound;
+use datafusion::logical_expr::WindowFrameUnits;
+use datafusion_expr::{WindowFrame, WindowFunctionDefinition};
+use datafusion_functions_aggregate::average::avg_udaf;
+use datafusion_functions_aggregate::count::count_udaf;
+use datafusion_functions_aggregate::min_max::{max_udaf, min_udaf};
+use datafusion_physical_expr::expressions::col;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::windows::{
+    create_window_expr, BoundedWindowAggExec, WindowAggExec,
+};
+use datafusion_physical_plan::{ExecutionPlan, InputOrderMode};
+use insta::assert_snapshot;
+use std::sync::{Arc, LazyLock};
+
+// Function definition - Alias of the resulting column - Arguments of the function
+#[derive(Clone)]
+struct WindowFuncParam(
+    WindowFunctionDefinition,
+    &'static str,
+    Vec<Arc<dyn PhysicalExpr>>,
+);
+
+fn function_arg_ordered() -> Vec<Arc<dyn PhysicalExpr>> {
+    let input_schema = create_test_schema().unwrap();
+    vec![col("nullable_col", &input_schema).unwrap()]
+}
+fn function_arg_unordered() -> Vec<Arc<dyn PhysicalExpr>> {
+    let input_schema = create_test_schema().unwrap();
+    vec![col("non_nullable_col", &input_schema).unwrap()]
+}
+
+fn fn_count_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(count_udaf()),
+        "count",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_max_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(max_udaf()),
+        "max",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_min_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(min_udaf()),
+        "min",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_avg_on_ordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
+        "avg",
+        function_arg_ordered(),
+    )
+}
+
+fn fn_count_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(count_udaf()),
+        "count",
+        function_arg_unordered(),
+    )
+}
+
+fn fn_max_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(max_udaf()),
+        "max",
+        function_arg_unordered(),
+    )
+}
+fn fn_min_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(min_udaf()),
+        "min",
+        function_arg_unordered(),
+    )
+}
+
+fn fn_avg_on_unordered() -> WindowFuncParam {
+    WindowFuncParam(
+        WindowFunctionDefinition::AggregateUDF(avg_udaf()),
+        "avg",
+        function_arg_unordered(),
+    )
+}
+
+struct TestWindowCase {
+    partition_by: bool,
+    window_frame: Arc<WindowFrame>,
+    func: WindowFuncParam,
+    required_sort: Vec<(&'static str, bool, bool)>, // (column name, ascending, nulls_first)
+}
+impl TestWindowCase {
+    fn source() -> Arc<dyn ExecutionPlan> {
+        static SOURCE: LazyLock<Arc<dyn ExecutionPlan>> = LazyLock::new(|| {
+            let input_schema = create_test_schema().unwrap();
+            let ordering = [sort_expr_options(
+                "nullable_col",
+                &input_schema,
+                SortOptions {
+                    descending: false,
+                    nulls_first: false,
+                },
+            )]
+            .into();
+            parquet_exec_with_sort(input_schema.clone(), vec![ordering])
+        });
+        Arc::clone(&SOURCE)
+    }
+
+    // runs the window test case and returns the string representation of the plan
+    fn run(self) -> String {
+        let input_schema = create_test_schema().unwrap();
+        let source = Self::source();
+
+        let Self {
+            partition_by,
+            window_frame,
+            func: WindowFuncParam(func_def, func_name, func_args),
+            required_sort,
+        } = self;
+        let partition_by_exprs = if partition_by {
+            vec![col("nullable_col", &input_schema).unwrap()]
+        } else {
+            vec![]
+        };
+
+        let window_expr = create_window_expr(
+            &func_def,
+            func_name.to_string(),
+            &func_args,
+            &partition_by_exprs,
+            &[],
+            window_frame,
+            Arc::clone(&input_schema),
+            false,
+            false,
+            None,
+        )
+        .unwrap();
+
+        let window_exec = if window_expr.uses_bounded_memory() {
+            Arc::new(
+                BoundedWindowAggExec::try_new(
+                    vec![window_expr],
+                    Arc::clone(&source),
+                    InputOrderMode::Sorted,
+                    partition_by,
+                )
+                .unwrap(),
+            ) as Arc<dyn ExecutionPlan>
+        } else {
+            Arc::new(
+                WindowAggExec::try_new(
+                    vec![window_expr],
+                    Arc::clone(&source),
+                    partition_by,
+                )
+                .unwrap(),
+            ) as Arc<dyn ExecutionPlan>
+        };
+
+        let output_schema = window_exec.schema();
+        let sort_expr = required_sort.into_iter().map(|(col, asc, nulls_first)| {
+            sort_expr_options(
+                col,
+                &output_schema,
+                SortOptions {
+                    descending: !asc,
+                    nulls_first,
+                },
+            )
+        });
+        let ordering = LexOrdering::new(sort_expr).unwrap();
+        let physical_plan = sort_exec(ordering, window_exec);
+
+        crate::physical_optimizer::enforce_sorting::EnforceSortingTest::new(physical_plan)
+            .with_repartition_sorts(true)
+            .run()
+    }
+}
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_0() {
+    // ============================================REGION STARTS============================================
+    // WindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
+    // Case 0:
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_1() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_2() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_3() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_4() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_5() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", false, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_6() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ("non_nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_7() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+            );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_8() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_9() {
+    assert_snapshot!(TestWindowCase  {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_10() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_11() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
+// Case 12:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_12() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 13:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_13() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("non_nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 14:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_14() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("non_nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 15:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_15() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(None)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on ordered column
+// Case 16:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_16() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 17:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_17() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", false, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 18:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_18() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", true, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 19:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_19() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + no partition_by + on unordered column
+// Case 20:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_20() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 21:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_21() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, true),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 22:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_22() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 23:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_23() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + partition_by + on ordered column
+// Case 24:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_24() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 25:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_25() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 26:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_26() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// Case 27:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_27() {
+    assert_snapshot!(
+        TestWindowCase {
+            partition_by: true,
+            window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+            func: fn_avg_on_ordered(),
+            required_sort: vec![
+                ("avg", false, false),
+            ],
+        }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// WindowAggExec + Sliding(current row, unbounded following) + partition_by + on unordered column
+
+// Case 28:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_28() {
+    assert_snapshot!(
+        TestWindowCase {
+            partition_by: true,
+            window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+            func: fn_count_on_unordered(),
+            required_sort: vec![
+                ("count", false, false),
+                ("nullable_col", true, false),
+            ],
+        }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 29:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_29() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, true),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#)
+}
+
+// Case 30:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_30() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#);
+}
+
+// Case 31:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_31() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true)).reverse()),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on ordered column
+
+// Case 32:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_32() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 33:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_33() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 34:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_34() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+// Case 35:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_35() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+           ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + no partition_by + on unordered column
+
+// Case 36:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_36() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, true),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 37:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_37() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("max", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 38:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_38() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", false, true),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 39:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_39() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on ordered column
+
+// Case 40:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_40() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 41:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_41() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 42:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_42() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 43:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_43() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Plain(unbounded preceding, unbounded following) + partition_by + on unordered column
+
+// Case 44:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_44() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("count", true, true),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 45:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_45() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 46:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_46() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 47:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_47() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new(Some(true))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on ordered column
+
+// Case 48:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_48() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 49:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_49() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 50:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_50() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 51:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_51() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + no partition_by + on unordered column
+
+// Case 52:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_52() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 53:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_53() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 54:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_54() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("min", true, false),
+            ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 55:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_55() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: false,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on ordered column
+
+// Case 56:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_56() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_ordered(),
+        required_sort: vec![
+            ("count", true, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+
+// Case 57:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_57() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::Following(ScalarValue::new_one(&DataType::UInt32).unwrap()))),
+        func: fn_max_on_ordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 58:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_58() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_ordered(),
+        required_sort: vec![
+            ("min", false, false),
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 59:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_59() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_ordered(),
+        required_sort: vec![
+            ("avg", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// =============================================REGION ENDS=============================================
+// = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
+// ============================================REGION STARTS============================================
+// BoundedWindowAggExec + Sliding(bounded preceding, bounded following) + partition_by + on unordered column
+
+// Case 60:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_60() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_count_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("count", true, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 61:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_61() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_max_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("max", true, true),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 62:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_62() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_min_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+            ("min", false, false),
+        ],
+    }.run(),
+        @ r#"
+    Input / Optimized Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
+    );
+}
+
+// Case 63:
+#[test]
+fn test_window_partial_constant_and_set_monotonicity_63() {
+    assert_snapshot!(TestWindowCase {
+        partition_by: true,
+        window_frame: Arc::new(WindowFrame::new_bounds(WindowFrameUnits::Rows, WindowFrameBound::Preceding(ScalarValue::new_one(&DataType::UInt32).unwrap()), WindowFrameBound::CurrentRow)),
+        func: fn_avg_on_unordered(),
+        required_sort: vec![
+            ("nullable_col", true, false),
+        ],
+    }.run(),
+        @ r#"
+Input Plan:
+SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+
+Optimized Plan:
+BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+"#
+    );
+}
+// =============================================REGION ENDS=============================================
diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs
index 777c26e80e902..936c02eb2a02d 100644
--- a/datafusion/core/tests/physical_optimizer/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/mod.rs
@@ -21,6 +21,7 @@ mod aggregate_statistics;
 mod combine_partial_final_agg;
 mod enforce_distribution;
 mod enforce_sorting;
+mod enforce_sorting_monotonicity;
 mod filter_pushdown;
 mod join_selection;
 mod limit_pushdown;

From a61a9c2c10ae9cb153acc12f44d554d55af59c2d Mon Sep 17 00:00:00 2001
From: peasee <98815791+peasee@users.noreply.github.com>
Date: Thu, 16 Oct 2025 05:55:21 +1000
Subject: [PATCH 002/109] fix: Ensure ListingTable partitions are pruned when
 filters are not used (#17958)

* fix: Prune partitions when no filters are defined

* fix: Formatting

* chore: Cargo fmt

* chore: Clippy
---
 datafusion/catalog-listing/src/helpers.rs     | 23 +++++++--
 .../core/src/datasource/listing/table.rs      | 47 +++++++++++++++++++
 2 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs
index 00e9c71df3489..82cc36867939e 100644
--- a/datafusion/catalog-listing/src/helpers.rs
+++ b/datafusion/catalog-listing/src/helpers.rs
@@ -156,6 +156,7 @@ pub fn split_files(
     chunks
 }
 
+#[derive(Debug)]
 pub struct Partition {
     /// The path to the partition, including the table prefix
     path: Path,
@@ -245,7 +246,16 @@ async fn prune_partitions(
     partition_cols: &[(String, DataType)],
 ) -> Result<Vec<Partition>> {
     if filters.is_empty() {
-        return Ok(partitions);
+        // prune partitions which don't contain the partition columns
+        return Ok(partitions
+            .into_iter()
+            .filter(|p| {
+                let cols = partition_cols.iter().map(|x| x.0.as_str());
+                !parse_partitions_for_path(table_path, &p.path, cols)
+                    .unwrap_or_default()
+                    .is_empty()
+            })
+            .collect());
     }
 
     let mut builders: Vec<_> = (0..partition_cols.len())
@@ -432,6 +442,7 @@ pub async fn pruned_partition_list<'a>(
     }
 
     let partition_prefix = evaluate_partition_prefix(partition_cols, filters);
+
     let partitions =
         list_partitions(store, table_path, partition_cols.len(), partition_prefix)
             .await?;
@@ -502,12 +513,12 @@ where
     let subpath = table_path.strip_prefix(file_path)?;
 
     let mut part_values = vec![];
-    for (part, pn) in subpath.zip(table_partition_cols) {
+    for (part, expected_partition) in subpath.zip(table_partition_cols) {
         match part.split_once('=') {
-            Some((name, val)) if name == pn => part_values.push(val),
+            Some((name, val)) if name == expected_partition => part_values.push(val),
             _ => {
                 debug!(
-                    "Ignoring file: file_path='{file_path}', table_path='{table_path}', part='{part}', partition_col='{pn}'",
+                    "Ignoring file: file_path='{file_path}', table_path='{table_path}', part='{part}', partition_col='{expected_partition}'",
                 );
                 return None;
             }
@@ -594,6 +605,8 @@ mod tests {
             ("tablepath/mypartition=val1/notparquetfile", 100),
             ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
             ("tablepath/file.parquet", 100),
+            ("tablepath/notapartition/file.parquet", 100),
+            ("tablepath/notmypartition=val1/file.parquet", 100),
         ]);
         let filter = Expr::eq(col("mypartition"), lit("val1"));
         let pruned = pruned_partition_list(
@@ -619,6 +632,8 @@ mod tests {
             ("tablepath/mypartition=val2/file.parquet", 100),
             ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
             ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
+            ("tablepath/notapartition/file.parquet", 100),
+            ("tablepath/notmypartition=val1/file.parquet", 100),
         ]);
         let filter = Expr::eq(col("mypartition"), lit("val1"));
         let pruned = pruned_partition_list(
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 3ce58938d77e4..4ffb6d41864f3 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -2732,6 +2732,52 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_listing_table_prunes_extra_files_in_hive() -> Result<()> {
+        let files = [
+            "bucket/test/pid=1/file1",
+            "bucket/test/pid=1/file2",
+            "bucket/test/pid=2/file3",
+            "bucket/test/pid=2/file4",
+            "bucket/test/other/file5",
+        ];
+
+        let ctx = SessionContext::new();
+        register_test_store(&ctx, &files.iter().map(|f| (*f, 10)).collect::<Vec<_>>());
+
+        let opt = ListingOptions::new(Arc::new(JsonFormat::default()))
+            .with_file_extension_opt(Some(""))
+            .with_table_partition_cols(vec![("pid".to_string(), DataType::Int32)]);
+
+        let table_path = ListingTableUrl::parse("test:///bucket/test/").unwrap();
+        let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(opt)
+            .with_schema(Arc::new(schema));
+
+        let table = ListingTable::try_new(config)?;
+
+        let (file_list, _) = table.list_files_for_scan(&ctx.state(), &[], None).await?;
+        assert_eq!(file_list.len(), 1);
+
+        let files = file_list[0].clone();
+
+        assert_eq!(
+            files
+                .iter()
+                .map(|f| f.path().to_string())
+                .collect::<Vec<_>>(),
+            vec![
+                "bucket/test/pid=1/file1",
+                "bucket/test/pid=1/file2",
+                "bucket/test/pid=2/file3",
+                "bucket/test/pid=2/file4",
+            ]
+        );
+
+        Ok(())
+    }
+
     #[cfg(feature = "parquet")]
     #[tokio::test]
     async fn test_table_stats_behaviors() -> Result<()> {
@@ -2750,6 +2796,7 @@ mod tests {
         let config_default = ListingTableConfig::new(table_path.clone())
             .with_listing_options(opt_default)
             .with_schema(schema_default);
+
         let table_default = ListingTable::try_new(config_default)?;
 
         let exec_default = table_default.scan(&state, None, &[], None).await?;

From 41fdab9d29244b62fe5537db6fd5decdb046d339 Mon Sep 17 00:00:00 2001
From: Tobias Schwarzinger <tobias.schwarzinger@tuwien.ac.at>
Date: Wed, 15 Oct 2025 21:56:16 +0200
Subject: [PATCH 003/109] Push Down Filter Subexpressions in Nested Loop Joins
 as Projections (#17906)

* Check-in NestedLoopJoinProjectionPushDown

* Update Cargo.lock

* Add some comments

* Update slts that are affected by the nl-join-projection-push-down

* please lints

* Move code into projection_pushdown.rs

* Remove explicit coalesce batches

* Docs
---
 Cargo.lock                                    |   1 +
 datafusion/physical-optimizer/Cargo.toml      |   1 +
 .../src/projection_pushdown.rs                | 748 +++++++++++++++++-
 .../sqllogictest/test_files/join.slt.part     |   5 +-
 datafusion/sqllogictest/test_files/joins.slt  |  10 +-
 .../test_files/tpch/plans/q11.slt.part        |  97 +--
 .../test_files/tpch/plans/q22.slt.part        |  41 +-
 7 files changed, 824 insertions(+), 79 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 00bd64f21eb11..bbf64d5262e29 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2452,6 +2452,7 @@ dependencies = [
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-expr-common",
+ "datafusion-functions",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 15466cd86bb04..4df011fc0a05e 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -52,5 +52,6 @@ recursive = { workspace = true, optional = true }
 
 [dev-dependencies]
 datafusion-expr = { workspace = true }
+datafusion-functions = { workspace = true }
 insta = { workspace = true }
 tokio = { workspace = true }
diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs
index 34affcbd4a19b..987e3cb6f713e 100644
--- a/datafusion/physical-optimizer/src/projection_pushdown.rs
+++ b/datafusion/physical-optimizer/src/projection_pushdown.rs
@@ -20,18 +20,32 @@
 //! projections one by one if the operator below is amenable to this. If a
 //! projection reaches a source, it can even disappear from the plan entirely.
 
-use std::sync::Arc;
-
 use crate::PhysicalOptimizerRule;
+use arrow::datatypes::{Fields, Schema, SchemaRef};
+use datafusion_common::alias::AliasGenerator;
+use std::collections::HashSet;
+use std::sync::Arc;
 
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::tree_node::{TransformedResult, TreeNode};
-use datafusion_common::Result;
-use datafusion_physical_plan::projection::remove_unnecessary_projections;
+use datafusion_common::tree_node::{
+    Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
+};
+use datafusion_common::{JoinSide, JoinType, Result};
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
+use datafusion_physical_plan::joins::NestedLoopJoinExec;
+use datafusion_physical_plan::projection::{
+    remove_unnecessary_projections, ProjectionExec,
+};
 use datafusion_physical_plan::ExecutionPlan;
 
 /// This rule inspects `ProjectionExec`'s in the given physical plan and tries to
 /// remove or swap with its child.
+///
+/// Furthermore, tries to push down projections from nested loop join filters that only depend on
+/// one side of the join. By pushing these projections down, functions that only depend on one side
+/// of the join must be evaluated for the cartesian product of the two sides.
 #[derive(Default, Debug)]
 pub struct ProjectionPushdown {}
 
@@ -48,6 +62,20 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
         plan: Arc<dyn ExecutionPlan>,
         _config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        let alias_generator = AliasGenerator::new();
+        let plan = plan
+            .transform_up(|plan| {
+                match plan.as_any().downcast_ref::<NestedLoopJoinExec>() {
+                    None => Ok(Transformed::no(plan)),
+                    Some(hash_join) => try_push_down_join_filter(
+                        Arc::clone(&plan),
+                        hash_join,
+                        &alias_generator,
+                    ),
+                }
+            })
+            .map(|t| t.data)?;
+
         plan.transform_down(remove_unnecessary_projections).data()
     }
 
@@ -59,3 +87,713 @@ impl PhysicalOptimizerRule for ProjectionPushdown {
         true
     }
 }
+
+/// Tries to push down parts of the filter.
+///
+/// See [JoinFilterRewriter] for details.
+fn try_push_down_join_filter(
+    original_plan: Arc<dyn ExecutionPlan>,
+    join: &NestedLoopJoinExec,
+    alias_generator: &AliasGenerator,
+) -> Result<Transformed<Arc<dyn ExecutionPlan>>> {
+    // Mark joins are currently not supported.
+    if matches!(join.join_type(), JoinType::LeftMark | JoinType::RightMark) {
+        return Ok(Transformed::no(original_plan));
+    }
+
+    let projections = join.projection();
+    let Some(filter) = join.filter() else {
+        return Ok(Transformed::no(original_plan));
+    };
+
+    let original_lhs_length = join.left().schema().fields().len();
+    let original_rhs_length = join.right().schema().fields().len();
+
+    let lhs_rewrite = try_push_down_projection(
+        Arc::clone(&join.right().schema()),
+        Arc::clone(join.left()),
+        JoinSide::Left,
+        filter.clone(),
+        alias_generator,
+    )?;
+    let rhs_rewrite = try_push_down_projection(
+        Arc::clone(&lhs_rewrite.data.0.schema()),
+        Arc::clone(join.right()),
+        JoinSide::Right,
+        lhs_rewrite.data.1,
+        alias_generator,
+    )?;
+    if !lhs_rewrite.transformed && !rhs_rewrite.transformed {
+        return Ok(Transformed::no(original_plan));
+    }
+
+    let join_filter = minimize_join_filter(
+        Arc::clone(rhs_rewrite.data.1.expression()),
+        rhs_rewrite.data.1.column_indices().to_vec(),
+        lhs_rewrite.data.0.schema().as_ref(),
+        rhs_rewrite.data.0.schema().as_ref(),
+    );
+
+    let new_lhs_length = lhs_rewrite.data.0.schema().fields.len();
+    let projections = match projections {
+        None => match join.join_type() {
+            JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                // Build projections that ignore the newly projected columns.
+                let mut projections = Vec::new();
+                projections.extend(0..original_lhs_length);
+                projections.extend(new_lhs_length..new_lhs_length + original_rhs_length);
+                projections
+            }
+            JoinType::LeftSemi | JoinType::LeftAnti => {
+                // Only return original left columns
+                let mut projections = Vec::new();
+                projections.extend(0..original_lhs_length);
+                projections
+            }
+            JoinType::RightSemi | JoinType::RightAnti => {
+                // Only return original right columns
+                let mut projections = Vec::new();
+                projections.extend(0..original_rhs_length);
+                projections
+            }
+            _ => unreachable!("Unsupported join type"),
+        },
+        Some(projections) => {
+            let rhs_offset = new_lhs_length - original_lhs_length;
+            projections
+                .iter()
+                .map(|idx| {
+                    if *idx >= original_lhs_length {
+                        idx + rhs_offset
+                    } else {
+                        *idx
+                    }
+                })
+                .collect()
+        }
+    };
+
+    Ok(Transformed::yes(Arc::new(NestedLoopJoinExec::try_new(
+        lhs_rewrite.data.0,
+        rhs_rewrite.data.0,
+        Some(join_filter),
+        join.join_type(),
+        Some(projections),
+    )?)))
+}
+
+/// Tries to push down parts of `expr` into the `join_side`.
+fn try_push_down_projection(
+    other_schema: SchemaRef,
+    plan: Arc<dyn ExecutionPlan>,
+    join_side: JoinSide,
+    join_filter: JoinFilter,
+    alias_generator: &AliasGenerator,
+) -> Result<Transformed<(Arc<dyn ExecutionPlan>, JoinFilter)>> {
+    let expr = Arc::clone(join_filter.expression());
+    let original_plan_schema = plan.schema();
+    let mut rewriter = JoinFilterRewriter::new(
+        join_side,
+        original_plan_schema.as_ref(),
+        join_filter.column_indices().to_vec(),
+        alias_generator,
+    );
+    let new_expr = rewriter.rewrite(expr)?;
+
+    if new_expr.transformed {
+        let new_join_side =
+            ProjectionExec::try_new(rewriter.join_side_projections, plan)?;
+        let new_schema = Arc::clone(&new_join_side.schema());
+
+        let (lhs_schema, rhs_schema) = match join_side {
+            JoinSide::Left => (new_schema, other_schema),
+            JoinSide::Right => (other_schema, new_schema),
+            JoinSide::None => unreachable!("Mark join not supported"),
+        };
+        let intermediate_schema = rewriter
+            .intermediate_column_indices
+            .iter()
+            .map(|ci| match ci.side {
+                JoinSide::Left => Arc::clone(&lhs_schema.fields[ci.index]),
+                JoinSide::Right => Arc::clone(&rhs_schema.fields[ci.index]),
+                JoinSide::None => unreachable!("Mark join not supported"),
+            })
+            .collect::<Fields>();
+
+        let join_filter = JoinFilter::new(
+            new_expr.data,
+            rewriter.intermediate_column_indices,
+            Arc::new(Schema::new(intermediate_schema)),
+        );
+        Ok(Transformed::yes((Arc::new(new_join_side), join_filter)))
+    } else {
+        Ok(Transformed::no((plan, join_filter)))
+    }
+}
+
+/// Creates a new [JoinFilter] and tries to minimize the internal schema.
+///
+/// This could eliminate some columns that were only part of a computation that has been pushed
+/// down. As this computation is now materialized on one side of the join, the original input
+/// columns are not needed anymore.
+fn minimize_join_filter(
+    expr: Arc<dyn PhysicalExpr>,
+    old_column_indices: Vec<ColumnIndex>,
+    lhs_schema: &Schema,
+    rhs_schema: &Schema,
+) -> JoinFilter {
+    let mut used_columns = HashSet::new();
+    expr.apply(|expr| {
+        if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+            used_columns.insert(col.index());
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })
+    .expect("Closure cannot fail");
+
+    let new_column_indices = old_column_indices
+        .iter()
+        .enumerate()
+        .filter(|(idx, _)| used_columns.contains(idx))
+        .map(|(_, ci)| ci.clone())
+        .collect::<Vec<_>>();
+    let fields = new_column_indices
+        .iter()
+        .map(|ci| match ci.side {
+            JoinSide::Left => lhs_schema.field(ci.index).clone(),
+            JoinSide::Right => rhs_schema.field(ci.index).clone(),
+            JoinSide::None => unreachable!("Mark join not supported"),
+        })
+        .collect::<Fields>();
+
+    let final_expr = expr
+        .transform_up(|expr| match expr.as_any().downcast_ref::<Column>() {
+            None => Ok(Transformed::no(expr)),
+            Some(column) => {
+                let new_idx = used_columns
+                    .iter()
+                    .filter(|idx| **idx < column.index())
+                    .count();
+                let new_column = Column::new(column.name(), new_idx);
+                Ok(Transformed::yes(
+                    Arc::new(new_column) as Arc<dyn PhysicalExpr>
+                ))
+            }
+        })
+        .expect("Closure cannot fail");
+
+    JoinFilter::new(
+        final_expr.data,
+        new_column_indices,
+        Arc::new(Schema::new(fields)),
+    )
+}
+
+/// Implements the push-down machinery.
+///
+/// The rewriter starts at the top of the filter expression and traverses the expression tree. For
+/// each (sub-)expression, the rewriter checks whether it only refers to one side of the join. If
+/// this is never the case, no subexpressions of the filter can be pushed down. If there is a
+/// subexpression that can be computed using only one side of the join, the entire subexpression is
+/// pushed down to the join side.
+struct JoinFilterRewriter<'a> {
+    join_side: JoinSide,
+    join_side_schema: &'a Schema,
+    join_side_projections: Vec<(Arc<dyn PhysicalExpr>, String)>,
+    intermediate_column_indices: Vec<ColumnIndex>,
+    alias_generator: &'a AliasGenerator,
+}
+
+impl<'a> JoinFilterRewriter<'a> {
+    /// Creates a new [JoinFilterRewriter].
+    fn new(
+        join_side: JoinSide,
+        join_side_schema: &'a Schema,
+        column_indices: Vec<ColumnIndex>,
+        alias_generator: &'a AliasGenerator,
+    ) -> Self {
+        let projections = join_side_schema
+            .fields()
+            .iter()
+            .enumerate()
+            .map(|(idx, field)| {
+                (
+                    Arc::new(Column::new(field.name(), idx)) as Arc<dyn PhysicalExpr>,
+                    field.name().to_string(),
+                )
+            })
+            .collect();
+
+        Self {
+            join_side,
+            join_side_schema,
+            join_side_projections: projections,
+            intermediate_column_indices: column_indices,
+            alias_generator,
+        }
+    }
+
+    /// Executes the push-down machinery on `expr`.
+    ///
+    /// See the [JoinFilterRewriter] for further information.
+    fn rewrite(
+        &mut self,
+        expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
+        let depends_on_this_side = self.depends_on_join_side(&expr, self.join_side)?;
+        // We don't push down things that do not depend on this side (other side or no side).
+        if !depends_on_this_side {
+            return Ok(Transformed::no(expr));
+        }
+
+        // Recurse if there is a dependency to both sides or if the entire expression is volatile.
+        let depends_on_other_side =
+            self.depends_on_join_side(&expr, self.join_side.negate())?;
+        let is_volatile = is_volatile_expression_tree(expr.as_ref());
+        if depends_on_other_side || is_volatile {
+            return expr.map_children(|expr| self.rewrite(expr));
+        }
+
+        // There is only a dependency on this side.
+
+        // If this expression has no children, we do not push down, as it should already be a column
+        // reference.
+        if expr.children().is_empty() {
+            return Ok(Transformed::no(expr));
+        }
+
+        // Otherwise, we push down a projection.
+        let alias = self.alias_generator.next("join_proj_push_down");
+        let idx = self.create_new_column(alias.clone(), expr)?;
+
+        Ok(Transformed::yes(
+            Arc::new(Column::new(&alias, idx)) as Arc<dyn PhysicalExpr>
+        ))
+    }
+
+    /// Creates a new column in the current join side.
+    fn create_new_column(
+        &mut self,
+        name: String,
+        expr: Arc<dyn PhysicalExpr>,
+    ) -> Result<usize> {
+        // First, add a new projection. The expression must be rewritten, as it is no longer
+        // executed against the filter schema.
+        let new_idx = self.join_side_projections.len();
+        let rewritten_expr = expr.transform_up(|expr| {
+            Ok(match expr.as_any().downcast_ref::<Column>() {
+                None => Transformed::no(expr),
+                Some(column) => {
+                    let intermediate_column =
+                        &self.intermediate_column_indices[column.index()];
+                    assert_eq!(intermediate_column.side, self.join_side);
+
+                    let join_side_index = intermediate_column.index;
+                    let field = self.join_side_schema.field(join_side_index);
+                    let new_column = Column::new(field.name(), join_side_index);
+                    Transformed::yes(Arc::new(new_column) as Arc<dyn PhysicalExpr>)
+                }
+            })
+        })?;
+        self.join_side_projections.push((rewritten_expr.data, name));
+
+        // Then, update the column indices
+        let new_intermediate_idx = self.intermediate_column_indices.len();
+        let idx = ColumnIndex {
+            index: new_idx,
+            side: self.join_side,
+        };
+        self.intermediate_column_indices.push(idx);
+
+        Ok(new_intermediate_idx)
+    }
+
+    /// Checks whether the entire expression depends on the given `join_side`.
+    fn depends_on_join_side(
+        &mut self,
+        expr: &Arc<dyn PhysicalExpr>,
+        join_side: JoinSide,
+    ) -> Result<bool> {
+        let mut result = false;
+        expr.apply(|expr| match expr.as_any().downcast_ref::<Column>() {
+            None => Ok(TreeNodeRecursion::Continue),
+            Some(c) => {
+                let column_index = &self.intermediate_column_indices[c.index()];
+                if column_index.side == join_side {
+                    result = true;
+                    return Ok(TreeNodeRecursion::Stop);
+                }
+                Ok(TreeNodeRecursion::Continue)
+            }
+        })?;
+
+        Ok(result)
+    }
+}
+
+fn is_volatile_expression_tree(expr: &dyn PhysicalExpr) -> bool {
+    if expr.is_volatile_node() {
+        return true;
+    }
+
+    expr.children()
+        .iter()
+        .map(|expr| is_volatile_expression_tree(expr.as_ref()))
+        .reduce(|lhs, rhs| lhs || rhs)
+        .unwrap_or(false)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use arrow::datatypes::{DataType, Field, FieldRef, Schema};
+    use datafusion_expr_common::operator::Operator;
+    use datafusion_functions::math::random;
+    use datafusion_physical_expr::expressions::{binary, lit};
+    use datafusion_physical_expr::ScalarFunctionExpr;
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    use datafusion_physical_plan::displayable;
+    use datafusion_physical_plan::empty::EmptyExec;
+    use insta::assert_snapshot;
+    use std::sync::Arc;
+
+    #[tokio::test]
+    async fn no_computation_does_not_project() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_greater_than_x,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=a@0 > x@1
+          EmptyExec
+          EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn simple_push_down() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, x@2]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn does_not_push_down_short_circuiting_expressions() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            |schema| {
+                binary(
+                    lit(false),
+                    Operator::And,
+                    a_plus_one_greater_than_x_plus_one(schema)?,
+                    schema,
+                )
+            },
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=false AND join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, x@2]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn does_not_push_down_volatile_functions() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_rand_greater_than_x,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=a@0 + rand() > x@1
+          EmptyExec
+          EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn complex_schema_push_down() -> Result<()> {
+        let (left_schema, right_schema) = create_complex_schemas();
+
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_b_x_z(),
+            None,
+            a_plus_b_greater_than_x_plus_z,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0, b@1, c@2, x@4, y@5, z@6]
+          ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, a@0 + b@1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, y@1 as y, z@2 as z, x@0 + z@2 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn push_down_with_existing_projections() -> Result<()> {
+        let (left_schema, right_schema) = create_complex_schemas();
+
+        let optimized_plan = run_test(
+            left_schema,
+            right_schema,
+            a_b_x_z(),
+            Some(vec![1, 3, 5]), // ("b", "x", "z")
+            a_plus_b_greater_than_x_plus_z,
+            JoinType::Inner,
+        )?;
+
+        assert_snapshot!(optimized_plan, @r"
+        NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[b@1, x@4, z@6]
+          ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, a@0 + b@1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, y@1 as y, z@2 as z, x@0 + z@2 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn left_semi_join_projection() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+
+        let left_semi_join_plan = run_test(
+            left_schema.clone(),
+            right_schema.clone(),
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::LeftSemi,
+        )?;
+
+        assert_snapshot!(left_semi_join_plan, @r"
+        NestedLoopJoinExec: join_type=LeftSemi, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[a@0]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn right_semi_join_projection() -> Result<()> {
+        let (left_schema, right_schema) = create_simple_schemas();
+        let right_semi_join_plan = run_test(
+            left_schema,
+            right_schema,
+            a_x(),
+            None,
+            a_plus_one_greater_than_x_plus_one,
+            JoinType::RightSemi,
+        )?;
+        assert_snapshot!(right_semi_join_plan, @r"
+        NestedLoopJoinExec: join_type=RightSemi, filter=join_proj_push_down_1@0 > join_proj_push_down_2@1, projection=[x@0]
+          ProjectionExec: expr=[a@0 as a, a@0 + 1 as join_proj_push_down_1]
+            EmptyExec
+          ProjectionExec: expr=[x@0 as x, x@0 + 1 as join_proj_push_down_2]
+            EmptyExec
+        ");
+        Ok(())
+    }
+
+    fn run_test(
+        left_schema: Schema,
+        right_schema: Schema,
+        column_indices: Vec<ColumnIndex>,
+        existing_projections: Option<Vec<usize>>,
+        filter_expr_builder: impl FnOnce(&Schema) -> Result<Arc<dyn PhysicalExpr>>,
+        join_type: JoinType,
+    ) -> Result<String> {
+        let left = Arc::new(EmptyExec::new(Arc::new(left_schema.clone())));
+        let right = Arc::new(EmptyExec::new(Arc::new(right_schema.clone())));
+
+        let join_fields: Vec<_> = column_indices
+            .iter()
+            .map(|ci| match ci.side {
+                JoinSide::Left => left_schema.field(ci.index).clone(),
+                JoinSide::Right => right_schema.field(ci.index).clone(),
+                JoinSide::None => unreachable!(),
+            })
+            .collect();
+        let join_schema = Arc::new(Schema::new(join_fields));
+
+        let filter_expr = filter_expr_builder(join_schema.as_ref())?;
+
+        let join_filter = JoinFilter::new(filter_expr, column_indices, join_schema);
+
+        let join = NestedLoopJoinExec::try_new(
+            left,
+            right,
+            Some(join_filter),
+            &join_type,
+            existing_projections,
+        )?;
+
+        let optimizer = ProjectionPushdown::new();
+        let optimized_plan = optimizer.optimize(Arc::new(join), &Default::default())?;
+
+        let displayable_plan = displayable(optimized_plan.as_ref()).indent(false);
+        Ok(displayable_plan.to_string())
+    }
+
+    fn create_simple_schemas() -> (Schema, Schema) {
+        let left_schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
+        let right_schema = Schema::new(vec![Field::new("x", DataType::Int32, false)]);
+
+        (left_schema, right_schema)
+    }
+
+    fn create_complex_schemas() -> (Schema, Schema) {
+        let left_schema = Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int32, false),
+            Field::new("c", DataType::Int32, false),
+        ]);
+
+        let right_schema = Schema::new(vec![
+            Field::new("x", DataType::Int32, false),
+            Field::new("y", DataType::Int32, false),
+            Field::new("z", DataType::Int32, false),
+        ]);
+
+        (left_schema, right_schema)
+    }
+
+    fn a_x() -> Vec<ColumnIndex> {
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+        ]
+    }
+
+    fn a_b_x_z() -> Vec<ColumnIndex> {
+        vec![
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 1,
+                side: JoinSide::Left,
+            },
+            ColumnIndex {
+                index: 0,
+                side: JoinSide::Right,
+            },
+            ColumnIndex {
+                index: 2,
+                side: JoinSide::Right,
+            },
+        ]
+    }
+
+    fn a_plus_one_greater_than_x_plus_one(
+        join_schema: &Schema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let left_expr = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            lit(1),
+            join_schema,
+        )?;
+        let right_expr = binary(
+            Arc::new(Column::new("x", 1)),
+            Operator::Plus,
+            lit(1),
+            join_schema,
+        )?;
+        binary(left_expr, Operator::Gt, right_expr, join_schema)
+    }
+
+    fn a_plus_rand_greater_than_x(join_schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
+        let left_expr = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            Arc::new(ScalarFunctionExpr::new(
+                "rand",
+                random(),
+                vec![],
+                FieldRef::new(Field::new("out", DataType::Float64, false)),
+                Arc::new(ConfigOptions::default()),
+            )),
+            join_schema,
+        )?;
+        let right_expr = Arc::new(Column::new("x", 1));
+        binary(left_expr, Operator::Gt, right_expr, join_schema)
+    }
+
+    fn a_greater_than_x(join_schema: &Schema) -> Result<Arc<dyn PhysicalExpr>> {
+        binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Gt,
+            Arc::new(Column::new("x", 1)),
+            join_schema,
+        )
+    }
+
+    fn a_plus_b_greater_than_x_plus_z(
+        join_schema: &Schema,
+    ) -> Result<Arc<dyn PhysicalExpr>> {
+        let lhs = binary(
+            Arc::new(Column::new("a", 0)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 1)),
+            join_schema,
+        )?;
+        let rhs = binary(
+            Arc::new(Column::new("x", 2)),
+            Operator::Plus,
+            Arc::new(Column::new("z", 3)),
+            join_schema,
+        )?;
+        binary(lhs, Operator::Gt, rhs, join_schema)
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part
index 2abe654a96c8c..fe3356af88fcc 100644
--- a/datafusion/sqllogictest/test_files/join.slt.part
+++ b/datafusion/sqllogictest/test_files/join.slt.part
@@ -849,9 +849,10 @@ logical_plan
 05)----TableScan: department projection=[dept_name]
 physical_plan
 01)ProjectionExec: expr=[emp_id@1 as emp_id, name@2 as name, dept_name@0 as dept_name]
-02)--NestedLoopJoinExec: join_type=Right, filter=name@0 = Alice OR name@0 = Bob
+02)--NestedLoopJoinExec: join_type=Right, filter=join_proj_push_down_1@0, projection=[dept_name@0, emp_id@1, name@2]
 03)----DataSourceExec: partitions=1, partition_sizes=[1]
-04)----DataSourceExec: partitions=1, partition_sizes=[1]
+04)----ProjectionExec: expr=[emp_id@0 as emp_id, name@1 as name, name@1 = Alice OR name@1 = Bob as join_proj_push_down_1]
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query ITT rowsort
 SELECT e.emp_id, e.name, d.dept_name
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 96d2bad086e66..9472395da6418 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3519,10 +3519,12 @@ logical_plan
 04)--SubqueryAlias: t2
 05)----TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
-01)NestedLoopJoinExec: join_type=Inner, filter=example(CAST(a@0 AS Float64), CAST(a@1 AS Float64)) > 3
-02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-03)--RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-04)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+01)NestedLoopJoinExec: join_type=Inner, filter=example(join_proj_push_down_1@0, join_proj_push_down_2@1) > 3, projection=[a0@0, a@1, b@2, c@3, d@4, a0@6, a@7, b@8, c@9, d@10]
+02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_1]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+04)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, CAST(a@1 AS Float64) as join_proj_push_down_2]
+05)----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+06)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 ####
 # Config teardown
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
index a6225daae4362..6b03d708c7fa2 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
@@ -75,51 +75,52 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=10), expr=[value@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value]
-03)----NestedLoopJoinExec: join_type=Inner, filter=CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Decimal128(38, 15)) > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@1, projection=[ps_partkey@0, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1]
-04)------CoalescePartitionsExec
-05)--------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-08)--------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
-11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-13)------------------------CoalesceBatchesExec: target_batch_size=8192
-14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
-15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-17)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-18)----------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-20)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-22)--------------------CoalesceBatchesExec: target_batch_size=8192
-23)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-24)------------------------CoalesceBatchesExec: target_batch_size=8192
-25)--------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-26)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-27)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-28)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
-29)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-30)----------CoalescePartitionsExec
-31)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-32)--------------CoalesceBatchesExec: target_batch_size=8192
-33)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
-34)------------------CoalesceBatchesExec: target_batch_size=8192
-35)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-36)----------------------CoalesceBatchesExec: target_batch_size=8192
-37)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
-38)--------------------------CoalesceBatchesExec: target_batch_size=8192
-39)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-40)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-41)--------------------------CoalesceBatchesExec: target_batch_size=8192
-42)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-43)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-44)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-45)------------------CoalesceBatchesExec: target_batch_size=8192
-46)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-47)----------------------CoalesceBatchesExec: target_batch_size=8192
-48)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-49)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-50)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+03)----NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@0, projection=[ps_partkey@0, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1, sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)@3]
+04)------ProjectionExec: expr=[ps_partkey@0 as ps_partkey, sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as sum(partsupp.ps_supplycost * partsupp.ps_availqty), CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) as join_proj_push_down_1]
+05)--------CoalescePartitionsExec
+06)----------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+09)----------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[ps_partkey@0, ps_availqty@1, ps_supplycost@2]
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_availqty@2, ps_supplycost@3, s_nationkey@5]
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+18)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+19)------------------------------CoalesceBatchesExec: target_batch_size=8192
+20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+21)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+22)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+23)----------------------CoalesceBatchesExec: target_batch_size=8192
+24)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+25)--------------------------CoalesceBatchesExec: target_batch_size=8192
+26)----------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+28)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+29)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+30)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+31)----------CoalescePartitionsExec
+32)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+33)--------------CoalesceBatchesExec: target_batch_size=8192
+34)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
+35)------------------CoalesceBatchesExec: target_batch_size=8192
+36)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+37)----------------------CoalesceBatchesExec: target_batch_size=8192
+38)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
+39)--------------------------CoalesceBatchesExec: target_batch_size=8192
+40)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+41)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+42)--------------------------CoalesceBatchesExec: target_batch_size=8192
+43)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+46)------------------CoalesceBatchesExec: target_batch_size=8192
+47)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+48)----------------------CoalesceBatchesExec: target_batch_size=8192
+49)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
index fc9c01843cc75..22476156b80d8 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q22.slt.part
@@ -83,23 +83,24 @@ physical_plan
 07)------------AggregateExec: mode=Partial, gby=[cntrycode@0 as cntrycode], aggr=[count(Int64(1)), sum(custsale.c_acctbal)]
 08)--------------ProjectionExec: expr=[substr(c_phone@0, 1, 2) as cntrycode, c_acctbal@1 as c_acctbal]
 09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------NestedLoopJoinExec: join_type=Inner, filter=CAST(c_acctbal@0 AS Decimal128(19, 6)) > avg(customer.c_acctbal)@1
-11)--------------------CoalescePartitionsExec
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------FilterExec: substr(c_phone@1, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17])
-18)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-19)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
-20)--------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
-22)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
-23)--------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
-24)----------------------CoalescePartitionsExec
-25)------------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
-26)--------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
-28)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-29)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false
+10)------------------NestedLoopJoinExec: join_type=Inner, filter=join_proj_push_down_1@1 > avg(customer.c_acctbal)@0, projection=[c_phone@0, c_acctbal@1, avg(customer.c_acctbal)@3]
+11)--------------------ProjectionExec: expr=[c_phone@0 as c_phone, c_acctbal@1 as c_acctbal, CAST(c_acctbal@1 AS Decimal128(19, 6)) as join_proj_push_down_1]
+12)----------------------CoalescePartitionsExec
+13)------------------------CoalesceBatchesExec: target_batch_size=8192
+14)--------------------------HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(c_custkey@0, o_custkey@0)], projection=[c_phone@1, c_acctbal@2]
+15)----------------------------CoalesceBatchesExec: target_batch_size=8192
+16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
+17)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------------FilterExec: substr(c_phone@1, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17])
+19)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_phone, c_acctbal], file_type=csv, has_header=false
+21)----------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------RepartitionExec: partitioning=Hash([o_custkey@0], 4), input_partitions=4
+23)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_custkey], file_type=csv, has_header=false
+24)--------------------AggregateExec: mode=Final, gby=[], aggr=[avg(customer.c_acctbal)]
+25)----------------------CoalescePartitionsExec
+26)------------------------AggregateExec: mode=Partial, gby=[], aggr=[avg(customer.c_acctbal)]
+27)--------------------------CoalesceBatchesExec: target_batch_size=8192
+28)----------------------------FilterExec: c_acctbal@1 > Some(0),15,2 AND substr(c_phone@0, 1, 2) IN ([13, 31, 23, 29, 30, 18, 17]), projection=[c_acctbal@1]
+29)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+30)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_phone, c_acctbal], file_type=csv, has_header=false

From 264030cca76d0bdb4d8809f252b422e72624a345 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Wed, 15 Oct 2025 13:06:02 -0700
Subject: [PATCH 004/109] feat: support Spark `concat` string function (#18063)

* chore: Extend backtrace coverage

* fmt

* part2

* feedback

* clippy

* feat: support Spark `concat`

* clippy

* comments

* test

* doc
---
 .../spark/src/function/string/concat.rs       | 306 ++++++++++++++++++
 datafusion/spark/src/function/string/mod.rs   |   8 +
 .../test_files/spark/string/concat.slt        |  48 +++
 3 files changed, 362 insertions(+)
 create mode 100644 datafusion/spark/src/function/string/concat.rs
 create mode 100644 datafusion/sqllogictest/test_files/spark/string/concat.slt

diff --git a/datafusion/spark/src/function/string/concat.rs b/datafusion/spark/src/function/string/concat.rs
new file mode 100644
index 0000000000000..0e981e7c37224
--- /dev/null
+++ b/datafusion/spark/src/function/string/concat.rs
@@ -0,0 +1,306 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayBuilder};
+use arrow::datatypes::DataType;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
+    Volatility,
+};
+use datafusion_functions::string::concat::ConcatFunc;
+use std::any::Any;
+use std::sync::Arc;
+
+/// Spark-compatible `concat` expression
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#concat>
+///
+/// Concatenates multiple input strings into a single string.
+/// Returns NULL if any input is NULL.
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkConcat {
+    signature: Signature,
+}
+
+impl Default for SparkConcat {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkConcat {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::one_of(
+                vec![TypeSignature::UserDefined, TypeSignature::Nullary],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkConcat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "concat"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Utf8)
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        spark_concat(args)
+    }
+
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        // Accept any string types, including zero arguments
+        Ok(arg_types.to_vec())
+    }
+}
+
+/// Concatenates strings, returning NULL if any input is NULL
+/// This is a Spark-specific wrapper around DataFusion's concat that returns NULL
+/// if any argument is NULL (Spark behavior), whereas DataFusion's concat ignores NULLs.
+fn spark_concat(args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+    let ScalarFunctionArgs {
+        args: arg_values,
+        arg_fields,
+        number_rows,
+        return_field,
+        config_options,
+    } = args;
+
+    // Handle zero-argument case: return empty string
+    if arg_values.is_empty() {
+        return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(
+            Some(String::new()),
+        )));
+    }
+
+    // Step 1: Check for NULL mask in incoming args
+    let null_mask = compute_null_mask(&arg_values, number_rows)?;
+
+    // If all scalars and any is NULL, return NULL immediately
+    if null_mask.is_none() {
+        return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+    }
+
+    // Step 2: Delegate to DataFusion's concat
+    let concat_func = ConcatFunc::new();
+    let func_args = ScalarFunctionArgs {
+        args: arg_values,
+        arg_fields,
+        number_rows,
+        return_field,
+        config_options,
+    };
+    let result = concat_func.invoke_with_args(func_args)?;
+
+    // Step 3: Apply NULL mask to result
+    apply_null_mask(result, null_mask)
+}
+
+/// Compute NULL mask for the arguments
+/// Returns None if all scalars and any is NULL, or a Vector of
+/// boolean representing the null mask for incoming arrays
+fn compute_null_mask(
+    args: &[ColumnarValue],
+    number_rows: usize,
+) -> Result<Option<Vec<bool>>> {
+    // Check if all arguments are scalars
+    let all_scalars = args
+        .iter()
+        .all(|arg| matches!(arg, ColumnarValue::Scalar(_)));
+
+    if all_scalars {
+        // For scalars, check if any is NULL
+        for arg in args {
+            if let ColumnarValue::Scalar(scalar) = arg {
+                if scalar.is_null() {
+                    // Return None to indicate all values should be NULL
+                    return Ok(None);
+                }
+            }
+        }
+        // No NULLs in scalars
+        Ok(Some(vec![]))
+    } else {
+        // For arrays, compute NULL mask for each row
+        let array_len = args
+            .iter()
+            .find_map(|arg| match arg {
+                ColumnarValue::Array(array) => Some(array.len()),
+                _ => None,
+            })
+            .unwrap_or(number_rows);
+
+        // Convert all scalars to arrays for uniform processing
+        let arrays: Result<Vec<_>> = args
+            .iter()
+            .map(|arg| match arg {
+                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
+                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(array_len),
+            })
+            .collect();
+        let arrays = arrays?;
+
+        // Compute NULL mask
+        let mut null_mask = vec![false; array_len];
+        for array in &arrays {
+            for (i, null_flag) in null_mask.iter_mut().enumerate().take(array_len) {
+                if array.is_null(i) {
+                    *null_flag = true;
+                }
+            }
+        }
+
+        Ok(Some(null_mask))
+    }
+}
+
+/// Apply NULL mask to the result
+fn apply_null_mask(
+    result: ColumnarValue,
+    null_mask: Option<Vec<bool>>,
+) -> Result<ColumnarValue> {
+    match (result, null_mask) {
+        // Scalar with NULL mask means return NULL
+        (ColumnarValue::Scalar(_), None) => {
+            Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
+        }
+        // Scalar without NULL mask, return as-is
+        (scalar @ ColumnarValue::Scalar(_), Some(mask)) if mask.is_empty() => Ok(scalar),
+        // Array with NULL mask
+        (ColumnarValue::Array(array), Some(null_mask)) if !null_mask.is_empty() => {
+            let array_len = array.len();
+            let return_type = array.data_type();
+
+            let mut builder: Box<dyn ArrayBuilder> = match return_type {
+                DataType::Utf8 => {
+                    let string_array = array
+                        .as_any()
+                        .downcast_ref::<arrow::array::StringArray>()
+                        .unwrap();
+                    let mut builder =
+                        arrow::array::StringBuilder::with_capacity(array_len, 0);
+                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
+                        if is_null || string_array.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            builder.append_value(string_array.value(i));
+                        }
+                    }
+                    Box::new(builder)
+                }
+                DataType::LargeUtf8 => {
+                    let string_array = array
+                        .as_any()
+                        .downcast_ref::<arrow::array::LargeStringArray>()
+                        .unwrap();
+                    let mut builder =
+                        arrow::array::LargeStringBuilder::with_capacity(array_len, 0);
+                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
+                        if is_null || string_array.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            builder.append_value(string_array.value(i));
+                        }
+                    }
+                    Box::new(builder)
+                }
+                DataType::Utf8View => {
+                    let string_array = array
+                        .as_any()
+                        .downcast_ref::<arrow::array::StringViewArray>()
+                        .unwrap();
+                    let mut builder =
+                        arrow::array::StringViewBuilder::with_capacity(array_len);
+                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
+                        if is_null || string_array.is_null(i) {
+                            builder.append_null();
+                        } else {
+                            builder.append_value(string_array.value(i));
+                        }
+                    }
+                    Box::new(builder)
+                }
+                _ => {
+                    return datafusion_common::exec_err!(
+                        "Unsupported return type for concat: {:?}",
+                        return_type
+                    );
+                }
+            };
+
+            Ok(ColumnarValue::Array(builder.finish()))
+        }
+        // Array without NULL mask, return as-is
+        (array @ ColumnarValue::Array(_), _) => Ok(array),
+        // Shouldn't happen
+        (scalar, _) => Ok(scalar),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::function::utils::test::test_scalar_function;
+    use arrow::array::StringArray;
+    use arrow::datatypes::DataType;
+    use datafusion_common::Result;
+
+    #[test]
+    fn test_concat_basic() -> Result<()> {
+        test_scalar_function!(
+            SparkConcat::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("Spark".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("SQL".to_string()))),
+            ],
+            Ok(Some("SparkSQL")),
+            &str,
+            DataType::Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_concat_with_null() -> Result<()> {
+        test_scalar_function!(
+            SparkConcat::new(),
+            vec![
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("Spark".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(Some("SQL".to_string()))),
+                ColumnarValue::Scalar(ScalarValue::Utf8(None)),
+            ],
+            Ok(None),
+            &str,
+            DataType::Utf8,
+            StringArray
+        );
+        Ok(())
+    }
+}
diff --git a/datafusion/spark/src/function/string/mod.rs b/datafusion/spark/src/function/string/mod.rs
index 3115c1e960fa8..480984f02159b 100644
--- a/datafusion/spark/src/function/string/mod.rs
+++ b/datafusion/spark/src/function/string/mod.rs
@@ -17,6 +17,7 @@
 
 pub mod ascii;
 pub mod char;
+pub mod concat;
 pub mod elt;
 pub mod format_string;
 pub mod ilike;
@@ -30,6 +31,7 @@ use std::sync::Arc;
 
 make_udf_function!(ascii::SparkAscii, ascii);
 make_udf_function!(char::CharFunc, char);
+make_udf_function!(concat::SparkConcat, concat);
 make_udf_function!(ilike::SparkILike, ilike);
 make_udf_function!(length::SparkLengthFunc, length);
 make_udf_function!(elt::SparkElt, elt);
@@ -50,6 +52,11 @@ pub mod expr_fn {
         "Returns the ASCII character having the binary equivalent to col. If col is larger than 256 the result is equivalent to char(col % 256).",
         arg1
     ));
+    export_functions!((
+        concat,
+        "Concatenates multiple input strings into a single string. Returns NULL if any input is NULL.",
+        args
+    ));
     export_functions!((
         elt,
         "Returns the n-th input (1-indexed), e.g. returns 2nd input when n is 2. The function returns NULL if the index is 0 or exceeds the length of the array.",
@@ -86,6 +93,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         ascii(),
         char(),
+        concat(),
         elt(),
         ilike(),
         length(),
diff --git a/datafusion/sqllogictest/test_files/spark/string/concat.slt b/datafusion/sqllogictest/test_files/spark/string/concat.slt
new file mode 100644
index 0000000000000..0b796a54a69e8
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/string/concat.slt
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+query T
+SELECT concat('Spark', 'SQL');
+----
+SparkSQL
+
+query T
+SELECT concat('Spark', 'SQL', NULL);
+----
+NULL
+
+query T
+SELECT concat('', '1', '', '2');
+----
+12
+
+query T
+SELECT concat();
+----
+(empty)
+
+query T
+SELECT concat('');
+----
+(empty)
+
+
+query T
+SELECT concat(a, b, c) from (select 'a' a, 'b' b, 'c' c union all select null a, 'b', 'c') order by 1 nulls last;
+----
+abc
+NULL
\ No newline at end of file

From 4153adf2c0f6e317ef476febfdc834208bd46622 Mon Sep 17 00:00:00 2001
From: "xudong.w" <wxd963996380@gmail.com>
Date: Thu, 16 Oct 2025 14:28:58 +0800
Subject: [PATCH 005/109] Add independent configs for topk/join dynamic filter
 (#18090)

* Add independent configs for topk/join dynamic filter

* fix ci

* update doc

* fix typo
---
 datafusion/common/src/config.rs               |  26 +-
 .../physical-plan/src/joins/hash_join/exec.rs |   2 +-
 datafusion/physical-plan/src/sorts/sort.rs    |   2 +-
 .../dynamic_filter_pushdown_config.slt        | 339 ++++++++++++++++++
 .../test_files/information_schema.slt         |   6 +-
 docs/source/user-guide/configs.md             |   4 +-
 6 files changed, 374 insertions(+), 5 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 39d730eaafb49..9bde637f43794 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -745,11 +745,21 @@ config_namespace! {
         /// past window functions, if possible
         pub enable_window_limits: bool, default = true
 
-        /// When set to true attempts to push down dynamic filters generated by operators into the file scan phase.
+        /// When set to true, the optimizer will attempt to push down TopK dynamic filters
+        /// into the file scan phase.
+        pub enable_topk_dynamic_filter_pushdown: bool, default = true
+
+        /// When set to true, the optimizer will attempt to push down Join dynamic filters
+        /// into the file scan phase.
+        pub enable_join_dynamic_filter_pushdown: bool, default = true
+
+        /// When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase.
         /// For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer
         /// will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans.
         /// This means that if we already have 10 timestamps in the year 2025
         /// any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.
+        /// The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown`
+        /// So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
         pub enable_dynamic_filter_pushdown: bool, default = true
 
         /// When set to true, the optimizer will insert filters before a join between
@@ -1039,6 +1049,20 @@ impl ConfigOptions {
         };
 
         if prefix == "datafusion" {
+            if key == "optimizer.enable_dynamic_filter_pushdown" {
+                let bool_value = value.parse::<bool>().map_err(|e| {
+                    DataFusionError::Configuration(format!(
+                        "Failed to parse '{value}' as bool: {e}",
+                    ))
+                })?;
+
+                {
+                    self.optimizer.enable_dynamic_filter_pushdown = bool_value;
+                    self.optimizer.enable_topk_dynamic_filter_pushdown = bool_value;
+                    self.optimizer.enable_join_dynamic_filter_pushdown = bool_value;
+                }
+                return Ok(());
+            }
             return ConfigField::set(self, key, value);
         }
 
diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs
index 4c293b0498e77..b5fe5ee5cda14 100644
--- a/datafusion/physical-plan/src/joins/hash_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs
@@ -1137,7 +1137,7 @@ impl ExecutionPlan for HashJoinExec {
 
         // Add dynamic filters in Post phase if enabled
         if matches!(phase, FilterPushdownPhase::Post)
-            && config.optimizer.enable_dynamic_filter_pushdown
+            && config.optimizer.enable_join_dynamic_filter_pushdown
         {
             // Add actual dynamic filter to right side (probe side)
             let dynamic_filter = Self::create_dynamic_filter(&self.on);
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 7f47d60c735a3..bd798ab4f54b2 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -1355,7 +1355,7 @@ impl ExecutionPlan for SortExec {
             ChildFilterDescription::from_child(&parent_filters, self.input())?;
 
         if let Some(filter) = &self.filter {
-            if config.optimizer.enable_dynamic_filter_pushdown {
+            if config.optimizer.enable_topk_dynamic_filter_pushdown {
                 child = child.with_self_filter(filter.read().expr());
             }
         }
diff --git a/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
new file mode 100644
index 0000000000000..e5cd6d88b08f4
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/dynamic_filter_pushdown_config.slt
@@ -0,0 +1,339 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Tests for dynamic filter pushdown configuration options
+# - enable_topk_dynamic_filter_pushdown (for TopK dynamic filters)
+# - enable_join_dynamic_filter_pushdown (for Join dynamic filters)
+# - enable_dynamic_filter_pushdown (controls both)
+
+# Setup: Create parquet test files
+statement ok
+CREATE TABLE test_data(id INT, value INT, name VARCHAR) AS VALUES
+(1, 100, 'a'),
+(2, 200, 'b'),
+(3, 300, 'c'),
+(4, 400, 'd'),
+(5, 500, 'e'),
+(6, 600, 'f'),
+(7, 700, 'g'),
+(8, 800, 'h'),
+(9, 900, 'i'),
+(10, 1000, 'j');
+
+statement ok
+CREATE TABLE join_left(id INT, data VARCHAR) AS VALUES
+(1, 'left1'),
+(2, 'left2'),
+(3, 'left3'),
+(4, 'left4'),
+(5, 'left5');
+
+statement ok
+CREATE TABLE join_right(id INT, info VARCHAR) AS VALUES
+(1, 'right1'),
+(3, 'right3'),
+(5, 'right5');
+
+# Copy data to parquet files
+query I
+COPY test_data TO 'test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet' STORED AS PARQUET;
+----
+10
+
+query I
+COPY join_left TO 'test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet' STORED AS PARQUET;
+----
+5
+
+query I
+COPY join_right TO 'test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet' STORED AS PARQUET;
+----
+3
+
+# Create external tables from parquet files
+statement ok
+CREATE EXTERNAL TABLE test_parquet(id INT, value INT, name VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE left_parquet(id INT, data VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE right_parquet(id INT, info VARCHAR)
+STORED AS PARQUET
+LOCATION 'test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet';
+
+# Test 1: TopK dynamic filter pushdown with Parquet
+query TT
+EXPLAIN SELECT * FROM test_parquet ORDER BY value DESC LIMIT 3;
+----
+logical_plan
+01)Sort: test_parquet.value DESC NULLS FIRST, fetch=3
+02)--TableScan: test_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[value@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Disable TopK dynamic filter pushdown
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = false;
+
+query TT
+EXPLAIN SELECT * FROM test_parquet ORDER BY value DESC LIMIT 3;
+----
+logical_plan
+01)Sort: test_parquet.value DESC NULLS FIRST, fetch=3
+02)--TableScan: test_parquet projection=[id, value, name]
+physical_plan
+01)SortExec: TopK(fetch=3), expr=[value@1 DESC], preserve_partitioning=[false]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/test_data.parquet]]}, projection=[id, value, name], file_type=parquet
+
+# Re-enable for next tests
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+# Test 2: Join dynamic filter pushdown with Parquet
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Disable Join dynamic filter pushdown
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = false;
+
+# Without Join filter, HashJoin should NOT have filter=DynamicFilter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Re-enable for next tests
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+# Test 3: Test independent control
+
+# Disable TopK, keep Join enabled
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = false;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+# Join should still have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Enable TopK, disable Join
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = false;
+
+# Join should NOT have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Test 4: Backward compatibility
+
+# First, set both new configs to specific values
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+statement ok
+set datafusion.catalog.information_schema = true
+
+# Setting the config should override both
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = false;
+
+# Verify both configs are now false
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
+----
+false
+
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
+----
+false
+
+statement ok
+set datafusion.catalog.information_schema = false
+
+# Join should NOT have dynamic filter
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet
+
+# Re-enable
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
+
+statement ok
+set datafusion.catalog.information_schema = true
+
+# Verify both configs are now true
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_topk_dynamic_filter_pushdown';
+----
+true
+
+query T
+SELECT value FROM information_schema.df_settings
+WHERE name = 'datafusion.optimizer.enable_join_dynamic_filter_pushdown';
+----
+true
+
+statement ok
+set datafusion.catalog.information_schema = false
+
+# Join should have dynamic filter again
+query TT
+EXPLAIN SELECT l.*, r.info
+FROM left_parquet l
+INNER JOIN right_parquet r ON l.id = r.id;
+----
+logical_plan
+01)Projection: l.id, l.data, r.info
+02)--Inner Join: l.id = r.id
+03)----SubqueryAlias: l
+04)------TableScan: left_parquet projection=[id, data]
+05)----SubqueryAlias: r
+06)------TableScan: right_parquet projection=[id, info]
+physical_plan
+01)ProjectionExec: expr=[id@1 as id, data@2 as data, info@0 as info]
+02)--CoalesceBatchesExec: target_batch_size=8192
+03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(id@0, id@0)], projection=[info@1, id@2, data@3]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_right.parquet]]}, projection=[id, info], file_type=parquet
+05)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/dynamic_filter_pushdown_config/join_left.parquet]]}, projection=[id, data], file_type=parquet, predicate=DynamicFilter [ empty ]
+
+# Cleanup
+
+statement ok
+DROP TABLE test_data;
+
+statement ok
+DROP TABLE join_left;
+
+statement ok
+DROP TABLE join_right;
+
+statement ok
+DROP TABLE test_parquet;
+
+statement ok
+DROP TABLE left_parquet;
+
+statement ok
+DROP TABLE right_parquet;
+
+# Reset configs to defaults
+statement ok
+SET datafusion.optimizer.enable_topk_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_join_dynamic_filter_pushdown = true;
+
+statement ok
+SET datafusion.optimizer.enable_dynamic_filter_pushdown = true;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 670992633bb85..a69a8d5c0d8f6 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -289,8 +289,10 @@ datafusion.optimizer.allow_symmetric_joins_without_pruning true
 datafusion.optimizer.default_filter_selectivity 20
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true
 datafusion.optimizer.enable_dynamic_filter_pushdown true
+datafusion.optimizer.enable_join_dynamic_filter_pushdown true
 datafusion.optimizer.enable_round_robin_repartition true
 datafusion.optimizer.enable_topk_aggregation true
+datafusion.optimizer.enable_topk_dynamic_filter_pushdown true
 datafusion.optimizer.enable_window_limits true
 datafusion.optimizer.expand_views_at_output false
 datafusion.optimizer.filter_null_join_keys false
@@ -404,9 +406,11 @@ datafusion.format.types_info false Show types in visual representation batches
 datafusion.optimizer.allow_symmetric_joins_without_pruning true Should DataFusion allow symmetric hash joins for unbounded data sources even when its inputs do not have any ordering or filtering If the flag is not enabled, the SymmetricHashJoin operator will be unable to prune its internal buffers, resulting in certain join types - such as Full, Left, LeftAnti, LeftSemi, Right, RightAnti, and RightSemi - being produced only at the end of the execution. This is not typical in stream processing. Additionally, without proper design for long runner execution, all types of joins may encounter out-of-memory errors.
 datafusion.optimizer.default_filter_selectivity 20 The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
-datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.
+datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
+datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.
 datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
+datafusion.optimizer.enable_topk_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.
 datafusion.optimizer.enable_window_limits true When set to true, the optimizer will attempt to push limit operations past window functions, if possible
 datafusion.optimizer.expand_views_at_output false When set to true, if the returned type is a view type then the output will be coerced to a non-view. Coerces `Utf8View` to `LargeUtf8`, and `BinaryView` to `LargeBinary`.
 datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 6bc7b90e893ad..ab3b11a8d833a 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -132,7 +132,9 @@ The following configuration settings are available:
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.optimizer.enable_window_limits                               | true                      | When set to true, the optimizer will attempt to push limit operations past window functions, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan.                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| datafusion.optimizer.enable_topk_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_join_dynamic_filter_pushdown                | true                      | When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
+| datafusion.optimizer.enable_dynamic_filter_pushdown                     | true                      | When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.                                                                                                                       |
 | datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
 | datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |

From 3bca1bb6eb7251ef48dd639d371be2427f5ca696 Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Thu, 16 Oct 2025 04:38:08 -0600
Subject: [PATCH 006/109] Adds Trace and Summary to CLI instrumented stores
 (#18064)

- Adds the ability for a user to choose a summary only output for an
   instrumented object store when using the CLI
 - The existing "enabled" setting that displays both a summary and a
   detailed usage for each object store call has been renamed to `Trace`
   to improve clarity
 - Adds additional test cases for summary only and modifies existing
   tests to use trace
 - Updates user guide docs to reflect the CLI flag and command line
   changes
---
 datafusion-cli/src/command.rs                 | 17 +++++++---
 datafusion-cli/src/main.rs                    |  2 +-
 .../src/object_storage/instrumented.rs        | 31 ++++++++++++-------
 datafusion-cli/src/print_options.rs           | 17 +++++-----
 datafusion-cli/tests/cli_integration.rs       |  7 +++--
 ...bject_store_profiling@s3_url_fallback.snap | 28 +++++++++++++++--
 docs/source/user-guide/cli/usage.md           |  4 +--
 7 files changed, 75 insertions(+), 31 deletions(-)

diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs
index 48fb37e8a8880..3fbfe5680cfcd 100644
--- a/datafusion-cli/src/command.rs
+++ b/datafusion-cli/src/command.rs
@@ -128,7 +128,7 @@ impl Command {
                     let profile_mode = mode
                         .parse()
                         .map_err(|_|
-                            exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, enabled")
+                            exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, summary, trace")
                         )?;
                     print_options
                         .instrumented_registry
@@ -165,7 +165,7 @@ impl Command {
                 ("\\pset [NAME [VALUE]]", "set table output option\n(format)")
             }
             Self::ObjectStoreProfileMode(_) => (
-                "\\object_store_profiling (disabled|enabled)",
+                "\\object_store_profiling (disabled|summary|trace)",
                 "print or set object store profile mode",
             ),
         }
@@ -312,13 +312,22 @@ mod tests {
             InstrumentedObjectStoreMode::default()
         );
 
-        cmd = "object_store_profiling enabled"
+        cmd = "object_store_profiling summary"
             .parse()
             .expect("expected parse to succeed");
         assert!(cmd.execute(&ctx, &mut print_options).await.is_ok());
         assert_eq!(
             print_options.instrumented_registry.instrument_mode(),
-            InstrumentedObjectStoreMode::Enabled
+            InstrumentedObjectStoreMode::Summary
+        );
+
+        cmd = "object_store_profiling trace"
+            .parse()
+            .expect("expected parse to succeed");
+        assert!(cmd.execute(&ctx, &mut print_options).await.is_ok());
+        assert_eq!(
+            print_options.instrumented_registry.instrument_mode(),
+            InstrumentedObjectStoreMode::Trace
         );
 
         cmd = "object_store_profiling does_not_exist"
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index 3dbe839d3c9b3..bdb2fdf5198e2 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -151,7 +151,7 @@ struct Args {
 
     #[clap(
         long,
-        help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, enabled]",
+        help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, summary, trace]",
         default_value_t = InstrumentedObjectStoreMode::Disabled
     )]
     object_store_profiling: InstrumentedObjectStoreMode,
diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 9252e0688c35a..cb96734f24645 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -48,8 +48,10 @@ pub enum InstrumentedObjectStoreMode {
     /// Disable collection of profiling data
     #[default]
     Disabled,
-    /// Enable collection of profiling data
-    Enabled,
+    /// Enable collection of profiling data and output a summary
+    Summary,
+    /// Enable collection of profiling data and output a summary and all details
+    Trace,
 }
 
 impl fmt::Display for InstrumentedObjectStoreMode {
@@ -64,7 +66,8 @@ impl FromStr for InstrumentedObjectStoreMode {
     fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
         match s.to_lowercase().as_str() {
             "disabled" => Ok(Self::Disabled),
-            "enabled" => Ok(Self::Enabled),
+            "summary" => Ok(Self::Summary),
+            "trace" => Ok(Self::Trace),
             _ => Err(DataFusionError::Execution(format!("Unrecognized mode {s}"))),
         }
     }
@@ -73,7 +76,8 @@ impl FromStr for InstrumentedObjectStoreMode {
 impl From<u8> for InstrumentedObjectStoreMode {
     fn from(value: u8) -> Self {
         match value {
-            1 => InstrumentedObjectStoreMode::Enabled,
+            1 => InstrumentedObjectStoreMode::Summary,
+            2 => InstrumentedObjectStoreMode::Trace,
             _ => InstrumentedObjectStoreMode::Disabled,
         }
     }
@@ -434,16 +438,21 @@ mod tests {
             InstrumentedObjectStoreMode::Disabled
         ));
         assert!(matches!(
-            "EnABlEd".parse().unwrap(),
-            InstrumentedObjectStoreMode::Enabled
+            "SUmMaRy".parse().unwrap(),
+            InstrumentedObjectStoreMode::Summary
+        ));
+        assert!(matches!(
+            "TRaCe".parse().unwrap(),
+            InstrumentedObjectStoreMode::Trace
         ));
         assert!("does_not_exist"
             .parse::<InstrumentedObjectStoreMode>()
             .is_err());
 
         assert!(matches!(0.into(), InstrumentedObjectStoreMode::Disabled));
-        assert!(matches!(1.into(), InstrumentedObjectStoreMode::Enabled));
-        assert!(matches!(2.into(), InstrumentedObjectStoreMode::Disabled));
+        assert!(matches!(1.into(), InstrumentedObjectStoreMode::Summary));
+        assert!(matches!(2.into(), InstrumentedObjectStoreMode::Trace));
+        assert!(matches!(3.into(), InstrumentedObjectStoreMode::Disabled));
     }
 
     #[test]
@@ -455,8 +464,8 @@ mod tests {
             InstrumentedObjectStoreMode::default()
         );
 
-        reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Enabled);
-        assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Enabled);
+        reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Trace);
+        assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Trace);
 
         let store = object_store::memory::InMemory::new();
         let url = "mem://test".parse().unwrap();
@@ -484,7 +493,7 @@ mod tests {
         let _ = instrumented.get(&path).await.unwrap();
         assert!(instrumented.requests.lock().is_empty());
 
-        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Enabled);
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
         assert!(instrumented.requests.lock().is_empty());
         let _ = instrumented.get(&path).await.unwrap();
         assert_eq!(instrumented.requests.lock().len(), 1);
diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs
index f54de189b4ef5..01be736ca54df 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -188,20 +188,21 @@ impl PrintOptions {
         if !self.quiet {
             writeln!(writer, "{formatted_exec_details}")?;
 
-            if self.instrumented_registry.instrument_mode()
-                != InstrumentedObjectStoreMode::Disabled
-            {
+            let instrument_mode = self.instrumented_registry.instrument_mode();
+            if instrument_mode != InstrumentedObjectStoreMode::Disabled {
                 writeln!(writer, "{OBJECT_STORE_PROFILING_HEADER}")?;
                 for store in self.instrumented_registry.stores() {
                     let requests = store.take_requests();
 
                     if !requests.is_empty() {
                         writeln!(writer, "{store}")?;
-                        for req in requests.iter() {
-                            writeln!(writer, "{req}")?;
+                        if instrument_mode == InstrumentedObjectStoreMode::Trace {
+                            for req in requests.iter() {
+                                writeln!(writer, "{req}")?;
+                            }
+                            // Add an extra blank line to help visually organize the output
+                            writeln!(writer)?;
                         }
-                        // Add an extra blank line to help visually organize the output
-                        writeln!(writer)?;
 
                         writeln!(writer, "Summaries:")?;
                         let summaries = RequestSummary::summarize_by_operation(&requests);
@@ -252,7 +253,7 @@ mod tests {
         print_output.clear();
         print_options
             .instrumented_registry
-            .set_instrument_mode(InstrumentedObjectStoreMode::Enabled);
+            .set_instrument_mode(InstrumentedObjectStoreMode::Trace);
         print_options.write_output(&mut print_output, exec_out.clone())?;
         let out_str: String = print_output
             .clone()
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index a67924fef2537..56620346ed0fe 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -434,8 +434,11 @@ LOCATION 's3://data/cars.csv';
 
 -- Initial query should not show any profiling as the object store is not instrumented yet
 SELECT * from CARS LIMIT 1;
-\object_store_profiling enabled
--- Query again to see the profiling output
+\object_store_profiling trace
+-- Query again to see the full profiling output
+SELECT * from CARS LIMIT 1;
+\object_store_profiling summary
+-- Query again to see the summarized profiling output
 SELECT * from CARS LIMIT 1;
 \object_store_profiling disabled
 -- Final query should not show any profiling as we disabled it again
diff --git a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap b/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
index 50c6cc8eab99f..5c91800676a4d 100644
--- a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
+++ b/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
@@ -8,7 +8,7 @@ info:
     AWS_ALLOW_HTTP: "true"
     AWS_ENDPOINT: "http://localhost:55031"
     AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword
-  stdin: "\n    CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling enabled\n-- Query again to see the profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n"
+  stdin: "\n    CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling trace\n-- Query again to see the full profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling summary\n-- Query again to see the summarized profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n"
 snapshot_kind: text
 ---
 success: true
@@ -26,7 +26,7 @@ exit_code: 0
 1 row(s) fetched. 
 [ELAPSED]
 
-ObjectStore Profile mode set to Enabled
+ObjectStore Profile mode set to Trace
 +-----+-------+---------------------+
 | car | speed | time                |
 +-----+-------+---------------------+
@@ -36,7 +36,7 @@ ObjectStore Profile mode set to Enabled
 [ELAPSED]
 
 Object Store Profiling
-Instrumented Object Store: instrument_mode: Enabled, inner: AmazonS3(data)
+Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(data)
 <TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
 
 Summaries:
@@ -50,6 +50,28 @@ size max: 1006 B
 size avg: 1006 B
 size sum: 1006 B
 
+ObjectStore Profile mode set to Summary
++-----+-------+---------------------+
+| car | speed | time                |
++-----+-------+---------------------+
+| red | 20.0  | 1996-04-12T12:05:03 |
++-----+-------+---------------------+
+1 row(s) fetched. 
+[ELAPSED]
+
+Object Store Profiling
+Instrumented Object Store: instrument_mode: Summary, inner: AmazonS3(data)
+Summaries:
+Get
+count: 1
+[SUMMARY_DURATION]
+[SUMMARY_DURATION]
+[SUMMARY_DURATION]
+size min: 1006 B
+size max: 1006 B
+size avg: 1006 B
+size sum: 1006 B
+
 ObjectStore Profile mode set to Disabled
 +-----+-------+---------------------+
 | car | speed | time                |
diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index 57a96c5d79003..29ed6b8183c26 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -65,7 +65,7 @@ OPTIONS:
 
       --object-store-profiling <OBJECT_STORE_PROFILING>
           Specify the default object_store_profiling mode, defaults to 'disabled'.
-          [possible values: disabled, enabled] [default: Disabled]
+          [possible values: disabled, summary, trace] [default: Disabled]
 
     -p, --data-path <DATA_PATH>
             Path to your data, default to current directory
@@ -129,7 +129,7 @@ Available commands inside DataFusion CLI are:
 - Object Store Profiling Mode
 
 ```bash
-> \object_store_profiling [disabled|enabled]
+> \object_store_profiling [disabled|summary|trace]
 ```
 
 ## Supported SQL

From ec3ca719d6e0aee8f4ef2c9551e6374ee08ce469 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Thu, 16 Oct 2025 14:03:02 +0300
Subject: [PATCH 007/109] fix: Improve null handling in array_to_string
 function (#18076)

* fix: Improve null handling in array_to_string function

* chore
---
 datafusion/functions-nested/src/string.rs    | 58 +++++++++++++-------
 datafusion/sqllogictest/test_files/array.slt |  2 +-
 2 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs
index 3373f7a9838e1..61caa3ac70764 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -369,27 +369,38 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             List(..) => {
                 let list_array = as_list_array(&arr)?;
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
             }
             FixedSizeList(..) => {
                 let list_array = as_fixed_size_list_array(&arr)?;
+
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
@@ -397,13 +408,18 @@ pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             LargeList(..) => {
                 let list_array = as_large_list_array(&arr)?;
                 for i in 0..list_array.len() {
-                    compute_array_to_string(
-                        arg,
-                        list_array.value(i),
-                        delimiter.clone(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?;
+                    if !list_array.is_null(i) {
+                        compute_array_to_string(
+                            arg,
+                            list_array.value(i),
+                            delimiter.clone(),
+                            null_string.clone(),
+                            with_null_string,
+                        )?;
+                    } else if with_null_string {
+                        arg.push_str(&null_string);
+                        arg.push_str(&delimiter);
+                    }
                 }
 
                 Ok(arg)
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index d8c29a323e945..f488204d6d7b6 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -4855,7 +4855,7 @@ h,-,-,-,o nil-2-nil-4-5 1|0|3
 query T
 select array_to_string(arrow_cast([arrow_cast([NULL, 'a'], 'FixedSizeList(2, Utf8)'), NULL], 'FixedSizeList(2, FixedSizeList(2, Utf8))'), ',', '-');
 ----
--,a,-,-
+-,a,-
 
 # array_to_string with columns #1
 

From c8e0f1cf7bba4a955b09ffd5d9b1bec38d868e8a Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Thu, 16 Oct 2025 14:41:28 +0300
Subject: [PATCH 008/109] feat: update .asf.yaml configuration settings
 (#18027)

---
 .asf.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.asf.yaml b/.asf.yaml
index d71e7def36ad1..99fd6fac22c76 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -41,6 +41,7 @@ github:
     - sql
   enabled_merge_buttons:
     squash: true
+    squash_commit_message: PR_TITLE_AND_DESC
     merge: false
     rebase: false
   features:

From 0a57e017c7a114936a3a6e1e16a4de6b44888342 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 16 Oct 2025 07:54:01 -0700
Subject: [PATCH 009/109] Fix extended tests on main to get CI green (#18096)

## Which issue does this PR close?


- Closes https://github.com/apache/datafusion/issues/18084

## Rationale for this change
Some of the extended tests are failing because we have fixed case
conditional evaluation and queries that (incorrectly) previously did not
pass are now.

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

Update datafusion-testing pin

## Are these changes tested?

I tested locally with:

```shell
INCLUDE_SQLITE=true cargo test --profile release-nonlto --test sqllogictests
```

## Are there any user-facing changes?

No
---
 datafusion-testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion-testing b/datafusion-testing
index 905df5f65cc9d..eccb0e4a42634 160000
--- a/datafusion-testing
+++ b/datafusion-testing
@@ -1 +1 @@
-Subproject commit 905df5f65cc9d0851719c21f5a4dd5cd77621f19
+Subproject commit eccb0e4a426344ef3faf534cd60e02e9c3afd3ac

From 9bfa2ae770f03455eca1a0dc32e39a6a201cbe17 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 17 Oct 2025 01:55:06 +1100
Subject: [PATCH 010/109] chore(deps): bump taiki-e/install-action from 2.62.29
 to 2.62.31 (#18094)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.29 to 2.62.31.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.31</h2>
<ul>
<li>
<p>Update <code>protoc@latest</code> to 3.33.0.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.3.</p>
</li>
<li>
<p>Update <code>syft@latest</code> to 1.34.1.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.9.</p>
</li>
<li>
<p>Update <code>cargo-shear@latest</code> to 1.6.0.</p>
</li>
</ul>
<h2>2.62.30</h2>
<ul>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.6.</p>
</li>
<li>
<p>Update <code>zizmor@latest</code> to 1.15.2.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<h2>[2.62.31] - 2025-10-16</h2>
<ul>
<li>
<p>Update <code>protoc@latest</code> to 3.33.0.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.3.</p>
</li>
<li>
<p>Update <code>syft@latest</code> to 1.34.1.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.9.</p>
</li>
<li>
<p>Update <code>cargo-shear@latest</code> to 1.6.0.</p>
</li>
</ul>
<h2>[2.62.30] - 2025-10-15</h2>
<ul>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.6.</p>
</li>
<li>
<p>Update <code>zizmor@latest</code> to 1.15.2.</p>
</li>
</ul>
<h2>[2.62.29] - 2025-10-14</h2>
<ul>
<li>
<p>Update <code>zizmor@latest</code> to 1.15.1.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.106.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.1.</p>
</li>
</ul>
<h2>[2.62.28] - 2025-10-11</h2>
<ul>
<li>
<p>Update <code>release-plz@latest</code> to 0.3.148.</p>
</li>
<li>
<p>Update <code>cargo-sort@latest</code> to 2.0.2.</p>
</li>
<li>
<p>Update <code>cargo-binstall@latest</code> to 1.15.7.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.2.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/0005e0116e92d8489d8d96fbff83f061c79ba95a"><code>0005e01</code></a>
Release 2.62.31</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/6936d999d90424ed013e4f325d91e14d7ddba27f"><code>6936d99</code></a>
Update <code>protoc@latest</code> to 3.33.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/ac7ad6efa1b1bb919bcaa357eb1873f328ee07f7"><code>ac7ad6e</code></a>
Update <code>uv@latest</code> to 0.9.3</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/005833aaf18c1621513995406c3bc0397747afc2"><code>005833a</code></a>
Update <code>syft@latest</code> to 1.34.1</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/2b32ff6f3dc99bc9fa6647cbc9f7da71cf979b65"><code>2b32ff6</code></a>
Update <code>mise@latest</code> to 2025.10.9</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/74c0274864f156f487aee04623a20b315fb2125a"><code>74c0274</code></a>
Update <code>cargo-shear@latest</code> to 1.6.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/f13d8e15c52b25c79b608d399cc802adc73d83da"><code>f13d8e1</code></a>
Release 2.62.30</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/1034dc55996706645239db97d3ea04f42a708f22"><code>1034dc5</code></a>
Update <code>vacuum@latest</code> to 0.18.6</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/55b5d509b8761e9696e1cfec0d6f66f0655e8fff"><code>55b5d50</code></a>
Update <code>zizmor@latest</code> to 1.15.2</li>
<li>See full diff in <a
href="https://github.com/taiki-e/install-action/compare/5b5de1b4da26ad411330c0454bdd72929bfcbeb2...0005e0116e92d8489d8d96fbff83f061c79ba95a">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.29&new-version=2.62.31)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 5d5e9e270a65e..00bfa1e1b285f 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ecdbf031b45b0..9fa033fce646f 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -412,7 +412,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -739,7 +739,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@5b5de1b4da26ad411330c0454bdd72929bfcbeb2  # v2.62.29
+        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
         with:
           tool: cargo-msrv
 

From b1723e5c6a6700ba939b03319377830511719aa2 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Thu, 16 Oct 2025 09:29:22 -0700
Subject: [PATCH 011/109] chore: run extended suite on PRs for critical areas
 (#18088)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.
Related to https://github.com/apache/datafusion/issues/18084

## Rationale for this change

Run extended suite on PRs for critical areas, to avoid post merge
bugfixing

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .github/workflows/extended.yml | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml
index 9343997e05682..23bd66a0cf352 100644
--- a/.github/workflows/extended.yml
+++ b/.github/workflows/extended.yml
@@ -36,6 +36,14 @@ on:
       # it is not expected to have many changes in these branches,
       # so running extended tests is not a burden
       - 'branch-*'
+  # Also run for changes to some critical areas that are most likely
+  # to trigger errors in extended tests
+  pull_request:
+    branches: [ '**' ]
+    paths:
+      - 'datafusion/physical*/**/*.rs'
+      - 'datafusion/expr*/**/*.rs'
+      - 'datafusion/optimizer/**/*.rs'
   workflow_dispatch:
     inputs:
       pr_number:

From 4e03c92059d2aaa72bb8b3f076626f6111ec720d Mon Sep 17 00:00:00 2001
From: dario curreri <48800335+dariocurr@users.noreply.github.com>
Date: Thu, 16 Oct 2025 19:21:24 +0200
Subject: [PATCH 012/109] refactor: add dialect enum (#18043)

## Which issue does this PR close?

- Closes #18042

## Rationale for this change

This PR introduces a new dialect enum to improve type safety and code
maintainability when handling different SQL dialects in DataFusion

1. Provide compile-time guarantees for dialect handling
2. Improve code readability and self-documentation
3. Enable better IDE support and autocomplete

## What changes are included in this PR?

- Added a new `Dialect` enum to represent supported SQL dialects
- Refactored existing code to use the new enum instead of previous
representations
- Modified tests to work with the new enum-based approach

## Are these changes tested?

Yes

## Are there any user-facing changes?

Yes, this is an API change: the type of the `dialect` field changed from
`String` to `Dialect`
---
 Cargo.lock                                    |  1 +
 datafusion-cli/Cargo.toml                     |  5 +-
 datafusion-cli/src/helper.rs                  | 19 ++--
 datafusion-cli/src/highlighter.rs             | 10 ++-
 .../examples/remote_catalog.rs                |  4 +-
 datafusion/common/src/config.rs               | 90 ++++++++++++++++++-
 datafusion/core/benches/sql_planner.rs        |  7 +-
 .../core/src/execution/session_state.rs       | 27 +++---
 .../tests/user_defined/insert_operation.rs    |  5 +-
 docs/source/library-user-guide/upgrading.md   |  8 ++
 10 files changed, 141 insertions(+), 35 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index bbf64d5262e29..7b09121595d67 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1936,6 +1936,7 @@ dependencies = [
  "clap 4.5.48",
  "ctor",
  "datafusion",
+ "datafusion-common",
  "dirs",
  "env_logger",
  "futures",
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index d186cd711945d..53744e6c609b8 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -40,7 +40,7 @@ async-trait = { workspace = true }
 aws-config = "1.8.7"
 aws-credential-types = "1.2.7"
 chrono = { workspace = true }
-clap = { version = "4.5.47", features = ["derive", "cargo"] }
+clap = { version = "4.5.47", features = ["cargo", "derive"] }
 datafusion = { workspace = true, features = [
     "avro",
     "compression",
@@ -55,6 +55,7 @@ datafusion = { workspace = true, features = [
     "sql",
     "unicode_expressions",
 ] }
+datafusion-common = { workspace = true }
 dirs = "6.0.0"
 env_logger = { workspace = true }
 futures = { workspace = true }
@@ -65,7 +66,7 @@ parking_lot = { workspace = true }
 parquet = { workspace = true, default-features = false }
 regex = { workspace = true }
 rustyline = "17.0"
-tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] }
+tokio = { workspace = true, features = ["macros", "parking_lot", "rt", "rt-multi-thread", "signal", "sync"] }
 url = { workspace = true }
 
 [dev-dependencies]
diff --git a/datafusion-cli/src/helper.rs b/datafusion-cli/src/helper.rs
index 64c34c4737369..219637b3460e6 100644
--- a/datafusion-cli/src/helper.rs
+++ b/datafusion-cli/src/helper.rs
@@ -24,6 +24,7 @@ use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter};
 
 use datafusion::sql::parser::{DFParser, Statement};
 use datafusion::sql::sqlparser::dialect::dialect_from_str;
+use datafusion_common::config::Dialect;
 
 use rustyline::completion::{Completer, FilenameCompleter, Pair};
 use rustyline::error::ReadlineError;
@@ -34,12 +35,12 @@ use rustyline::{Context, Helper, Result};
 
 pub struct CliHelper {
     completer: FilenameCompleter,
-    dialect: String,
+    dialect: Dialect,
     highlighter: Box<dyn Highlighter>,
 }
 
 impl CliHelper {
-    pub fn new(dialect: &str, color: bool) -> Self {
+    pub fn new(dialect: &Dialect, color: bool) -> Self {
         let highlighter: Box<dyn Highlighter> = if !color {
             Box::new(NoSyntaxHighlighter {})
         } else {
@@ -47,20 +48,20 @@ impl CliHelper {
         };
         Self {
             completer: FilenameCompleter::new(),
-            dialect: dialect.into(),
+            dialect: *dialect,
             highlighter,
         }
     }
 
-    pub fn set_dialect(&mut self, dialect: &str) {
-        if dialect != self.dialect {
-            self.dialect = dialect.to_string();
+    pub fn set_dialect(&mut self, dialect: &Dialect) {
+        if *dialect != self.dialect {
+            self.dialect = *dialect;
         }
     }
 
     fn validate_input(&self, input: &str) -> Result<ValidationResult> {
         if let Some(sql) = input.strip_suffix(';') {
-            let dialect = match dialect_from_str(&self.dialect) {
+            let dialect = match dialect_from_str(self.dialect) {
                 Some(dialect) => dialect,
                 None => {
                     return Ok(ValidationResult::Invalid(Some(format!(
@@ -97,7 +98,7 @@ impl CliHelper {
 
 impl Default for CliHelper {
     fn default() -> Self {
-        Self::new("generic", false)
+        Self::new(&Dialect::Generic, false)
     }
 }
 
@@ -289,7 +290,7 @@ mod tests {
         );
 
         // valid in postgresql dialect
-        validator.set_dialect("postgresql");
+        validator.set_dialect(&Dialect::PostgreSQL);
         let result =
             readline_direct(Cursor::new(r"select 1 # 2;".as_bytes()), &validator)?;
         assert!(matches!(result, ValidationResult::Valid(None)));
diff --git a/datafusion-cli/src/highlighter.rs b/datafusion-cli/src/highlighter.rs
index 7a886b94740bd..f4e57a2e3593a 100644
--- a/datafusion-cli/src/highlighter.rs
+++ b/datafusion-cli/src/highlighter.rs
@@ -27,6 +27,7 @@ use datafusion::sql::sqlparser::{
     keywords::Keyword,
     tokenizer::{Token, Tokenizer},
 };
+use datafusion_common::config;
 use rustyline::highlight::{CmdKind, Highlighter};
 
 /// The syntax highlighter.
@@ -36,7 +37,7 @@ pub struct SyntaxHighlighter {
 }
 
 impl SyntaxHighlighter {
-    pub fn new(dialect: &str) -> Self {
+    pub fn new(dialect: &config::Dialect) -> Self {
         let dialect = dialect_from_str(dialect).unwrap_or(Box::new(GenericDialect {}));
         Self { dialect }
     }
@@ -93,13 +94,14 @@ impl Color {
 
 #[cfg(test)]
 mod tests {
+    use super::config::Dialect;
     use super::SyntaxHighlighter;
     use rustyline::highlight::Highlighter;
 
     #[test]
     fn highlighter_valid() {
         let s = "SElect col_a from tab_1;";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!(
             "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;",
@@ -110,7 +112,7 @@ mod tests {
     #[test]
     fn highlighter_valid_with_new_line() {
         let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!(
             "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n \u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;",
@@ -121,7 +123,7 @@ mod tests {
     #[test]
     fn highlighter_invalid() {
         let s = "SElect col_a from tab_1 WHERE col_b = ';";
-        let highlighter = SyntaxHighlighter::new("generic");
+        let highlighter = SyntaxHighlighter::new(&Dialect::Generic);
         let out = highlighter.highlight(s, s.len());
         assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out);
     }
diff --git a/datafusion-examples/examples/remote_catalog.rs b/datafusion-examples/examples/remote_catalog.rs
index 70c0963545e08..74575554ec0af 100644
--- a/datafusion-examples/examples/remote_catalog.rs
+++ b/datafusion-examples/examples/remote_catalog.rs
@@ -75,8 +75,8 @@ async fn main() -> Result<()> {
     let state = ctx.state();
 
     // First, parse the SQL (but don't plan it / resolve any table references)
-    let dialect = state.config().options().sql_parser.dialect.as_str();
-    let statement = state.sql_to_statement(sql, dialect)?;
+    let dialect = state.config().options().sql_parser.dialect;
+    let statement = state.sql_to_statement(sql, &dialect)?;
 
     // Find all `TableReferences` in the parsed queries. These correspond to the
     // tables referred to by the query (in this case
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 9bde637f43794..126935a1de90b 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -258,7 +258,7 @@ config_namespace! {
 
         /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
         /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
-        pub dialect: String, default = "generic".to_string()
+        pub dialect: Dialect, default = Dialect::Generic
         // no need to lowercase because `sqlparser::dialect_from_str`] is case-insensitive
 
         /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
@@ -292,6 +292,94 @@ config_namespace! {
     }
 }
 
+/// This is the SQL dialect used by DataFusion's parser.
+/// This mirrors [sqlparser::dialect::Dialect](https://docs.rs/sqlparser/latest/sqlparser/dialect/trait.Dialect.html)
+/// trait in order to offer an easier API and avoid adding the `sqlparser` dependency
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+pub enum Dialect {
+    #[default]
+    Generic,
+    MySQL,
+    PostgreSQL,
+    Hive,
+    SQLite,
+    Snowflake,
+    Redshift,
+    MsSQL,
+    ClickHouse,
+    BigQuery,
+    Ansi,
+    DuckDB,
+    Databricks,
+}
+
+impl AsRef<str> for Dialect {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::Generic => "generic",
+            Self::MySQL => "mysql",
+            Self::PostgreSQL => "postgresql",
+            Self::Hive => "hive",
+            Self::SQLite => "sqlite",
+            Self::Snowflake => "snowflake",
+            Self::Redshift => "redshift",
+            Self::MsSQL => "mssql",
+            Self::ClickHouse => "clickhouse",
+            Self::BigQuery => "bigquery",
+            Self::Ansi => "ansi",
+            Self::DuckDB => "duckdb",
+            Self::Databricks => "databricks",
+        }
+    }
+}
+
+impl FromStr for Dialect {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let value = match s.to_ascii_lowercase().as_str() {
+            "generic" => Self::Generic,
+            "mysql" => Self::MySQL,
+            "postgresql" | "postgres" => Self::PostgreSQL,
+            "hive" => Self::Hive,
+            "sqlite" => Self::SQLite,
+            "snowflake" => Self::Snowflake,
+            "redshift" => Self::Redshift,
+            "mssql" => Self::MsSQL,
+            "clickhouse" => Self::ClickHouse,
+            "bigquery" => Self::BigQuery,
+            "ansi" => Self::Ansi,
+            "duckdb" => Self::DuckDB,
+            "databricks" => Self::Databricks,
+            other => {
+                let error_message = format!(
+                    "Invalid Dialect: {other}. Expected one of: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB, Databricks"
+                );
+                return Err(DataFusionError::Configuration(error_message));
+            }
+        };
+        Ok(value)
+    }
+}
+
+impl ConfigField for Dialect {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = Self::from_str(value)?;
+        Ok(())
+    }
+}
+
+impl Display for Dialect {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let str = self.as_ref();
+        write!(f, "{str}")
+    }
+}
+
 #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
 pub enum SpillCompression {
     Zstd,
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 3be8668b2b8c4..83563099cad67 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -30,7 +30,7 @@ use criterion::Bencher;
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::SessionContext;
 use datafusion::prelude::DataFrame;
-use datafusion_common::ScalarValue;
+use datafusion_common::{config::Dialect, ScalarValue};
 use datafusion_expr::Expr::Literal;
 use datafusion_expr::{cast, col, lit, not, try_cast, when};
 use datafusion_functions::expr_fn::{
@@ -288,7 +288,10 @@ fn benchmark_with_param_values_many_columns(
     }
     // SELECT max(attr0), ..., max(attrN) FROM t1.
     let query = format!("SELECT {aggregates} FROM t1");
-    let statement = ctx.state().sql_to_statement(&query, "Generic").unwrap();
+    let statement = ctx
+        .state()
+        .sql_to_statement(&query, &Dialect::Generic)
+        .unwrap();
     let plan =
         rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
     b.iter(|| {
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index b04004dd495c8..6749ddd7ab8d5 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -30,15 +30,14 @@ use crate::datasource::provider_as_source;
 use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::execution::SessionStateDefaults;
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
+use arrow::datatypes::DataType;
 use datafusion_catalog::information_schema::{
     InformationSchemaProvider, INFORMATION_SCHEMA,
 };
-
-use arrow::datatypes::DataType;
 use datafusion_catalog::MemoryCatalogProviderList;
 use datafusion_catalog::{TableFunction, TableFunctionImpl};
 use datafusion_common::alias::AliasGenerator;
-use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
+use datafusion_common::config::{ConfigExtension, ConfigOptions, Dialect, TableOptions};
 use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
@@ -374,7 +373,7 @@ impl SessionState {
     pub fn sql_to_statement(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<Statement> {
         let dialect = dialect_from_str(dialect).ok_or_else(|| {
             plan_datafusion_err!(
@@ -411,7 +410,7 @@ impl SessionState {
     pub fn sql_to_expr(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<SQLExpr> {
         self.sql_to_expr_with_alias(sql, dialect).map(|x| x.expr)
     }
@@ -423,7 +422,7 @@ impl SessionState {
     pub fn sql_to_expr_with_alias(
         &self,
         sql: &str,
-        dialect: &str,
+        dialect: &Dialect,
     ) -> datafusion_common::Result<SQLExprWithAlias> {
         let dialect = dialect_from_str(dialect).ok_or_else(|| {
             plan_datafusion_err!(
@@ -527,8 +526,8 @@ impl SessionState {
         &self,
         sql: &str,
     ) -> datafusion_common::Result<LogicalPlan> {
-        let dialect = self.config.options().sql_parser.dialect.as_str();
-        let statement = self.sql_to_statement(sql, dialect)?;
+        let dialect = self.config.options().sql_parser.dialect;
+        let statement = self.sql_to_statement(sql, &dialect)?;
         let plan = self.statement_to_plan(statement).await?;
         Ok(plan)
     }
@@ -542,9 +541,9 @@ impl SessionState {
         sql: &str,
         df_schema: &DFSchema,
     ) -> datafusion_common::Result<Expr> {
-        let dialect = self.config.options().sql_parser.dialect.as_str();
+        let dialect = self.config.options().sql_parser.dialect;
 
-        let sql_expr = self.sql_to_expr_with_alias(sql, dialect)?;
+        let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?;
 
         let provider = SessionContextProvider {
             state: self,
@@ -2034,6 +2033,7 @@ mod tests {
     use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_catalog::MemoryCatalogProviderList;
+    use datafusion_common::config::Dialect;
     use datafusion_common::DFSchema;
     use datafusion_common::Result;
     use datafusion_execution::config::SessionConfig;
@@ -2059,8 +2059,8 @@ mod tests {
             let sql = "[1,2,3]";
             let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
             let df_schema = DFSchema::try_from(schema)?;
-            let dialect = state.config.options().sql_parser.dialect.as_str();
-            let sql_expr = state.sql_to_expr(sql, dialect)?;
+            let dialect = state.config.options().sql_parser.dialect;
+            let sql_expr = state.sql_to_expr(sql, &dialect)?;
 
             let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
             query.sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
@@ -2218,7 +2218,8 @@ mod tests {
             }
 
             let state = &context_provider.state;
-            let statement = state.sql_to_statement("select count(*) from t", "mysql")?;
+            let statement =
+                state.sql_to_statement("select count(*) from t", &Dialect::MySQL)?;
             let plan = SqlToRel::new(&context_provider).statement_to_plan(statement)?;
             state.create_physical_plan(&plan).await
         }
diff --git a/datafusion/core/tests/user_defined/insert_operation.rs b/datafusion/core/tests/user_defined/insert_operation.rs
index c8a4279a42110..e0a3e98604ae4 100644
--- a/datafusion/core/tests/user_defined/insert_operation.rs
+++ b/datafusion/core/tests/user_defined/insert_operation.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{any::Any, sync::Arc};
+use std::{any::Any, str::FromStr, sync::Arc};
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use async_trait::async_trait;
@@ -24,6 +24,7 @@ use datafusion::{
     prelude::{SessionConfig, SessionContext},
 };
 use datafusion_catalog::{Session, TableProvider};
+use datafusion_common::config::Dialect;
 use datafusion_expr::{dml::InsertOp, Expr, TableType};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 use datafusion_physical_plan::execution_plan::SchedulingType;
@@ -63,7 +64,7 @@ async fn assert_insert_op(ctx: &SessionContext, sql: &str, insert_op: InsertOp)
 fn session_ctx_with_dialect(dialect: impl Into<String>) -> SessionContext {
     let mut config = SessionConfig::new();
     let options = config.options_mut();
-    options.sql_parser.dialect = dialect.into();
+    options.sql_parser.dialect = Dialect::from_str(&dialect.into()).unwrap();
     SessionContext::new_with_config(config)
 }
 
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index e93659872565b..0b9da1b5a86ae 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -86,6 +86,14 @@ See [issue #17601] for more details.
 
 [issue #17601]: https://github.com/apache/datafusion/issues/17601
 
+### `SessionState`'s `sql_to_statement` method takes `Dialect` rather than a `str`
+
+The `dialect` parameter of `sql_to_statement` method defined in `datafusion::execution::session_state::SessionState`
+has changed from `&str` to `&Dialect`.
+`Dialect` is an enum defined in the `datafusion-common`
+crate under the `config` module that provides type safety
+and better validation for SQL dialect selection
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables

From ea83c2644eb559e55401ce2f7f975032e8d7845d Mon Sep 17 00:00:00 2001
From: Pepijn Van Eeckhoudt <pepijn@vaneeckhoudt.net>
Date: Thu, 16 Oct 2025 19:21:48 +0200
Subject: [PATCH 013/109] #17982 Make `nvl` a thin wrapper for `coalesce`
 (#17991)

## Which issue does this PR close?

- Closes #17982

## Rationale for this change

By making `NVLFunc` a wrapper for `CoalesceFunc` with a more restrictive
signature the implementation automatically benefits from any
optimisation work related to `coalesce`.

## What changes are included in this PR?

- Make `NVLFunc` a thin wrapper of `CoalesceFunc`. This seemed like the
simplest way to reuse the coalesce logic, but keep the stricter
signature of `nvl`.
- Add `ScalarUDF::conditional_arguments` as a more precise complement to
`ScalarUDF::short_circuits`. By letting each function expose which
arguments are eager and which are lazy, we provide more precise
information to the optimizer which may enable better optimisation.

## Are these changes tested?

Assumed to be covered by sql logic tests.
Unit tests for the custom implementation were removed since those are no
longer relevant.

## Are there any user-facing changes?

The rewriting of `nvl` to `case when ... then ... else ... end` is
visible in the physical query plan.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/expr/src/udf.rs                    |  55 +++-
 datafusion/functions/src/core/coalesce.rs     |  11 +-
 datafusion/functions/src/core/nvl.rs          | 240 +++---------------
 .../optimizer/src/common_subexpr_eliminate.rs |   6 +-
 datafusion/sqllogictest/test_files/nvl.slt    |  35 +++
 .../test_files/string/string_view.slt         |   2 +-
 .../source/user-guide/sql/scalar_functions.md |   2 +-
 7 files changed, 141 insertions(+), 210 deletions(-)

diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index d522158f7b6b7..de81ec5f0bacf 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -252,7 +252,21 @@ impl ScalarUDF {
         Ok(result)
     }
 
-    /// Get the circuits of inner implementation
+    /// Determines which of the arguments passed to this function are evaluated eagerly
+    /// and which may be evaluated lazily.
+    ///
+    /// See [ScalarUDFImpl::conditional_arguments] for more information.
+    pub fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.inner.conditional_arguments(args)
+    }
+
+    /// Returns true if some of this `exprs` subexpressions may not be evaluated
+    /// and thus any side effects (like divide by zero) may not be encountered.
+    ///
+    /// See [ScalarUDFImpl::short_circuits] for more information.
     pub fn short_circuits(&self) -> bool {
         self.inner.short_circuits()
     }
@@ -656,10 +670,42 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     ///
     /// Setting this to true prevents certain optimizations such as common
     /// subexpression elimination
+    ///
+    /// When overriding this function to return `true`, [ScalarUDFImpl::conditional_arguments] can also be
+    /// overridden to report more accurately which arguments are eagerly evaluated and which ones
+    /// lazily.
     fn short_circuits(&self) -> bool {
         false
     }
 
+    /// Determines which of the arguments passed to this function are evaluated eagerly
+    /// and which may be evaluated lazily.
+    ///
+    /// If this function returns `None`, all arguments are eagerly evaluated.
+    /// Returning `None` is a micro optimization that saves a needless `Vec`
+    /// allocation.
+    ///
+    /// If the function returns `Some`, returns (`eager`, `lazy`) where `eager`
+    /// are the arguments that are always evaluated, and `lazy` are the
+    /// arguments that may be evaluated lazily (i.e. may not be evaluated at all
+    /// in some cases).
+    ///
+    /// Implementations must ensure that the two returned `Vec`s are disjunct,
+    /// and that each argument from `args` is present in one the two `Vec`s.
+    ///
+    /// When overriding this function, [ScalarUDFImpl::short_circuits] must
+    /// be overridden to return `true`.
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        if self.short_circuits() {
+            Some((vec![], args.iter().collect()))
+        } else {
+            None
+        }
+    }
+
     /// Computes the output [`Interval`] for a [`ScalarUDFImpl`], given the input
     /// intervals.
     ///
@@ -845,6 +891,13 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.simplify(args, info)
     }
 
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.inner.conditional_arguments(args)
+    }
+
     fn short_circuits(&self) -> bool {
         self.inner.short_circuits()
     }
diff --git a/datafusion/functions/src/core/coalesce.rs b/datafusion/functions/src/core/coalesce.rs
index 3fba539dd04b4..aab1f445d5590 100644
--- a/datafusion/functions/src/core/coalesce.rs
+++ b/datafusion/functions/src/core/coalesce.rs
@@ -47,7 +47,7 @@ use std::any::Any;
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CoalesceFunc {
-    signature: Signature,
+    pub(super) signature: Signature,
 }
 
 impl Default for CoalesceFunc {
@@ -126,6 +126,15 @@ impl ScalarUDFImpl for CoalesceFunc {
         internal_err!("coalesce should have been simplified to case")
     }
 
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        let eager = vec![&args[0]];
+        let lazy = args[1..].iter().collect();
+        Some((eager, lazy))
+    }
+
     fn short_circuits(&self) -> bool {
         true
     }
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index c8b34c4b17800..0b9968a88fc95 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -15,21 +15,19 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::Array;
-use arrow::compute::is_not_null;
-use arrow::compute::kernels::zip::zip;
-use arrow::datatypes::DataType;
-use datafusion_common::{utils::take_function_args, Result};
+use crate::core::coalesce::CoalesceFunc;
+use arrow::datatypes::{DataType, FieldRef};
+use datafusion_common::Result;
+use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    Volatility,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
-use std::sync::Arc;
 
 #[user_doc(
     doc_section(label = "Conditional Functions"),
-    description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.",
+    description = "Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values.",
     syntax_example = "nvl(expression1, expression2)",
     sql_example = r#"```sql
 > select nvl(null, 'a');
@@ -57,7 +55,7 @@ use std::sync::Arc;
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct NVLFunc {
-    signature: Signature,
+    coalesce: CoalesceFunc,
     aliases: Vec<String>,
 }
 
@@ -90,11 +88,13 @@ impl Default for NVLFunc {
 impl NVLFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::uniform(
-                2,
-                SUPPORTED_NVL_TYPES.to_vec(),
-                Volatility::Immutable,
-            ),
+            coalesce: CoalesceFunc {
+                signature: Signature::uniform(
+                    2,
+                    SUPPORTED_NVL_TYPES.to_vec(),
+                    Volatility::Immutable,
+                ),
+            },
             aliases: vec![String::from("ifnull")],
         }
     }
@@ -110,209 +110,45 @@ impl ScalarUDFImpl for NVLFunc {
     }
 
     fn signature(&self) -> &Signature {
-        &self.signature
+        &self.coalesce.signature
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        Ok(arg_types[0].clone())
+        self.coalesce.return_type(arg_types)
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        nvl_func(&args.args)
-    }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        self.coalesce.return_field_from_args(args)
     }
-}
-
-fn nvl_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let [lhs, rhs] = take_function_args("nvl/ifnull", args)?;
-    let (lhs_array, rhs_array) = match (lhs, rhs) {
-        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
-            (Arc::clone(lhs), rhs.to_array_of_size(lhs.len())?)
-        }
-        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
-            (Arc::clone(lhs), Arc::clone(rhs))
-        }
-        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
-            (lhs.to_array_of_size(rhs.len())?, Arc::clone(rhs))
-        }
-        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
-            let mut current_value = lhs;
-            if lhs.is_null() {
-                current_value = rhs;
-            }
-            return Ok(ColumnarValue::Scalar(current_value.clone()));
-        }
-    };
-    let to_apply = is_not_null(&lhs_array)?;
-    let value = zip(&to_apply, &lhs_array, &rhs_array)?;
-    Ok(ColumnarValue::Array(value))
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::Arc;
-
-    use arrow::array::*;
 
-    use super::*;
-    use datafusion_common::ScalarValue;
-
-    #[test]
-    fn nvl_int32() -> Result<()> {
-        let a = Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            None,
-            None,
-            Some(3),
-            None,
-            None,
-            Some(4),
-            Some(5),
-        ]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(6i32)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(2),
-            Some(6),
-            Some(6),
-            Some(3),
-            Some(6),
-            Some(6),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        info: &dyn SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        self.coalesce.simplify(args, info)
     }
 
-    #[test]
-    // Ensure that arrays with no nulls can also invoke nvl() correctly
-    fn nvl_int32_non_nulls() -> Result<()> {
-        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(20i32)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(1),
-            Some(3),
-            Some(10),
-            Some(7),
-            Some(8),
-            Some(1),
-            Some(2),
-            Some(4),
-            Some(5),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        self.coalesce.invoke_with_args(args)
     }
 
-    #[test]
-    fn nvl_boolean() -> Result<()> {
-        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(BooleanArray::from(vec![
-            Some(true),
-            Some(false),
-            Some(false),
-        ])) as ArrayRef;
-
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn conditional_arguments<'a>(
+        &self,
+        args: &'a [Expr],
+    ) -> Option<(Vec<&'a Expr>, Vec<&'a Expr>)> {
+        self.coalesce.conditional_arguments(args)
     }
 
-    #[test]
-    fn nvl_string() -> Result<()> {
-        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::from("bax"));
-
-        let result = nvl_func(&[a, lit_array])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(StringArray::from(vec![
-            Some("foo"),
-            Some("bar"),
-            Some("bax"),
-            Some("baz"),
-        ])) as ArrayRef;
-
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn short_circuits(&self) -> bool {
+        self.coalesce.short_circuits()
     }
 
-    #[test]
-    fn nvl_literal_first() -> Result<()> {
-        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
-        let a = ColumnarValue::Array(Arc::new(a));
-
-        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result = nvl_func(&[lit_array, a])?;
-        let result = result.into_array(0).expect("Failed to convert to array");
-
-        let expected = Arc::new(Int32Array::from(vec![
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-            Some(2),
-        ])) as ArrayRef;
-        assert_eq!(expected.as_ref(), result.as_ref());
-        Ok(())
+    fn aliases(&self) -> &[String] {
+        &self.aliases
     }
 
-    #[test]
-    fn nvl_scalar() -> Result<()> {
-        let a_null = ColumnarValue::Scalar(ScalarValue::Int32(None));
-        let b_null = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-
-        let result_null = nvl_func(&[a_null, b_null])?;
-        let result_null = result_null
-            .into_array(1)
-            .expect("Failed to convert to array");
-
-        let expected_null = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
-
-        assert_eq!(expected_null.as_ref(), result_null.as_ref());
-
-        let a_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
-        let b_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
-
-        let result_nnull = nvl_func(&[a_nnull, b_nnull])?;
-        let result_nnull = result_nnull
-            .into_array(1)
-            .expect("Failed to convert to array");
-
-        let expected_nnull = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
-        assert_eq!(expected_nnull.as_ref(), result_nnull.as_ref());
-
-        Ok(())
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
     }
 }
diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs
index ec1f8f991a8ee..2510068494591 100644
--- a/datafusion/optimizer/src/common_subexpr_eliminate.rs
+++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs
@@ -652,10 +652,8 @@ impl CSEController for ExprCSEController<'_> {
             // In case of `ScalarFunction`s we don't know which children are surely
             // executed so start visiting all children conditionally and stop the
             // recursion with `TreeNodeRecursion::Jump`.
-            Expr::ScalarFunction(ScalarFunction { func, args })
-                if func.short_circuits() =>
-            {
-                Some((vec![], args.iter().collect()))
+            Expr::ScalarFunction(ScalarFunction { func, args }) => {
+                func.conditional_arguments(args)
             }
 
             // In case of `And` and `Or` the first child is surely executed, but we
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
index daab54307cc20..f4225148ab781 100644
--- a/datafusion/sqllogictest/test_files/nvl.slt
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -148,3 +148,38 @@ query T
 SELECT NVL(arrow_cast('a', 'Utf8View'), NULL);
 ----
 a
+
+# nvl is implemented as a case, and short-circuits evaluation
+# so the following query should not error
+query I
+SELECT NVL(1, 1/0);
+----
+1
+
+# but this one should
+query error DataFusion error: Arrow error: Divide by zero error
+SELECT NVL(NULL, 1/0);
+
+# Expect the query plan to show nvl as a case expression
+query I
+select NVL(int_field, 9999) FROM test;
+----
+1
+2
+3
+9999
+4
+9999
+
+# Expect the query plan to show nvl as a case expression
+query TT
+EXPLAIN select NVL(int_field, 9999) FROM test;
+----
+logical_plan
+01)Projection: CASE WHEN __common_expr_1 IS NOT NULL THEN __common_expr_1 ELSE Int64(9999) END AS nvl(test.int_field,Int64(9999))
+02)--Projection: CAST(test.int_field AS Int64) AS __common_expr_1
+03)----TableScan: test projection=[int_field]
+physical_plan
+01)ProjectionExec: expr=[CASE WHEN __common_expr_1@0 IS NOT NULL THEN __common_expr_1@0 ELSE 9999 END as nvl(test.int_field,Int64(9999))]
+02)--ProjectionExec: expr=[CAST(int_field@0 AS Int64) as __common_expr_1]
+03)----DataSourceExec: partitions=1, partition_sizes=[1]
diff --git a/datafusion/sqllogictest/test_files/string/string_view.slt b/datafusion/sqllogictest/test_files/string/string_view.slt
index fb67daa0b8405..4d30f572ad6fb 100644
--- a/datafusion/sqllogictest/test_files/string/string_view.slt
+++ b/datafusion/sqllogictest/test_files/string/string_view.slt
@@ -988,7 +988,7 @@ query TT
 EXPLAIN SELECT NVL(column1_utf8view, 'a') as c2 FROM test;
 ----
 logical_plan
-01)Projection: nvl(test.column1_utf8view, Utf8View("a")) AS c2
+01)Projection: CASE WHEN test.column1_utf8view IS NOT NULL THEN test.column1_utf8view ELSE Utf8View("a") END AS c2
 02)--TableScan: test projection=[column1_utf8view]
 
 ## Ensure no casts for nullif
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 9fcaac7628557..ec2faf8b3d5df 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1056,7 +1056,7 @@ nullif(expression1, expression2)
 
 ### `nvl`
 
-Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_.
+Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_ and _expression2_ is not evaluated. This function can be used to substitute a default value for NULL values.
 
 ```sql
 nvl(expression1, expression2)

From 7c3b0d0a68d89ba0ac079c7d9adaa3d52ece1c39 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Fri, 17 Oct 2025 04:23:36 +1100
Subject: [PATCH 014/109] minor: fix incorrect deprecation version & window
 docs (#18093)

---
 datafusion/functions-window-common/src/expr.rs      | 2 +-
 datafusion/functions-window-common/src/partition.rs | 2 +-
 datafusion/functions/src/planner.rs                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs
index 774cd5182b30b..d72cd412f0175 100644
--- a/datafusion/functions-window-common/src/expr.rs
+++ b/datafusion/functions-window-common/src/expr.rs
@@ -37,7 +37,7 @@ impl<'a> ExpressionArgs<'a> {
     ///
     /// * `input_exprs` - The expressions passed as arguments
     ///   to the user-defined window function.
-    /// * `input_types` - The data types corresponding to the
+    /// * `input_fields` - The fields corresponding to the
     ///   arguments to the user-defined window function.
     ///
     pub fn new(
diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs
index 61125e596130b..df0a815401177 100644
--- a/datafusion/functions-window-common/src/partition.rs
+++ b/datafusion/functions-window-common/src/partition.rs
@@ -42,7 +42,7 @@ impl<'a> PartitionEvaluatorArgs<'a> {
     ///
     /// * `input_exprs` - The expressions passed as arguments
     ///   to the user-defined window function.
-    /// * `input_types` - The data types corresponding to the
+    /// * `input_fields` - The fields corresponding to the
     ///   arguments to the user-defined window function.
     /// * `is_reversed` - Set to `true` if and only if the user-defined
     ///   window function is reversible and is reversed.
diff --git a/datafusion/functions/src/planner.rs b/datafusion/functions/src/planner.rs
index 7228cdc07e727..ccd167997003e 100644
--- a/datafusion/functions/src/planner.rs
+++ b/datafusion/functions/src/planner.rs
@@ -25,7 +25,7 @@ use datafusion_expr::{
 };
 
 #[deprecated(
-    since = "0.50.0",
+    since = "50.0.0",
     note = "Use UnicodeFunctionPlanner and DateTimeFunctionPlanner instead"
 )]
 #[derive(Default, Debug)]

From 337378ab81f6c7dab7da9000124c554d3b7ee568 Mon Sep 17 00:00:00 2001
From: Oleks V <comphead@users.noreply.github.com>
Date: Thu, 16 Oct 2025 12:36:01 -0700
Subject: [PATCH 015/109] chore: use `NullBuffer::union` for Spark `concat`
 (#18087)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

Followup on
https://github.com/apache/datafusion/pull/18063#pullrequestreview-3341818221

## Rationale for this change

Use cheaper `NullBuffer::union` to apply null mask instead of iterator
approach

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 .../spark/src/function/string/concat.rs       | 141 +++++++-----------
 1 file changed, 52 insertions(+), 89 deletions(-)

diff --git a/datafusion/spark/src/function/string/concat.rs b/datafusion/spark/src/function/string/concat.rs
index 0e981e7c37224..0dcc58d5bb8ed 100644
--- a/datafusion/spark/src/function/string/concat.rs
+++ b/datafusion/spark/src/function/string/concat.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{Array, ArrayBuilder};
+use arrow::array::Array;
+use arrow::buffer::NullBuffer;
 use arrow::datatypes::DataType;
 use datafusion_common::{Result, ScalarValue};
 use datafusion_expr::{
@@ -31,6 +32,10 @@ use std::sync::Arc;
 ///
 /// Concatenates multiple input strings into a single string.
 /// Returns NULL if any input is NULL.
+///
+/// Differences with DataFusion concat:
+/// - Support 0 arguments
+/// - Return NULL if any input is NULL
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct SparkConcat {
     signature: Signature,
@@ -80,6 +85,16 @@ impl ScalarUDFImpl for SparkConcat {
     }
 }
 
+/// Represents the null state for Spark concat
+enum NullMaskResolution {
+    /// Return NULL as the result (e.g., scalar inputs with at least one NULL)
+    ReturnNull,
+    /// No null mask needed (e.g., all scalar inputs are non-NULL)
+    NoMask,
+    /// Null mask to apply for arrays
+    Apply(NullBuffer),
+}
+
 /// Concatenates strings, returning NULL if any input is NULL
 /// This is a Spark-specific wrapper around DataFusion's concat that returns NULL
 /// if any argument is NULL (Spark behavior), whereas DataFusion's concat ignores NULLs.
@@ -103,7 +118,7 @@ fn spark_concat(args: ScalarFunctionArgs) -> Result<ColumnarValue> {
     let null_mask = compute_null_mask(&arg_values, number_rows)?;
 
     // If all scalars and any is NULL, return NULL immediately
-    if null_mask.is_none() {
+    if matches!(null_mask, NullMaskResolution::ReturnNull) {
         return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
     }
 
@@ -122,13 +137,11 @@ fn spark_concat(args: ScalarFunctionArgs) -> Result<ColumnarValue> {
     apply_null_mask(result, null_mask)
 }
 
-/// Compute NULL mask for the arguments
-/// Returns None if all scalars and any is NULL, or a Vector of
-/// boolean representing the null mask for incoming arrays
+/// Compute NULL mask for the arguments using NullBuffer::union
 fn compute_null_mask(
     args: &[ColumnarValue],
     number_rows: usize,
-) -> Result<Option<Vec<bool>>> {
+) -> Result<NullMaskResolution> {
     // Check if all arguments are scalars
     let all_scalars = args
         .iter()
@@ -139,15 +152,14 @@ fn compute_null_mask(
         for arg in args {
             if let ColumnarValue::Scalar(scalar) = arg {
                 if scalar.is_null() {
-                    // Return None to indicate all values should be NULL
-                    return Ok(None);
+                    return Ok(NullMaskResolution::ReturnNull);
                 }
             }
         }
         // No NULLs in scalars
-        Ok(Some(vec![]))
+        Ok(NullMaskResolution::NoMask)
     } else {
-        // For arrays, compute NULL mask for each row
+        // For arrays, compute NULL mask for each row using NullBuffer::union
         let array_len = args
             .iter()
             .find_map(|arg| match arg {
@@ -166,99 +178,50 @@ fn compute_null_mask(
             .collect();
         let arrays = arrays?;
 
-        // Compute NULL mask
-        let mut null_mask = vec![false; array_len];
-        for array in &arrays {
-            for (i, null_flag) in null_mask.iter_mut().enumerate().take(array_len) {
-                if array.is_null(i) {
-                    *null_flag = true;
-                }
-            }
-        }
+        // Use NullBuffer::union to combine all null buffers
+        let combined_nulls = arrays
+            .iter()
+            .map(|arr| arr.nulls())
+            .fold(None, |acc, nulls| NullBuffer::union(acc.as_ref(), nulls));
 
-        Ok(Some(null_mask))
+        match combined_nulls {
+            Some(nulls) => Ok(NullMaskResolution::Apply(nulls)),
+            None => Ok(NullMaskResolution::NoMask),
+        }
     }
 }
 
-/// Apply NULL mask to the result
+/// Apply NULL mask to the result using NullBuffer::union
 fn apply_null_mask(
     result: ColumnarValue,
-    null_mask: Option<Vec<bool>>,
+    null_mask: NullMaskResolution,
 ) -> Result<ColumnarValue> {
     match (result, null_mask) {
-        // Scalar with NULL mask means return NULL
-        (ColumnarValue::Scalar(_), None) => {
+        // Scalar with ReturnNull mask means return NULL
+        (ColumnarValue::Scalar(_), NullMaskResolution::ReturnNull) => {
             Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
         }
-        // Scalar without NULL mask, return as-is
-        (scalar @ ColumnarValue::Scalar(_), Some(mask)) if mask.is_empty() => Ok(scalar),
-        // Array with NULL mask
-        (ColumnarValue::Array(array), Some(null_mask)) if !null_mask.is_empty() => {
-            let array_len = array.len();
-            let return_type = array.data_type();
+        // Scalar without mask, return as-is
+        (scalar @ ColumnarValue::Scalar(_), NullMaskResolution::NoMask) => Ok(scalar),
+        // Array with NULL mask - use NullBuffer::union to combine nulls
+        (ColumnarValue::Array(array), NullMaskResolution::Apply(null_mask)) => {
+            // Combine the result's existing nulls with our computed null mask
+            let combined_nulls = NullBuffer::union(array.nulls(), Some(&null_mask));
 
-            let mut builder: Box<dyn ArrayBuilder> = match return_type {
-                DataType::Utf8 => {
-                    let string_array = array
-                        .as_any()
-                        .downcast_ref::<arrow::array::StringArray>()
-                        .unwrap();
-                    let mut builder =
-                        arrow::array::StringBuilder::with_capacity(array_len, 0);
-                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
-                        if is_null || string_array.is_null(i) {
-                            builder.append_null();
-                        } else {
-                            builder.append_value(string_array.value(i));
-                        }
-                    }
-                    Box::new(builder)
-                }
-                DataType::LargeUtf8 => {
-                    let string_array = array
-                        .as_any()
-                        .downcast_ref::<arrow::array::LargeStringArray>()
-                        .unwrap();
-                    let mut builder =
-                        arrow::array::LargeStringBuilder::with_capacity(array_len, 0);
-                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
-                        if is_null || string_array.is_null(i) {
-                            builder.append_null();
-                        } else {
-                            builder.append_value(string_array.value(i));
-                        }
-                    }
-                    Box::new(builder)
-                }
-                DataType::Utf8View => {
-                    let string_array = array
-                        .as_any()
-                        .downcast_ref::<arrow::array::StringViewArray>()
-                        .unwrap();
-                    let mut builder =
-                        arrow::array::StringViewBuilder::with_capacity(array_len);
-                    for (i, &is_null) in null_mask.iter().enumerate().take(array_len) {
-                        if is_null || string_array.is_null(i) {
-                            builder.append_null();
-                        } else {
-                            builder.append_value(string_array.value(i));
-                        }
-                    }
-                    Box::new(builder)
-                }
-                _ => {
-                    return datafusion_common::exec_err!(
-                        "Unsupported return type for concat: {:?}",
-                        return_type
-                    );
-                }
-            };
+            // Create new array with combined nulls
+            let new_array = array
+                .into_data()
+                .into_builder()
+                .nulls(combined_nulls)
+                .build()?;
 
-            Ok(ColumnarValue::Array(builder.finish()))
+            Ok(ColumnarValue::Array(Arc::new(arrow::array::make_array(
+                new_array,
+            ))))
         }
         // Array without NULL mask, return as-is
-        (array @ ColumnarValue::Array(_), _) => Ok(array),
-        // Shouldn't happen
+        (array @ ColumnarValue::Array(_), NullMaskResolution::NoMask) => Ok(array),
+        // Edge cases that shouldn't happen in practice
         (scalar, _) => Ok(scalar),
     }
 }

From cadf42955146fd91cf594e0375eb932752457f55 Mon Sep 17 00:00:00 2001
From: Khanh Duong <dqkqdlot@gmail.com>
Date: Fri, 17 Oct 2025 09:56:36 +0900
Subject: [PATCH 016/109] feat: support `null_treatment`, `distinct`, and
 `filter` for window functions in proto (#18024)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #17417.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

- Support `null_treatment`, `distinct`, and `filter` for window function
in proto.
- Support `null_treatment` for aggregate udf in proto.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

- [x] Add `null_treatment`, `distinct`, `filter` fields to
`WindowExprNode` message and handle them in `to/from_proto.rs`.
- [x] Add `null_treatment` field to `AggregateUDFExprNode` message and
handle them in `to/from_proto.rs`.
- [ ] Docs update: I'm not sure where to add docs as declared in the
issue description.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

- Add tests to `roundtrip_window` for respectnulls, ignorenulls,
distinct, filter.
- Add tests to `roundtrip_aggregate_udf` for respectnulls, ignorenulls.

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
N/A

---------

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 datafusion/proto/proto/datafusion.proto       |  11 +-
 datafusion/proto/src/generated/pbjson.rs      | 145 ++++++++++++++++++
 datafusion/proto/src/generated/prost.rs       |  36 ++++-
 .../proto/src/logical_plan/from_proto.rs      |  87 +++++++----
 datafusion/proto/src/logical_plan/to_proto.rs |  33 +++-
 .../tests/cases/roundtrip_logical_plan.rs     |  93 +++++++++--
 6 files changed, 359 insertions(+), 46 deletions(-)

diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index ee9ac0e7902d3..11103472ae2ae 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -518,6 +518,7 @@ message AggregateUDFExprNode {
   LogicalExprNode filter = 3;
   repeated SortExprNode order_by = 4;
   optional bytes fun_definition = 6;
+  optional NullTreatment null_treatment = 7;
 }
 
 message ScalarUDFExprNode {
@@ -538,6 +539,9 @@ message WindowExprNode {
   // repeated LogicalExprNode filter = 7;
   WindowFrame window_frame = 8;
   optional bytes fun_definition = 10;
+  optional NullTreatment null_treatment = 11;
+  bool distinct = 12;
+  LogicalExprNode filter = 13;
 }
 
 message BetweenNode {
@@ -622,6 +626,11 @@ message WindowFrameBound {
   datafusion_common.ScalarValue bound_value = 2;
 }
 
+enum NullTreatment {
+  RESPECT_NULLS = 0;
+  IGNORE_NULLS = 1;
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////////////////
 // Arrow Data Types
 ///////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1365,4 +1374,4 @@ message SortMergeJoinExecNode {
   JoinFilter filter = 5;
   repeated SortExprNode sort_options = 6;
   datafusion_common.NullEquality null_equality = 7;
-}
\ No newline at end of file
+}
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 29967d812000f..b34da2c312de0 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -596,6 +596,9 @@ impl serde::Serialize for AggregateUdfExprNode {
         if self.fun_definition.is_some() {
             len += 1;
         }
+        if self.null_treatment.is_some() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.AggregateUDFExprNode", len)?;
         if !self.fun_name.is_empty() {
             struct_ser.serialize_field("funName", &self.fun_name)?;
@@ -617,6 +620,11 @@ impl serde::Serialize for AggregateUdfExprNode {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
         }
+        if let Some(v) = self.null_treatment.as_ref() {
+            let v = NullTreatment::try_from(*v)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
+            struct_ser.serialize_field("nullTreatment", &v)?;
+        }
         struct_ser.end()
     }
 }
@@ -636,6 +644,8 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             "orderBy",
             "fun_definition",
             "funDefinition",
+            "null_treatment",
+            "nullTreatment",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -646,6 +656,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
             Filter,
             OrderBy,
             FunDefinition,
+            NullTreatment,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -673,6 +684,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                             "filter" => Ok(GeneratedField::Filter),
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
+                            "nullTreatment" | "null_treatment" => Ok(GeneratedField::NullTreatment),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -698,6 +710,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                 let mut filter__ = None;
                 let mut order_by__ = None;
                 let mut fun_definition__ = None;
+                let mut null_treatment__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::FunName => {
@@ -738,6 +751,12 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::NullTreatment => {
+                            if null_treatment__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullTreatment"));
+                            }
+                            null_treatment__ = map_.next_value::<::std::option::Option<NullTreatment>>()?.map(|x| x as i32);
+                        }
                     }
                 }
                 Ok(AggregateUdfExprNode {
@@ -747,6 +766,7 @@ impl<'de> serde::Deserialize<'de> for AggregateUdfExprNode {
                     filter: filter__,
                     order_by: order_by__.unwrap_or_default(),
                     fun_definition: fun_definition__,
+                    null_treatment: null_treatment__,
                 })
             }
         }
@@ -13284,6 +13304,77 @@ impl<'de> serde::Deserialize<'de> for Not {
         deserializer.deserialize_struct("datafusion.Not", FIELDS, GeneratedVisitor)
     }
 }
+impl serde::Serialize for NullTreatment {
+    #[allow(deprecated)]
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let variant = match self {
+            Self::RespectNulls => "RESPECT_NULLS",
+            Self::IgnoreNulls => "IGNORE_NULLS",
+        };
+        serializer.serialize_str(variant)
+    }
+}
+impl<'de> serde::Deserialize<'de> for NullTreatment {
+    #[allow(deprecated)]
+    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        const FIELDS: &[&str] = &[
+            "RESPECT_NULLS",
+            "IGNORE_NULLS",
+        ];
+
+        struct GeneratedVisitor;
+
+        impl<'de> serde::de::Visitor<'de> for GeneratedVisitor {
+            type Value = NullTreatment;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(formatter, "expected one of: {:?}", &FIELDS)
+            }
+
+            fn visit_i64<E>(self, v: i64) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                i32::try_from(v)
+                    .ok()
+                    .and_then(|x| x.try_into().ok())
+                    .ok_or_else(|| {
+                        serde::de::Error::invalid_value(serde::de::Unexpected::Signed(v), &self)
+                    })
+            }
+
+            fn visit_u64<E>(self, v: u64) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                i32::try_from(v)
+                    .ok()
+                    .and_then(|x| x.try_into().ok())
+                    .ok_or_else(|| {
+                        serde::de::Error::invalid_value(serde::de::Unexpected::Unsigned(v), &self)
+                    })
+            }
+
+            fn visit_str<E>(self, value: &str) -> std::result::Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                match value {
+                    "RESPECT_NULLS" => Ok(NullTreatment::RespectNulls),
+                    "IGNORE_NULLS" => Ok(NullTreatment::IgnoreNulls),
+                    _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
+                }
+            }
+        }
+        deserializer.deserialize_any(GeneratedVisitor)
+    }
+}
 impl serde::Serialize for OptimizedLogicalPlanType {
     #[allow(deprecated)]
     fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
@@ -23514,6 +23605,15 @@ impl serde::Serialize for WindowExprNode {
         if self.fun_definition.is_some() {
             len += 1;
         }
+        if self.null_treatment.is_some() {
+            len += 1;
+        }
+        if self.distinct {
+            len += 1;
+        }
+        if self.filter.is_some() {
+            len += 1;
+        }
         if self.window_function.is_some() {
             len += 1;
         }
@@ -23535,6 +23635,17 @@ impl serde::Serialize for WindowExprNode {
             #[allow(clippy::needless_borrows_for_generic_args)]
             struct_ser.serialize_field("funDefinition", pbjson::private::base64::encode(&v).as_str())?;
         }
+        if let Some(v) = self.null_treatment.as_ref() {
+            let v = NullTreatment::try_from(*v)
+                .map_err(|_| serde::ser::Error::custom(format!("Invalid variant {}", *v)))?;
+            struct_ser.serialize_field("nullTreatment", &v)?;
+        }
+        if self.distinct {
+            struct_ser.serialize_field("distinct", &self.distinct)?;
+        }
+        if let Some(v) = self.filter.as_ref() {
+            struct_ser.serialize_field("filter", v)?;
+        }
         if let Some(v) = self.window_function.as_ref() {
             match v {
                 window_expr_node::WindowFunction::Udaf(v) => {
@@ -23564,6 +23675,10 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             "windowFrame",
             "fun_definition",
             "funDefinition",
+            "null_treatment",
+            "nullTreatment",
+            "distinct",
+            "filter",
             "udaf",
             "udwf",
         ];
@@ -23575,6 +23690,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
             OrderBy,
             WindowFrame,
             FunDefinition,
+            NullTreatment,
+            Distinct,
+            Filter,
             Udaf,
             Udwf,
         }
@@ -23603,6 +23721,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                             "orderBy" | "order_by" => Ok(GeneratedField::OrderBy),
                             "windowFrame" | "window_frame" => Ok(GeneratedField::WindowFrame),
                             "funDefinition" | "fun_definition" => Ok(GeneratedField::FunDefinition),
+                            "nullTreatment" | "null_treatment" => Ok(GeneratedField::NullTreatment),
+                            "distinct" => Ok(GeneratedField::Distinct),
+                            "filter" => Ok(GeneratedField::Filter),
                             "udaf" => Ok(GeneratedField::Udaf),
                             "udwf" => Ok(GeneratedField::Udwf),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -23629,6 +23750,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                 let mut order_by__ = None;
                 let mut window_frame__ = None;
                 let mut fun_definition__ = None;
+                let mut null_treatment__ = None;
+                let mut distinct__ = None;
+                let mut filter__ = None;
                 let mut window_function__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -23664,6 +23788,24 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                                 map_.next_value::<::std::option::Option<::pbjson::private::BytesDeserialize<_>>>()?.map(|x| x.0)
                             ;
                         }
+                        GeneratedField::NullTreatment => {
+                            if null_treatment__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullTreatment"));
+                            }
+                            null_treatment__ = map_.next_value::<::std::option::Option<NullTreatment>>()?.map(|x| x as i32);
+                        }
+                        GeneratedField::Distinct => {
+                            if distinct__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("distinct"));
+                            }
+                            distinct__ = Some(map_.next_value()?);
+                        }
+                        GeneratedField::Filter => {
+                            if filter__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("filter"));
+                            }
+                            filter__ = map_.next_value()?;
+                        }
                         GeneratedField::Udaf => {
                             if window_function__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("udaf"));
@@ -23684,6 +23826,9 @@ impl<'de> serde::Deserialize<'de> for WindowExprNode {
                     order_by: order_by__.unwrap_or_default(),
                     window_frame: window_frame__,
                     fun_definition: fun_definition__,
+                    null_treatment: null_treatment__,
+                    distinct: distinct__.unwrap_or_default(),
+                    filter: filter__,
                     window_function: window_function__,
                 })
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index d3b5f566e98b7..2e1c482db65c4 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -605,7 +605,7 @@ pub mod logical_expr_node {
         TryCast(::prost::alloc::boxed::Box<super::TryCastNode>),
         /// window expressions
         #[prost(message, tag = "18")]
-        WindowExpr(super::WindowExprNode),
+        WindowExpr(::prost::alloc::boxed::Box<super::WindowExprNode>),
         /// AggregateUDF expressions
         #[prost(message, tag = "19")]
         AggregateUdfExpr(::prost::alloc::boxed::Box<super::AggregateUdfExprNode>),
@@ -795,6 +795,8 @@ pub struct AggregateUdfExprNode {
     pub order_by: ::prost::alloc::vec::Vec<SortExprNode>,
     #[prost(bytes = "vec", optional, tag = "6")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+    #[prost(enumeration = "NullTreatment", optional, tag = "7")]
+    pub null_treatment: ::core::option::Option<i32>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct ScalarUdfExprNode {
@@ -818,6 +820,12 @@ pub struct WindowExprNode {
     pub window_frame: ::core::option::Option<WindowFrame>,
     #[prost(bytes = "vec", optional, tag = "10")]
     pub fun_definition: ::core::option::Option<::prost::alloc::vec::Vec<u8>>,
+    #[prost(enumeration = "NullTreatment", optional, tag = "11")]
+    pub null_treatment: ::core::option::Option<i32>,
+    #[prost(bool, tag = "12")]
+    pub distinct: bool,
+    #[prost(message, optional, boxed, tag = "13")]
+    pub filter: ::core::option::Option<::prost::alloc::boxed::Box<LogicalExprNode>>,
     #[prost(oneof = "window_expr_node::WindowFunction", tags = "3, 9")]
     pub window_function: ::core::option::Option<window_expr_node::WindowFunction>,
 }
@@ -2129,6 +2137,32 @@ impl WindowFrameBoundType {
 }
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
+pub enum NullTreatment {
+    RespectNulls = 0,
+    IgnoreNulls = 1,
+}
+impl NullTreatment {
+    /// String value of the enum field names used in the ProtoBuf definition.
+    ///
+    /// The values are not transformed in any way and thus are considered stable
+    /// (if the ProtoBuf definition does not change) and safe for programmatic use.
+    pub fn as_str_name(&self) -> &'static str {
+        match self {
+            Self::RespectNulls => "RESPECT_NULLS",
+            Self::IgnoreNulls => "IGNORE_NULLS",
+        }
+    }
+    /// Creates an enum from field names used in the ProtoBuf definition.
+    pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
+        match value {
+            "RESPECT_NULLS" => Some(Self::RespectNulls),
+            "IGNORE_NULLS" => Some(Self::IgnoreNulls),
+            _ => None,
+        }
+    }
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
+#[repr(i32)]
 pub enum DateUnit {
     Day = 0,
     DateMillisecond = 1,
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index cbfa15183b5c1..ec6415adc4c9b 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -23,7 +23,7 @@ use datafusion_common::{
     RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions,
 };
 use datafusion_expr::dml::InsertOp;
-use datafusion_expr::expr::{Alias, Placeholder, Sort};
+use datafusion_expr::expr::{Alias, NullTreatment, Placeholder, Sort};
 use datafusion_expr::expr::{Unnest, WildcardOptions};
 use datafusion_expr::{
     expr::{self, InList, WindowFunction},
@@ -243,6 +243,15 @@ impl From<protobuf::dml_node::Type> for WriteOp {
     }
 }
 
+impl From<protobuf::NullTreatment> for NullTreatment {
+    fn from(t: protobuf::NullTreatment) -> Self {
+        match t {
+            protobuf::NullTreatment::RespectNulls => NullTreatment::RespectNulls,
+            protobuf::NullTreatment::IgnoreNulls => NullTreatment::IgnoreNulls,
+        }
+    }
+}
+
 pub fn parse_expr(
     proto: &protobuf::LogicalExprNode,
     registry: &dyn FunctionRegistry,
@@ -301,9 +310,21 @@ pub fn parse_expr(
                     exec_datafusion_err!("missing window frame during deserialization")
                 })?;
 
-            // TODO: support null treatment, distinct, and filter in proto.
-            // See https://github.com/apache/datafusion/issues/17417
-            match window_function {
+            let null_treatment = match expr.null_treatment {
+                Some(null_treatment) => {
+                    let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
+                    .map_err(|_| {
+                        proto_error(format!(
+                            "Received a WindowExprNode message with unknown NullTreatment {}",
+                            null_treatment
+                        ))
+                    })?;
+                    Some(NullTreatment::from(null_treatment))
+                }
+                None => None,
+            };
+
+            let agg_fn = match window_function {
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
                     let udaf_function = match &expr.fun_definition {
                         Some(buf) => codec.try_decode_udaf(udaf_name, buf)?,
@@ -311,17 +332,7 @@ pub fn parse_expr(
                             .udaf(udaf_name)
                             .or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?,
                     };
-
-                    let args = parse_exprs(&expr.exprs, registry, codec)?;
-                    Expr::from(WindowFunction::new(
-                        expr::WindowFunctionDefinition::AggregateUDF(udaf_function),
-                        args,
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .build()
-                    .map_err(Error::DataFusionError)
+                    expr::WindowFunctionDefinition::AggregateUDF(udaf_function)
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
                     let udwf_function = match &expr.fun_definition {
@@ -330,19 +341,28 @@ pub fn parse_expr(
                             .udwf(udwf_name)
                             .or_else(|_| codec.try_decode_udwf(udwf_name, &[]))?,
                     };
-
-                    let args = parse_exprs(&expr.exprs, registry, codec)?;
-                    Expr::from(WindowFunction::new(
-                        expr::WindowFunctionDefinition::WindowUDF(udwf_function),
-                        args,
-                    ))
-                    .partition_by(partition_by)
-                    .order_by(order_by)
-                    .window_frame(window_frame)
-                    .build()
-                    .map_err(Error::DataFusionError)
+                    expr::WindowFunctionDefinition::WindowUDF(udwf_function)
                 }
+            };
+
+            let args = parse_exprs(&expr.exprs, registry, codec)?;
+            let mut builder = Expr::from(WindowFunction::new(agg_fn, args))
+                .partition_by(partition_by)
+                .order_by(order_by)
+                .window_frame(window_frame)
+                .null_treatment(null_treatment);
+
+            if expr.distinct {
+                builder = builder.distinct();
+            };
+
+            if let Some(filter) =
+                parse_optional_expr(expr.filter.as_deref(), registry, codec)?
+            {
+                builder = builder.filter(filter);
             }
+
+            builder.build().map_err(Error::DataFusionError)
         }
         ExprType::Alias(alias) => Ok(Expr::Alias(Alias::new(
             parse_required_expr(alias.expr.as_deref(), registry, "expr", codec)?,
@@ -571,6 +591,19 @@ pub fn parse_expr(
                     .udaf(&pb.fun_name)
                     .or_else(|_| codec.try_decode_udaf(&pb.fun_name, &[]))?,
             };
+            let null_treatment = match pb.null_treatment {
+                Some(null_treatment) => {
+                    let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
+                    .map_err(|_| {
+                        proto_error(format!(
+                            "Received an AggregateUdfExprNode message with unknown NullTreatment {}",
+                            null_treatment
+                        ))
+                    })?;
+                    Some(NullTreatment::from(null_treatment))
+                }
+                None => None,
+            };
 
             Ok(Expr::AggregateFunction(expr::AggregateFunction::new_udf(
                 agg_fn,
@@ -578,7 +611,7 @@ pub fn parse_expr(
                 pb.distinct,
                 parse_optional_expr(pb.filter.as_deref(), registry, codec)?.map(Box::new),
                 parse_sorts(&pb.order_by, registry, codec)?,
-                None,
+                null_treatment,
             )))
         }
 
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 1be3300008c79..6238c2f1cdded 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -25,7 +25,7 @@ use datafusion_common::{NullEquality, TableReference, UnnestOptions};
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::expr::{
     self, AggregateFunctionParams, Alias, Between, BinaryExpr, Cast, GroupingSet, InList,
-    Like, Placeholder, ScalarFunction, Unnest,
+    Like, NullTreatment, Placeholder, ScalarFunction, Unnest,
 };
 use datafusion_expr::WriteOp;
 use datafusion_expr::{
@@ -314,11 +314,9 @@ pub fn serialize_expr(
                         ref partition_by,
                         ref order_by,
                         ref window_frame,
-                        // TODO: support null treatment, distinct, and filter in proto.
-                        // See https://github.com/apache/datafusion/issues/17417
-                        null_treatment: _,
-                        distinct: _,
-                        filter: _,
+                        ref null_treatment,
+                        ref distinct,
+                        ref filter,
                     },
             } = window_fun.as_ref();
             let mut buf = Vec::new();
@@ -342,16 +340,24 @@ pub fn serialize_expr(
 
             let window_frame: Option<protobuf::WindowFrame> =
                 Some(window_frame.try_into()?);
+
             let window_expr = protobuf::WindowExprNode {
                 exprs: serialize_exprs(args, codec)?,
                 window_function: Some(window_function),
                 partition_by,
                 order_by,
                 window_frame,
+                distinct: *distinct,
+                filter: match filter {
+                    Some(e) => Some(Box::new(serialize_expr(e.as_ref(), codec)?)),
+                    None => None,
+                },
+                null_treatment: null_treatment
+                    .map(|nt| protobuf::NullTreatment::from(nt).into()),
                 fun_definition,
             };
             protobuf::LogicalExprNode {
-                expr_type: Some(ExprType::WindowExpr(window_expr)),
+                expr_type: Some(ExprType::WindowExpr(Box::new(window_expr))),
             }
         }
         Expr::AggregateFunction(expr::AggregateFunction {
@@ -362,7 +368,7 @@ pub fn serialize_expr(
                     ref distinct,
                     ref filter,
                     ref order_by,
-                    null_treatment: _,
+                    ref null_treatment,
                 },
         }) => {
             let mut buf = Vec::new();
@@ -379,6 +385,8 @@ pub fn serialize_expr(
                         },
                         order_by: serialize_sorts(order_by, codec)?,
                         fun_definition: (!buf.is_empty()).then_some(buf),
+                        null_treatment: null_treatment
+                            .map(|nt| protobuf::NullTreatment::from(nt).into()),
                     },
                 ))),
             }
@@ -722,3 +730,12 @@ impl From<&WriteOp> for protobuf::dml_node::Type {
         }
     }
 }
+
+impl From<NullTreatment> for protobuf::NullTreatment {
+    fn from(t: NullTreatment) -> Self {
+        match t {
+            NullTreatment::RespectNulls => protobuf::NullTreatment::RespectNulls,
+            NullTreatment::IgnoreNulls => protobuf::NullTreatment::IgnoreNulls,
+        }
+    }
+}
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index c5d4b49092d91..3d51038eba72c 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -73,8 +73,8 @@ use datafusion_common::{
 };
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
-    self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, ScalarFunction,
-    Unnest, WildcardOptions,
+    self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, NullTreatment,
+    ScalarFunction, Unnest, WildcardOptions,
 };
 use datafusion_expr::logical_plan::{Extension, UserDefinedLogicalNodeCore};
 use datafusion_expr::{
@@ -2190,7 +2190,11 @@ fn roundtrip_aggregate_udf() {
         Arc::new(vec![DataType::Float64, DataType::UInt32]),
     );
 
-    let test_expr = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+    let ctx = SessionContext::new();
+    ctx.register_udaf(dummy_agg.clone());
+
+    // null_treatment absent
+    let test_expr1 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
         Arc::new(dummy_agg.clone()),
         vec![lit(1.0_f64)],
         false,
@@ -2199,10 +2203,29 @@ fn roundtrip_aggregate_udf() {
         None,
     ));
 
-    let ctx = SessionContext::new();
-    ctx.register_udaf(dummy_agg);
+    // null_treatment respect nulls
+    let test_expr2 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+        Arc::new(dummy_agg.clone()),
+        vec![lit(1.0_f64)],
+        true,
+        Some(Box::new(lit(true))),
+        vec![],
+        Some(NullTreatment::RespectNulls),
+    ));
 
-    roundtrip_expr_test(test_expr, ctx);
+    // null_treatment ignore nulls
+    let test_expr3 = Expr::AggregateFunction(expr::AggregateFunction::new_udf(
+        Arc::new(dummy_agg),
+        vec![lit(1.0_f64)],
+        true,
+        Some(Box::new(lit(true))),
+        vec![],
+        Some(NullTreatment::IgnoreNulls),
+    ));
+
+    roundtrip_expr_test(test_expr1, ctx.clone());
+    roundtrip_expr_test(test_expr2, ctx.clone());
+    roundtrip_expr_test(test_expr3, ctx);
 }
 
 fn dummy_udf() -> ScalarUDF {
@@ -2566,8 +2589,10 @@ fn roundtrip_window() {
     .window_frame(row_number_frame.clone())
     .build()
     .unwrap();
+    ctx.register_udwf(dummy_window_udf);
 
-    let text_expr7 = Expr::from(expr::WindowFunction::new(
+    // 7. test with average udaf
+    let test_expr7 = Expr::from(expr::WindowFunction::new(
         WindowFunctionDefinition::AggregateUDF(avg_udaf()),
         vec![col("col1")],
     ))
@@ -2575,7 +2600,53 @@ fn roundtrip_window() {
     .build()
     .unwrap();
 
-    ctx.register_udwf(dummy_window_udf);
+    // 8. test with respect nulls
+    let test_expr8 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .null_treatment(NullTreatment::RespectNulls)
+    .build()
+    .unwrap();
+
+    // 9. test with ignore nulls
+    let test_expr9 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .null_treatment(NullTreatment::IgnoreNulls)
+    .build()
+    .unwrap();
+
+    // 10. test with distinct is `true`
+    let test_expr10 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .distinct()
+    .build()
+    .unwrap();
+
+    // 11. test with filter
+    let test_expr11 = Expr::from(expr::WindowFunction::new(
+        WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        vec![],
+    ))
+    .partition_by(vec![col("col1")])
+    .order_by(vec![col("col2").sort(true, false)])
+    .window_frame(WindowFrame::new(Some(false)))
+    .filter(col("col1").eq(lit(1)))
+    .build()
+    .unwrap();
 
     roundtrip_expr_test(test_expr1, ctx.clone());
     roundtrip_expr_test(test_expr2, ctx.clone());
@@ -2583,7 +2654,11 @@ fn roundtrip_window() {
     roundtrip_expr_test(test_expr4, ctx.clone());
     roundtrip_expr_test(test_expr5, ctx.clone());
     roundtrip_expr_test(test_expr6, ctx.clone());
-    roundtrip_expr_test(text_expr7, ctx);
+    roundtrip_expr_test(test_expr7, ctx.clone());
+    roundtrip_expr_test(test_expr8, ctx.clone());
+    roundtrip_expr_test(test_expr9, ctx.clone());
+    roundtrip_expr_test(test_expr10, ctx.clone());
+    roundtrip_expr_test(test_expr11, ctx);
 }
 
 #[tokio::test]

From c84e3cf5a5a9f4f4b2a0f44a03a90ff0b9461df7 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Thu, 16 Oct 2025 23:05:54 -0500
Subject: [PATCH 017/109] feat: Add percentile_cont aggregate function (#17988)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Adds exact `percentile_cont` aggregate function as the counterpart to
the existing `approx_percentile_cont` function.

## What changes were made?

### New Implementation
- Created `percentile_cont.rs` with full implementation
- `PercentileCont` struct implementing `AggregateUDFImpl`
- `PercentileContAccumulator` for standard aggregation
- `DistinctPercentileContAccumulator` for DISTINCT mode
- `PercentileContGroupsAccumulator` for efficient grouped aggregation
- `calculate_percentile` function with linear interpolation

### Features
- **Exact calculation**: Stores all values in memory for precise results
- **WITHIN GROUP syntax**: Supports `WITHIN GROUP (ORDER BY ...)`
- **Interpolation**: Uses linear interpolation between values
- **All numeric types**: Works with integers, floats, and decimals
- **Ordered-set aggregate**: Properly marked as
`is_ordered_set_aggregate()`
- **GROUP BY support**: Efficient grouped aggregation via
GroupsAccumulator

### Tests
Added comprehensive tests in `aggregate.slt`:
- Error conditions validation
- Basic percentile calculations (0.0, 0.25, 0.5, 0.75, 1.0)
- Comparison with `median` function
- Ascending and descending order
- GROUP BY aggregation
- NULL handling
- Edge cases (empty sets, single values)
- Float interpolation
- Various numeric data types

## Example Usage

```sql
-- Basic usage with WITHIN GROUP syntax
SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name)
FROM table_name;

-- With GROUP BY
SELECT category, percentile_cont(0.95) WITHIN GROUP (ORDER BY value)
FROM sales
GROUP BY category;

-- Compare with median (percentile_cont(0.5) == median)
SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY price) FROM products;
```

## Performance Considerations

Like `median`, this function stores all values in memory before
computing results. For large datasets or when approximation is
acceptable, use `approx_percentile_cont` instead.

## Related Issues

Closes #6714

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude <noreply@anthropic.com>
---
 datafusion-testing                            |   2 +-
 .../src/approx_percentile_cont.rs             |  65 +-
 datafusion/functions-aggregate/src/lib.rs     |   4 +
 .../src/percentile_cont.rs                    | 814 ++++++++++++++++++
 datafusion/functions-aggregate/src/utils.rs   |  72 ++
 .../sqllogictest/test_files/aggregate.slt     | 342 +++++++-
 .../user-guide/sql/aggregate_functions.md     |  45 +
 7 files changed, 1294 insertions(+), 50 deletions(-)
 create mode 100644 datafusion/functions-aggregate/src/percentile_cont.rs
 create mode 100644 datafusion/functions-aggregate/src/utils.rs

diff --git a/datafusion-testing b/datafusion-testing
index eccb0e4a42634..905df5f65cc9d 160000
--- a/datafusion-testing
+++ b/datafusion-testing
@@ -1 +1 @@
-Subproject commit eccb0e4a426344ef3faf534cd60e02e9c3afd3ac
+Subproject commit 905df5f65cc9d0851719c21f5a4dd5cd77621f19
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 0deb09184b3f4..668280314e8d7 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -20,7 +20,7 @@ use std::fmt::{Debug, Formatter};
 use std::mem::size_of_val;
 use std::sync::Arc;
 
-use arrow::array::{Array, RecordBatch};
+use arrow::array::Array;
 use arrow::compute::{filter, is_not_null};
 use arrow::datatypes::FieldRef;
 use arrow::{
@@ -28,19 +28,19 @@ use arrow::{
         ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
         Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
     },
-    datatypes::{DataType, Field, Schema},
+    datatypes::{DataType, Field},
 };
 use datafusion_common::{
-    downcast_value, internal_err, not_impl_datafusion_err, not_impl_err, plan_err,
-    Result, ScalarValue,
+    downcast_value, internal_err, not_impl_err, plan_err, DataFusionError, Result,
+    ScalarValue,
 };
 use datafusion_expr::expr::{AggregateFunction, Sort};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
 use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::{
-    Accumulator, AggregateUDFImpl, ColumnarValue, Documentation, Expr, Signature,
-    TypeSignature, Volatility,
+    Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
+    Volatility,
 };
 use datafusion_functions_aggregate_common::tdigest::{
     TDigest, TryIntoF64, DEFAULT_MAX_SIZE,
@@ -48,6 +48,8 @@ use datafusion_functions_aggregate_common::tdigest::{
 use datafusion_macros::user_doc;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 
+use crate::utils::{get_scalar_value, validate_percentile_expr};
+
 create_func!(ApproxPercentileCont, approx_percentile_cont_udaf);
 
 /// Computes the approximate percentile continuous of a set of numbers
@@ -164,7 +166,8 @@ impl ApproxPercentileCont {
         &self,
         args: AccumulatorArgs,
     ) -> Result<ApproxPercentileAccumulator> {
-        let percentile = validate_input_percentile_expr(&args.exprs[1])?;
+        let percentile =
+            validate_percentile_expr(&args.exprs[1], "APPROX_PERCENTILE_CONT")?;
 
         let is_descending = args
             .order_bys
@@ -214,45 +217,15 @@ impl ApproxPercentileCont {
     }
 }
 
-fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
-    let empty_schema = Arc::new(Schema::empty());
-    let batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
-    if let ColumnarValue::Scalar(s) = expr.evaluate(&batch)? {
-        Ok(s)
-    } else {
-        internal_err!("Didn't expect ColumnarValue::Array")
-    }
-}
-
-fn validate_input_percentile_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<f64> {
-    let percentile = match get_scalar_value(expr)
-        .map_err(|_| not_impl_datafusion_err!("Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
-        ScalarValue::Float32(Some(value)) => {
-            value as f64
-        }
-        ScalarValue::Float64(Some(value)) => {
-            value
-        }
-        sv => {
-            return not_impl_err!(
-                "Percentile value for 'APPROX_PERCENTILE_CONT' must be Float32 or Float64 literal (got data type {})",
-                sv.data_type()
-            )
-        }
-    };
-
-    // Ensure the percentile is between 0 and 1.
-    if !(0.0..=1.0).contains(&percentile) {
-        return plan_err!(
-            "Percentile value must be between 0.0 and 1.0 inclusive, {percentile} is invalid"
-        );
-    }
-    Ok(percentile)
-}
-
 fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
-    let max_size = match get_scalar_value(expr)
-        .map_err(|_| not_impl_datafusion_err!("Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal, got: {expr}"))? {
+    let scalar_value = get_scalar_value(expr).map_err(|_e| {
+        DataFusionError::Plan(
+            "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal"
+                .to_string(),
+        )
+    })?;
+
+    let max_size = match scalar_value {
         ScalarValue::UInt8(Some(q)) => q as usize,
         ScalarValue::UInt16(Some(q)) => q as usize,
         ScalarValue::UInt32(Some(q)) => q as usize,
@@ -262,7 +235,7 @@ fn validate_input_max_size_expr(expr: &Arc<dyn PhysicalExpr>) -> Result<usize> {
         ScalarValue::Int16(Some(q)) if q > 0 => q as usize,
         ScalarValue::Int8(Some(q)) if q > 0 => q as usize,
         sv => {
-            return not_impl_err!(
+            return plan_err!(
                 "Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal (got data type {}).",
                 sv.data_type()
             )
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index 4f282301ce5bd..b56b2b118e73b 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -81,6 +81,7 @@ pub mod hyperloglog;
 pub mod median;
 pub mod min_max;
 pub mod nth_value;
+pub mod percentile_cont;
 pub mod regr;
 pub mod stddev;
 pub mod string_agg;
@@ -88,6 +89,7 @@ pub mod sum;
 pub mod variance;
 
 pub mod planner;
+mod utils;
 
 use crate::approx_percentile_cont::approx_percentile_cont_udaf;
 use crate::approx_percentile_cont_with_weight::approx_percentile_cont_with_weight_udaf;
@@ -123,6 +125,7 @@ pub mod expr_fn {
     pub use super::min_max::max;
     pub use super::min_max::min;
     pub use super::nth_value::nth_value;
+    pub use super::percentile_cont::percentile_cont;
     pub use super::regr::regr_avgx;
     pub use super::regr::regr_avgy;
     pub use super::regr::regr_count;
@@ -171,6 +174,7 @@ pub fn all_default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
         approx_distinct::approx_distinct_udaf(),
         approx_percentile_cont_udaf(),
         approx_percentile_cont_with_weight_udaf(),
+        percentile_cont::percentile_cont_udaf(),
         string_agg::string_agg_udaf(),
         bit_and_or_xor::bit_and_udaf(),
         bit_and_or_xor::bit_or_udaf(),
diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs
new file mode 100644
index 0000000000000..8e9e9a3144d48
--- /dev/null
+++ b/datafusion/functions-aggregate/src/percentile_cont.rs
@@ -0,0 +1,814 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::{Debug, Formatter};
+use std::mem::{size_of, size_of_val};
+use std::sync::Arc;
+
+use arrow::array::{
+    ArrowNumericType, BooleanArray, ListArray, PrimitiveArray, PrimitiveBuilder,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::{
+    array::{Array, ArrayRef, AsArray},
+    datatypes::{
+        ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
+        Decimal64Type, Field, FieldRef, Float16Type, Float32Type, Float64Type,
+    },
+};
+
+use arrow::array::ArrowNativeTypeOp;
+
+use datafusion_common::{
+    internal_datafusion_err, internal_err, plan_err, DataFusionError, HashSet, Result,
+    ScalarValue,
+};
+use datafusion_expr::expr::{AggregateFunction, Sort};
+use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
+use datafusion_expr::type_coercion::aggregates::NUMERICS;
+use datafusion_expr::utils::format_state_name;
+use datafusion_expr::{
+    Accumulator, AggregateUDFImpl, Documentation, Expr, Signature, TypeSignature,
+    Volatility,
+};
+use datafusion_expr::{EmitTo, GroupsAccumulator};
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate;
+use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::filtered_null_mask;
+use datafusion_functions_aggregate_common::utils::Hashable;
+use datafusion_macros::user_doc;
+
+use crate::utils::validate_percentile_expr;
+
+/// Precision multiplier for linear interpolation calculations.
+///
+/// This value of 1,000,000 was chosen to balance precision with overflow safety:
+/// - Provides 6 decimal places of precision for the fractional component
+/// - Small enough to avoid overflow when multiplied with typical numeric values
+/// - Sufficient precision for most statistical applications
+///
+/// The interpolation formula: `lower + (upper - lower) * fraction`
+/// is computed as: `lower + ((upper - lower) * (fraction * PRECISION)) / PRECISION`
+/// to avoid floating-point operations on integer types while maintaining precision.
+const INTERPOLATION_PRECISION: usize = 1_000_000;
+
+create_func!(PercentileCont, percentile_cont_udaf);
+
+/// Computes the exact percentile continuous of a set of numbers
+pub fn percentile_cont(order_by: Sort, percentile: Expr) -> Expr {
+    let expr = order_by.expr.clone();
+    let args = vec![expr, percentile];
+
+    Expr::AggregateFunction(AggregateFunction::new_udf(
+        percentile_cont_udaf(),
+        args,
+        false,
+        None,
+        vec![order_by],
+        None,
+    ))
+}
+
+#[user_doc(
+    doc_section(label = "General Functions"),
+    description = "Returns the exact percentile of input values, interpolating between values if needed.",
+    syntax_example = "percentile_cont(percentile) WITHIN GROUP (ORDER BY expression)",
+    sql_example = r#"```sql
+> SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++----------------------------------------------------------+
+| percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) |
++----------------------------------------------------------+
+| 45.5                                                     |
++----------------------------------------------------------+
+```
+
+An alternate syntax is also supported:
+```sql
+> SELECT percentile_cont(column_name, 0.75) FROM table_name;
++---------------------------------------+
+| percentile_cont(column_name, 0.75)    |
++---------------------------------------+
+| 45.5                                  |
++---------------------------------------+
+```"#,
+    standard_argument(name = "expression", prefix = "The"),
+    argument(
+        name = "percentile",
+        description = "Percentile to compute. Must be a float value between 0 and 1 (inclusive)."
+    )
+)]
+/// PERCENTILE_CONT aggregate expression. This uses an exact calculation and stores all values
+/// in memory before computing the result. If an approximation is sufficient then
+/// APPROX_PERCENTILE_CONT provides a much more efficient solution.
+///
+/// If using the distinct variation, the memory usage will be similarly high if the
+/// cardinality is high as it stores all distinct values in memory before computing the
+/// result, but if cardinality is low then memory usage will also be lower.
+#[derive(PartialEq, Eq, Hash)]
+pub struct PercentileCont {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Debug for PercentileCont {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
+        f.debug_struct("PercentileCont")
+            .field("name", &self.name())
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+impl Default for PercentileCont {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PercentileCont {
+    pub fn new() -> Self {
+        let mut variants = Vec::with_capacity(NUMERICS.len());
+        // Accept any numeric value paired with a float64 percentile
+        for num in NUMERICS {
+            variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64]));
+        }
+        Self {
+            signature: Signature::one_of(variants, Volatility::Immutable),
+            aliases: vec![String::from("quantile_cont")],
+        }
+    }
+
+    fn create_accumulator(&self, args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
+
+        let is_descending = args
+            .order_bys
+            .first()
+            .map(|sort_expr| sort_expr.options.descending)
+            .unwrap_or(false);
+
+        let percentile = if is_descending {
+            1.0 - percentile
+        } else {
+            percentile
+        };
+
+        macro_rules! helper {
+            ($t:ty, $dt:expr) => {
+                if args.is_distinct {
+                    Ok(Box::new(DistinctPercentileContAccumulator::<$t> {
+                        data_type: $dt.clone(),
+                        distinct_values: HashSet::new(),
+                        percentile,
+                    }))
+                } else {
+                    Ok(Box::new(PercentileContAccumulator::<$t> {
+                        data_type: $dt.clone(),
+                        all_values: vec![],
+                        percentile,
+                    }))
+                }
+            };
+        }
+
+        let input_dt = args.exprs[0].data_type(args.schema)?;
+        match input_dt {
+            // For integer types, use Float64 internally since percentile_cont returns Float64
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => helper!(Float64Type, DataType::Float64),
+            DataType::Float16 => helper!(Float16Type, input_dt),
+            DataType::Float32 => helper!(Float32Type, input_dt),
+            DataType::Float64 => helper!(Float64Type, input_dt),
+            DataType::Decimal32(_, _) => helper!(Decimal32Type, input_dt),
+            DataType::Decimal64(_, _) => helper!(Decimal64Type, input_dt),
+            DataType::Decimal128(_, _) => helper!(Decimal128Type, input_dt),
+            DataType::Decimal256(_, _) => helper!(Decimal256Type, input_dt),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "PercentileContAccumulator not supported for {} with {}",
+                args.name, input_dt,
+            ))),
+        }
+    }
+}
+
+impl AggregateUDFImpl for PercentileCont {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "percentile_cont"
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if !arg_types[0].is_numeric() {
+            return plan_err!("percentile_cont requires numeric input types");
+        }
+        // PERCENTILE_CONT performs linear interpolation and should return a float type
+        // For integer inputs, return Float64 (matching PostgreSQL/DuckDB behavior)
+        // For float inputs, preserve the float type
+        match &arg_types[0] {
+            DataType::Float16 | DataType::Float32 | DataType::Float64 => {
+                Ok(arg_types[0].clone())
+            }
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
+            | DataType::Decimal256(_, _) => Ok(arg_types[0].clone()),
+            DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64
+            | DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64 => Ok(DataType::Float64),
+            // Shouldn't happen due to signature check, but just in case
+            dt => plan_err!(
+                "percentile_cont does not support input type {}, must be numeric",
+                dt
+            ),
+        }
+    }
+
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+        //Intermediate state is a list of the elements we have collected so far
+        let input_type = args.input_fields[0].data_type().clone();
+        // For integer types, we store as Float64 internally
+        let storage_type = match &input_type {
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => DataType::Float64,
+            _ => input_type,
+        };
+
+        let field = Field::new_list_field(storage_type, true);
+        let state_name = if args.is_distinct {
+            "distinct_percentile_cont"
+        } else {
+            "percentile_cont"
+        };
+
+        Ok(vec![Field::new(
+            format_state_name(args.name, state_name),
+            DataType::List(Arc::new(field)),
+            true,
+        )
+        .into()])
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        self.create_accumulator(acc_args)
+    }
+
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        !args.is_distinct
+    }
+
+    fn create_groups_accumulator(
+        &self,
+        args: AccumulatorArgs,
+    ) -> Result<Box<dyn GroupsAccumulator>> {
+        let num_args = args.exprs.len();
+        if num_args != 2 {
+            return internal_err!(
+                "percentile_cont should have 2 args, but found num args:{}",
+                args.exprs.len()
+            );
+        }
+
+        let percentile = validate_percentile_expr(&args.exprs[1], "PERCENTILE_CONT")?;
+
+        let is_descending = args
+            .order_bys
+            .first()
+            .map(|sort_expr| sort_expr.options.descending)
+            .unwrap_or(false);
+
+        let percentile = if is_descending {
+            1.0 - percentile
+        } else {
+            percentile
+        };
+
+        macro_rules! helper {
+            ($t:ty, $dt:expr) => {
+                Ok(Box::new(PercentileContGroupsAccumulator::<$t>::new(
+                    $dt, percentile,
+                )))
+            };
+        }
+
+        let input_dt = args.exprs[0].data_type(args.schema)?;
+        match input_dt {
+            // For integer types, use Float64 internally since percentile_cont returns Float64
+            DataType::Int8
+            | DataType::Int16
+            | DataType::Int32
+            | DataType::Int64
+            | DataType::UInt8
+            | DataType::UInt16
+            | DataType::UInt32
+            | DataType::UInt64 => helper!(Float64Type, DataType::Float64),
+            DataType::Float16 => helper!(Float16Type, input_dt),
+            DataType::Float32 => helper!(Float32Type, input_dt),
+            DataType::Float64 => helper!(Float64Type, input_dt),
+            DataType::Decimal32(_, _) => helper!(Decimal32Type, input_dt),
+            DataType::Decimal64(_, _) => helper!(Decimal64Type, input_dt),
+            DataType::Decimal128(_, _) => helper!(Decimal128Type, input_dt),
+            DataType::Decimal256(_, _) => helper!(Decimal256Type, input_dt),
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "PercentileContGroupsAccumulator not supported for {} with {}",
+                args.name, input_dt,
+            ))),
+        }
+    }
+
+    fn supports_null_handling_clause(&self) -> bool {
+        false
+    }
+
+    fn is_ordered_set_aggregate(&self) -> bool {
+        true
+    }
+
+    fn documentation(&self) -> Option<&Documentation> {
+        self.doc()
+    }
+}
+
+/// The percentile_cont accumulator accumulates the raw input values
+/// as native types.
+///
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of native values that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
+struct PercentileContAccumulator<T: ArrowNumericType> {
+    data_type: DataType,
+    all_values: Vec<T::Native>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType> Debug for PercentileContAccumulator<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "PercentileContAccumulator({}, percentile={})",
+            self.data_type, self.percentile
+        )
+    }
+}
+
+impl<T: ArrowNumericType> Accumulator for PercentileContAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        // Convert `all_values` to `ListArray` and return a single List ScalarValue
+
+        // Build offsets
+        let offsets =
+            OffsetBuffer::new(ScalarBuffer::from(vec![0, self.all_values.len() as i32]));
+
+        // Build inner array
+        let values_array = PrimitiveArray::<T>::new(
+            ScalarBuffer::from(std::mem::take(&mut self.all_values)),
+            None,
+        )
+        .with_data_type(self.data_type.clone());
+
+        // Build the result list array
+        let list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(values_array),
+            None,
+        );
+
+        Ok(vec![ScalarValue::List(Arc::new(list_array))])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let values = values.as_primitive::<T>();
+        self.all_values.reserve(values.len() - values.null_count());
+        self.all_values.extend(values.iter().flatten());
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        let array = states[0].as_list::<i32>();
+        for v in array.iter().flatten() {
+            self.update_batch(&[v])?
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let d = std::mem::take(&mut self.all_values);
+        let value = calculate_percentile::<T>(d, self.percentile);
+        ScalarValue::new_primitive::<T>(value, &self.data_type)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) + self.all_values.capacity() * size_of::<T::Native>()
+    }
+}
+
+/// The percentile_cont groups accumulator accumulates the raw input values
+///
+/// For calculating the exact percentile of groups, we need to store all values
+/// of groups before final evaluation.
+/// So values in each group will be stored in a `Vec<T>`, and the total group values
+/// will be actually organized as a `Vec<Vec<T>>`.
+///
+#[derive(Debug)]
+struct PercentileContGroupsAccumulator<T: ArrowNumericType + Send> {
+    data_type: DataType,
+    group_values: Vec<Vec<T::Native>>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType + Send> PercentileContGroupsAccumulator<T> {
+    pub fn new(data_type: DataType, percentile: f64) -> Self {
+        Self {
+            data_type,
+            group_values: Vec::new(),
+            percentile,
+        }
+    }
+}
+
+impl<T: ArrowNumericType + Send> GroupsAccumulator
+    for PercentileContGroupsAccumulator<T>
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // For ordered-set aggregates, we only care about the ORDER BY column (first element)
+        // The percentile parameter is already stored in self.percentile
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values_array = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let values = values_array.as_primitive::<T>();
+
+        // Push the `not nulls + not filtered` row into its group
+        self.group_values.resize(total_num_groups, Vec::new());
+        accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            |group_index, new_value| {
+                self.group_values[group_index].push(new_value);
+            },
+        );
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        // Since aggregate filter should be applied in partial stage, in final stage there should be no filter
+        _opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+
+        let input_group_values = values[0].as_list::<i32>();
+
+        // Ensure group values big enough
+        self.group_values.resize(total_num_groups, Vec::new());
+
+        // Extend values to related groups
+        group_indices
+            .iter()
+            .zip(input_group_values.iter())
+            .for_each(|(&group_index, values_opt)| {
+                if let Some(values) = values_opt {
+                    let values = values.as_primitive::<T>();
+                    self.group_values[group_index].extend(values.values().iter());
+                }
+            });
+
+        Ok(())
+    }
+
+    fn state(&mut self, emit_to: EmitTo) -> Result<Vec<ArrayRef>> {
+        // Emit values
+        let emit_group_values = emit_to.take_needed(&mut self.group_values);
+
+        // Build offsets
+        let mut offsets = Vec::with_capacity(self.group_values.len() + 1);
+        offsets.push(0);
+        let mut cur_len = 0_i32;
+        for group_value in &emit_group_values {
+            cur_len += group_value.len() as i32;
+            offsets.push(cur_len);
+        }
+        let offsets = OffsetBuffer::new(ScalarBuffer::from(offsets));
+
+        // Build inner array
+        let flatten_group_values =
+            emit_group_values.into_iter().flatten().collect::<Vec<_>>();
+        let group_values_array =
+            PrimitiveArray::<T>::new(ScalarBuffer::from(flatten_group_values), None)
+                .with_data_type(self.data_type.clone());
+
+        // Build the result list array
+        let result_list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(group_values_array),
+            None,
+        );
+
+        Ok(vec![Arc::new(result_list_array)])
+    }
+
+    fn evaluate(&mut self, emit_to: EmitTo) -> Result<ArrayRef> {
+        // Emit values
+        let emit_group_values = emit_to.take_needed(&mut self.group_values);
+
+        // Calculate percentile for each group
+        let mut evaluate_result_builder =
+            PrimitiveBuilder::<T>::new().with_data_type(self.data_type.clone());
+        for values in emit_group_values {
+            let value = calculate_percentile::<T>(values, self.percentile);
+            evaluate_result_builder.append_option(value);
+        }
+
+        Ok(Arc::new(evaluate_result_builder.finish()))
+    }
+
+    fn convert_to_state(
+        &self,
+        values: &[ArrayRef],
+        opt_filter: Option<&BooleanArray>,
+    ) -> Result<Vec<ArrayRef>> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values_array = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let input_array = values_array.as_primitive::<T>();
+
+        // Directly convert the input array to states, each row will be
+        // seen as a respective group.
+        // For detail, the `input_array` will be converted to a `ListArray`.
+        // And if row is `not null + not filtered`, it will be converted to a list
+        // with only one element; otherwise, this row in `ListArray` will be set
+        // to null.
+
+        // Reuse values buffer in `input_array` to build `values` in `ListArray`
+        let values = PrimitiveArray::<T>::new(input_array.values().clone(), None)
+            .with_data_type(self.data_type.clone());
+
+        // `offsets` in `ListArray`, each row as a list element
+        let offset_end = i32::try_from(input_array.len()).map_err(|e| {
+            internal_datafusion_err!(
+                "cast array_len to i32 failed in convert_to_state of group percentile_cont, err:{e:?}"
+            )
+        })?;
+        let offsets = (0..=offset_end).collect::<Vec<_>>();
+        // Safety: The offsets vector is constructed as a sequential range from 0 to input_array.len(),
+        // which guarantees all OffsetBuffer invariants:
+        // 1. Offsets are monotonically increasing (each element is prev + 1)
+        // 2. No offset exceeds the values array length (max offset = input_array.len())
+        // 3. First offset is 0 and last offset equals the total length
+        // Therefore new_unchecked is safe to use here.
+        let offsets = unsafe { OffsetBuffer::new_unchecked(ScalarBuffer::from(offsets)) };
+
+        // `nulls` for converted `ListArray`
+        let nulls = filtered_null_mask(opt_filter, input_array);
+
+        let converted_list_array = ListArray::new(
+            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            offsets,
+            Arc::new(values),
+            nulls,
+        );
+
+        Ok(vec![Arc::new(converted_list_array)])
+    }
+
+    fn supports_convert_to_state(&self) -> bool {
+        true
+    }
+
+    fn size(&self) -> usize {
+        self.group_values
+            .iter()
+            .map(|values| values.capacity() * size_of::<T::Native>())
+            .sum::<usize>()
+            // account for size of self.group_values too
+            + self.group_values.capacity() * size_of::<Vec<T::Native>>()
+    }
+}
+
+/// The distinct percentile_cont accumulator accumulates the raw input values
+/// using a HashSet to eliminate duplicates.
+///
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
+struct DistinctPercentileContAccumulator<T: ArrowNumericType> {
+    data_type: DataType,
+    distinct_values: HashSet<Hashable<T::Native>>,
+    percentile: f64,
+}
+
+impl<T: ArrowNumericType> Debug for DistinctPercentileContAccumulator<T> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "DistinctPercentileContAccumulator({}, percentile={})",
+            self.data_type, self.percentile
+        )
+    }
+}
+
+impl<T: ArrowNumericType> Accumulator for DistinctPercentileContAccumulator<T> {
+    fn state(&mut self) -> Result<Vec<ScalarValue>> {
+        let all_values = self
+            .distinct_values
+            .iter()
+            .map(|x| ScalarValue::new_primitive::<T>(Some(x.0), &self.data_type))
+            .collect::<Result<Vec<_>>>()?;
+
+        let arr = ScalarValue::new_list_nullable(&all_values, &self.data_type);
+        Ok(vec![ScalarValue::List(arr)])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        if values.is_empty() {
+            return Ok(());
+        }
+
+        // Cast to target type if needed (e.g., integer to Float64)
+        let values = if values[0].data_type() != &self.data_type {
+            arrow::compute::cast(&values[0], &self.data_type)?
+        } else {
+            Arc::clone(&values[0])
+        };
+
+        let array = values.as_primitive::<T>();
+        match array.nulls().filter(|x| x.null_count() > 0) {
+            Some(n) => {
+                for idx in n.valid_indices() {
+                    self.distinct_values.insert(Hashable(array.value(idx)));
+                }
+            }
+            None => array.values().iter().for_each(|x| {
+                self.distinct_values.insert(Hashable(*x));
+            }),
+        }
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        let array = states[0].as_list::<i32>();
+        for v in array.iter().flatten() {
+            self.update_batch(&[v])?
+        }
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ScalarValue> {
+        let d = std::mem::take(&mut self.distinct_values)
+            .into_iter()
+            .map(|v| v.0)
+            .collect::<Vec<_>>();
+        let value = calculate_percentile::<T>(d, self.percentile);
+        ScalarValue::new_primitive::<T>(value, &self.data_type)
+    }
+
+    fn size(&self) -> usize {
+        size_of_val(self) + self.distinct_values.capacity() * size_of::<T::Native>()
+    }
+}
+
+/// Calculate the percentile value for a given set of values.
+/// This function performs an exact calculation by sorting all values.
+///
+/// The percentile is calculated using linear interpolation between closest ranks.
+/// For percentile p and n values:
+/// - If p * (n-1) is an integer, return the value at that position
+/// - Otherwise, interpolate between the two closest values
+fn calculate_percentile<T: ArrowNumericType>(
+    mut values: Vec<T::Native>,
+    percentile: f64,
+) -> Option<T::Native> {
+    let cmp = |x: &T::Native, y: &T::Native| x.compare(*y);
+
+    let len = values.len();
+    if len == 0 {
+        None
+    } else if len == 1 {
+        Some(values[0])
+    } else if percentile == 0.0 {
+        // Get minimum value
+        Some(
+            *values
+                .iter()
+                .min_by(|a, b| cmp(a, b))
+                .expect("we checked for len > 0 a few lines above"),
+        )
+    } else if percentile == 1.0 {
+        // Get maximum value
+        Some(
+            *values
+                .iter()
+                .max_by(|a, b| cmp(a, b))
+                .expect("we checked for len > 0 a few lines above"),
+        )
+    } else {
+        // Calculate the index using the formula: p * (n - 1)
+        let index = percentile * ((len - 1) as f64);
+        let lower_index = index.floor() as usize;
+        let upper_index = index.ceil() as usize;
+
+        if lower_index == upper_index {
+            // Exact index, return the value at that position
+            let (_, value, _) = values.select_nth_unstable_by(lower_index, cmp);
+            Some(*value)
+        } else {
+            // Need to interpolate between two values
+            // First, partition at lower_index to get the lower value
+            let (_, lower_value, _) = values.select_nth_unstable_by(lower_index, cmp);
+            let lower_value = *lower_value;
+
+            // Then partition at upper_index to get the upper value
+            let (_, upper_value, _) = values.select_nth_unstable_by(upper_index, cmp);
+            let upper_value = *upper_value;
+
+            // Linear interpolation using wrapping arithmetic
+            // We use wrapping operations here (matching the approach in median.rs) because:
+            // 1. Both values come from the input data, so diff is bounded by the value range
+            // 2. fraction is between 0 and 1, and INTERPOLATION_PRECISION is small enough
+            //    to prevent overflow when combined with typical numeric ranges
+            // 3. The result is guaranteed to be between lower_value and upper_value
+            // 4. For floating-point types, wrapping ops behave the same as standard ops
+            let fraction = index - (lower_index as f64);
+            let diff = upper_value.sub_wrapping(lower_value);
+            let interpolated = lower_value.add_wrapping(
+                diff.mul_wrapping(T::Native::usize_as(
+                    (fraction * INTERPOLATION_PRECISION as f64) as usize,
+                ))
+                .div_wrapping(T::Native::usize_as(INTERPOLATION_PRECISION)),
+            );
+            Some(interpolated)
+        }
+    }
+}
diff --git a/datafusion/functions-aggregate/src/utils.rs b/datafusion/functions-aggregate/src/utils.rs
new file mode 100644
index 0000000000000..c058b64f95727
--- /dev/null
+++ b/datafusion/functions-aggregate/src/utils.rs
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::RecordBatch;
+use arrow::datatypes::Schema;
+use datafusion_common::{internal_err, plan_err, DataFusionError, Result, ScalarValue};
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+
+/// Evaluates a physical expression to extract its scalar value.
+///
+/// This is used to extract constant values from expressions (like percentile parameters)
+/// by evaluating them against an empty record batch.
+pub(crate) fn get_scalar_value(expr: &Arc<dyn PhysicalExpr>) -> Result<ScalarValue> {
+    let empty_schema = Arc::new(Schema::empty());
+    let batch = RecordBatch::new_empty(Arc::clone(&empty_schema));
+    if let ColumnarValue::Scalar(s) = expr.evaluate(&batch)? {
+        Ok(s)
+    } else {
+        internal_err!("Didn't expect ColumnarValue::Array")
+    }
+}
+
+/// Validates that a percentile expression is a literal float value between 0.0 and 1.0.
+///
+/// Used by both `percentile_cont` and `approx_percentile_cont` to validate their
+/// percentile parameters.
+pub(crate) fn validate_percentile_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+    fn_name: &str,
+) -> Result<f64> {
+    let scalar_value = get_scalar_value(expr).map_err(|_e| {
+        DataFusionError::Plan(format!(
+            "Percentile value for '{fn_name}' must be a literal"
+        ))
+    })?;
+
+    let percentile = match scalar_value {
+        ScalarValue::Float32(Some(value)) => value as f64,
+        ScalarValue::Float64(Some(value)) => value,
+        sv => {
+            return plan_err!(
+                "Percentile value for '{fn_name}' must be Float32 or Float64 literal (got data type {})",
+                sv.data_type()
+            )
+        }
+    };
+
+    // Ensure the percentile is between 0 and 1.
+    if !(0.0..=1.0).contains(&percentile) {
+        return plan_err!(
+            "Percentile value must be between 0.0 and 1.0 inclusive, {percentile} is invalid"
+        );
+    }
+    Ok(percentile)
+}
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 9d6c7b11add6d..a5973afc0a93d 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -144,7 +144,7 @@ statement error Failed to coerce arguments to satisfy a call to 'approx_percenti
 SELECT approx_percentile_cont_with_weight(c2, c1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
 
 # csv_query_approx_percentile_cont_with_histogram_bins
-statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
+statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be UInt > 0 literal \(got data type Int64\)\.
 SELECT c1, approx_percentile_cont(0.95, -1000) WITHIN GROUP (ORDER BY c3) AS c3_p95 FROM aggregate_test_100 GROUP BY 1 ORDER BY 1
 
 statement error Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function
@@ -156,10 +156,10 @@ SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c3) FROM aggre
 statement error DataFusion error: Error during planning: Failed to coerce arguments to satisfy a call to 'approx_percentile_cont' function: coercion from Float64, Float64, Float64 to the signature OneOf(.*) failed(.|\n)*
 SELECT approx_percentile_cont(0.95, 111.1) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
-statement error DataFusion error: This feature is not implemented: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
+statement error DataFusion error: Error during planning: Percentile value for 'APPROX_PERCENTILE_CONT' must be a literal
 SELECT approx_percentile_cont(c12) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
-statement error DataFusion error: This feature is not implemented: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
+statement error DataFusion error: Error during planning: Tdigest max_size value for 'APPROX_PERCENTILE_CONT' must be a literal
 SELECT approx_percentile_cont(0.95, c5) WITHIN GROUP (ORDER BY c12) FROM aggregate_test_100
 
 statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for approx_percentile_cont
@@ -3356,6 +3356,342 @@ c 4
 d 4
 e 4
 
+#####################
+## percentile_cont tests (exact percentile calculation)
+#####################
+
+# Test error conditions for percentile_cont
+statement error DataFusion error: Error during planning: Percentile value must be between 0.0 and 1.0 inclusive
+SELECT percentile_cont(1.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: Percentile value must be between 0.0 and 1.0 inclusive
+SELECT percentile_cont(-0.1) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: Percentile value for 'PERCENTILE_CONT' must be a literal
+SELECT percentile_cont(c2) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for percentile_cont
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) IGNORE NULLS FROM aggregate_test_100
+
+statement error DataFusion error: Error during planning: \[IGNORE | RESPECT\] NULLS are not permitted for percentile_cont
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) RESPECT NULLS FROM aggregate_test_100
+
+statement error DataFusion error: This feature is not implemented: Only a single ordering expression is permitted in a WITHIN GROUP clause
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3, c2) FROM aggregate_test_100
+
+# Not supported over sliding windows
+query error DataFusion error: Error during planning: OVER and WITHIN GROUP clause cannot be used together
+SELECT percentile_cont(0.5)
+WITHIN GROUP (ORDER BY c3)
+OVER (ROWS BETWEEN 4 PRECEDING AND CURRENT ROW)
+FROM aggregate_test_100
+
+# Test basic percentile_cont with WITHIN GROUP syntax
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+1
+
+query R
+SELECT percentile_cont(1.0) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+2
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+4
+
+# Test that percentile_cont(0.5) equals median
+query I
+SELECT median(c2) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+3
+
+# Test with descending order
+query R
+SELECT percentile_cont(0.95) WITHIN GROUP (ORDER BY c3 DESC) FROM aggregate_test_100
+----
+-101.25
+
+query R
+SELECT percentile_cont(0.05) WITHIN GROUP (ORDER BY c3 DESC) FROM aggregate_test_100
+----
+118.099998
+
+# Test with GROUP BY
+query TR
+SELECT c1, percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a -25
+b 17
+c 1
+d 46.5
+e 64
+
+query TR
+SELECT c1, percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a 65
+b 68
+c 118
+d 123.299998
+e 112
+
+# Test with NULLs
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (NULL), (NULL), (NULL)) as t (v)
+----
+2
+
+# Test with all NULLs
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (CAST(NULL as INT))) as t (v)
+----
+NULL
+
+# Test with empty set
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1)) as t (v) WHERE v > 10
+----
+NULL
+
+# Test with single value
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (42)) as t (v)
+----
+42
+
+# Test with float values for interpolation
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+2.5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+1.75
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1.0), (2.0), (3.0), (4.0)) as t (v)
+----
+3.25
+
+# Test with various numeric types
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c7) FROM aggregate_test_100
+----
+134.5
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c8) FROM aggregate_test_100
+----
+30634
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c11) FROM aggregate_test_100
+----
+0.4906719
+
+# Test edge case with two values (tests interpolation)
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+15
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+12.5
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (10.0), (20.0)) as t (v)
+----
+17.5
+
+# Test integer inputs requiring interpolation (should return float)
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+2.5
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+1.75
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4)) as t (v)
+----
+3.25
+
+# Test with exact percentile values (no interpolation needed)
+query R
+SELECT percentile_cont(0.0) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+1
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2
+
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+3
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+4
+
+query R
+SELECT percentile_cont(1.0) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+5
+
+# Test with negative numbers
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+0
+
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+-5
+
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v) FROM (VALUES (-10), (-5), (0), (5), (10)) as t (v)
+----
+5
+
+# Test comparison: percentile_cont should give exact results
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+----
+15.5
+
+# Compare with approx_percentile_cont (should be close but may not be exact)
+query B
+SELECT ABS(percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) - approx_percentile_cont(0.5) WITHIN GROUP (ORDER BY c3)) < 5 FROM aggregate_test_100
+----
+true
+
+# Test percentile_cont without WITHIN GROUP clause (alternate syntax)
+query R
+SELECT percentile_cont(c2, 0.5) FROM aggregate_test_100
+----
+3
+
+query R
+SELECT percentile_cont(c2, 0.0) FROM aggregate_test_100
+----
+1
+
+query R
+SELECT percentile_cont(c2, 1.0) FROM aggregate_test_100
+----
+5
+
+query R
+SELECT percentile_cont(c2, 0.25) FROM aggregate_test_100
+----
+2
+
+query R
+SELECT percentile_cont(c2, 0.75) FROM aggregate_test_100
+----
+4
+
+# Verify alternate syntax gives same results as WITHIN GROUP syntax
+query B
+SELECT percentile_cont(c2, 0.5) = percentile_cont(0.5) WITHIN GROUP (ORDER BY c2) FROM aggregate_test_100
+----
+true
+
+query B
+SELECT percentile_cont(c3, 0.5) = percentile_cont(0.5) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100
+----
+true
+
+# Test alternate syntax with GROUP BY
+query TR
+SELECT c1, percentile_cont(c3, 0.5) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a -25
+b 17
+c 1
+d 46.5
+e 64
+
+# Verify alternate syntax with GROUP BY gives same results as WITHIN GROUP
+query TB
+SELECT c1, percentile_cont(c3, 0.95) = percentile_cont(0.95) WITHIN GROUP (ORDER BY c3) FROM aggregate_test_100 GROUP BY c1 ORDER BY c1
+----
+a true
+b true
+c true
+d true
+e true
+
+# Test ascending vs descending equivalence: percentile_cont(0.4) ASC should equal percentile_cont(0.6) DESC
+# This tests the mathematical property that the pth percentile ascending = (1-p)th percentile descending
+# Using a simple controlled dataset to demonstrate the property
+
+# Show 0.4 ascending
+query R
+SELECT percentile_cont(0.4) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2.6
+
+# Show 0.6 descending (should be same as 0.4 ascending)
+query R
+SELECT percentile_cont(0.6) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (1), (2), (3), (4), (5)) as t (v)
+----
+2.6
+
+# Show 0.3 ascending
+query R
+SELECT percentile_cont(0.3) WITHIN GROUP (ORDER BY v) FROM (VALUES (10), (20), (30), (40), (50)) as t (v)
+----
+21.99999
+
+# Show 0.7 descending (should be same as 0.3 ascending)
+query R
+SELECT percentile_cont(0.7) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (10), (20), (30), (40), (50)) as t (v)
+----
+22
+
+# Show 0.25 ascending on larger dataset
+query R
+SELECT percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) as t (v)
+----
+2.75
+
+# Show 0.75 descending (should be same as 0.25 ascending)
+query R
+SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY v DESC) FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) as t (v)
+----
+2.75
+
 # array_agg_zero
 query ?
 SELECT ARRAY_AGG([])
diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md
index 205962031b1d0..f17e09f2ce9d0 100644
--- a/docs/source/user-guide/sql/aggregate_functions.md
+++ b/docs/source/user-guide/sql/aggregate_functions.md
@@ -65,6 +65,8 @@ Note: When no rows pass the filter, `COUNT` returns `0` while `SUM`/`AVG`/`MIN`/
 - [mean](#mean)
 - [median](#median)
 - [min](#min)
+- [percentile_cont](#percentile_cont)
+- [quantile_cont](#quantile_cont)
 - [string_agg](#string_agg)
 - [sum](#sum)
 - [var](#var)
@@ -388,6 +390,49 @@ min(expression)
 +----------------------+
 ```
 
+### `percentile_cont`
+
+Returns the exact percentile of input values, interpolating between values if needed.
+
+```sql
+percentile_cont(percentile) WITHIN GROUP (ORDER BY expression)
+```
+
+#### Arguments
+
+- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators.
+- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive).
+
+#### Example
+
+```sql
+> SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) FROM table_name;
++----------------------------------------------------------+
+| percentile_cont(0.75) WITHIN GROUP (ORDER BY column_name) |
++----------------------------------------------------------+
+| 45.5                                                     |
++----------------------------------------------------------+
+```
+
+An alternate syntax is also supported:
+
+```sql
+> SELECT percentile_cont(column_name, 0.75) FROM table_name;
++---------------------------------------+
+| percentile_cont(column_name, 0.75)    |
++---------------------------------------+
+| 45.5                                  |
++---------------------------------------+
+```
+
+#### Aliases
+
+- quantile_cont
+
+### `quantile_cont`
+
+_Alias of [percentile_cont](#percentile_cont)._
+
 ### `string_agg`
 
 Concatenates the values of string expressions and places separator values between them. If ordering is required, strings are concatenated in the specified order. This aggregation function can only mix DISTINCT and ORDER BY if the ordering expression is exactly the same as the first argument expression.

From 621a24978a7a9c6d2b27973d1853dbc8776a56b5 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Fri, 17 Oct 2025 17:09:46 +1100
Subject: [PATCH 018/109] fix: Re-bump latest datafusion-testing module so
 extended tests succeed (#18110)

Looks like #17988 accidentally reverted the bump from #18096
---
 datafusion-testing | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion-testing b/datafusion-testing
index 905df5f65cc9d..eccb0e4a42634 160000
--- a/datafusion-testing
+++ b/datafusion-testing
@@ -1 +1 @@
-Subproject commit 905df5f65cc9d0851719c21f5a4dd5cd77621f19
+Subproject commit eccb0e4a426344ef3faf534cd60e02e9c3afd3ac

From ffe64e3103b037750aa927057a17984f8bf0bf7d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 17 Oct 2025 20:04:54 +1100
Subject: [PATCH 019/109] chore(deps): bump taiki-e/install-action from 2.62.31
 to 2.62.33 (#18113)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.31 to 2.62.33.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.33</h2>
<ul>
<li>Update <code>mise@latest</code> to 2025.10.10.</li>
</ul>
<h2>2.62.32</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.34.2.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.7.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<h2>[2.62.33] - 2025-10-17</h2>
<ul>
<li>Update <code>mise@latest</code> to 2025.10.10.</li>
</ul>
<h2>[2.62.32] - 2025-10-16</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.34.2.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.7.</p>
</li>
</ul>
<h2>[2.62.31] - 2025-10-16</h2>
<ul>
<li>
<p>Update <code>protoc@latest</code> to 3.33.0.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.3.</p>
</li>
<li>
<p>Update <code>syft@latest</code> to 1.34.1.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.9.</p>
</li>
<li>
<p>Update <code>cargo-shear@latest</code> to 1.6.0.</p>
</li>
</ul>
<h2>[2.62.30] - 2025-10-15</h2>
<ul>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.6.</p>
</li>
<li>
<p>Update <code>zizmor@latest</code> to 1.15.2.</p>
</li>
</ul>
<h2>[2.62.29] - 2025-10-14</h2>
<ul>
<li>
<p>Update <code>zizmor@latest</code> to 1.15.1.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.106.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.1.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/e43a5023a747770bfcb71ae048541a681714b951"><code>e43a502</code></a>
Release 2.62.33</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/2ae4258c3daeaf460c202b95aa4272c1f594d78e"><code>2ae4258</code></a>
Update <code>mise@latest</code> to 2025.10.10</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/e79914c740f0acf092c59adfa2a61d3d2266b6bf"><code>e79914c</code></a>
Release 2.62.32</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/40168eab5f259c94f094865825dbdefd1cf31bbf"><code>40168ea</code></a>
Update <code>syft@latest</code> to 1.34.2</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/6d89b16c494331f0cdbca002e68ea5ab4fa8e3f6"><code>6d89b16</code></a>
Update <code>vacuum@latest</code> to 0.18.7</li>
<li>See full diff in <a
href="https://github.com/taiki-e/install-action/compare/0005e0116e92d8489d8d96fbff83f061c79ba95a...e43a5023a747770bfcb71ae048541a681714b951">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.31&new-version=2.62.33)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 00bfa1e1b285f..98e6c35ada3b4 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
+        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 9fa033fce646f..09be2f2ad9e4a 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -412,7 +412,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
+        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -739,7 +739,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@0005e0116e92d8489d8d96fbff83f061c79ba95a  # v2.62.31
+        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
         with:
           tool: cargo-msrv
 

From 0ae9fdcb0da0cfaf180848115705fe3a9b7de343 Mon Sep 17 00:00:00 2001
From: Enrico La Sala <enrico.lasala@outlook.com>
Date: Fri, 17 Oct 2025 11:29:46 +0200
Subject: [PATCH 020/109] Adding hiop as known user (#18114)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Doesn't close an issue.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Hi we are hiop, a Serverless Data Logistic Platform.
We use DataFusion as a core part of our backend engine, and it plays a
crucial role in our data infrastructure. Our team members are passionate
about the project and actively try contribute to its development
(@dariocurr).

We’d love to have Hiop listed among the Known Users to show our support
and help the DataFusion community continue to grow.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Just adding hiop as known user

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 docs/source/user-guide/introduction.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md
index dc4825dc06dfb..778562d55ffcb 100644
--- a/docs/source/user-guide/introduction.md
+++ b/docs/source/user-guide/introduction.md
@@ -109,6 +109,7 @@ Here are some active projects using DataFusion:
 - [Funnel](https://funnel.io/) Data Platform powering Marketing Intelligence applications.
 - [GlareDB](https://github.com/GlareDB/glaredb) Fast SQL database for querying and analyzing distributed data.
 - [GreptimeDB] Open Source & Cloud Native Distributed Time Series Database
+- [hiop](https://hiop.io) Serverless Data Logistic Platform
 - [HoraeDB] Distributed Time-Series Database
 - [Iceberg-rust](https://github.com/apache/iceberg-rust) Rust implementation of Apache Iceberg
 - [InfluxDB] Time Series Database

From a9ecd683060ae019fe198a09911b46dee384d9d5 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Fri, 17 Oct 2025 21:37:42 +1100
Subject: [PATCH 021/109] chore: remove unnecessary `skip_failed_rules` config
 in slt (#18117)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #3695
- Closes #3797

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Was looking at above issues and I don't believe we skip the failed rules
for any tests anymore (default for the config is also `false`), apart
from this cleanup, so filing this PR so we can close the issues. Seems
we only do in this `window.slt` test after this fix:


https://github.com/apache/datafusion/blob/621a24978a7a9c6d2b27973d1853dbc8776a56b5/datafusion/sqllogictest/test_files/window.slt#L2587-L2611

Which seems intentional.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Remove unnecessary `skip_failed_rules` config.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

Existing tests.

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

No.

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 datafusion/sqllogictest/test_files/timestamps.slt | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 1a7ff41d64a66..38b599260de19 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -643,11 +643,7 @@ select date '1994-01-01' - interval '1' day as date;
 ----
 1993-12-31
 
-
 # cast_string_to_time()
-statement ok
-set datafusion.optimizer.skip_failed_rules = false
-
 query DDDD
 select
         time '08:09:10.123456789' as time_nano,
@@ -668,10 +664,6 @@ SELECT TIME '24:01:02' as time;
 query error Arrow error: Parser error: Invalid timezone "ZZ": failed to parse timezone
 SELECT TIMESTAMP '2023-12-05T21:58:10.45ZZ';
 
-statement ok
-set datafusion.optimizer.skip_failed_rules = true
-
-
 # cast_to_timestamp_twice
 query P
 select to_timestamp(a) from (select to_timestamp(1) as a) A;

From fe955058ac779dbf00e2e04ebd721aa2951a6537 Mon Sep 17 00:00:00 2001
From: Dmitrii Blaginin <dmitrii@blaginin.me>
Date: Fri, 17 Oct 2025 11:38:30 +0100
Subject: [PATCH 022/109] move repartition to insta (#18106)

Related https://github.com/apache/datafusion/pull/16324
https://github.com/apache/datafusion/pull/16617

almost there!
---
 .../physical-plan/src/repartition/mod.rs      | 60 ++++++++-----------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index a5bf68a63c387..dafde268ba737 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -1782,16 +1782,12 @@ mod test {
     /// `$PLAN`: the plan to optimized
     ///
     macro_rules! assert_plan {
-        ($EXPECTED_PLAN_LINES: expr,  $PLAN: expr) => {
+        ($PLAN: expr,  @ $EXPECTED: expr) => {
             let formatted = crate::displayable($PLAN).indent(true).to_string();
-            let actual: Vec<&str> = formatted.trim().lines().collect();
 
-            let expected_plan_lines: Vec<&str> = $EXPECTED_PLAN_LINES
-                .iter().map(|s| *s).collect();
-
-            assert_eq!(
-                expected_plan_lines, actual,
-                "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
+            insta::assert_snapshot!(
+                formatted,
+                @$EXPECTED
             );
         };
     }
@@ -1808,13 +1804,12 @@ mod test {
             .with_preserve_order();
 
         // Repartition should preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c0@0 ASC",
-            "  UnionExec",
-            "    DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-            "    DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c0@0 ASC
+          UnionExec
+            DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+            DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
         Ok(())
     }
 
@@ -1824,16 +1819,15 @@ mod test {
         let sort_exprs = sort_exprs(&schema);
         let source = sorted_memory_exec(&schema, sort_exprs);
         // output is sorted, but has only a single partition, so no need to sort
-        let exec = RepartitionExec::try_new(source, Partitioning::RoundRobinBatch(10))
-            .unwrap()
+        let exec = RepartitionExec::try_new(source, Partitioning::RoundRobinBatch(10))?
             .with_preserve_order();
 
         // Repartition should not preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "  DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
+
         Ok(())
     }
 
@@ -1848,13 +1842,12 @@ mod test {
             .with_preserve_order();
 
         // Repartition should not preserve order, as there is no order to preserve
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-            "  UnionExec",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-            "    DataSourceExec: partitions=1, partition_sizes=[0]",
-        ];
-        assert_plan!(expected_plan, &exec);
+        assert_plan!(&exec, @r"
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+          UnionExec
+            DataSourceExec: partitions=1, partition_sizes=[0]
+            DataSourceExec: partitions=1, partition_sizes=[0]
+        ");
         Ok(())
     }
 
@@ -1869,11 +1862,10 @@ mod test {
             .unwrap();
 
         // Repartition should not preserve order
-        let expected_plan = [
-            "RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1",
-            "  DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC",
-        ];
-        assert_plan!(expected_plan, exec.as_ref());
+        assert_plan!(exec.as_ref(), @r"
+        RepartitionExec: partitioning=RoundRobinBatch(20), input_partitions=1
+          DataSourceExec: partitions=1, partition_sizes=[0], output_ordering=c0@0 ASC
+        ");
         Ok(())
     }
 

From 3272ebe9989fb7fca2bbf9954658e197ed6a58cb Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 17 Oct 2025 06:43:54 -0400
Subject: [PATCH 023/109] refactor: move ListingTable over to the
 catalog-listing-table crate (#18080)

## Which issue does this PR close?

- This addresses part of
https://github.com/apache/datafusion/issues/17713
- Closes https://github.com/apache/datafusion/issues/14462


## Rationale for this change

In order to remove the `datafusion` core crate from `proto` as a
dependency, we need to access `ListingTable` but it is within the `core`
crate. There already exists a `datafusion-catalog-listing` which is bare
and appears to be the place this should exist.

## What changes are included in this PR?

Move `ListingTable` and some of its dependent structs over to the
`datafusion-catalog-listing` crate.

There is one dependency I wasn't able to remove from the `core` crate,
which is inferring the listing table configuration options. That is
because within this method it downcasts `Session` to `SessionState`. If
a downstream user ever attempts to implement `Session` themselves, these
methods also would not work. Because it would cause a circular
dependency, we cannot also lift the method we need out of `SessionState`
to `Session`. Instead I took the approach of splitting off the two
methods that require `SessionState` as an extension trait for the
listing table config.

From the git diff this appears to be a large change (+1637/-1519)
however the *vast* majority of that is copying the code from one file
into another. I have added a comment on the significant change.

## Are these changes tested?

Existing unit tests show no regression. This is just a code refactor.

## Are there any user-facing changes?

Users may need to update their use paths.
---
 Cargo.lock                                    |    3 +
 .../examples/custom_file_casts.rs             |    2 +-
 .../examples/json_shredding.rs                |    2 +-
 datafusion/catalog-listing/Cargo.toml         |    6 +
 datafusion/catalog-listing/src/config.rs      |  360 ++++
 datafusion/catalog-listing/src/mod.rs         |    7 +
 datafusion/catalog-listing/src/options.rs     |  411 +++++
 datafusion/catalog-listing/src/table.rs       |  788 ++++++++
 .../core/src/datasource/dynamic_file.rs       |    1 +
 datafusion/core/src/datasource/listing/mod.rs |    3 +-
 .../core/src/datasource/listing/table.rs      | 1596 +----------------
 datafusion/core/tests/catalog/memory.rs       |    2 +-
 .../core/tests/parquet/schema_adapter.rs      |    4 +-
 13 files changed, 1657 insertions(+), 1528 deletions(-)
 create mode 100644 datafusion/catalog-listing/src/config.rs
 create mode 100644 datafusion/catalog-listing/src/options.rs
 create mode 100644 datafusion/catalog-listing/src/table.rs

diff --git a/Cargo.lock b/Cargo.lock
index 7b09121595d67..0392c8147ad2c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1913,12 +1913,15 @@ dependencies = [
  "datafusion-catalog",
  "datafusion-common",
  "datafusion-datasource",
+ "datafusion-datasource-parquet",
  "datafusion-execution",
  "datafusion-expr",
  "datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
  "futures",
+ "itertools 0.14.0",
  "log",
  "object_store",
  "tokio",
diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_file_casts.rs
index 65ca096820640..4d97ecd91dc64 100644
--- a/datafusion-examples/examples/custom_file_casts.rs
+++ b/datafusion-examples/examples/custom_file_casts.rs
@@ -25,7 +25,7 @@ use datafusion::common::not_impl_err;
 use datafusion::common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion::common::{Result, ScalarValue};
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::object_store::ObjectStoreUrl;
diff --git a/datafusion-examples/examples/json_shredding.rs b/datafusion-examples/examples/json_shredding.rs
index c7d0146a001f7..a2e83bc9510ab 100644
--- a/datafusion-examples/examples/json_shredding.rs
+++ b/datafusion-examples/examples/json_shredding.rs
@@ -27,7 +27,7 @@ use datafusion::common::tree_node::{
 };
 use datafusion::common::{assert_contains, exec_datafusion_err, Result};
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::execution::context::SessionContext;
 use datafusion::execution::object_store::ObjectStoreUrl;
diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml
index 69f952ae98407..4eaeed675a206 100644
--- a/datafusion/catalog-listing/Cargo.toml
+++ b/datafusion/catalog-listing/Cargo.toml
@@ -39,14 +39,17 @@ datafusion-datasource = { workspace = true }
 datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-adapter = { workspace = true }
 datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 futures = { workspace = true }
+itertools = { workspace = true }
 log = { workspace = true }
 object_store = { workspace = true }
 tokio = { workspace = true }
 
 [dev-dependencies]
+datafusion-datasource-parquet = { workspace = true }
 
 [lints]
 workspace = true
@@ -54,3 +57,6 @@ workspace = true
 [lib]
 name = "datafusion_catalog_listing"
 path = "src/mod.rs"
+
+[package.metadata.cargo-machete]
+ignored = ["datafusion-datasource-parquet"]
diff --git a/datafusion/catalog-listing/src/config.rs b/datafusion/catalog-listing/src/config.rs
new file mode 100644
index 0000000000000..90f44de4fdbc8
--- /dev/null
+++ b/datafusion/catalog-listing/src/config.rs
@@ -0,0 +1,360 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::options::ListingOptions;
+use arrow::datatypes::{DataType, Schema, SchemaRef};
+use datafusion_catalog::Session;
+use datafusion_common::{config_err, internal_err};
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
+use std::str::FromStr;
+use std::sync::Arc;
+
+/// Indicates the source of the schema for a [`crate::ListingTable`]
+// PartialEq required for assert_eq! in tests
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+pub enum SchemaSource {
+    /// Schema is not yet set (initial state)
+    #[default]
+    Unset,
+    /// Schema was inferred from first table_path
+    Inferred,
+    /// Schema was specified explicitly via with_schema
+    Specified,
+}
+
+/// Configuration for creating a [`crate::ListingTable`]
+///
+/// # Schema Evolution Support
+///
+/// This configuration supports schema evolution through the optional
+/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
+///
+/// - **Type coercion requirements**: When you need custom logic for converting between
+///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
+/// - **Column mapping**: You need to map columns with a legacy name to a new name
+/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
+///
+/// If not specified, a [`datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory`]
+/// will be used, which handles basic schema compatibility cases.
+///
+#[derive(Debug, Clone, Default)]
+pub struct ListingTableConfig {
+    /// Paths on the `ObjectStore` for creating [`crate::ListingTable`].
+    /// They should share the same schema and object store.
+    pub table_paths: Vec<ListingTableUrl>,
+    /// Optional `SchemaRef` for the to be created [`crate::ListingTable`].
+    ///
+    /// See details on [`ListingTableConfig::with_schema`]
+    pub file_schema: Option<SchemaRef>,
+    /// Optional [`ListingOptions`] for the to be created [`crate::ListingTable`].
+    ///
+    /// See details on [`ListingTableConfig::with_listing_options`]
+    pub options: Option<ListingOptions>,
+    /// Tracks the source of the schema information
+    pub(crate) schema_source: SchemaSource,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    pub(crate) schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    pub(crate) expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTableConfig {
+    /// Creates new [`ListingTableConfig`] for reading the specified URL
+    pub fn new(table_path: ListingTableUrl) -> Self {
+        Self {
+            table_paths: vec![table_path],
+            ..Default::default()
+        }
+    }
+
+    /// Creates new [`ListingTableConfig`] with multiple table paths.
+    ///
+    ///  See `ListingTableConfigExt::infer_options` for details on what happens with multiple paths
+    pub fn new_with_multi_paths(table_paths: Vec<ListingTableUrl>) -> Self {
+        Self {
+            table_paths,
+            ..Default::default()
+        }
+    }
+
+    /// Returns the source of the schema for this configuration
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+    /// Set the `schema` for the overall [`crate::ListingTable`]
+    ///
+    /// [`crate::ListingTable`] will automatically coerce, when possible, the schema
+    /// for individual files to match this schema.
+    ///
+    /// If a schema is not provided, it is inferred using
+    /// [`Self::infer_schema`].
+    ///
+    /// If the schema is provided, it must contain only the fields in the file
+    /// without the table partitioning columns.
+    ///
+    /// # Example: Specifying Table Schema
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("id", DataType::Int64, false),
+    ///     Field::new("name", DataType::Utf8, true),
+    /// ]));
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)  // Set options first
+    ///     .with_schema(schema);                    // Then set schema
+    /// ```
+    pub fn with_schema(self, schema: SchemaRef) -> Self {
+        // Note: We preserve existing options state, but downstream code may expect
+        // options to be set. Consider calling with_listing_options() or infer_options()
+        // before operations that require options to be present.
+        debug_assert!(
+            self.options.is_some() || cfg!(test),
+            "ListingTableConfig::with_schema called without options set. \
+             Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code."
+        );
+
+        Self {
+            file_schema: Some(schema),
+            schema_source: SchemaSource::Specified,
+            ..self
+        }
+    }
+
+    /// Add `listing_options` to [`ListingTableConfig`]
+    ///
+    /// If not provided, format and other options are inferred via
+    /// `ListingTableConfigExt::infer_options`.
+    ///
+    /// # Example: Configuring Parquet Files with Custom Options
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet")
+    ///     .with_collect_stat(true);
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(options);  // Configure file format and options
+    /// ```
+    pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
+        // Note: This method properly sets options, but be aware that downstream
+        // methods like infer_schema() and try_new() require both schema and options
+        // to be set to function correctly.
+        debug_assert!(
+            !self.table_paths.is_empty() || cfg!(test),
+            "ListingTableConfig::with_listing_options called without table_paths set. \
+             Consider calling new() or new_with_multi_paths() first to establish table paths."
+        );
+
+        Self {
+            options: Some(listing_options),
+            ..self
+        }
+    }
+
+    /// Returns a tuple of `(file_extension, optional compression_extension)`
+    ///
+    /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))`
+    /// For example a path ending with blah.test.csv returns `("csv", None)`
+    pub fn infer_file_extension_and_compression_type(
+        path: &str,
+    ) -> datafusion_common::Result<(String, Option<String>)> {
+        let mut exts = path.rsplit('.');
+
+        let split = exts.next().unwrap_or("");
+
+        let file_compression_type = FileCompressionType::from_str(split)
+            .unwrap_or(FileCompressionType::UNCOMPRESSED);
+
+        if file_compression_type.is_compressed() {
+            let split2 = exts.next().unwrap_or("");
+            Ok((split2.to_string(), Some(split.to_string())))
+        } else {
+            Ok((split.to_string(), None))
+        }
+    }
+
+    /// Infer the [`SchemaRef`] based on `table_path`s.
+    ///
+    /// This method infers the table schema using the first `table_path`.
+    /// See [`ListingOptions::infer_schema`] for more details
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_schema(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<Self> {
+        match self.options {
+            Some(options) => {
+                let ListingTableConfig {
+                    table_paths,
+                    file_schema,
+                    options: _,
+                    schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                } = self;
+
+                let (schema, new_schema_source) = match file_schema {
+                    Some(schema) => (schema, schema_source), // Keep existing source if schema exists
+                    None => {
+                        if let Some(url) = table_paths.first() {
+                            (
+                                options.infer_schema(state, url).await?,
+                                SchemaSource::Inferred,
+                            )
+                        } else {
+                            (Arc::new(Schema::empty()), SchemaSource::Inferred)
+                        }
+                    }
+                };
+
+                Ok(Self {
+                    table_paths,
+                    file_schema: Some(schema),
+                    options: Some(options),
+                    schema_source: new_schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                })
+            }
+            None => internal_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Infer the partition columns from `table_paths`.
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_partitions_from_path(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<Self> {
+        match self.options {
+            Some(options) => {
+                let Some(url) = self.table_paths.first() else {
+                    return config_err!("No table path found");
+                };
+                let partitions = options
+                    .infer_partitions(state, url)
+                    .await?
+                    .into_iter()
+                    .map(|col_name| {
+                        (
+                            col_name,
+                            DataType::Dictionary(
+                                Box::new(DataType::UInt16),
+                                Box::new(DataType::Utf8),
+                            ),
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                let options = options.with_table_partition_cols(partitions);
+                Ok(Self {
+                    table_paths: self.table_paths,
+                    file_schema: self.file_schema,
+                    options: Some(options),
+                    schema_source: self.schema_source,
+                    schema_adapter_factory: self.schema_adapter_factory,
+                    expr_adapter_factory: self.expr_adapter_factory,
+                })
+            }
+            None => config_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for the [`crate::ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// If not provided, a default schema adapter factory will be used.
+    ///
+    /// # Example: Custom Schema Adapter for Type Coercion
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// #
+    /// # #[derive(Debug)]
+    /// # struct MySchemaAdapterFactory;
+    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
+    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+    /// #         unimplemented!()
+    /// #     }
+    /// # }
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)
+    ///     .with_schema(table_schema)
+    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
+    /// ```
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this configuration
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Set the [`PhysicalExprAdapterFactory`] for the [`crate::ListingTable`]
+    ///
+    /// The expression adapter factory is used to create physical expression adapters that can
+    /// handle schema evolution and type conversions when evaluating expressions
+    /// with different schemas than the table schema.
+    ///
+    /// If not provided, a default physical expression adapter factory will be used unless a custom
+    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
+    pub fn with_expr_adapter_factory(
+        self,
+        expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
+    ) -> Self {
+        Self {
+            expr_adapter_factory: Some(expr_adapter_factory),
+            ..self
+        }
+    }
+}
diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs
index 1322577b207ab..90d04b46b8067 100644
--- a/datafusion/catalog-listing/src/mod.rs
+++ b/datafusion/catalog-listing/src/mod.rs
@@ -24,4 +24,11 @@
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
 
+mod config;
 pub mod helpers;
+mod options;
+mod table;
+
+pub use config::{ListingTableConfig, SchemaSource};
+pub use options::ListingOptions;
+pub use table::ListingTable;
diff --git a/datafusion/catalog-listing/src/options.rs b/datafusion/catalog-listing/src/options.rs
new file mode 100644
index 0000000000000..3cbf3573e9519
--- /dev/null
+++ b/datafusion/catalog-listing/src/options.rs
@@ -0,0 +1,411 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::{DataType, SchemaRef};
+use datafusion_catalog::Session;
+use datafusion_common::plan_err;
+use datafusion_datasource::file_format::FileFormat;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_execution::config::SessionConfig;
+use datafusion_expr::SortExpr;
+use futures::StreamExt;
+use futures::{future, TryStreamExt};
+use itertools::Itertools;
+use std::sync::Arc;
+
+/// Options for creating a [`crate::ListingTable`]
+#[derive(Clone, Debug)]
+pub struct ListingOptions {
+    /// A suffix on which files should be filtered (leave empty to
+    /// keep all files on the path)
+    pub file_extension: String,
+    /// The file format
+    pub format: Arc<dyn FileFormat>,
+    /// The expected partition column names in the folder structure.
+    /// See [Self::with_table_partition_cols] for details
+    pub table_partition_cols: Vec<(String, DataType)>,
+    /// Set true to try to guess statistics from the files.
+    /// This can add a lot of overhead as it will usually require files
+    /// to be opened and at least partially parsed.
+    pub collect_stat: bool,
+    /// Group files to avoid that the number of partitions exceeds
+    /// this limit
+    pub target_partitions: usize,
+    /// Optional pre-known sort order(s). Must be `SortExpr`s.
+    ///
+    /// DataFusion may take advantage of this ordering to omit sorts
+    /// or use more efficient algorithms. Currently sortedness must be
+    /// provided if it is known by some external mechanism, but may in
+    /// the future be automatically determined, for example using
+    /// parquet metadata.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/4177>
+    ///
+    /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
+    ///       where each ordering consists of an individual lexicographic
+    ///       ordering (encapsulated by a `Vec<Expr>`). If there aren't
+    ///       multiple equivalent orderings, the outer `Vec` will have a
+    ///       single element.
+    pub file_sort_order: Vec<Vec<SortExpr>>,
+}
+
+impl ListingOptions {
+    /// Creates an options instance with the given format
+    /// Default values:
+    /// - use default file extension filter
+    /// - no input partition to discover
+    /// - one target partition
+    /// - do not collect statistics
+    pub fn new(format: Arc<dyn FileFormat>) -> Self {
+        Self {
+            file_extension: format.get_ext(),
+            format,
+            table_partition_cols: vec![],
+            collect_stat: false,
+            target_partitions: 1,
+            file_sort_order: vec![],
+        }
+    }
+
+    /// Set options from [`SessionConfig`] and returns self.
+    ///
+    /// Currently this sets `target_partitions` and `collect_stat`
+    /// but if more options are added in the future that need to be coordinated
+    /// they will be synchronized through this method.
+    pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
+        self = self.with_target_partitions(config.target_partitions());
+        self = self.with_collect_stat(config.collect_statistics());
+        self
+    }
+
+    /// Set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension(".parquet");
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> Self {
+        self.file_extension = file_extension.into();
+        self
+    }
+
+    /// Optionally set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// If `file_extension` is `None`, the file extension will not be changed
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let extension = Some(".parquet");
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension_opt(extension);
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension_opt<S>(mut self, file_extension: Option<S>) -> Self
+    where
+        S: Into<String>,
+    {
+        if let Some(file_extension) = file_extension {
+            self.file_extension = file_extension.into();
+        }
+        self
+    }
+
+    /// Set `table partition columns` on [`ListingOptions`] and returns self.
+    ///
+    /// "partition columns," used to support [Hive Partitioning], are
+    /// columns added to the data that is read, based on the folder
+    /// structure where the data resides.
+    ///
+    /// For example, give the following files in your filesystem:
+    ///
+    /// ```text
+    /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet
+    /// ```
+    ///
+    /// A [`crate::ListingTable`] created at `/mnt/nyctaxi/` with partition
+    /// columns "year" and "month" will include new `year` and `month`
+    /// columns while reading the files. The `year` column would have
+    /// value `2022` and the `month` column would have value `01` for
+    /// the rows read from
+    /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet`
+    ///
+    ///# Notes
+    ///
+    /// - If only one level (e.g. `year` in the example above) is
+    ///   specified, the other levels are ignored but the files are
+    ///   still read.
+    ///
+    /// - Files that don't follow this partitioning scheme will be
+    ///   ignored.
+    ///
+    /// - Since the columns have the same value for all rows read from
+    ///   each individual file (such as dates), they are typically
+    ///   dictionary encoded for efficiency. You may use
+    ///   [`wrap_partition_type_in_dict`] to request a
+    ///   dictionary-encoded type.
+    ///
+    /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion_expr::col;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// // listing options for files with paths such as  `/mnt/data/col_a=x/col_b=y/data.parquet`
+    /// // `col_a` and `col_b` will be included in the data read from those files
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8),
+    ///       ("col_b".to_string(), DataType::Utf8)]);
+    ///
+    /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8),
+    ///     ("col_b".to_string(), DataType::Utf8)]);
+    /// ```
+    ///
+    /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html
+    /// [`wrap_partition_type_in_dict`]: datafusion_datasource::file_scan_config::wrap_partition_type_in_dict
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<(String, DataType)>,
+    ) -> Self {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
+    /// Set stat collection on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_collect_stat(true);
+    ///
+    /// assert_eq!(listing_options.collect_stat, true);
+    /// ```
+    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
+        self.collect_stat = collect_stat;
+        self
+    }
+
+    /// Set number of target partitions on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_target_partitions(8);
+    ///
+    /// assert_eq!(listing_options.target_partitions, 8);
+    /// ```
+    pub fn with_target_partitions(mut self, target_partitions: usize) -> Self {
+        self.target_partitions = target_partitions;
+        self
+    }
+
+    /// Set file sort order on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion_expr::col;
+    /// # use datafusion_catalog_listing::ListingOptions;
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    ///
+    ///  // Tell datafusion that the files are sorted by column "a"
+    ///  let file_sort_order = vec![vec![
+    ///    col("a").sort(true, true)
+    ///  ]];
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_sort_order(file_sort_order.clone());
+    ///
+    /// assert_eq!(listing_options.file_sort_order, file_sort_order);
+    /// ```
+    pub fn with_file_sort_order(mut self, file_sort_order: Vec<Vec<SortExpr>>) -> Self {
+        self.file_sort_order = file_sort_order;
+        self
+    }
+
+    /// Infer the schema of the files at the given path on the provided object store.
+    ///
+    /// If the table_path contains one or more files (i.e. it is a directory /
+    /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`]
+    ///
+    /// Note: The inferred schema does not include any partitioning columns.
+    ///
+    /// This method is called as part of creating a [`crate::ListingTable`].
+    pub async fn infer_schema<'a>(
+        &'a self,
+        state: &dyn Session,
+        table_path: &'a ListingTableUrl,
+    ) -> datafusion_common::Result<SchemaRef> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            // Empty files cannot affect schema but may throw when trying to read for it
+            .try_filter(|object_meta| future::ready(object_meta.size > 0))
+            .try_collect()
+            .await?;
+
+        let schema = self.format.infer_schema(state, &store, &files).await?;
+
+        Ok(schema)
+    }
+
+    /// Infers the partition columns stored in `LOCATION` and compares
+    /// them with the columns provided in `PARTITIONED BY` to help prevent
+    /// accidental corrupts of partitioned tables.
+    ///
+    /// Allows specifying partial partitions.
+    pub async fn validate_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> datafusion_common::Result<()> {
+        if self.table_partition_cols.is_empty() {
+            return Ok(());
+        }
+
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Can't create a partitioned table backed by a single file, \
+                perhaps the URL is missing a trailing slash?"
+            );
+        }
+
+        let inferred = self.infer_partitions(state, table_path).await?;
+
+        // no partitioned files found on disk
+        if inferred.is_empty() {
+            return Ok(());
+        }
+
+        let table_partition_names = self
+            .table_partition_cols
+            .iter()
+            .map(|(col_name, _)| col_name.clone())
+            .collect_vec();
+
+        if inferred.len() < table_partition_names.len() {
+            return plan_err!(
+                "Inferred partitions to be {:?}, but got {:?}",
+                inferred,
+                table_partition_names
+            );
+        }
+
+        // match prefix to allow creating tables with partial partitions
+        for (idx, col) in table_partition_names.iter().enumerate() {
+            if &inferred[idx] != col {
+                return plan_err!(
+                    "Inferred partitions to be {:?}, but got {:?}",
+                    inferred,
+                    table_partition_names
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Infer the partitioning at the given path on the provided object store.
+    /// For performance reasons, it doesn't read all the files on disk
+    /// and therefore may fail to detect invalid partitioning.
+    pub async fn infer_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> datafusion_common::Result<Vec<String>> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        // only use 10 files for inference
+        // This can fail to detect inconsistent partition keys
+        // A DFS traversal approach of the store can help here
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            .take(10)
+            .try_collect()
+            .await?;
+
+        let stripped_path_parts = files.iter().map(|file| {
+            table_path
+                .strip_prefix(&file.location)
+                .unwrap()
+                .collect_vec()
+        });
+
+        let partition_keys = stripped_path_parts
+            .map(|path_parts| {
+                path_parts
+                    .into_iter()
+                    .rev()
+                    .skip(1) // get parents only; skip the file itself
+                    .rev()
+                    // Partitions are expected to follow the format "column_name=value", so we
+                    // should ignore any path part that cannot be parsed into the expected format
+                    .filter(|s| s.contains('='))
+                    .map(|s| s.split('=').take(1).collect())
+                    .collect_vec()
+            })
+            .collect_vec();
+
+        match partition_keys.into_iter().all_equal_value() {
+            Ok(v) => Ok(v),
+            Err(None) => Ok(vec![]),
+            Err(Some(diff)) => {
+                let mut sorted_diff = [diff.0, diff.1];
+                sorted_diff.sort();
+                plan_err!("Found mixed partition values on disk {:?}", sorted_diff)
+            }
+        }
+    }
+}
diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs
new file mode 100644
index 0000000000000..e9ac1bf097a22
--- /dev/null
+++ b/datafusion/catalog-listing/src/table.rs
@@ -0,0 +1,788 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::config::SchemaSource;
+use crate::helpers::{expr_applicable_for_cols, pruned_partition_list};
+use crate::{ListingOptions, ListingTableConfig};
+use arrow::datatypes::{Field, Schema, SchemaBuilder, SchemaRef};
+use async_trait::async_trait;
+use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider};
+use datafusion_common::stats::Precision;
+use datafusion_common::{
+    internal_datafusion_err, plan_err, project_schema, Constraints, DataFusionError,
+    SchemaExt, Statistics,
+};
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_groups::FileGroup;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::schema_adapter::{
+    DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
+};
+use datafusion_datasource::{
+    compute_all_files_statistics, ListingTableUrl, PartitionedFile,
+};
+use datafusion_execution::cache::cache_manager::FileStatisticsCache;
+use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
+use datafusion_expr::dml::InsertOp;
+use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::{Expr, TableProviderFilterPushDown, TableType};
+use datafusion_physical_expr::create_lex_ordering;
+use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::ExecutionPlan;
+use futures::{future, stream, Stream, StreamExt, TryStreamExt};
+use object_store::ObjectStore;
+use std::any::Any;
+use std::collections::HashMap;
+use std::sync::Arc;
+
+/// Built in [`TableProvider`] that reads data from one or more files as a single table.
+///
+/// The files are read using an  [`ObjectStore`] instance, for example from
+/// local files or objects from AWS S3.
+///
+/// # Features:
+/// * Reading multiple files as a single table
+/// * Hive style partitioning (e.g., directories named `date=2024-06-01`)
+/// * Merges schemas from files with compatible but not identical schemas (see [`ListingTableConfig::file_schema`])
+/// * `limit`, `filter` and `projection` pushdown for formats that support it (e.g.,
+///   Parquet)
+/// * Statistics collection and pruning based on file metadata
+/// * Pre-existing sort order (see [`ListingOptions::file_sort_order`])
+/// * Metadata caching to speed up repeated queries (see [`FileMetadataCache`])
+/// * Statistics caching (see [`FileStatisticsCache`])
+///
+/// [`FileMetadataCache`]: datafusion_execution::cache::cache_manager::FileMetadataCache
+///
+/// # Reading Directories and Hive Style Partitioning
+///
+/// For example, given the `table1` directory (or object store prefix)
+///
+/// ```text
+/// table1
+///  ├── file1.parquet
+///  └── file2.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as
+/// a single table, merging the schemas if the files have compatible but not
+/// identical schemas.
+///
+/// Given the `table2` directory (or object store prefix)
+///
+/// ```text
+/// table2
+///  ├── date=2024-06-01
+///  │    ├── file3.parquet
+///  │    └── file4.parquet
+///  └── date=2024-06-02
+///       └── file5.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and
+/// `file5.parquet` as a single table, again merging schemas if necessary.
+///
+/// Given the hive style partitioning structure (e.g,. directories named
+/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date`
+/// column when reading the table:
+/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01`
+/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`.
+///
+/// If the query has a predicate like `WHERE date = '2024-06-01'`
+/// only the corresponding directory will be read.
+///
+/// # See Also
+///
+/// 1. [`ListingTableConfig`]: Configuration options
+/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable`
+///
+/// [`DataSourceExec`]: datafusion_datasource::source::DataSourceExec
+///
+/// # Caching Metadata
+///
+/// Some formats, such as Parquet, use the `FileMetadataCache` to cache file
+/// metadata that is needed to execute but expensive to read, such as row
+/// groups and statistics. The cache is scoped to the `SessionContext` and can
+/// be configured via the [runtime config options].
+///
+/// [runtime config options]: https://datafusion.apache.org/user-guide/configs.html#runtime-configuration-settings
+///
+/// # Example: Read a directory of parquet files using a [`ListingTable`]
+///
+/// ```no_run
+/// # use datafusion_common::Result;
+/// # use std::sync::Arc;
+/// # use datafusion_catalog::TableProvider;
+/// # use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
+/// # use datafusion_datasource::ListingTableUrl;
+/// # use datafusion_datasource_parquet::file_format::ParquetFormat;/// #
+/// # use datafusion_catalog::Session;
+/// async fn get_listing_table(session: &dyn Session) -> Result<Arc<dyn TableProvider>> {
+/// let table_path = "/path/to/parquet";
+///
+/// // Parse the path
+/// let table_path = ListingTableUrl::parse(table_path)?;
+///
+/// // Create default parquet options
+/// let file_format = ParquetFormat::new();
+/// let listing_options = ListingOptions::new(Arc::new(file_format))
+///   .with_file_extension(".parquet");
+///
+/// // Resolve the schema
+/// let resolved_schema = listing_options
+///    .infer_schema(session, &table_path)
+///    .await?;
+///
+/// let config = ListingTableConfig::new(table_path)
+///   .with_listing_options(listing_options)
+///   .with_schema(resolved_schema);
+///
+/// // Create a new TableProvider
+/// let provider = Arc::new(ListingTable::try_new(config)?);
+///
+/// # Ok(provider)
+/// # }
+/// ```
+#[derive(Debug, Clone)]
+pub struct ListingTable {
+    table_paths: Vec<ListingTableUrl>,
+    /// `file_schema` contains only the columns physically stored in the data files themselves.
+    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
+    ///     - Used when reading the raw data from files
+    file_schema: SchemaRef,
+    /// `table_schema` combines `file_schema` + partition columns
+    ///     - Partition columns are derived from directory paths (not stored in files)
+    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
+    table_schema: SchemaRef,
+    /// Indicates how the schema was derived (inferred or explicitly specified)
+    schema_source: SchemaSource,
+    /// Options used to configure the listing table such as the file format
+    /// and partitioning information
+    options: ListingOptions,
+    /// The SQL definition for this table, if any
+    definition: Option<String>,
+    /// Cache for collected file statistics
+    collected_statistics: FileStatisticsCache,
+    /// Constraints applied to this table
+    constraints: Constraints,
+    /// Column default expressions for columns that are not physically present in the data files
+    column_defaults: HashMap<String, Expr>,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTable {
+    /// Create new [`ListingTable`]
+    ///
+    /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`]
+    pub fn try_new(config: ListingTableConfig) -> datafusion_common::Result<Self> {
+        // Extract schema_source before moving other parts of the config
+        let schema_source = config.schema_source();
+
+        let file_schema = config
+            .file_schema
+            .ok_or_else(|| internal_datafusion_err!("No schema provided."))?;
+
+        let options = config
+            .options
+            .ok_or_else(|| internal_datafusion_err!("No ListingOptions provided"))?;
+
+        // Add the partition columns to the file schema
+        let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned());
+        for (part_col_name, part_col_type) in &options.table_partition_cols {
+            builder.push(Field::new(part_col_name, part_col_type.clone(), false));
+        }
+
+        let table_schema = Arc::new(
+            builder
+                .finish()
+                .with_metadata(file_schema.metadata().clone()),
+        );
+
+        let table = Self {
+            table_paths: config.table_paths,
+            file_schema,
+            table_schema,
+            schema_source,
+            options,
+            definition: None,
+            collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+            schema_adapter_factory: config.schema_adapter_factory,
+            expr_adapter_factory: config.expr_adapter_factory,
+        };
+
+        Ok(table)
+    }
+
+    /// Assign constraints
+    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
+        self.constraints = constraints;
+        self
+    }
+
+    /// Assign column defaults
+    pub fn with_column_defaults(
+        mut self,
+        column_defaults: HashMap<String, Expr>,
+    ) -> Self {
+        self.column_defaults = column_defaults;
+        self
+    }
+
+    /// Set the [`FileStatisticsCache`] used to cache parquet file statistics.
+    ///
+    /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics
+    /// multiple times in the same session.
+    ///
+    /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
+    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
+        self.collected_statistics =
+            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
+        self
+    }
+
+    /// Specify the SQL definition for this table, if any
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
+        self
+    }
+
+    /// Get paths ref
+    pub fn table_paths(&self) -> &Vec<ListingTableUrl> {
+        &self.table_paths
+    }
+
+    /// Get options ref
+    pub fn options(&self) -> &ListingOptions {
+        &self.options
+    }
+
+    /// Get the schema source
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// # Example: Adding Schema Evolution Support
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion_catalog_listing::{ListingTable, ListingTableConfig, ListingOptions};
+    /// # use datafusion_datasource::ListingTableUrl;
+    /// # use datafusion_datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
+    /// # let table = ListingTable::try_new(config).unwrap();
+    /// let table_with_evolution = table
+    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
+    /// ```
+    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this table
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Creates a schema adapter for mapping between file and table schemas
+    ///
+    /// Uses the configured schema adapter factory if available, otherwise falls back
+    /// to the default implementation.
+    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
+        let table_schema = self.schema();
+        match &self.schema_adapter_factory {
+            Some(factory) => {
+                factory.create_with_projected_schema(Arc::clone(&table_schema))
+            }
+            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
+        }
+    }
+
+    /// Creates a file source and applies schema adapter factory if available
+    fn create_file_source_with_schema_adapter(
+        &self,
+    ) -> datafusion_common::Result<Arc<dyn FileSource>> {
+        let mut source = self.options.format.file_source();
+        // Apply schema adapter to source if available
+        //
+        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
+        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
+        if let Some(factory) = &self.schema_adapter_factory {
+            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
+        }
+        Ok(source)
+    }
+
+    /// If file_sort_order is specified, creates the appropriate physical expressions
+    pub fn try_create_output_ordering(
+        &self,
+        execution_props: &ExecutionProps,
+    ) -> datafusion_common::Result<Vec<LexOrdering>> {
+        create_lex_ordering(
+            &self.table_schema,
+            &self.options.file_sort_order,
+            execution_props,
+        )
+    }
+}
+
+// Expressions can be used for partition pruning if they can be evaluated using
+// only the partition columns and there are partition columns.
+fn can_be_evaluated_for_partition_pruning(
+    partition_column_names: &[&str],
+    expr: &Expr,
+) -> bool {
+    !partition_column_names.is_empty()
+        && expr_applicable_for_cols(partition_column_names, expr)
+}
+
+#[async_trait]
+impl TableProvider for ListingTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.table_schema)
+    }
+
+    fn constraints(&self) -> Option<&Constraints> {
+        Some(&self.constraints)
+    }
+
+    fn table_type(&self) -> TableType {
+        TableType::Base
+    }
+
+    async fn scan(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        let options = ScanArgs::default()
+            .with_projection(projection.map(|p| p.as_slice()))
+            .with_filters(Some(filters))
+            .with_limit(limit);
+        Ok(self.scan_with_args(state, options).await?.into_inner())
+    }
+
+    async fn scan_with_args<'a>(
+        &self,
+        state: &dyn Session,
+        args: ScanArgs<'a>,
+    ) -> datafusion_common::Result<ScanResult> {
+        let projection = args.projection().map(|p| p.to_vec());
+        let filters = args.filters().map(|f| f.to_vec()).unwrap_or_default();
+        let limit = args.limit();
+
+        // extract types of partition columns
+        let table_partition_cols = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .collect::<datafusion_common::Result<Vec<_>>>()?;
+
+        let table_partition_col_names = table_partition_cols
+            .iter()
+            .map(|field| field.name().as_str())
+            .collect::<Vec<_>>();
+
+        // If the filters can be resolved using only partition cols, there is no need to
+        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
+        let (partition_filters, filters): (Vec<_>, Vec<_>) =
+            filters.iter().cloned().partition(|filter| {
+                can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter)
+            });
+
+        // We should not limit the number of partitioned files to scan if there are filters and limit
+        // at the same time. This is because the limit should be applied after the filters are applied.
+        let statistic_file_limit = if filters.is_empty() { limit } else { None };
+
+        let (mut partitioned_file_lists, statistics) = self
+            .list_files_for_scan(state, &partition_filters, statistic_file_limit)
+            .await?;
+
+        // if no files need to be read, return an `EmptyExec`
+        if partitioned_file_lists.is_empty() {
+            let projected_schema = project_schema(&self.schema(), projection.as_ref())?;
+            return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema))));
+        }
+
+        let output_ordering = self.try_create_output_ordering(state.execution_props())?;
+        match state
+            .config_options()
+            .execution
+            .split_file_groups_by_statistics
+            .then(|| {
+                output_ordering.first().map(|output_ordering| {
+                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
+                        &self.table_schema,
+                        &partitioned_file_lists,
+                        output_ordering,
+                        self.options.target_partitions,
+                    )
+                })
+            })
+            .flatten()
+        {
+            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
+            Some(Ok(new_groups)) => {
+                if new_groups.len() <= self.options.target_partitions {
+                    partitioned_file_lists = new_groups;
+                } else {
+                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
+                }
+            }
+            None => {} // no ordering required
+        };
+
+        let Some(object_store_url) =
+            self.table_paths.first().map(ListingTableUrl::object_store)
+        else {
+            return Ok(ScanResult::new(Arc::new(EmptyExec::new(Arc::new(
+                Schema::empty(),
+            )))));
+        };
+
+        let file_source = self.create_file_source_with_schema_adapter()?;
+
+        // create the execution plan
+        let plan = self
+            .options
+            .format
+            .create_physical_plan(
+                state,
+                FileScanConfigBuilder::new(
+                    object_store_url,
+                    Arc::clone(&self.file_schema),
+                    file_source,
+                )
+                .with_file_groups(partitioned_file_lists)
+                .with_constraints(self.constraints.clone())
+                .with_statistics(statistics)
+                .with_projection(projection)
+                .with_limit(limit)
+                .with_output_ordering(output_ordering)
+                .with_table_partition_cols(table_partition_cols)
+                .with_expr_adapter(self.expr_adapter_factory.clone())
+                .build(),
+            )
+            .await?;
+
+        Ok(ScanResult::new(plan))
+    }
+
+    fn supports_filters_pushdown(
+        &self,
+        filters: &[&Expr],
+    ) -> datafusion_common::Result<Vec<TableProviderFilterPushDown>> {
+        let partition_column_names = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| col.0.as_str())
+            .collect::<Vec<_>>();
+        filters
+            .iter()
+            .map(|filter| {
+                if can_be_evaluated_for_partition_pruning(&partition_column_names, filter)
+                {
+                    // if filter can be handled by partition pruning, it is exact
+                    return Ok(TableProviderFilterPushDown::Exact);
+                }
+
+                Ok(TableProviderFilterPushDown::Inexact)
+            })
+            .collect()
+    }
+
+    fn get_table_definition(&self) -> Option<&str> {
+        self.definition.as_deref()
+    }
+
+    async fn insert_into(
+        &self,
+        state: &dyn Session,
+        input: Arc<dyn ExecutionPlan>,
+        insert_op: InsertOp,
+    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
+        // Check that the schema of the plan matches the schema of this table.
+        self.schema()
+            .logically_equivalent_names_and_types(&input.schema())?;
+
+        let table_path = &self.table_paths()[0];
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \
+                To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE"
+            );
+        }
+
+        // Get the object store for the table path.
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let file_list_stream = pruned_partition_list(
+            state,
+            store.as_ref(),
+            table_path,
+            &[],
+            &self.options.file_extension,
+            &self.options.table_partition_cols,
+        )
+        .await?;
+
+        let file_group = file_list_stream.try_collect::<Vec<_>>().await?.into();
+        let keep_partition_by_columns =
+            state.config_options().execution.keep_partition_by_columns;
+
+        // Sink related option, apart from format
+        let config = FileSinkConfig {
+            original_url: String::default(),
+            object_store_url: self.table_paths()[0].object_store(),
+            table_paths: self.table_paths().clone(),
+            file_group,
+            output_schema: self.schema(),
+            table_partition_cols: self.options.table_partition_cols.clone(),
+            insert_op,
+            keep_partition_by_columns,
+            file_extension: self.options().format.get_ext(),
+        };
+
+        let orderings = self.try_create_output_ordering(state.execution_props())?;
+        // It is sufficient to pass only one of the equivalent orderings:
+        let order_requirements = orderings.into_iter().next().map(Into::into);
+
+        self.options()
+            .format
+            .create_writer_physical_plan(input, state, config, order_requirements)
+            .await
+    }
+
+    fn get_column_default(&self, column: &str) -> Option<&Expr> {
+        self.column_defaults.get(column)
+    }
+}
+
+impl ListingTable {
+    /// Get the list of files for a scan as well as the file level statistics.
+    /// The list is grouped to let the execution plan know how the files should
+    /// be distributed to different threads / executors.
+    pub async fn list_files_for_scan<'a>(
+        &'a self,
+        ctx: &'a dyn Session,
+        filters: &'a [Expr],
+        limit: Option<usize>,
+    ) -> datafusion_common::Result<(Vec<FileGroup>, Statistics)> {
+        let store = if let Some(url) = self.table_paths.first() {
+            ctx.runtime_env().object_store(url)?
+        } else {
+            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
+        };
+        // list files (with partitions)
+        let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
+            pruned_partition_list(
+                ctx,
+                store.as_ref(),
+                table_path,
+                filters,
+                &self.options.file_extension,
+                &self.options.table_partition_cols,
+            )
+        }))
+        .await?;
+        let meta_fetch_concurrency =
+            ctx.config_options().execution.meta_fetch_concurrency;
+        let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency);
+        // collect the statistics if required by the config
+        let files = file_list
+            .map(|part_file| async {
+                let part_file = part_file?;
+                let statistics = if self.options.collect_stat {
+                    self.do_collect_statistics(ctx, &store, &part_file).await?
+                } else {
+                    Arc::new(Statistics::new_unknown(&self.file_schema))
+                };
+                Ok(part_file.with_statistics(statistics))
+            })
+            .boxed()
+            .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
+
+        let (file_group, inexact_stats) =
+            get_files_with_limit(files, limit, self.options.collect_stat).await?;
+
+        let file_groups = file_group.split_files(self.options.target_partitions);
+        let (mut file_groups, mut stats) = compute_all_files_statistics(
+            file_groups,
+            self.schema(),
+            self.options.collect_stat,
+            inexact_stats,
+        )?;
+
+        let schema_adapter = self.create_schema_adapter();
+        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
+
+        stats.column_statistics =
+            schema_mapper.map_column_statistics(&stats.column_statistics)?;
+        file_groups.iter_mut().try_for_each(|file_group| {
+            if let Some(stat) = file_group.statistics_mut() {
+                stat.column_statistics =
+                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
+            }
+            Ok::<_, DataFusionError>(())
+        })?;
+        Ok((file_groups, stats))
+    }
+
+    /// Collects statistics for a given partitioned file.
+    ///
+    /// This method first checks if the statistics for the given file are already cached.
+    /// If they are, it returns the cached statistics.
+    /// If they are not, it infers the statistics from the file and stores them in the cache.
+    async fn do_collect_statistics(
+        &self,
+        ctx: &dyn Session,
+        store: &Arc<dyn ObjectStore>,
+        part_file: &PartitionedFile,
+    ) -> datafusion_common::Result<Arc<Statistics>> {
+        match self
+            .collected_statistics
+            .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
+        {
+            Some(statistics) => Ok(statistics),
+            None => {
+                let statistics = self
+                    .options
+                    .format
+                    .infer_stats(
+                        ctx,
+                        store,
+                        Arc::clone(&self.file_schema),
+                        &part_file.object_meta,
+                    )
+                    .await?;
+                let statistics = Arc::new(statistics);
+                self.collected_statistics.put_with_extra(
+                    &part_file.object_meta.location,
+                    Arc::clone(&statistics),
+                    &part_file.object_meta,
+                );
+                Ok(statistics)
+            }
+        }
+    }
+}
+
+/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
+///
+/// This function collects files from the provided stream until either:
+/// 1. The stream is exhausted
+/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
+///
+/// # Arguments
+/// * `files` - A stream of `Result<PartitionedFile>` items to process
+/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
+///   once the accumulated number of rows exceeds this limit
+/// * `collect_stats` - Whether to collect and accumulate statistics from the files
+///
+/// # Returns
+/// A `Result` containing a `FileGroup` with the collected files
+/// and a boolean indicating whether the statistics are inexact.
+///
+/// # Note
+/// The function will continue processing files if statistics are not available or if the
+/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
+/// but files will still be collected.
+async fn get_files_with_limit(
+    files: impl Stream<Item = datafusion_common::Result<PartitionedFile>>,
+    limit: Option<usize>,
+    collect_stats: bool,
+) -> datafusion_common::Result<(FileGroup, bool)> {
+    let mut file_group = FileGroup::default();
+    // Fusing the stream allows us to call next safely even once it is finished.
+    let mut all_files = Box::pin(files.fuse());
+    enum ProcessingState {
+        ReadingFiles,
+        ReachedLimit,
+    }
+
+    let mut state = ProcessingState::ReadingFiles;
+    let mut num_rows = Precision::Absent;
+
+    while let Some(file_result) = all_files.next().await {
+        // Early exit if we've already reached our limit
+        if matches!(state, ProcessingState::ReachedLimit) {
+            break;
+        }
+
+        let file = file_result?;
+
+        // Update file statistics regardless of state
+        if collect_stats {
+            if let Some(file_stats) = &file.statistics {
+                num_rows = if file_group.is_empty() {
+                    // For the first file, just take its row count
+                    file_stats.num_rows
+                } else {
+                    // For subsequent files, accumulate the counts
+                    num_rows.add(&file_stats.num_rows)
+                };
+            }
+        }
+
+        // Always add the file to our group
+        file_group.push(file);
+
+        // Check if we've hit the limit (if one was specified)
+        if let Some(limit) = limit {
+            if let Precision::Exact(row_count) = num_rows {
+                if row_count > limit {
+                    state = ProcessingState::ReachedLimit;
+                }
+            }
+        }
+    }
+    // If we still have files in the stream, it means that the limit kicked
+    // in, and the statistic could have been different had we processed the
+    // files in a different order.
+    let inexact_stats = all_files.next().await.is_some();
+    Ok((file_group, inexact_stats))
+}
diff --git a/datafusion/core/src/datasource/dynamic_file.rs b/datafusion/core/src/datasource/dynamic_file.rs
index b30d53e586911..256a11ba693b5 100644
--- a/datafusion/core/src/datasource/dynamic_file.rs
+++ b/datafusion/core/src/datasource/dynamic_file.rs
@@ -20,6 +20,7 @@
 
 use std::sync::Arc;
 
+use crate::datasource::listing::ListingTableConfigExt;
 use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl};
 use crate::datasource::TableProvider;
 use crate::error::Result;
diff --git a/datafusion/core/src/datasource/listing/mod.rs b/datafusion/core/src/datasource/listing/mod.rs
index a58db55bccb61..c206566a65941 100644
--- a/datafusion/core/src/datasource/listing/mod.rs
+++ b/datafusion/core/src/datasource/listing/mod.rs
@@ -20,7 +20,8 @@
 
 mod table;
 pub use datafusion_catalog_listing::helpers;
+pub use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
 pub use datafusion_datasource::{
     FileRange, ListingTableUrl, PartitionedFile, PartitionedFileStream,
 };
-pub use table::{ListingOptions, ListingTable, ListingTableConfig};
+pub use table::ListingTableConfigExt;
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 4ffb6d41864f3..3333b70676203 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -15,226 +15,42 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! The table implementation.
-
-use super::{
-    helpers::{expr_applicable_for_cols, pruned_partition_list},
-    ListingTableUrl, PartitionedFile,
-};
-use crate::{
-    datasource::file_format::{file_compression_type::FileCompressionType, FileFormat},
-    datasource::physical_plan::FileSinkConfig,
-    execution::context::SessionState,
-};
-use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef};
-use arrow_schema::Schema;
+use crate::execution::SessionState;
 use async_trait::async_trait;
-use datafusion_catalog::{ScanArgs, ScanResult, Session, TableProvider};
-use datafusion_common::{
-    config_datafusion_err, config_err, internal_datafusion_err, internal_err, plan_err,
-    project_schema, stats::Precision, Constraints, DataFusionError, Result, SchemaExt,
-};
-use datafusion_datasource::{
-    compute_all_files_statistics,
-    file::FileSource,
-    file_groups::FileGroup,
-    file_scan_config::{FileScanConfig, FileScanConfigBuilder},
-    schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory},
-};
-use datafusion_execution::{
-    cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache},
-    config::SessionConfig,
-};
-use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::{
-    dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType,
-};
-use datafusion_physical_expr::create_lex_ordering;
-use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
-use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics};
-use futures::{future, stream, Stream, StreamExt, TryStreamExt};
-use itertools::Itertools;
-use object_store::ObjectStore;
-use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc};
-
-/// Indicates the source of the schema for a [`ListingTable`]
-// PartialEq required for assert_eq! in tests
-#[derive(Debug, Clone, Copy, PartialEq, Default)]
-pub enum SchemaSource {
-    /// Schema is not yet set (initial state)
-    #[default]
-    Unset,
-    /// Schema was inferred from first table_path
-    Inferred,
-    /// Schema was specified explicitly via with_schema
-    Specified,
-}
+use datafusion_catalog_listing::{ListingOptions, ListingTableConfig};
+use datafusion_common::{config_datafusion_err, internal_datafusion_err};
+use datafusion_session::Session;
+use futures::StreamExt;
+use std::collections::HashMap;
 
-/// Configuration for creating a [`ListingTable`]
-///
-/// # Schema Evolution Support
-///
-/// This configuration supports schema evolution through the optional
-/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
+/// Extension trait for [`ListingTableConfig`] that supports inferring schemas
 ///
-/// - **Type coercion requirements**: When you need custom logic for converting between
-///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
-/// - **Column mapping**: You need to map columns with a legacy name to a new name
-/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
-///
-/// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles
-/// basic schema compatibility cases.
-///
-#[derive(Debug, Clone, Default)]
-pub struct ListingTableConfig {
-    /// Paths on the `ObjectStore` for creating `ListingTable`.
-    /// They should share the same schema and object store.
-    pub table_paths: Vec<ListingTableUrl>,
-    /// Optional `SchemaRef` for the to be created `ListingTable`.
-    ///
-    /// See details on [`ListingTableConfig::with_schema`]
-    pub file_schema: Option<SchemaRef>,
-    /// Optional [`ListingOptions`] for the to be created [`ListingTable`].
-    ///
-    /// See details on [`ListingTableConfig::with_listing_options`]
-    pub options: Option<ListingOptions>,
-    /// Tracks the source of the schema information
-    schema_source: SchemaSource,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
-    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
-}
-
-impl ListingTableConfig {
-    /// Creates new [`ListingTableConfig`] for reading the specified URL
-    pub fn new(table_path: ListingTableUrl) -> Self {
-        Self {
-            table_paths: vec![table_path],
-            ..Default::default()
-        }
-    }
-
-    /// Creates new [`ListingTableConfig`] with multiple table paths.
-    ///
-    /// See [`Self::infer_options`] for details on what happens with multiple paths
-    pub fn new_with_multi_paths(table_paths: Vec<ListingTableUrl>) -> Self {
-        Self {
-            table_paths,
-            ..Default::default()
-        }
-    }
-
-    /// Returns the source of the schema for this configuration
-    pub fn schema_source(&self) -> SchemaSource {
-        self.schema_source
-    }
-    /// Set the `schema` for the overall [`ListingTable`]
-    ///
-    /// [`ListingTable`] will automatically coerce, when possible, the schema
-    /// for individual files to match this schema.
-    ///
-    /// If a schema is not provided, it is inferred using
-    /// [`Self::infer_schema`].
-    ///
-    /// If the schema is provided, it must contain only the fields in the file
-    /// without the table partitioning columns.
-    ///
-    /// # Example: Specifying Table Schema
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{Schema, Field, DataType};
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// let schema = Arc::new(Schema::new(vec![
-    ///     Field::new("id", DataType::Int64, false),
-    ///     Field::new("name", DataType::Utf8, true),
-    /// ]));
-    ///
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(listing_options)  // Set options first
-    ///     .with_schema(schema);                    // Then set schema
-    /// ```
-    pub fn with_schema(self, schema: SchemaRef) -> Self {
-        // Note: We preserve existing options state, but downstream code may expect
-        // options to be set. Consider calling with_listing_options() or infer_options()
-        // before operations that require options to be present.
-        debug_assert!(
-            self.options.is_some() || cfg!(test),
-            "ListingTableConfig::with_schema called without options set. \
-             Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code."
-        );
-
-        Self {
-            file_schema: Some(schema),
-            schema_source: SchemaSource::Specified,
-            ..self
-        }
-    }
-
-    /// Add `listing_options` to [`ListingTableConfig`]
-    ///
-    /// If not provided, format and other options are inferred via
-    /// [`Self::infer_options`].
-    ///
-    /// # Example: Configuring Parquet Files with Custom Options
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
-    ///     .with_file_extension(".parquet")
-    ///     .with_collect_stat(true);
-    ///
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(options);  // Configure file format and options
-    /// ```
-    pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
-        // Note: This method properly sets options, but be aware that downstream
-        // methods like infer_schema() and try_new() require both schema and options
-        // to be set to function correctly.
-        debug_assert!(
-            !self.table_paths.is_empty() || cfg!(test),
-            "ListingTableConfig::with_listing_options called without table_paths set. \
-             Consider calling new() or new_with_multi_paths() first to establish table paths."
-        );
-
-        Self {
-            options: Some(listing_options),
-            ..self
-        }
-    }
-
-    /// Returns a tuple of `(file_extension, optional compression_extension)`
-    ///
-    /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))`
-    /// For example a path ending with blah.test.csv returns `("csv", None)`
-    fn infer_file_extension_and_compression_type(
-        path: &str,
-    ) -> Result<(String, Option<String>)> {
-        let mut exts = path.rsplit('.');
-
-        let split = exts.next().unwrap_or("");
-
-        let file_compression_type = FileCompressionType::from_str(split)
-            .unwrap_or(FileCompressionType::UNCOMPRESSED);
-
-        if file_compression_type.is_compressed() {
-            let split2 = exts.next().unwrap_or("");
-            Ok((split2.to_string(), Some(split.to_string())))
-        } else {
-            Ok((split.to_string(), None))
-        }
-    }
-
+/// This trait exists because the following inference methods only
+/// work for [`SessionState`] implementations of [`Session`].
+/// See [`ListingTableConfig`] for the remaining inference methods.
+#[async_trait]
+pub trait ListingTableConfigExt {
     /// Infer `ListingOptions` based on `table_path` and file suffix.
     ///
     /// The format is inferred based on the first `table_path`.
-    pub async fn infer_options(self, state: &dyn Session) -> Result<Self> {
+    async fn infer_options(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig>;
+
+    /// Convenience method to call both [`Self::infer_options`] and [`ListingTableConfig::infer_schema`]
+    async fn infer(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig>;
+}
+
+#[async_trait]
+impl ListingTableConfigExt for ListingTableConfig {
+    async fn infer_options(
+        self,
+        state: &dyn Session,
+    ) -> datafusion_common::Result<ListingTableConfig> {
         let store = if let Some(url) = self.table_paths.first() {
             state.runtime_env().object_store(url)?
         } else {
@@ -281,1299 +97,19 @@ impl ListingTableConfig {
             .with_target_partitions(state.config().target_partitions())
             .with_collect_stat(state.config().collect_statistics());
 
-        Ok(Self {
-            table_paths: self.table_paths,
-            file_schema: self.file_schema,
-            options: Some(listing_options),
-            schema_source: self.schema_source,
-            schema_adapter_factory: self.schema_adapter_factory,
-            expr_adapter_factory: self.expr_adapter_factory,
-        })
-    }
-
-    /// Infer the [`SchemaRef`] based on `table_path`s.
-    ///
-    /// This method infers the table schema using the first `table_path`.
-    /// See [`ListingOptions::infer_schema`] for more details
-    ///
-    /// # Errors
-    /// * if `self.options` is not set. See [`Self::with_listing_options`]
-    pub async fn infer_schema(self, state: &dyn Session) -> Result<Self> {
-        match self.options {
-            Some(options) => {
-                let ListingTableConfig {
-                    table_paths,
-                    file_schema,
-                    options: _,
-                    schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
-                } = self;
-
-                let (schema, new_schema_source) = match file_schema {
-                    Some(schema) => (schema, schema_source), // Keep existing source if schema exists
-                    None => {
-                        if let Some(url) = table_paths.first() {
-                            (
-                                options.infer_schema(state, url).await?,
-                                SchemaSource::Inferred,
-                            )
-                        } else {
-                            (Arc::new(Schema::empty()), SchemaSource::Inferred)
-                        }
-                    }
-                };
-
-                Ok(Self {
-                    table_paths,
-                    file_schema: Some(schema),
-                    options: Some(options),
-                    schema_source: new_schema_source,
-                    schema_adapter_factory,
-                    expr_adapter_factory: physical_expr_adapter_factory,
-                })
-            }
-            None => internal_err!("No `ListingOptions` set for inferring schema"),
-        }
+        Ok(self.with_listing_options(listing_options))
     }
 
-    /// Convenience method to call both [`Self::infer_options`] and [`Self::infer_schema`]
-    pub async fn infer(self, state: &dyn Session) -> Result<Self> {
+    async fn infer(self, state: &dyn Session) -> datafusion_common::Result<Self> {
         self.infer_options(state).await?.infer_schema(state).await
     }
-
-    /// Infer the partition columns from `table_paths`.
-    ///
-    /// # Errors
-    /// * if `self.options` is not set. See [`Self::with_listing_options`]
-    pub async fn infer_partitions_from_path(self, state: &dyn Session) -> Result<Self> {
-        match self.options {
-            Some(options) => {
-                let Some(url) = self.table_paths.first() else {
-                    return config_err!("No table path found");
-                };
-                let partitions = options
-                    .infer_partitions(state, url)
-                    .await?
-                    .into_iter()
-                    .map(|col_name| {
-                        (
-                            col_name,
-                            DataType::Dictionary(
-                                Box::new(DataType::UInt16),
-                                Box::new(DataType::Utf8),
-                            ),
-                        )
-                    })
-                    .collect::<Vec<_>>();
-                let options = options.with_table_partition_cols(partitions);
-                Ok(Self {
-                    table_paths: self.table_paths,
-                    file_schema: self.file_schema,
-                    options: Some(options),
-                    schema_source: self.schema_source,
-                    schema_adapter_factory: self.schema_adapter_factory,
-                    expr_adapter_factory: self.expr_adapter_factory,
-                })
-            }
-            None => config_err!("No `ListingOptions` set for inferring schema"),
-        }
-    }
-
-    /// Set the [`SchemaAdapterFactory`] for the [`ListingTable`]
-    ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
-    ///
-    /// If not provided, a default schema adapter factory will be used.
-    ///
-    /// # Example: Custom Schema Adapter for Type Coercion
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// #
-    /// # #[derive(Debug)]
-    /// # struct MySchemaAdapterFactory;
-    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
-    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
-    /// #         unimplemented!()
-    /// #     }
-    /// # }
-    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(listing_options)
-    ///     .with_schema(table_schema)
-    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
-    /// ```
-    pub fn with_schema_adapter_factory(
-        self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this configuration
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
-    }
-
-    /// Set the [`PhysicalExprAdapterFactory`] for the [`ListingTable`]
-    ///
-    /// The expression adapter factory is used to create physical expression adapters that can
-    /// handle schema evolution and type conversions when evaluating expressions
-    /// with different schemas than the table schema.
-    ///
-    /// If not provided, a default physical expression adapter factory will be used unless a custom
-    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
-    ///
-    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
-    pub fn with_expr_adapter_factory(
-        self,
-        expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
-    ) -> Self {
-        Self {
-            expr_adapter_factory: Some(expr_adapter_factory),
-            ..self
-        }
-    }
-}
-
-/// Options for creating a [`ListingTable`]
-#[derive(Clone, Debug)]
-pub struct ListingOptions {
-    /// A suffix on which files should be filtered (leave empty to
-    /// keep all files on the path)
-    pub file_extension: String,
-    /// The file format
-    pub format: Arc<dyn FileFormat>,
-    /// The expected partition column names in the folder structure.
-    /// See [Self::with_table_partition_cols] for details
-    pub table_partition_cols: Vec<(String, DataType)>,
-    /// Set true to try to guess statistics from the files.
-    /// This can add a lot of overhead as it will usually require files
-    /// to be opened and at least partially parsed.
-    pub collect_stat: bool,
-    /// Group files to avoid that the number of partitions exceeds
-    /// this limit
-    pub target_partitions: usize,
-    /// Optional pre-known sort order(s). Must be `SortExpr`s.
-    ///
-    /// DataFusion may take advantage of this ordering to omit sorts
-    /// or use more efficient algorithms. Currently sortedness must be
-    /// provided if it is known by some external mechanism, but may in
-    /// the future be automatically determined, for example using
-    /// parquet metadata.
-    ///
-    /// See <https://github.com/apache/datafusion/issues/4177>
-    ///
-    /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
-    ///       where each ordering consists of an individual lexicographic
-    ///       ordering (encapsulated by a `Vec<Expr>`). If there aren't
-    ///       multiple equivalent orderings, the outer `Vec` will have a
-    ///       single element.
-    pub file_sort_order: Vec<Vec<SortExpr>>,
-}
-
-impl ListingOptions {
-    /// Creates an options instance with the given format
-    /// Default values:
-    /// - use default file extension filter
-    /// - no input partition to discover
-    /// - one target partition
-    /// - do not collect statistics
-    pub fn new(format: Arc<dyn FileFormat>) -> Self {
-        Self {
-            file_extension: format.get_ext(),
-            format,
-            table_partition_cols: vec![],
-            collect_stat: false,
-            target_partitions: 1,
-            file_sort_order: vec![],
-        }
-    }
-
-    /// Set options from [`SessionConfig`] and returns self.
-    ///
-    /// Currently this sets `target_partitions` and `collect_stat`
-    /// but if more options are added in the future that need to be coordinated
-    /// they will be synchronized through this method.
-    pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
-        self = self.with_target_partitions(config.target_partitions());
-        self = self.with_collect_stat(config.collect_statistics());
-        self
-    }
-
-    /// Set file extension on [`ListingOptions`] and returns self.
-    ///
-    /// # Example
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::SessionContext;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension(".parquet");
-    ///
-    /// assert_eq!(listing_options.file_extension, ".parquet");
-    /// ```
-    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> Self {
-        self.file_extension = file_extension.into();
-        self
-    }
-
-    /// Optionally set file extension on [`ListingOptions`] and returns self.
-    ///
-    /// If `file_extension` is `None`, the file extension will not be changed
-    ///
-    /// # Example
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::SessionContext;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    /// let extension = Some(".parquet");
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension_opt(extension);
-    ///
-    /// assert_eq!(listing_options.file_extension, ".parquet");
-    /// ```
-    pub fn with_file_extension_opt<S>(mut self, file_extension: Option<S>) -> Self
-    where
-        S: Into<String>,
-    {
-        if let Some(file_extension) = file_extension {
-            self.file_extension = file_extension.into();
-        }
-        self
-    }
-
-    /// Set `table partition columns` on [`ListingOptions`] and returns self.
-    ///
-    /// "partition columns," used to support [Hive Partitioning], are
-    /// columns added to the data that is read, based on the folder
-    /// structure where the data resides.
-    ///
-    /// For example, give the following files in your filesystem:
-    ///
-    /// ```text
-    /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet
-    /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet
-    /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet
-    /// ```
-    ///
-    /// A [`ListingTable`] created at `/mnt/nyctaxi/` with partition
-    /// columns "year" and "month" will include new `year` and `month`
-    /// columns while reading the files. The `year` column would have
-    /// value `2022` and the `month` column would have value `01` for
-    /// the rows read from
-    /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet`
-    ///
-    ///# Notes
-    ///
-    /// - If only one level (e.g. `year` in the example above) is
-    ///   specified, the other levels are ignored but the files are
-    ///   still read.
-    ///
-    /// - Files that don't follow this partitioning scheme will be
-    ///   ignored.
-    ///
-    /// - Since the columns have the same value for all rows read from
-    ///   each individual file (such as dates), they are typically
-    ///   dictionary encoded for efficiency. You may use
-    ///   [`wrap_partition_type_in_dict`] to request a
-    ///   dictionary-encoded type.
-    ///
-    /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
-    ///
-    /// # Example
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use arrow::datatypes::DataType;
-    /// # use datafusion::prelude::col;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// // listing options for files with paths such as  `/mnt/data/col_a=x/col_b=y/data.parquet`
-    /// // `col_a` and `col_b` will be included in the data read from those files
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8),
-    ///       ("col_b".to_string(), DataType::Utf8)]);
-    ///
-    /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8),
-    ///     ("col_b".to_string(), DataType::Utf8)]);
-    /// ```
-    ///
-    /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html
-    /// [`wrap_partition_type_in_dict`]: crate::datasource::physical_plan::wrap_partition_type_in_dict
-    pub fn with_table_partition_cols(
-        mut self,
-        table_partition_cols: Vec<(String, DataType)>,
-    ) -> Self {
-        self.table_partition_cols = table_partition_cols;
-        self
-    }
-
-    /// Set stat collection on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_collect_stat(true);
-    ///
-    /// assert_eq!(listing_options.collect_stat, true);
-    /// ```
-    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
-        self.collect_stat = collect_stat;
-        self
-    }
-
-    /// Set number of target partitions on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_target_partitions(8);
-    ///
-    /// assert_eq!(listing_options.target_partitions, 8);
-    /// ```
-    pub fn with_target_partitions(mut self, target_partitions: usize) -> Self {
-        self.target_partitions = target_partitions;
-        self
-    }
-
-    /// Set file sort order on [`ListingOptions`] and returns self.
-    ///
-    /// ```
-    /// # use std::sync::Arc;
-    /// # use datafusion::prelude::col;
-    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
-    ///
-    ///  // Tell datafusion that the files are sorted by column "a"
-    ///  let file_sort_order = vec![vec![
-    ///    col("a").sort(true, true)
-    ///  ]];
-    ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_sort_order(file_sort_order.clone());
-    ///
-    /// assert_eq!(listing_options.file_sort_order, file_sort_order);
-    /// ```
-    pub fn with_file_sort_order(mut self, file_sort_order: Vec<Vec<SortExpr>>) -> Self {
-        self.file_sort_order = file_sort_order;
-        self
-    }
-
-    /// Infer the schema of the files at the given path on the provided object store.
-    ///
-    /// If the table_path contains one or more files (i.e. it is a directory /
-    /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`]
-    ///
-    /// Note: The inferred schema does not include any partitioning columns.
-    ///
-    /// This method is called as part of creating a [`ListingTable`].
-    pub async fn infer_schema<'a>(
-        &'a self,
-        state: &dyn Session,
-        table_path: &'a ListingTableUrl,
-    ) -> Result<SchemaRef> {
-        let store = state.runtime_env().object_store(table_path)?;
-
-        let files: Vec<_> = table_path
-            .list_all_files(state, store.as_ref(), &self.file_extension)
-            .await?
-            // Empty files cannot affect schema but may throw when trying to read for it
-            .try_filter(|object_meta| future::ready(object_meta.size > 0))
-            .try_collect()
-            .await?;
-
-        let schema = self.format.infer_schema(state, &store, &files).await?;
-
-        Ok(schema)
-    }
-
-    /// Infers the partition columns stored in `LOCATION` and compares
-    /// them with the columns provided in `PARTITIONED BY` to help prevent
-    /// accidental corrupts of partitioned tables.
-    ///
-    /// Allows specifying partial partitions.
-    pub async fn validate_partitions(
-        &self,
-        state: &dyn Session,
-        table_path: &ListingTableUrl,
-    ) -> Result<()> {
-        if self.table_partition_cols.is_empty() {
-            return Ok(());
-        }
-
-        if !table_path.is_collection() {
-            return plan_err!(
-                "Can't create a partitioned table backed by a single file, \
-                perhaps the URL is missing a trailing slash?"
-            );
-        }
-
-        let inferred = self.infer_partitions(state, table_path).await?;
-
-        // no partitioned files found on disk
-        if inferred.is_empty() {
-            return Ok(());
-        }
-
-        let table_partition_names = self
-            .table_partition_cols
-            .iter()
-            .map(|(col_name, _)| col_name.clone())
-            .collect_vec();
-
-        if inferred.len() < table_partition_names.len() {
-            return plan_err!(
-                "Inferred partitions to be {:?}, but got {:?}",
-                inferred,
-                table_partition_names
-            );
-        }
-
-        // match prefix to allow creating tables with partial partitions
-        for (idx, col) in table_partition_names.iter().enumerate() {
-            if &inferred[idx] != col {
-                return plan_err!(
-                    "Inferred partitions to be {:?}, but got {:?}",
-                    inferred,
-                    table_partition_names
-                );
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Infer the partitioning at the given path on the provided object store.
-    /// For performance reasons, it doesn't read all the files on disk
-    /// and therefore may fail to detect invalid partitioning.
-    pub(crate) async fn infer_partitions(
-        &self,
-        state: &dyn Session,
-        table_path: &ListingTableUrl,
-    ) -> Result<Vec<String>> {
-        let store = state.runtime_env().object_store(table_path)?;
-
-        // only use 10 files for inference
-        // This can fail to detect inconsistent partition keys
-        // A DFS traversal approach of the store can help here
-        let files: Vec<_> = table_path
-            .list_all_files(state, store.as_ref(), &self.file_extension)
-            .await?
-            .take(10)
-            .try_collect()
-            .await?;
-
-        let stripped_path_parts = files.iter().map(|file| {
-            table_path
-                .strip_prefix(&file.location)
-                .unwrap()
-                .collect_vec()
-        });
-
-        let partition_keys = stripped_path_parts
-            .map(|path_parts| {
-                path_parts
-                    .into_iter()
-                    .rev()
-                    .skip(1) // get parents only; skip the file itself
-                    .rev()
-                    // Partitions are expected to follow the format "column_name=value", so we
-                    // should ignore any path part that cannot be parsed into the expected format
-                    .filter(|s| s.contains('='))
-                    .map(|s| s.split('=').take(1).collect())
-                    .collect_vec()
-            })
-            .collect_vec();
-
-        match partition_keys.into_iter().all_equal_value() {
-            Ok(v) => Ok(v),
-            Err(None) => Ok(vec![]),
-            Err(Some(diff)) => {
-                let mut sorted_diff = [diff.0, diff.1];
-                sorted_diff.sort();
-                plan_err!("Found mixed partition values on disk {:?}", sorted_diff)
-            }
-        }
-    }
-}
-
-/// Built in [`TableProvider`] that reads data from one or more files as a single table.
-///
-/// The files are read using an  [`ObjectStore`] instance, for example from
-/// local files or objects from AWS S3.
-///
-/// # Features:
-/// * Reading multiple files as a single table
-/// * Hive style partitioning (e.g., directories named `date=2024-06-01`)
-/// * Merges schemas from files with compatible but not identical schemas (see [`ListingTableConfig::file_schema`])
-/// * `limit`, `filter` and `projection` pushdown for formats that support it (e.g.,
-///   Parquet)
-/// * Statistics collection and pruning based on file metadata
-/// * Pre-existing sort order (see [`ListingOptions::file_sort_order`])
-/// * Metadata caching to speed up repeated queries (see [`FileMetadataCache`])
-/// * Statistics caching (see [`FileStatisticsCache`])
-///
-/// [`FileMetadataCache`]: datafusion_execution::cache::cache_manager::FileMetadataCache
-///
-/// # Reading Directories and Hive Style Partitioning
-///
-/// For example, given the `table1` directory (or object store prefix)
-///
-/// ```text
-/// table1
-///  ├── file1.parquet
-///  └── file2.parquet
-/// ```
-///
-/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as
-/// a single table, merging the schemas if the files have compatible but not
-/// identical schemas.
-///
-/// Given the `table2` directory (or object store prefix)
-///
-/// ```text
-/// table2
-///  ├── date=2024-06-01
-///  │    ├── file3.parquet
-///  │    └── file4.parquet
-///  └── date=2024-06-02
-///       └── file5.parquet
-/// ```
-///
-/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and
-/// `file5.parquet` as a single table, again merging schemas if necessary.
-///
-/// Given the hive style partitioning structure (e.g,. directories named
-/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date`
-/// column when reading the table:
-/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01`
-/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`.
-///
-/// If the query has a predicate like `WHERE date = '2024-06-01'`
-/// only the corresponding directory will be read.
-///
-/// # See Also
-///
-/// 1. [`ListingTableConfig`]: Configuration options
-/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable`
-///
-/// [`DataSourceExec`]: crate::datasource::source::DataSourceExec
-///
-/// # Caching Metadata
-///
-/// Some formats, such as Parquet, use the `FileMetadataCache` to cache file
-/// metadata that is needed to execute but expensive to read, such as row
-/// groups and statistics. The cache is scoped to the [`SessionContext`] and can
-/// be configured via the [runtime config options].
-///
-/// [`SessionContext`]: crate::prelude::SessionContext
-/// [runtime config options]: https://datafusion.apache.org/user-guide/configs.html#runtime-configuration-settings
-///
-/// # Example: Read a directory of parquet files using a [`ListingTable`]
-///
-/// ```no_run
-/// # use datafusion::prelude::SessionContext;
-/// # use datafusion::error::Result;
-/// # use std::sync::Arc;
-/// # use datafusion::datasource::{
-/// #   listing::{
-/// #      ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
-/// #   },
-/// #   file_format::parquet::ParquetFormat,
-/// # };
-/// # #[tokio::main]
-/// # async fn main() -> Result<()> {
-/// let ctx = SessionContext::new();
-/// let session_state = ctx.state();
-/// let table_path = "/path/to/parquet";
-///
-/// // Parse the path
-/// let table_path = ListingTableUrl::parse(table_path)?;
-///
-/// // Create default parquet options
-/// let file_format = ParquetFormat::new();
-/// let listing_options = ListingOptions::new(Arc::new(file_format))
-///   .with_file_extension(".parquet");
-///
-/// // Resolve the schema
-/// let resolved_schema = listing_options
-///    .infer_schema(&session_state, &table_path)
-///    .await?;
-///
-/// let config = ListingTableConfig::new(table_path)
-///   .with_listing_options(listing_options)
-///   .with_schema(resolved_schema);
-///
-/// // Create a new TableProvider
-/// let provider = Arc::new(ListingTable::try_new(config)?);
-///
-/// // This provider can now be read as a dataframe:
-/// let df = ctx.read_table(provider.clone());
-///
-/// // or registered as a named table:
-/// ctx.register_table("my_table", provider);
-///
-/// # Ok(())
-/// # }
-/// ```
-#[derive(Debug, Clone)]
-pub struct ListingTable {
-    table_paths: Vec<ListingTableUrl>,
-    /// `file_schema` contains only the columns physically stored in the data files themselves.
-    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
-    ///     - Used when reading the raw data from files
-    file_schema: SchemaRef,
-    /// `table_schema` combines `file_schema` + partition columns
-    ///     - Partition columns are derived from directory paths (not stored in files)
-    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
-    table_schema: SchemaRef,
-    /// Indicates how the schema was derived (inferred or explicitly specified)
-    schema_source: SchemaSource,
-    /// Options used to configure the listing table such as the file format
-    /// and partitioning information
-    options: ListingOptions,
-    /// The SQL definition for this table, if any
-    definition: Option<String>,
-    /// Cache for collected file statistics
-    collected_statistics: FileStatisticsCache,
-    /// Constraints applied to this table
-    constraints: Constraints,
-    /// Column default expressions for columns that are not physically present in the data files
-    column_defaults: HashMap<String, Expr>,
-    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
-    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
-    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
-}
-
-impl ListingTable {
-    /// Create new [`ListingTable`]
-    ///
-    /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`]
-    pub fn try_new(config: ListingTableConfig) -> Result<Self> {
-        // Extract schema_source before moving other parts of the config
-        let schema_source = config.schema_source();
-
-        let file_schema = config
-            .file_schema
-            .ok_or_else(|| internal_datafusion_err!("No schema provided."))?;
-
-        let options = config
-            .options
-            .ok_or_else(|| internal_datafusion_err!("No ListingOptions provided"))?;
-
-        // Add the partition columns to the file schema
-        let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned());
-        for (part_col_name, part_col_type) in &options.table_partition_cols {
-            builder.push(Field::new(part_col_name, part_col_type.clone(), false));
-        }
-
-        let table_schema = Arc::new(
-            builder
-                .finish()
-                .with_metadata(file_schema.metadata().clone()),
-        );
-
-        let table = Self {
-            table_paths: config.table_paths,
-            file_schema,
-            table_schema,
-            schema_source,
-            options,
-            definition: None,
-            collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
-            constraints: Constraints::default(),
-            column_defaults: HashMap::new(),
-            schema_adapter_factory: config.schema_adapter_factory,
-            expr_adapter_factory: config.expr_adapter_factory,
-        };
-
-        Ok(table)
-    }
-
-    /// Assign constraints
-    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
-        self.constraints = constraints;
-        self
-    }
-
-    /// Assign column defaults
-    pub fn with_column_defaults(
-        mut self,
-        column_defaults: HashMap<String, Expr>,
-    ) -> Self {
-        self.column_defaults = column_defaults;
-        self
-    }
-
-    /// Set the [`FileStatisticsCache`] used to cache parquet file statistics.
-    ///
-    /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics
-    /// multiple times in the same session.
-    ///
-    /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
-    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
-        self.collected_statistics =
-            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
-        self
-    }
-
-    /// Specify the SQL definition for this table, if any
-    pub fn with_definition(mut self, definition: Option<String>) -> Self {
-        self.definition = definition;
-        self
-    }
-
-    /// Get paths ref
-    pub fn table_paths(&self) -> &Vec<ListingTableUrl> {
-        &self.table_paths
-    }
-
-    /// Get options ref
-    pub fn options(&self) -> &ListingOptions {
-        &self.options
-    }
-
-    /// Get the schema source
-    pub fn schema_source(&self) -> SchemaSource {
-        self.schema_source
-    }
-
-    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
-    ///
-    /// The schema adapter factory is used to create schema adapters that can
-    /// handle schema evolution and type conversions when reading files with
-    /// different schemas than the table schema.
-    ///
-    /// # Example: Adding Schema Evolution Support
-    /// ```rust
-    /// # use std::sync::Arc;
-    /// # use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingOptions, ListingTableUrl};
-    /// # use datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
-    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
-    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
-    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
-    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
-    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
-    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
-    /// # let table = ListingTable::try_new(config).unwrap();
-    /// let table_with_evolution = table
-    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
-    /// ```
-    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
-    pub fn with_schema_adapter_factory(
-        self,
-        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
-    ) -> Self {
-        Self {
-            schema_adapter_factory: Some(schema_adapter_factory),
-            ..self
-        }
-    }
-
-    /// Get the [`SchemaAdapterFactory`] for this table
-    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
-        self.schema_adapter_factory.as_ref()
-    }
-
-    /// Creates a schema adapter for mapping between file and table schemas
-    ///
-    /// Uses the configured schema adapter factory if available, otherwise falls back
-    /// to the default implementation.
-    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
-        let table_schema = self.schema();
-        match &self.schema_adapter_factory {
-            Some(factory) => {
-                factory.create_with_projected_schema(Arc::clone(&table_schema))
-            }
-            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
-        }
-    }
-
-    /// Creates a file source and applies schema adapter factory if available
-    fn create_file_source_with_schema_adapter(&self) -> Result<Arc<dyn FileSource>> {
-        let mut source = self.options.format.file_source();
-        // Apply schema adapter to source if available
-        //
-        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
-        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
-        if let Some(factory) = &self.schema_adapter_factory {
-            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
-        }
-        Ok(source)
-    }
-
-    /// If file_sort_order is specified, creates the appropriate physical expressions
-    fn try_create_output_ordering(
-        &self,
-        execution_props: &ExecutionProps,
-    ) -> Result<Vec<LexOrdering>> {
-        create_lex_ordering(
-            &self.table_schema,
-            &self.options.file_sort_order,
-            execution_props,
-        )
-    }
-}
-
-// Expressions can be used for partition pruning if they can be evaluated using
-// only the partition columns and there are partition columns.
-fn can_be_evaluated_for_partition_pruning(
-    partition_column_names: &[&str],
-    expr: &Expr,
-) -> bool {
-    !partition_column_names.is_empty()
-        && expr_applicable_for_cols(partition_column_names, expr)
-}
-
-#[async_trait]
-impl TableProvider for ListingTable {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.table_schema)
-    }
-
-    fn constraints(&self) -> Option<&Constraints> {
-        Some(&self.constraints)
-    }
-
-    fn table_type(&self) -> TableType {
-        TableType::Base
-    }
-
-    async fn scan(
-        &self,
-        state: &dyn Session,
-        projection: Option<&Vec<usize>>,
-        filters: &[Expr],
-        limit: Option<usize>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let options = ScanArgs::default()
-            .with_projection(projection.map(|p| p.as_slice()))
-            .with_filters(Some(filters))
-            .with_limit(limit);
-        Ok(self.scan_with_args(state, options).await?.into_inner())
-    }
-
-    async fn scan_with_args<'a>(
-        &self,
-        state: &dyn Session,
-        args: ScanArgs<'a>,
-    ) -> Result<ScanResult> {
-        let projection = args.projection().map(|p| p.to_vec());
-        let filters = args.filters().map(|f| f.to_vec()).unwrap_or_default();
-        let limit = args.limit();
-
-        // extract types of partition columns
-        let table_partition_cols = self
-            .options
-            .table_partition_cols
-            .iter()
-            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
-            .collect::<Result<Vec<_>>>()?;
-
-        let table_partition_col_names = table_partition_cols
-            .iter()
-            .map(|field| field.name().as_str())
-            .collect::<Vec<_>>();
-
-        // If the filters can be resolved using only partition cols, there is no need to
-        // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
-        let (partition_filters, filters): (Vec<_>, Vec<_>) =
-            filters.iter().cloned().partition(|filter| {
-                can_be_evaluated_for_partition_pruning(&table_partition_col_names, filter)
-            });
-
-        // We should not limit the number of partitioned files to scan if there are filters and limit
-        // at the same time. This is because the limit should be applied after the filters are applied.
-        let statistic_file_limit = if filters.is_empty() { limit } else { None };
-
-        let (mut partitioned_file_lists, statistics) = self
-            .list_files_for_scan(state, &partition_filters, statistic_file_limit)
-            .await?;
-
-        // if no files need to be read, return an `EmptyExec`
-        if partitioned_file_lists.is_empty() {
-            let projected_schema = project_schema(&self.schema(), projection.as_ref())?;
-            return Ok(ScanResult::new(Arc::new(EmptyExec::new(projected_schema))));
-        }
-
-        let output_ordering = self.try_create_output_ordering(state.execution_props())?;
-        match state
-            .config_options()
-            .execution
-            .split_file_groups_by_statistics
-            .then(|| {
-                output_ordering.first().map(|output_ordering| {
-                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
-                        &self.table_schema,
-                        &partitioned_file_lists,
-                        output_ordering,
-                        self.options.target_partitions,
-                    )
-                })
-            })
-            .flatten()
-        {
-            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
-            Some(Ok(new_groups)) => {
-                if new_groups.len() <= self.options.target_partitions {
-                    partitioned_file_lists = new_groups;
-                } else {
-                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
-                }
-            }
-            None => {} // no ordering required
-        };
-
-        let Some(object_store_url) =
-            self.table_paths.first().map(ListingTableUrl::object_store)
-        else {
-            return Ok(ScanResult::new(Arc::new(EmptyExec::new(Arc::new(
-                Schema::empty(),
-            )))));
-        };
-
-        let file_source = self.create_file_source_with_schema_adapter()?;
-
-        // create the execution plan
-        let plan = self
-            .options
-            .format
-            .create_physical_plan(
-                state,
-                FileScanConfigBuilder::new(
-                    object_store_url,
-                    Arc::clone(&self.file_schema),
-                    file_source,
-                )
-                .with_file_groups(partitioned_file_lists)
-                .with_constraints(self.constraints.clone())
-                .with_statistics(statistics)
-                .with_projection(projection)
-                .with_limit(limit)
-                .with_output_ordering(output_ordering)
-                .with_table_partition_cols(table_partition_cols)
-                .with_expr_adapter(self.expr_adapter_factory.clone())
-                .build(),
-            )
-            .await?;
-
-        Ok(ScanResult::new(plan))
-    }
-
-    fn supports_filters_pushdown(
-        &self,
-        filters: &[&Expr],
-    ) -> Result<Vec<TableProviderFilterPushDown>> {
-        let partition_column_names = self
-            .options
-            .table_partition_cols
-            .iter()
-            .map(|col| col.0.as_str())
-            .collect::<Vec<_>>();
-        filters
-            .iter()
-            .map(|filter| {
-                if can_be_evaluated_for_partition_pruning(&partition_column_names, filter)
-                {
-                    // if filter can be handled by partition pruning, it is exact
-                    return Ok(TableProviderFilterPushDown::Exact);
-                }
-
-                Ok(TableProviderFilterPushDown::Inexact)
-            })
-            .collect()
-    }
-
-    fn get_table_definition(&self) -> Option<&str> {
-        self.definition.as_deref()
-    }
-
-    async fn insert_into(
-        &self,
-        state: &dyn Session,
-        input: Arc<dyn ExecutionPlan>,
-        insert_op: InsertOp,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        // Check that the schema of the plan matches the schema of this table.
-        self.schema()
-            .logically_equivalent_names_and_types(&input.schema())?;
-
-        let table_path = &self.table_paths()[0];
-        if !table_path.is_collection() {
-            return plan_err!(
-                "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \
-                To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE"
-            );
-        }
-
-        // Get the object store for the table path.
-        let store = state.runtime_env().object_store(table_path)?;
-
-        let file_list_stream = pruned_partition_list(
-            state,
-            store.as_ref(),
-            table_path,
-            &[],
-            &self.options.file_extension,
-            &self.options.table_partition_cols,
-        )
-        .await?;
-
-        let file_group = file_list_stream.try_collect::<Vec<_>>().await?.into();
-        let keep_partition_by_columns =
-            state.config_options().execution.keep_partition_by_columns;
-
-        // Sink related option, apart from format
-        let config = FileSinkConfig {
-            original_url: String::default(),
-            object_store_url: self.table_paths()[0].object_store(),
-            table_paths: self.table_paths().clone(),
-            file_group,
-            output_schema: self.schema(),
-            table_partition_cols: self.options.table_partition_cols.clone(),
-            insert_op,
-            keep_partition_by_columns,
-            file_extension: self.options().format.get_ext(),
-        };
-
-        let orderings = self.try_create_output_ordering(state.execution_props())?;
-        // It is sufficient to pass only one of the equivalent orderings:
-        let order_requirements = orderings.into_iter().next().map(Into::into);
-
-        self.options()
-            .format
-            .create_writer_physical_plan(input, state, config, order_requirements)
-            .await
-    }
-
-    fn get_column_default(&self, column: &str) -> Option<&Expr> {
-        self.column_defaults.get(column)
-    }
-}
-
-impl ListingTable {
-    /// Get the list of files for a scan as well as the file level statistics.
-    /// The list is grouped to let the execution plan know how the files should
-    /// be distributed to different threads / executors.
-    async fn list_files_for_scan<'a>(
-        &'a self,
-        ctx: &'a dyn Session,
-        filters: &'a [Expr],
-        limit: Option<usize>,
-    ) -> Result<(Vec<FileGroup>, Statistics)> {
-        let store = if let Some(url) = self.table_paths.first() {
-            ctx.runtime_env().object_store(url)?
-        } else {
-            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
-        };
-        // list files (with partitions)
-        let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
-            pruned_partition_list(
-                ctx,
-                store.as_ref(),
-                table_path,
-                filters,
-                &self.options.file_extension,
-                &self.options.table_partition_cols,
-            )
-        }))
-        .await?;
-        let meta_fetch_concurrency =
-            ctx.config_options().execution.meta_fetch_concurrency;
-        let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency);
-        // collect the statistics if required by the config
-        let files = file_list
-            .map(|part_file| async {
-                let part_file = part_file?;
-                let statistics = if self.options.collect_stat {
-                    self.do_collect_statistics(ctx, &store, &part_file).await?
-                } else {
-                    Arc::new(Statistics::new_unknown(&self.file_schema))
-                };
-                Ok(part_file.with_statistics(statistics))
-            })
-            .boxed()
-            .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
-
-        let (file_group, inexact_stats) =
-            get_files_with_limit(files, limit, self.options.collect_stat).await?;
-
-        let file_groups = file_group.split_files(self.options.target_partitions);
-        let (mut file_groups, mut stats) = compute_all_files_statistics(
-            file_groups,
-            self.schema(),
-            self.options.collect_stat,
-            inexact_stats,
-        )?;
-
-        let schema_adapter = self.create_schema_adapter();
-        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
-
-        stats.column_statistics =
-            schema_mapper.map_column_statistics(&stats.column_statistics)?;
-        file_groups.iter_mut().try_for_each(|file_group| {
-            if let Some(stat) = file_group.statistics_mut() {
-                stat.column_statistics =
-                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
-            }
-            Ok::<_, DataFusionError>(())
-        })?;
-        Ok((file_groups, stats))
-    }
-
-    /// Collects statistics for a given partitioned file.
-    ///
-    /// This method first checks if the statistics for the given file are already cached.
-    /// If they are, it returns the cached statistics.
-    /// If they are not, it infers the statistics from the file and stores them in the cache.
-    async fn do_collect_statistics(
-        &self,
-        ctx: &dyn Session,
-        store: &Arc<dyn ObjectStore>,
-        part_file: &PartitionedFile,
-    ) -> Result<Arc<Statistics>> {
-        match self
-            .collected_statistics
-            .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
-        {
-            Some(statistics) => Ok(statistics),
-            None => {
-                let statistics = self
-                    .options
-                    .format
-                    .infer_stats(
-                        ctx,
-                        store,
-                        Arc::clone(&self.file_schema),
-                        &part_file.object_meta,
-                    )
-                    .await?;
-                let statistics = Arc::new(statistics);
-                self.collected_statistics.put_with_extra(
-                    &part_file.object_meta.location,
-                    Arc::clone(&statistics),
-                    &part_file.object_meta,
-                );
-                Ok(statistics)
-            }
-        }
-    }
-}
-
-/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
-///
-/// This function collects files from the provided stream until either:
-/// 1. The stream is exhausted
-/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
-///
-/// # Arguments
-/// * `files` - A stream of `Result<PartitionedFile>` items to process
-/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
-///   once the accumulated number of rows exceeds this limit
-/// * `collect_stats` - Whether to collect and accumulate statistics from the files
-///
-/// # Returns
-/// A `Result` containing a `FileGroup` with the collected files
-/// and a boolean indicating whether the statistics are inexact.
-///
-/// # Note
-/// The function will continue processing files if statistics are not available or if the
-/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
-/// but files will still be collected.
-async fn get_files_with_limit(
-    files: impl Stream<Item = Result<PartitionedFile>>,
-    limit: Option<usize>,
-    collect_stats: bool,
-) -> Result<(FileGroup, bool)> {
-    let mut file_group = FileGroup::default();
-    // Fusing the stream allows us to call next safely even once it is finished.
-    let mut all_files = Box::pin(files.fuse());
-    enum ProcessingState {
-        ReadingFiles,
-        ReachedLimit,
-    }
-
-    let mut state = ProcessingState::ReadingFiles;
-    let mut num_rows = Precision::Absent;
-
-    while let Some(file_result) = all_files.next().await {
-        // Early exit if we've already reached our limit
-        if matches!(state, ProcessingState::ReachedLimit) {
-            break;
-        }
-
-        let file = file_result?;
-
-        // Update file statistics regardless of state
-        if collect_stats {
-            if let Some(file_stats) = &file.statistics {
-                num_rows = if file_group.is_empty() {
-                    // For the first file, just take its row count
-                    file_stats.num_rows
-                } else {
-                    // For subsequent files, accumulate the counts
-                    num_rows.add(&file_stats.num_rows)
-                };
-            }
-        }
-
-        // Always add the file to our group
-        file_group.push(file);
-
-        // Check if we've hit the limit (if one was specified)
-        if let Some(limit) = limit {
-            if let Precision::Exact(row_count) = num_rows {
-                if row_count > limit {
-                    state = ProcessingState::ReachedLimit;
-                }
-            }
-        }
-    }
-    // If we still have files in the stream, it means that the limit kicked
-    // in, and the statistic could have been different had we processed the
-    // files in a different order.
-    let inexact_stats = all_files.next().await.is_some();
-    Ok((file_group, inexact_stats))
 }
 
 #[cfg(test)]
 mod tests {
-    use super::*;
     #[cfg(feature = "parquet")]
     use crate::datasource::file_format::parquet::ParquetFormat;
+    use crate::datasource::listing::table::ListingTableConfigExt;
     use crate::prelude::*;
     use crate::{
         datasource::{
@@ -1587,21 +123,34 @@ mod tests {
         },
     };
     use arrow::{compute::SortOptions, record_batch::RecordBatch};
+    use arrow_schema::{DataType, Field, Schema, SchemaRef};
+    use datafusion_catalog::TableProvider;
+    use datafusion_catalog_listing::{
+        ListingOptions, ListingTable, ListingTableConfig, SchemaSource,
+    };
     use datafusion_common::{
-        assert_contains,
+        assert_contains, plan_err,
         stats::Precision,
         test_util::{batches_to_string, datafusion_test_data},
-        ColumnStatistics, ScalarValue,
+        ColumnStatistics, DataFusionError, Result, ScalarValue,
     };
+    use datafusion_datasource::file_compression_type::FileCompressionType;
+    use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::schema_adapter::{
         SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
     };
+    use datafusion_datasource::ListingTableUrl;
+    use datafusion_expr::dml::InsertOp;
     use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator};
     use datafusion_physical_expr::expressions::binary;
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_expr_common::sort_expr::LexOrdering;
+    use datafusion_physical_plan::empty::EmptyExec;
     use datafusion_physical_plan::{collect, ExecutionPlanProperties};
     use rstest::rstest;
+    use std::collections::HashMap;
     use std::io::Write;
+    use std::sync::Arc;
     use tempfile::TempDir;
     use url::Url;
 
@@ -1638,10 +187,13 @@ mod tests {
         let ctx = SessionContext::new();
         let testdata = datafusion_test_data();
         let filename = format!("{testdata}/aggregate_simple.csv");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         // Test default schema source
-        let config = ListingTableConfig::new(table_path.clone());
+        let format = CsvFormat::default();
+        let options = ListingOptions::new(Arc::new(format));
+        let config =
+            ListingTableConfig::new(table_path.clone()).with_listing_options(options);
         assert_eq!(config.schema_source(), SchemaSource::Unset);
 
         // Test schema source after setting a schema explicitly
@@ -1650,18 +202,13 @@ mod tests {
         assert_eq!(config_with_schema.schema_source(), SchemaSource::Specified);
 
         // Test schema source after inferring schema
-        let format = CsvFormat::default();
-        let options = ListingOptions::new(Arc::new(format));
-        let config_with_options = config.with_listing_options(options.clone());
-        assert_eq!(config_with_options.schema_source(), SchemaSource::Unset);
+        assert_eq!(config.schema_source(), SchemaSource::Unset);
 
-        let config_with_inferred = config_with_options.infer_schema(&ctx.state()).await?;
+        let config_with_inferred = config.infer_schema(&ctx.state()).await?;
         assert_eq!(config_with_inferred.schema_source(), SchemaSource::Inferred);
 
         // Test schema preservation through operations
-        let config_with_schema_and_options = config_with_schema
-            .clone()
-            .with_listing_options(options.clone());
+        let config_with_schema_and_options = config_with_schema.clone();
         assert_eq!(
             config_with_schema_and_options.schema_source(),
             SchemaSource::Specified
@@ -1836,7 +383,7 @@ mod tests {
             .with_table_partition_cols(vec![(String::from("p1"), DataType::Utf8)])
             .with_target_partitions(4);
 
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
         let file_schema =
             Arc::new(Schema::new(vec![Field::new("a", DataType::Boolean, false)]));
         let config = ListingTableConfig::new(table_path)
@@ -1872,7 +419,7 @@ mod tests {
     ) -> Result<Arc<dyn TableProvider>> {
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{testdata}/{name}");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let config = ListingTableConfig::new(table_path)
             .infer(&ctx.state())
@@ -1899,7 +446,7 @@ mod tests {
 
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
 
-        let table_path = ListingTableUrl::parse(table_prefix).unwrap();
+        let table_path = ListingTableUrl::parse(table_prefix)?;
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
             .with_schema(Arc::new(schema));
@@ -2458,7 +1005,7 @@ mod tests {
     async fn test_infer_options_compressed_csv() -> Result<()> {
         let testdata = crate::test_util::arrow_test_data();
         let filename = format!("{testdata}/csv/aggregate_test_100.csv.gz");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let ctx = SessionContext::new();
 
@@ -2479,12 +1026,15 @@ mod tests {
 
         let testdata = datafusion_test_data();
         let filename = format!("{testdata}/aggregate_simple.csv");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let provided_schema = create_test_schema();
 
-        let config =
-            ListingTableConfig::new(table_path).with_schema(Arc::clone(&provided_schema));
+        let format = CsvFormat::default();
+        let options = ListingOptions::new(Arc::new(format));
+        let config = ListingTableConfig::new(table_path)
+            .with_listing_options(options)
+            .with_schema(Arc::clone(&provided_schema));
 
         let config = config.infer(&ctx.state()).await?;
 
@@ -2549,8 +1099,8 @@ mod tests {
             table_path1.clone(),
             table_path2.clone(),
         ])
-        .with_schema(schema_3cols)
-        .with_listing_options(options.clone());
+        .with_listing_options(options.clone())
+        .with_schema(schema_3cols);
         let config2 = config2.infer_schema(&ctx.state()).await?;
         assert_eq!(config2.schema_source(), SchemaSource::Specified);
 
@@ -2573,8 +1123,8 @@ mod tests {
             table_path1.clone(),
             table_path2.clone(),
         ])
-        .with_schema(schema_4cols)
-        .with_listing_options(options.clone());
+        .with_listing_options(options.clone())
+        .with_schema(schema_4cols);
         let config3 = config3.infer_schema(&ctx.state()).await?;
         assert_eq!(config3.schema_source(), SchemaSource::Specified);
 
@@ -2785,7 +1335,7 @@ mod tests {
 
         let testdata = crate::test_util::parquet_test_data();
         let filename = format!("{}/{}", testdata, "alltypes_plain.parquet");
-        let table_path = ListingTableUrl::parse(filename).unwrap();
+        let table_path = ListingTableUrl::parse(filename)?;
 
         let ctx = SessionContext::new();
         let state = ctx.state();
@@ -2932,7 +1482,7 @@ mod tests {
         let format = JsonFormat::default();
         let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(false);
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
 
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
@@ -3146,7 +1696,7 @@ mod tests {
         let format = JsonFormat::default();
         let opt = ListingOptions::new(Arc::new(format)).with_collect_stat(collect_stat);
         let schema = Schema::new(vec![Field::new("a", DataType::Boolean, false)]);
-        let table_path = ListingTableUrl::parse("test:///table/").unwrap();
+        let table_path = ListingTableUrl::parse("test:///table/")?;
 
         let config = ListingTableConfig::new(table_path)
             .with_listing_options(opt)
diff --git a/datafusion/core/tests/catalog/memory.rs b/datafusion/core/tests/catalog/memory.rs
index ea9e71fc37467..06ed141b2e8bd 100644
--- a/datafusion/core/tests/catalog/memory.rs
+++ b/datafusion/core/tests/catalog/memory.rs
@@ -19,7 +19,7 @@ use arrow::datatypes::Schema;
 use datafusion::catalog::CatalogProvider;
 use datafusion::datasource::empty::EmptyTable;
 use datafusion::datasource::listing::{
-    ListingTable, ListingTableConfig, ListingTableUrl,
+    ListingTable, ListingTableConfig, ListingTableConfigExt, ListingTableUrl,
 };
 use datafusion::prelude::SessionContext;
 use datafusion_catalog::memory::*;
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 4ae2fa9b4c399..40fc6176e212b 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -23,7 +23,9 @@ use arrow_schema::{DataType, Field, FieldRef, Schema, SchemaRef};
 use bytes::{BufMut, BytesMut};
 use datafusion::assert_batches_eq;
 use datafusion::common::Result;
-use datafusion::datasource::listing::{ListingTable, ListingTableConfig};
+use datafusion::datasource::listing::{
+    ListingTable, ListingTableConfig, ListingTableConfigExt,
+};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::DataFusionError;

From 8e1d13a9c6e989b8a6216c6752a34fb3e0494e0f Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 17 Oct 2025 06:44:10 -0400
Subject: [PATCH 024/109] refactor: move arrow datasource to new
 `datafusion-datasource-arrow` crate (#18082)

## Which issue does this PR close?

- This addresses part of
https://github.com/apache/datafusion/issues/17713 but it does not close
it.

## Rationale for this change

In order to remove `core` from `proto` crate, we need `ArrowFormat` to
be available. Similar to the other datasource types (csv, avro, json,
parquet) this splits the Arrow IPC file format into its own crate.

## What changes are included in this PR?

This is a straight refactor. Code is merely moved around.

The size of the diff is the additional files that are required
(cargo.toml, readme.md, etc)

## Are these changes tested?

Existing unit tests.

## Are there any user-facing changes?

Users that include `ArrowSource` may need to update their include paths.
For most, the reexports will cover this need.
---
 .github/workflows/labeler/labeler-config.yml  |   2 +-
 Cargo.lock                                    |  25 +-
 Cargo.toml                                    |   2 +
 datafusion/core/Cargo.toml                    |   4 +-
 .../core/src/datasource/file_format/arrow.rs  | 509 +--------------
 .../src/datasource/physical_plan/arrow.rs     |  23 +
 .../core/src/datasource/physical_plan/mod.rs  |   5 +-
 datafusion/datasource-arrow/Cargo.toml        |  64 ++
 datafusion/datasource-arrow/LICENSE.txt       | 212 ++++++
 datafusion/datasource-arrow/NOTICE.txt        |   5 +
 datafusion/datasource-arrow/README.md         |  34 +
 .../datasource-arrow/src/file_format.rs       | 603 ++++++++++++++++++
 datafusion/datasource-arrow/src/mod.rs        |  25 +
 .../src/source.rs}                            |   5 +-
 .../tests/data/example.arrow                  | Bin
 .../sqllogictest/test_files/arrow_files.slt   |   8 +-
 datafusion/sqllogictest/test_files/ddl.slt    |   6 +-
 .../test_files/repartition_scan.slt           |   4 +-
 dev/release/README.md                         |   1 +
 19 files changed, 1012 insertions(+), 525 deletions(-)
 create mode 100644 datafusion/core/src/datasource/physical_plan/arrow.rs
 create mode 100644 datafusion/datasource-arrow/Cargo.toml
 create mode 100644 datafusion/datasource-arrow/LICENSE.txt
 create mode 100644 datafusion/datasource-arrow/NOTICE.txt
 create mode 100644 datafusion/datasource-arrow/README.md
 create mode 100644 datafusion/datasource-arrow/src/file_format.rs
 create mode 100644 datafusion/datasource-arrow/src/mod.rs
 rename datafusion/{core/src/datasource/physical_plan/arrow_file.rs => datasource-arrow/src/source.rs} (98%)
 rename datafusion/{core => datasource-arrow}/tests/data/example.arrow (100%)

diff --git a/.github/workflows/labeler/labeler-config.yml b/.github/workflows/labeler/labeler-config.yml
index e408130725215..38d88059dab70 100644
--- a/.github/workflows/labeler/labeler-config.yml
+++ b/.github/workflows/labeler/labeler-config.yml
@@ -58,7 +58,7 @@ execution:
 
 datasource:
   - changed-files:
-      - any-glob-to-any-file: ['datafusion/datasource/**/*', 'datafusion/datasource-avro/**/*', 'datafusion/datasource-csv/**/*', 'datafusion/datasource-json/**/*', 'datafusion/datasource-parquet/**/*']
+      - any-glob-to-any-file: ['datafusion/datasource/**/*', 'datafusion/datasource-avro/**/*', 'datafusion/datasource-arrow/**/*', 'datafusion/datasource-csv/**/*', 'datafusion/datasource-json/**/*', 'datafusion/datasource-parquet/**/*']
 
 functions:
   - changed-files:
diff --git a/Cargo.lock b/Cargo.lock
index 0392c8147ad2c..d69ece6d3fb05 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1789,7 +1789,6 @@ name = "datafusion"
 version = "50.2.0"
 dependencies = [
  "arrow",
- "arrow-ipc",
  "arrow-schema",
  "async-trait",
  "bytes",
@@ -1803,6 +1802,7 @@ dependencies = [
  "datafusion-common",
  "datafusion-common-runtime",
  "datafusion-datasource",
+ "datafusion-datasource-arrow",
  "datafusion-datasource-avro",
  "datafusion-datasource-csv",
  "datafusion-datasource-json",
@@ -2030,6 +2030,29 @@ dependencies = [
  "zstd",
 ]
 
+[[package]]
+name = "datafusion-datasource-arrow"
+version = "50.2.0"
+dependencies = [
+ "arrow",
+ "arrow-ipc",
+ "async-trait",
+ "bytes",
+ "chrono",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "itertools 0.14.0",
+ "object_store",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-datasource-avro"
 version = "50.2.0"
diff --git a/Cargo.toml b/Cargo.toml
index dd0b20de528af..79c14d6cca799 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ members = [
     "datafusion/catalog",
     "datafusion/catalog-listing",
     "datafusion/datasource",
+    "datafusion/datasource-arrow",
     "datafusion/datasource-avro",
     "datafusion/datasource-csv",
     "datafusion/datasource-json",
@@ -116,6 +117,7 @@ datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "5
 datafusion-common = { path = "datafusion/common", version = "50.2.0", default-features = false }
 datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.2.0" }
 datafusion-datasource = { path = "datafusion/datasource", version = "50.2.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.2.0", default-features = false }
 datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.2.0", default-features = false }
 datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.2.0", default-features = false }
 datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.2.0", default-features = false }
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index d3bc4546588de..a5a715cea94f1 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -47,7 +47,7 @@ compression = [
     "bzip2",
     "flate2",
     "zstd",
-    "arrow-ipc/zstd",
+    "datafusion-datasource-arrow/compression",
     "datafusion-datasource/compression",
 ]
 crypto_expressions = ["datafusion-functions/crypto_expressions"]
@@ -109,7 +109,6 @@ extended_tests = []
 
 [dependencies]
 arrow = { workspace = true }
-arrow-ipc = { workspace = true }
 arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
 async-trait = { workspace = true }
 bytes = { workspace = true }
@@ -120,6 +119,7 @@ datafusion-catalog-listing = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }
 datafusion-datasource = { workspace = true }
+datafusion-datasource-arrow = { workspace = true }
 datafusion-datasource-avro = { workspace = true, optional = true }
 datafusion-datasource-csv = { workspace = true }
 datafusion-datasource-json = { workspace = true }
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index 25bc166d657a5..8701f96eb3b84 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -15,510 +15,5 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`ArrowFormat`]: Apache Arrow [`FileFormat`] abstractions
-//!
-//! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
-
-use std::any::Any;
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::fmt::{self, Debug};
-use std::sync::Arc;
-
-use super::file_compression_type::FileCompressionType;
-use super::write::demux::DemuxedStreamReceiver;
-use super::write::SharedBuffer;
-use super::FileFormatFactory;
-use crate::datasource::file_format::write::get_writer_schema;
-use crate::datasource::file_format::FileFormat;
-use crate::datasource::physical_plan::{ArrowSource, FileSink, FileSinkConfig};
-use crate::error::Result;
-use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
-
-use arrow::datatypes::{Schema, SchemaRef};
-use arrow::error::ArrowError;
-use arrow::ipc::convert::fb_to_schema;
-use arrow::ipc::reader::FileReader;
-use arrow::ipc::writer::IpcWriteOptions;
-use arrow::ipc::{root_as_message, CompressionType};
-use datafusion_catalog::Session;
-use datafusion_common::parsers::CompressionTypeVariant;
-use datafusion_common::{
-    internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics,
-    DEFAULT_ARROW_EXTENSION,
-};
-use datafusion_common_runtime::{JoinSet, SpawnedTask};
-use datafusion_datasource::display::FileGroupDisplay;
-use datafusion_datasource::file::FileSource;
-use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
-use datafusion_datasource::sink::{DataSink, DataSinkExec};
-use datafusion_datasource::write::ObjectWriterBuilder;
-use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_expr::dml::InsertOp;
-use datafusion_physical_expr_common::sort_expr::LexRequirement;
-
-use async_trait::async_trait;
-use bytes::Bytes;
-use datafusion_datasource::source::DataSourceExec;
-use futures::stream::BoxStream;
-use futures::StreamExt;
-use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
-use tokio::io::AsyncWriteExt;
-
-/// Initial writing buffer size. Note this is just a size hint for efficiency. It
-/// will grow beyond the set value if needed.
-const INITIAL_BUFFER_BYTES: usize = 1048576;
-
-/// If the buffered Arrow data exceeds this size, it is flushed to object store
-const BUFFER_FLUSH_BYTES: usize = 1024000;
-
-#[derive(Default, Debug)]
-/// Factory struct used to create [ArrowFormat]
-pub struct ArrowFormatFactory;
-
-impl ArrowFormatFactory {
-    /// Creates an instance of [ArrowFormatFactory]
-    pub fn new() -> Self {
-        Self {}
-    }
-}
-
-impl FileFormatFactory for ArrowFormatFactory {
-    fn create(
-        &self,
-        _state: &dyn Session,
-        _format_options: &HashMap<String, String>,
-    ) -> Result<Arc<dyn FileFormat>> {
-        Ok(Arc::new(ArrowFormat))
-    }
-
-    fn default(&self) -> Arc<dyn FileFormat> {
-        Arc::new(ArrowFormat)
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-}
-
-impl GetExt for ArrowFormatFactory {
-    fn get_ext(&self) -> String {
-        // Removes the dot, i.e. ".parquet" -> "parquet"
-        DEFAULT_ARROW_EXTENSION[1..].to_string()
-    }
-}
-
-/// Arrow `FileFormat` implementation.
-#[derive(Default, Debug)]
-pub struct ArrowFormat;
-
-#[async_trait]
-impl FileFormat for ArrowFormat {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn get_ext(&self) -> String {
-        ArrowFormatFactory::new().get_ext()
-    }
-
-    fn get_ext_with_compression(
-        &self,
-        file_compression_type: &FileCompressionType,
-    ) -> Result<String> {
-        let ext = self.get_ext();
-        match file_compression_type.get_variant() {
-            CompressionTypeVariant::UNCOMPRESSED => Ok(ext),
-            _ => Err(internal_datafusion_err!(
-                "Arrow FileFormat does not support compression."
-            )),
-        }
-    }
-
-    fn compression_type(&self) -> Option<FileCompressionType> {
-        None
-    }
-
-    async fn infer_schema(
-        &self,
-        _state: &dyn Session,
-        store: &Arc<dyn ObjectStore>,
-        objects: &[ObjectMeta],
-    ) -> Result<SchemaRef> {
-        let mut schemas = vec![];
-        for object in objects {
-            let r = store.as_ref().get(&object.location).await?;
-            let schema = match r.payload {
-                #[cfg(not(target_arch = "wasm32"))]
-                GetResultPayload::File(mut file, _) => {
-                    let reader = FileReader::try_new(&mut file, None)?;
-                    reader.schema()
-                }
-                GetResultPayload::Stream(stream) => {
-                    infer_schema_from_file_stream(stream).await?
-                }
-            };
-            schemas.push(schema.as_ref().clone());
-        }
-        let merged_schema = Schema::try_merge(schemas)?;
-        Ok(Arc::new(merged_schema))
-    }
-
-    async fn infer_stats(
-        &self,
-        _state: &dyn Session,
-        _store: &Arc<dyn ObjectStore>,
-        table_schema: SchemaRef,
-        _object: &ObjectMeta,
-    ) -> Result<Statistics> {
-        Ok(Statistics::new_unknown(&table_schema))
-    }
-
-    async fn create_physical_plan(
-        &self,
-        _state: &dyn Session,
-        conf: FileScanConfig,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(ArrowSource::default());
-        let config = FileScanConfigBuilder::from(conf)
-            .with_source(source)
-            .build();
-
-        Ok(DataSourceExec::from_data_source(config))
-    }
-
-    async fn create_writer_physical_plan(
-        &self,
-        input: Arc<dyn ExecutionPlan>,
-        _state: &dyn Session,
-        conf: FileSinkConfig,
-        order_requirements: Option<LexRequirement>,
-    ) -> Result<Arc<dyn ExecutionPlan>> {
-        if conf.insert_op != InsertOp::Append {
-            return not_impl_err!("Overwrites are not implemented yet for Arrow format");
-        }
-
-        let sink = Arc::new(ArrowFileSink::new(conf));
-
-        Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
-    }
-
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ArrowSource::default())
-    }
-}
-
-/// Implements [`FileSink`] for writing to arrow_ipc files
-struct ArrowFileSink {
-    config: FileSinkConfig,
-}
-
-impl ArrowFileSink {
-    fn new(config: FileSinkConfig) -> Self {
-        Self { config }
-    }
-}
-
-#[async_trait]
-impl FileSink for ArrowFileSink {
-    fn config(&self) -> &FileSinkConfig {
-        &self.config
-    }
-
-    async fn spawn_writer_tasks_and_join(
-        &self,
-        context: &Arc<TaskContext>,
-        demux_task: SpawnedTask<Result<()>>,
-        mut file_stream_rx: DemuxedStreamReceiver,
-        object_store: Arc<dyn ObjectStore>,
-    ) -> Result<u64> {
-        let mut file_write_tasks: JoinSet<std::result::Result<usize, DataFusionError>> =
-            JoinSet::new();
-
-        let ipc_options =
-            IpcWriteOptions::try_new(64, false, arrow_ipc::MetadataVersion::V5)?
-                .try_with_compression(Some(CompressionType::LZ4_FRAME))?;
-        while let Some((path, mut rx)) = file_stream_rx.recv().await {
-            let shared_buffer = SharedBuffer::new(INITIAL_BUFFER_BYTES);
-            let mut arrow_writer = arrow_ipc::writer::FileWriter::try_new_with_options(
-                shared_buffer.clone(),
-                &get_writer_schema(&self.config),
-                ipc_options.clone(),
-            )?;
-            let mut object_store_writer = ObjectWriterBuilder::new(
-                FileCompressionType::UNCOMPRESSED,
-                &path,
-                Arc::clone(&object_store),
-            )
-            .with_buffer_size(Some(
-                context
-                    .session_config()
-                    .options()
-                    .execution
-                    .objectstore_writer_buffer_size,
-            ))
-            .build()?;
-            file_write_tasks.spawn(async move {
-                let mut row_count = 0;
-                while let Some(batch) = rx.recv().await {
-                    row_count += batch.num_rows();
-                    arrow_writer.write(&batch)?;
-                    let mut buff_to_flush = shared_buffer.buffer.try_lock().unwrap();
-                    if buff_to_flush.len() > BUFFER_FLUSH_BYTES {
-                        object_store_writer
-                            .write_all(buff_to_flush.as_slice())
-                            .await?;
-                        buff_to_flush.clear();
-                    }
-                }
-                arrow_writer.finish()?;
-                let final_buff = shared_buffer.buffer.try_lock().unwrap();
-
-                object_store_writer.write_all(final_buff.as_slice()).await?;
-                object_store_writer.shutdown().await?;
-                Ok(row_count)
-            });
-        }
-
-        let mut row_count = 0;
-        while let Some(result) = file_write_tasks.join_next().await {
-            match result {
-                Ok(r) => {
-                    row_count += r?;
-                }
-                Err(e) => {
-                    if e.is_panic() {
-                        std::panic::resume_unwind(e.into_panic());
-                    } else {
-                        unreachable!();
-                    }
-                }
-            }
-        }
-
-        demux_task
-            .join_unwind()
-            .await
-            .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
-        Ok(row_count as u64)
-    }
-}
-
-impl Debug for ArrowFileSink {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("ArrowFileSink").finish()
-    }
-}
-
-impl DisplayAs for ArrowFileSink {
-    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match t {
-            DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(f, "ArrowFileSink(file_groups=",)?;
-                FileGroupDisplay(&self.config.file_group).fmt_as(t, f)?;
-                write!(f, ")")
-            }
-            DisplayFormatType::TreeRender => {
-                writeln!(f, "format: arrow")?;
-                write!(f, "file={}", &self.config.original_url)
-            }
-        }
-    }
-}
-
-#[async_trait]
-impl DataSink for ArrowFileSink {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-
-    fn schema(&self) -> &SchemaRef {
-        self.config.output_schema()
-    }
-
-    async fn write_all(
-        &self,
-        data: SendableRecordBatchStream,
-        context: &Arc<TaskContext>,
-    ) -> Result<u64> {
-        FileSink::write_all(self, data, context).await
-    }
-}
-
-const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
-const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
-
-/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
-/// See <https://github.com/apache/arrow-rs/issues/5021>
-async fn infer_schema_from_file_stream(
-    mut stream: BoxStream<'static, object_store::Result<Bytes>>,
-) -> Result<SchemaRef> {
-    // Expected format:
-    // <magic number "ARROW1"> - 6 bytes
-    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
-    // <metadata_size: int32> - 4 bytes
-    // <metadata_flatbuffer: bytes>
-    // <rest of file bytes>
-
-    // So in first read we need at least all known sized sections,
-    // which is 6 + 2 + 4 + 4 = 16 bytes.
-    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
-
-    // Files should start with these magic bytes
-    if bytes[0..6] != ARROW_MAGIC {
-        return Err(ArrowError::ParseError(
-            "Arrow file does not contain correct header".to_string(),
-        ))?;
-    }
-
-    // Since continuation marker bytes added in later versions
-    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
-        (&bytes[12..16], 16)
-    } else {
-        (&bytes[8..12], 12)
-    };
-
-    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
-    let meta_len = i32::from_le_bytes(meta_len);
-
-    // Read bytes for Schema message
-    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
-        // Need to read more bytes to decode Message
-        let mut block_data = Vec::with_capacity(meta_len as usize);
-        // In case we had some spare bytes in our initial read chunk
-        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
-        let size_to_read = meta_len as usize - block_data.len();
-        let block_data =
-            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
-        Cow::Owned(block_data)
-    } else {
-        // Already have the bytes we need
-        let end_index = meta_len as usize + rest_of_bytes_start_index;
-        let block_data = &bytes[rest_of_bytes_start_index..end_index];
-        Cow::Borrowed(block_data)
-    };
-
-    // Decode Schema message
-    let message = root_as_message(&block_data).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
-    })?;
-    let ipc_schema = message.header_as_schema().ok_or_else(|| {
-        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
-    })?;
-    let schema = fb_to_schema(ipc_schema);
-
-    Ok(Arc::new(schema))
-}
-
-async fn collect_at_least_n_bytes(
-    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
-    n: usize,
-    extend_from: Option<Vec<u8>>,
-) -> Result<Vec<u8>> {
-    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
-    // If extending existing buffer then ensure we read n additional bytes
-    let n = n + buf.len();
-    while let Some(bytes) = stream.next().await.transpose()? {
-        buf.extend_from_slice(&bytes);
-        if buf.len() >= n {
-            break;
-        }
-    }
-    if buf.len() < n {
-        return Err(ArrowError::ParseError(
-            "Unexpected end of byte stream for Arrow IPC file".to_string(),
-        ))?;
-    }
-    Ok(buf)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::execution::context::SessionContext;
-
-    use chrono::DateTime;
-    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
-
-    #[tokio::test]
-    async fn test_infer_schema_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
-
-        // Test chunk sizes where too small so we keep having to read more bytes
-        // And when large enough that first read contains all we need
-        for chunk_size in [7, 3000] {
-            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
-            let inferred_schema = arrow_format
-                .infer_schema(
-                    &state,
-                    &(store.clone() as Arc<dyn ObjectStore>),
-                    std::slice::from_ref(&object_meta),
-                )
-                .await?;
-            let actual_fields = inferred_schema
-                .fields()
-                .iter()
-                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-                .collect::<Vec<_>>();
-            assert_eq!(expected, actual_fields);
-        }
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_infer_schema_short_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(20); // should cause error that file shorter than expected
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let session_ctx = SessionContext::new();
-        let state = session_ctx.state();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-
-        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
-        let err = arrow_format
-            .infer_schema(
-                &state,
-                &(store.clone() as Arc<dyn ObjectStore>),
-                std::slice::from_ref(&object_meta),
-            )
-            .await;
-
-        assert!(err.is_err());
-        assert_eq!(
-            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
-            err.unwrap_err().to_string().lines().next().unwrap()
-        );
-
-        Ok(())
-    }
-}
+//! Re-exports the [`datafusion_datasource_arrow::file_format`] module, and contains tests for it.
+pub use datafusion_datasource_arrow::file_format::*;
diff --git a/datafusion/core/src/datasource/physical_plan/arrow.rs b/datafusion/core/src/datasource/physical_plan/arrow.rs
new file mode 100644
index 0000000000000..392eaa8c4be49
--- /dev/null
+++ b/datafusion/core/src/datasource/physical_plan/arrow.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Reexports the [`datafusion_datasource_arrow::source`] module, containing [Arrow] based [`FileSource`].
+//!
+//! [Arrow]: https://arrow.apache.org/docs/python/ipc.html
+//! [`FileSource`]: datafusion_datasource::file::FileSource
+
+pub use datafusion_datasource_arrow::source::*;
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index 3a9dedaa028f2..1ac292e260fdf 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -17,7 +17,7 @@
 
 //! Execution plans that read file formats
 
-mod arrow_file;
+pub mod arrow;
 pub mod csv;
 pub mod json;
 
@@ -35,10 +35,9 @@ pub use datafusion_datasource_parquet::source::ParquetSource;
 #[cfg(feature = "parquet")]
 pub use datafusion_datasource_parquet::{ParquetFileMetrics, ParquetFileReaderFactory};
 
-pub use arrow_file::ArrowSource;
-
 pub use json::{JsonOpener, JsonSource};
 
+pub use arrow::{ArrowOpener, ArrowSource};
 pub use csv::{CsvOpener, CsvSource};
 pub use datafusion_datasource::file::FileSource;
 pub use datafusion_datasource::file_groups::FileGroup;
diff --git a/datafusion/datasource-arrow/Cargo.toml b/datafusion/datasource-arrow/Cargo.toml
new file mode 100644
index 0000000000000..b3d1e3f2accc9
--- /dev/null
+++ b/datafusion/datasource-arrow/Cargo.toml
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-datasource-arrow"
+description = "datafusion-datasource-arrow"
+readme = "README.md"
+authors.workspace = true
+edition.workspace = true
+homepage.workspace = true
+license.workspace = true
+repository.workspace = true
+rust-version.workspace = true
+version.workspace = true
+
+[package.metadata.docs.rs]
+all-features = true
+
+[dependencies]
+arrow = { workspace = true }
+arrow-ipc = { workspace = true }
+async-trait = { workspace = true }
+bytes = { workspace = true }
+datafusion-common = { workspace = true, features = ["object_store"] }
+datafusion-common-runtime = { workspace = true }
+datafusion-datasource = { workspace = true }
+datafusion-execution = { workspace = true }
+datafusion-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-plan = { workspace = true }
+datafusion-session = { workspace = true }
+futures = { workspace = true }
+itertools = { workspace = true }
+object_store = { workspace = true }
+tokio = { workspace = true }
+
+[dev-dependencies]
+chrono = { workspace = true }
+
+[lints]
+workspace = true
+
+[lib]
+name = "datafusion_datasource_arrow"
+path = "src/mod.rs"
+
+[features]
+compression = [
+    "arrow-ipc/zstd",
+]
diff --git a/datafusion/datasource-arrow/LICENSE.txt b/datafusion/datasource-arrow/LICENSE.txt
new file mode 100644
index 0000000000000..d74c6b599d2ae
--- /dev/null
+++ b/datafusion/datasource-arrow/LICENSE.txt
@@ -0,0 +1,212 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+This project includes code from Apache Aurora.
+
+* dev/release/{release,changelog,release-candidate} are based on the scripts from
+  Apache Aurora
+
+Copyright: 2016 The Apache Software Foundation.
+Home page: https://aurora.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
diff --git a/datafusion/datasource-arrow/NOTICE.txt b/datafusion/datasource-arrow/NOTICE.txt
new file mode 100644
index 0000000000000..7f3c80d606c07
--- /dev/null
+++ b/datafusion/datasource-arrow/NOTICE.txt
@@ -0,0 +1,5 @@
+Apache DataFusion
+Copyright 2019-2025 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
diff --git a/datafusion/datasource-arrow/README.md b/datafusion/datasource-arrow/README.md
new file mode 100644
index 0000000000000..9901b52105dd4
--- /dev/null
+++ b/datafusion/datasource-arrow/README.md
@@ -0,0 +1,34 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache DataFusion Arrow DataSource
+
+[Apache DataFusion] is an extensible query execution framework, written in Rust, that uses [Apache Arrow] as its in-memory format.
+
+This crate is a submodule of DataFusion that defines a Arrow based file source.
+It works with files following the [Arrow IPC format].
+
+Most projects should use the [`datafusion`] crate directly, which re-exports
+this module. If you are already using the [`datafusion`] crate, there is no
+reason to use this crate directly in your project as well.
+
+[apache arrow]: https://arrow.apache.org/
+[apache datafusion]: https://datafusion.apache.org/
+[`datafusion`]: https://crates.io/crates/datafusion
+[arrow ipc format]: https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs
new file mode 100644
index 0000000000000..3b85640804219
--- /dev/null
+++ b/datafusion/datasource-arrow/src/file_format.rs
@@ -0,0 +1,603 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [`ArrowFormat`]: Apache Arrow [`FileFormat`] abstractions
+//!
+//! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
+
+use std::any::Any;
+use std::borrow::Cow;
+use std::collections::HashMap;
+use std::fmt::{self, Debug};
+use std::sync::Arc;
+
+use arrow::datatypes::{Schema, SchemaRef};
+use arrow::error::ArrowError;
+use arrow::ipc::convert::fb_to_schema;
+use arrow::ipc::reader::FileReader;
+use arrow::ipc::writer::IpcWriteOptions;
+use arrow::ipc::{root_as_message, CompressionType};
+use datafusion_common::error::Result;
+use datafusion_common::parsers::CompressionTypeVariant;
+use datafusion_common::{
+    internal_datafusion_err, not_impl_err, DataFusionError, GetExt, Statistics,
+    DEFAULT_ARROW_EXTENSION,
+};
+use datafusion_common_runtime::{JoinSet, SpawnedTask};
+use datafusion_datasource::display::FileGroupDisplay;
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::sink::{DataSink, DataSinkExec};
+use datafusion_datasource::write::{
+    get_writer_schema, ObjectWriterBuilder, SharedBuffer,
+};
+use datafusion_execution::{SendableRecordBatchStream, TaskContext};
+use datafusion_expr::dml::InsertOp;
+use datafusion_physical_expr_common::sort_expr::LexRequirement;
+
+use crate::source::ArrowSource;
+use async_trait::async_trait;
+use bytes::Bytes;
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
+use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_datasource::write::demux::DemuxedStreamReceiver;
+use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+use datafusion_session::Session;
+use futures::stream::BoxStream;
+use futures::StreamExt;
+use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
+use tokio::io::AsyncWriteExt;
+
+/// Initial writing buffer size. Note this is just a size hint for efficiency. It
+/// will grow beyond the set value if needed.
+const INITIAL_BUFFER_BYTES: usize = 1048576;
+
+/// If the buffered Arrow data exceeds this size, it is flushed to object store
+const BUFFER_FLUSH_BYTES: usize = 1024000;
+
+#[derive(Default, Debug)]
+/// Factory struct used to create [ArrowFormat]
+pub struct ArrowFormatFactory;
+
+impl ArrowFormatFactory {
+    /// Creates an instance of [ArrowFormatFactory]
+    pub fn new() -> Self {
+        Self {}
+    }
+}
+
+impl FileFormatFactory for ArrowFormatFactory {
+    fn create(
+        &self,
+        _state: &dyn Session,
+        _format_options: &HashMap<String, String>,
+    ) -> Result<Arc<dyn FileFormat>> {
+        Ok(Arc::new(ArrowFormat))
+    }
+
+    fn default(&self) -> Arc<dyn FileFormat> {
+        Arc::new(ArrowFormat)
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+}
+
+impl GetExt for ArrowFormatFactory {
+    fn get_ext(&self) -> String {
+        // Removes the dot, i.e. ".parquet" -> "parquet"
+        DEFAULT_ARROW_EXTENSION[1..].to_string()
+    }
+}
+
+/// Arrow `FileFormat` implementation.
+#[derive(Default, Debug)]
+pub struct ArrowFormat;
+
+#[async_trait]
+impl FileFormat for ArrowFormat {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn get_ext(&self) -> String {
+        ArrowFormatFactory::new().get_ext()
+    }
+
+    fn get_ext_with_compression(
+        &self,
+        file_compression_type: &FileCompressionType,
+    ) -> Result<String> {
+        let ext = self.get_ext();
+        match file_compression_type.get_variant() {
+            CompressionTypeVariant::UNCOMPRESSED => Ok(ext),
+            _ => Err(internal_datafusion_err!(
+                "Arrow FileFormat does not support compression."
+            )),
+        }
+    }
+
+    fn compression_type(&self) -> Option<FileCompressionType> {
+        None
+    }
+
+    async fn infer_schema(
+        &self,
+        _state: &dyn Session,
+        store: &Arc<dyn ObjectStore>,
+        objects: &[ObjectMeta],
+    ) -> Result<SchemaRef> {
+        let mut schemas = vec![];
+        for object in objects {
+            let r = store.as_ref().get(&object.location).await?;
+            let schema = match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
+                GetResultPayload::File(mut file, _) => {
+                    let reader = FileReader::try_new(&mut file, None)?;
+                    reader.schema()
+                }
+                GetResultPayload::Stream(stream) => {
+                    infer_schema_from_file_stream(stream).await?
+                }
+            };
+            schemas.push(schema.as_ref().clone());
+        }
+        let merged_schema = Schema::try_merge(schemas)?;
+        Ok(Arc::new(merged_schema))
+    }
+
+    async fn infer_stats(
+        &self,
+        _state: &dyn Session,
+        _store: &Arc<dyn ObjectStore>,
+        table_schema: SchemaRef,
+        _object: &ObjectMeta,
+    ) -> Result<Statistics> {
+        Ok(Statistics::new_unknown(&table_schema))
+    }
+
+    async fn create_physical_plan(
+        &self,
+        _state: &dyn Session,
+        conf: FileScanConfig,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        let source = Arc::new(ArrowSource::default());
+        let config = FileScanConfigBuilder::from(conf)
+            .with_source(source)
+            .build();
+
+        Ok(DataSourceExec::from_data_source(config))
+    }
+
+    async fn create_writer_physical_plan(
+        &self,
+        input: Arc<dyn ExecutionPlan>,
+        _state: &dyn Session,
+        conf: FileSinkConfig,
+        order_requirements: Option<LexRequirement>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        if conf.insert_op != InsertOp::Append {
+            return not_impl_err!("Overwrites are not implemented yet for Arrow format");
+        }
+
+        let sink = Arc::new(ArrowFileSink::new(conf));
+
+        Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
+    }
+
+    fn file_source(&self) -> Arc<dyn FileSource> {
+        Arc::new(ArrowSource::default())
+    }
+}
+
+/// Implements [`FileSink`] for writing to arrow_ipc files
+struct ArrowFileSink {
+    config: FileSinkConfig,
+}
+
+impl ArrowFileSink {
+    fn new(config: FileSinkConfig) -> Self {
+        Self { config }
+    }
+}
+
+#[async_trait]
+impl FileSink for ArrowFileSink {
+    fn config(&self) -> &FileSinkConfig {
+        &self.config
+    }
+
+    async fn spawn_writer_tasks_and_join(
+        &self,
+        context: &Arc<TaskContext>,
+        demux_task: SpawnedTask<Result<()>>,
+        mut file_stream_rx: DemuxedStreamReceiver,
+        object_store: Arc<dyn ObjectStore>,
+    ) -> Result<u64> {
+        let mut file_write_tasks: JoinSet<std::result::Result<usize, DataFusionError>> =
+            JoinSet::new();
+
+        let ipc_options =
+            IpcWriteOptions::try_new(64, false, arrow_ipc::MetadataVersion::V5)?
+                .try_with_compression(Some(CompressionType::LZ4_FRAME))?;
+        while let Some((path, mut rx)) = file_stream_rx.recv().await {
+            let shared_buffer = SharedBuffer::new(INITIAL_BUFFER_BYTES);
+            let mut arrow_writer = arrow_ipc::writer::FileWriter::try_new_with_options(
+                shared_buffer.clone(),
+                &get_writer_schema(&self.config),
+                ipc_options.clone(),
+            )?;
+            let mut object_store_writer = ObjectWriterBuilder::new(
+                FileCompressionType::UNCOMPRESSED,
+                &path,
+                Arc::clone(&object_store),
+            )
+            .with_buffer_size(Some(
+                context
+                    .session_config()
+                    .options()
+                    .execution
+                    .objectstore_writer_buffer_size,
+            ))
+            .build()?;
+            file_write_tasks.spawn(async move {
+                let mut row_count = 0;
+                while let Some(batch) = rx.recv().await {
+                    row_count += batch.num_rows();
+                    arrow_writer.write(&batch)?;
+                    let mut buff_to_flush = shared_buffer.buffer.try_lock().unwrap();
+                    if buff_to_flush.len() > BUFFER_FLUSH_BYTES {
+                        object_store_writer
+                            .write_all(buff_to_flush.as_slice())
+                            .await?;
+                        buff_to_flush.clear();
+                    }
+                }
+                arrow_writer.finish()?;
+                let final_buff = shared_buffer.buffer.try_lock().unwrap();
+
+                object_store_writer.write_all(final_buff.as_slice()).await?;
+                object_store_writer.shutdown().await?;
+                Ok(row_count)
+            });
+        }
+
+        let mut row_count = 0;
+        while let Some(result) = file_write_tasks.join_next().await {
+            match result {
+                Ok(r) => {
+                    row_count += r?;
+                }
+                Err(e) => {
+                    if e.is_panic() {
+                        std::panic::resume_unwind(e.into_panic());
+                    } else {
+                        unreachable!();
+                    }
+                }
+            }
+        }
+
+        demux_task
+            .join_unwind()
+            .await
+            .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
+        Ok(row_count as u64)
+    }
+}
+
+impl Debug for ArrowFileSink {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ArrowFileSink").finish()
+    }
+}
+
+impl DisplayAs for ArrowFileSink {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(f, "ArrowFileSink(file_groups=",)?;
+                FileGroupDisplay(&self.config.file_group).fmt_as(t, f)?;
+                write!(f, ")")
+            }
+            DisplayFormatType::TreeRender => {
+                writeln!(f, "format: arrow")?;
+                write!(f, "file={}", &self.config.original_url)
+            }
+        }
+    }
+}
+
+#[async_trait]
+impl DataSink for ArrowFileSink {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> &SchemaRef {
+        self.config.output_schema()
+    }
+
+    async fn write_all(
+        &self,
+        data: SendableRecordBatchStream,
+        context: &Arc<TaskContext>,
+    ) -> Result<u64> {
+        FileSink::write_all(self, data, context).await
+    }
+}
+
+const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
+const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
+
+/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
+/// See <https://github.com/apache/arrow-rs/issues/5021>
+async fn infer_schema_from_file_stream(
+    mut stream: BoxStream<'static, object_store::Result<Bytes>>,
+) -> Result<SchemaRef> {
+    // Expected format:
+    // <magic number "ARROW1"> - 6 bytes
+    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
+    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
+    // <metadata_size: int32> - 4 bytes
+    // <metadata_flatbuffer: bytes>
+    // <rest of file bytes>
+
+    // So in first read we need at least all known sized sections,
+    // which is 6 + 2 + 4 + 4 = 16 bytes.
+    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
+
+    // Files should start with these magic bytes
+    if bytes[0..6] != ARROW_MAGIC {
+        return Err(ArrowError::ParseError(
+            "Arrow file does not contain correct header".to_string(),
+        ))?;
+    }
+
+    // Since continuation marker bytes added in later versions
+    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
+        (&bytes[12..16], 16)
+    } else {
+        (&bytes[8..12], 12)
+    };
+
+    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
+    let meta_len = i32::from_le_bytes(meta_len);
+
+    // Read bytes for Schema message
+    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
+        // Need to read more bytes to decode Message
+        let mut block_data = Vec::with_capacity(meta_len as usize);
+        // In case we had some spare bytes in our initial read chunk
+        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
+        let size_to_read = meta_len as usize - block_data.len();
+        let block_data =
+            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
+        Cow::Owned(block_data)
+    } else {
+        // Already have the bytes we need
+        let end_index = meta_len as usize + rest_of_bytes_start_index;
+        let block_data = &bytes[rest_of_bytes_start_index..end_index];
+        Cow::Borrowed(block_data)
+    };
+
+    // Decode Schema message
+    let message = root_as_message(&block_data).map_err(|err| {
+        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
+    })?;
+    let ipc_schema = message.header_as_schema().ok_or_else(|| {
+        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
+    })?;
+    let schema = fb_to_schema(ipc_schema);
+
+    Ok(Arc::new(schema))
+}
+
+async fn collect_at_least_n_bytes(
+    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
+    n: usize,
+    extend_from: Option<Vec<u8>>,
+) -> Result<Vec<u8>> {
+    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
+    // If extending existing buffer then ensure we read n additional bytes
+    let n = n + buf.len();
+    while let Some(bytes) = stream.next().await.transpose()? {
+        buf.extend_from_slice(&bytes);
+        if buf.len() >= n {
+            break;
+        }
+    }
+    if buf.len() < n {
+        return Err(ArrowError::ParseError(
+            "Unexpected end of byte stream for Arrow IPC file".to_string(),
+        ))?;
+    }
+    Ok(buf)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use chrono::DateTime;
+    use datafusion_common::config::TableOptions;
+    use datafusion_common::DFSchema;
+    use datafusion_execution::config::SessionConfig;
+    use datafusion_execution::runtime_env::RuntimeEnv;
+    use datafusion_expr::execution_props::ExecutionProps;
+    use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
+    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+    use object_store::{chunked::ChunkedStore, memory::InMemory, path::Path};
+
+    struct MockSession {
+        config: SessionConfig,
+        runtime_env: Arc<RuntimeEnv>,
+    }
+
+    impl MockSession {
+        fn new() -> Self {
+            Self {
+                config: SessionConfig::new(),
+                runtime_env: Arc::new(RuntimeEnv::default()),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl Session for MockSession {
+        fn session_id(&self) -> &str {
+            unimplemented!()
+        }
+
+        fn config(&self) -> &SessionConfig {
+            &self.config
+        }
+
+        async fn create_physical_plan(
+            &self,
+            _logical_plan: &LogicalPlan,
+        ) -> Result<Arc<dyn ExecutionPlan>> {
+            unimplemented!()
+        }
+
+        fn create_physical_expr(
+            &self,
+            _expr: Expr,
+            _df_schema: &DFSchema,
+        ) -> Result<Arc<dyn PhysicalExpr>> {
+            unimplemented!()
+        }
+
+        fn scalar_functions(&self) -> &HashMap<String, Arc<ScalarUDF>> {
+            unimplemented!()
+        }
+
+        fn aggregate_functions(&self) -> &HashMap<String, Arc<AggregateUDF>> {
+            unimplemented!()
+        }
+
+        fn window_functions(&self) -> &HashMap<String, Arc<WindowUDF>> {
+            unimplemented!()
+        }
+
+        fn runtime_env(&self) -> &Arc<RuntimeEnv> {
+            &self.runtime_env
+        }
+
+        fn execution_props(&self) -> &ExecutionProps {
+            unimplemented!()
+        }
+
+        fn as_any(&self) -> &dyn Any {
+            unimplemented!()
+        }
+
+        fn table_options(&self) -> &TableOptions {
+            unimplemented!()
+        }
+
+        fn table_options_mut(&mut self) -> &mut TableOptions {
+            unimplemented!()
+        }
+
+        fn task_ctx(&self) -> Arc<TaskContext> {
+            unimplemented!()
+        }
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_stream() -> Result<()> {
+        let mut bytes = std::fs::read("tests/data/example.arrow")?;
+        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
+        let location = Path::parse("example.arrow")?;
+        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        in_memory_store.put(&location, bytes.into()).await?;
+
+        let state = MockSession::new();
+        let object_meta = ObjectMeta {
+            location,
+            last_modified: DateTime::default(),
+            size: u64::MAX,
+            e_tag: None,
+            version: None,
+        };
+
+        let arrow_format = ArrowFormat {};
+        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
+
+        // Test chunk sizes where too small so we keep having to read more bytes
+        // And when large enough that first read contains all we need
+        for chunk_size in [7, 3000] {
+            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
+            let inferred_schema = arrow_format
+                .infer_schema(
+                    &state,
+                    &(store.clone() as Arc<dyn ObjectStore>),
+                    std::slice::from_ref(&object_meta),
+                )
+                .await?;
+            let actual_fields = inferred_schema
+                .fields()
+                .iter()
+                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+                .collect::<Vec<_>>();
+            assert_eq!(expected, actual_fields);
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_short_stream() -> Result<()> {
+        let mut bytes = std::fs::read("tests/data/example.arrow")?;
+        bytes.truncate(20); // should cause error that file shorter than expected
+        let location = Path::parse("example.arrow")?;
+        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+        in_memory_store.put(&location, bytes.into()).await?;
+
+        let state = MockSession::new();
+        let object_meta = ObjectMeta {
+            location,
+            last_modified: DateTime::default(),
+            size: u64::MAX,
+            e_tag: None,
+            version: None,
+        };
+
+        let arrow_format = ArrowFormat {};
+
+        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
+        let err = arrow_format
+            .infer_schema(
+                &state,
+                &(store.clone() as Arc<dyn ObjectStore>),
+                std::slice::from_ref(&object_meta),
+            )
+            .await;
+
+        assert!(err.is_err());
+        assert_eq!(
+            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
+            err.unwrap_err().to_string().lines().next().unwrap()
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/datasource-arrow/src/mod.rs b/datafusion/datasource-arrow/src/mod.rs
new file mode 100644
index 0000000000000..18bb8792c3ffe
--- /dev/null
+++ b/datafusion/datasource-arrow/src/mod.rs
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Make sure fast / cheap clones on Arc are explicit:
+// https://github.com/apache/datafusion/issues/11143
+#![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
+
+pub mod file_format;
+pub mod source;
+
+pub use file_format::*;
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/datasource-arrow/src/source.rs
similarity index 98%
rename from datafusion/core/src/datasource/physical_plan/arrow_file.rs
rename to datafusion/datasource-arrow/src/source.rs
index b37dc499d4035..f43f11880182b 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/datasource-arrow/src/source.rs
@@ -18,20 +18,21 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::datasource::physical_plan::{FileOpenFuture, FileOpener};
-use crate::error::Result;
 use datafusion_datasource::as_file_source;
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
 
 use arrow::buffer::Buffer;
 use arrow::datatypes::SchemaRef;
 use arrow_ipc::reader::FileDecoder;
+use datafusion_common::error::Result;
 use datafusion_common::{exec_datafusion_err, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::PartitionedFile;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 
+use datafusion_datasource::file_stream::FileOpenFuture;
+use datafusion_datasource::file_stream::FileOpener;
 use futures::StreamExt;
 use itertools::Itertools;
 use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
diff --git a/datafusion/core/tests/data/example.arrow b/datafusion/datasource-arrow/tests/data/example.arrow
similarity index 100%
rename from datafusion/core/tests/data/example.arrow
rename to datafusion/datasource-arrow/tests/data/example.arrow
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt b/datafusion/sqllogictest/test_files/arrow_files.slt
index 62453ec4bf3e6..b3975e0c3f471 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -29,7 +29,7 @@ statement ok
 
 CREATE EXTERNAL TABLE arrow_simple
 STORED AS ARROW
-LOCATION '../core/tests/data/example.arrow';
+LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 
 # physical plan
@@ -37,7 +37,7 @@ query TT
 EXPLAIN SELECT * FROM arrow_simple
 ----
 logical_plan TableScan: arrow_simple projection=[f0, f1, f2]
-physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow]]}, projection=[f0, f1, f2], file_type=arrow
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow]]}, projection=[f0, f1, f2], file_type=arrow
 
 # correct content
 query ITB
@@ -50,8 +50,8 @@ SELECT * FROM arrow_simple
 
 # Ensure that local files can not be read by default (a potential security issue)
 # (url table is only supported when DynamicFileCatalog is enabled)
-statement error DataFusion error: Error during planning: table 'datafusion.public.../core/tests/data/example.arrow' not found
-SELECT * FROM '../core/tests/data/example.arrow';
+statement error DataFusion error: Error during planning: table 'datafusion.public.../datasource-arrow/tests/data/example.arrow' not found
+SELECT * FROM '../datasource-arrow/tests/data/example.arrow';
 
 # ARROW partitioned table
 statement ok
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index 03ef08e1a5f83..bc6cbfab0caed 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -312,7 +312,7 @@ DROP TABLE aggregate_simple
 
 # Arrow format
 statement ok
-CREATE external table arrow_simple STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+CREATE external table arrow_simple STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 query ITB rowsort
 SELECT * FROM arrow_simple order by f1 LIMIT 1
@@ -796,7 +796,7 @@ logical_plan
 02)--Values: (Int64(1), Int64(2), Int64(3))
 
 query TT
-explain CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+explain CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 ----
 logical_plan CreateExternalTable: Bare { table: "tty" }
 
@@ -804,7 +804,7 @@ statement ok
 set datafusion.explain.logical_plan_only=false;
 
 statement error DataFusion error: This feature is not implemented: Temporary tables not supported
-CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../core/tests/data/example.arrow';
+CREATE EXTERNAL TEMPORARY TABLE tty STORED as ARROW LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 statement error DataFusion error: This feature is not implemented: Temporary views not supported
 CREATE TEMPORARY VIEW y AS VALUES (1,2,3);
diff --git a/datafusion/sqllogictest/test_files/repartition_scan.slt b/datafusion/sqllogictest/test_files/repartition_scan.slt
index c536c8165c5a3..41718b3aebc27 100644
--- a/datafusion/sqllogictest/test_files/repartition_scan.slt
+++ b/datafusion/sqllogictest/test_files/repartition_scan.slt
@@ -244,7 +244,7 @@ DROP TABLE json_table;
 statement ok
 CREATE EXTERNAL TABLE arrow_table
 STORED AS ARROW
-LOCATION '../core/tests/data/example.arrow';
+LOCATION '../datasource-arrow/tests/data/example.arrow';
 
 
 # It would be great to see the file read as "4" groups with even sizes (offsets) eventually
@@ -253,7 +253,7 @@ query TT
 EXPLAIN SELECT * FROM arrow_table
 ----
 logical_plan TableScan: arrow_table projection=[f0, f1, f2]
-physical_plan DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:0..461], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:461..922], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:922..1383], [WORKSPACE_ROOT/datafusion/core/tests/data/example.arrow:1383..1842]]}, projection=[f0, f1, f2], file_type=arrow
+physical_plan DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:0..461], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:461..922], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:922..1383], [WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example.arrow:1383..1842]]}, projection=[f0, f1, f2], file_type=arrow
 
 # correct content
 query ITB
diff --git a/dev/release/README.md b/dev/release/README.md
index d70e256f73831..1b78f8d13be98 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -295,6 +295,7 @@ Verify that the Cargo.toml in the tarball contains the correct version
 (cd datafusion/catalog && cargo publish)
 (cd datafusion/catalog-listing && cargo publish)
 (cd datafusion/functions-table && cargo publish)
+(cd datafusion/datasource-arrow && cargo publish)
 (cd datafusion/datasource-csv && cargo publish)
 (cd datafusion/datasource-json && cargo publish)
 (cd datafusion/datasource-parquet && cargo publish)

From dce59f850ce9cf3e2aa36402a1593a8b25432336 Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Fri, 17 Oct 2025 07:30:15 -0600
Subject: [PATCH 025/109] Adds instrumentation to LIST operations in CLI
 (#18103)

## Which issue does this PR close?

This does not fully close, but is an incremental building block
component for:
 - https://github.com/apache/datafusion/issues/17207

The full context of how this code is likely to progress can be seen in
the POC for this effort:
 - https://github.com/apache/datafusion/pull/17266

## Rationale for this change

Continued progress filling out the methods that are instrumented for the
instrumented object store.

## What changes are included in this PR?

- Adds instrumentation around basic list operations into the
instrumented object store
 - Adds test cases for new code

## Are these changes tested?

Yes.

Example output:
```sql
DataFusion CLI v50.2.0
> \object_store_profiling trace
ObjectStore Profile mode set to Trace
> CREATE EXTERNAL TABLE nyc_taxi_rides
STORED AS PARQUET
LOCATION 's3://altinity-clickhouse-data/nyc_taxi_rides/data/tripdata_parquet';
0 row(s) fetched.
Elapsed 2.679 seconds.

Object Store Profiling
Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(altinity-clickhouse-data)
2025-10-16T18:53:09.512970085+00:00 operation=List path=nyc_taxi_rides/data/tripdata_parquet

Summaries:
List
count: 1

Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(altinity-clickhouse-data)
2025-10-16T18:53:09.929709943+00:00 operation=List path=nyc_taxi_rides/data/tripdata_parquet
2025-10-16T18:53:10.106757629+00:00 operation=List path=nyc_taxi_rides/data/tripdata_parquet
2025-10-16T18:53:10.220555058+00:00 operation=Get duration=0.230604s size=8 range: bytes=222192975-222192982 path=nyc_taxi_rides/data/tripdata_parquet/data-200901.parquet
2025-10-16T18:53:10.226399832+00:00 operation=Get duration=0.263826s size=8 range: bytes=233123927-233123934 path=nyc_taxi_rides/data/tripdata_parquet/data-201104.parquet
2025-10-16T18:53:10.226194195+00:00 operation=Get duration=0.269754s size=8 range: bytes=252843253-252843260 path=nyc_taxi_rides/data/tripdata_parquet/data-201103.parquet

. . .

2025-10-16T18:53:11.928787014+00:00 operation=Get duration=0.072248s size=18278 range: bytes=201384109-201402386 path=nyc_taxi_rides/data/tripdata_parquet/data-201509.parquet
2025-10-16T18:53:11.933475464+00:00 operation=Get duration=0.068880s size=17175 range: bytes=195411804-195428978 path=nyc_taxi_rides/data/tripdata_parquet/data-201601.parquet
2025-10-16T18:53:11.949629591+00:00 operation=Get duration=0.065645s size=19872 range: bytes=214807880-214827751 path=nyc_taxi_rides/data/tripdata_parquet/data-201603.parquet

Summaries:
List
count: 2

Get
count: 288
duration min: 0.060930s
duration max: 0.444601s
duration avg: 0.133339s
size min: 8 B
size max: 44247 B
size avg: 18870 B
size sum: 5434702 B

>
```


## Are there any user-facing changes?
No-ish

##
cc @alamb
---
 .../src/object_storage/instrumented.rs        | 70 +++++++++++++++++--
 1 file changed, 64 insertions(+), 6 deletions(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index cb96734f24645..8acece315f764 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -114,6 +114,11 @@ impl InstrumentedObjectStore {
         req.drain(..).collect()
     }
 
+    fn enabled(&self) -> bool {
+        self.instrument_mode.load(Ordering::Relaxed)
+            != InstrumentedObjectStoreMode::Disabled as u8
+    }
+
     async fn instrumented_get_opts(
         &self,
         location: &Path,
@@ -138,6 +143,26 @@ impl InstrumentedObjectStore {
 
         Ok(ret)
     }
+
+    fn instrumented_list(
+        &self,
+        prefix: Option<&Path>,
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
+        let timestamp = Utc::now();
+        let ret = self.inner.list(prefix);
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::List,
+            path: prefix.cloned().unwrap_or_else(|| Path::from("")),
+            timestamp,
+            duration: None, // list returns a stream, so the duration isn't meaningful
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        ret
+    }
 }
 
 impl fmt::Display for InstrumentedObjectStore {
@@ -172,9 +197,7 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
-        if self.instrument_mode.load(Ordering::Relaxed)
-            != InstrumentedObjectStoreMode::Disabled as u8
-        {
+        if self.enabled() {
             return self.instrumented_get_opts(location, options).await;
         }
 
@@ -186,6 +209,10 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
+        if self.enabled() {
+            return self.instrumented_list(prefix);
+        }
+
         self.inner.list(prefix)
     }
 
@@ -213,7 +240,7 @@ pub enum Operation {
     _Delete,
     Get,
     _Head,
-    _List,
+    List,
     _Put,
 }
 
@@ -477,8 +504,9 @@ mod tests {
         assert_eq!(reg.stores().len(), 1);
     }
 
-    #[tokio::test]
-    async fn instrumented_store() {
+    // Returns an `InstrumentedObjectStore` with some data loaded for testing and the path to
+    // access the data
+    async fn setup_test_store() -> (InstrumentedObjectStore, Path) {
         let store = Arc::new(object_store::memory::InMemory::new());
         let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
         let instrumented = InstrumentedObjectStore::new(store, mode);
@@ -488,6 +516,13 @@ mod tests {
         let payload = PutPayload::from_static(b"test_data");
         instrumented.put(&path, payload).await.unwrap();
 
+        (instrumented, path)
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_get() {
+        let (instrumented, path) = setup_test_store().await;
+
         // By default no requests should be instrumented/stored
         assert!(instrumented.requests.lock().is_empty());
         let _ = instrumented.get(&path).await.unwrap();
@@ -511,6 +546,29 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_list() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list(Some(&path));
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list(Some(&path));
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::List);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_none());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[test]
     fn request_details() {
         let rd = RequestDetails {

From 76050235b427d3e0f90c30a8222a8babe537ad9c Mon Sep 17 00:00:00 2001
From: Chen Chongchen <chenkovsky@qq.com>
Date: Fri, 17 Oct 2025 21:52:07 +0800
Subject: [PATCH 026/109] feat: spark udf array shuffle (#17674)

## Which issue does this PR close?


## Rationale for this change

support shuffle udf

## What changes are included in this PR?

support shuffle udf

## Are these changes tested?

UT

## Are there any user-facing changes?

No
---
 datafusion/spark/Cargo.toml                   |   2 +-
 datafusion/spark/src/function/array/mod.rs    |   9 +-
 .../spark/src/function/array/shuffle.rs       | 191 ++++++++++++++++++
 .../test_files/spark/array/shuffle.slt        | 113 +++++++++++
 4 files changed, 313 insertions(+), 2 deletions(-)
 create mode 100644 datafusion/spark/src/function/array/shuffle.rs
 create mode 100644 datafusion/sqllogictest/test_files/spark/array/shuffle.slt

diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml
index b95cc31caec68..7f6210fb32bf6 100644
--- a/datafusion/spark/Cargo.toml
+++ b/datafusion/spark/Cargo.toml
@@ -46,12 +46,12 @@ datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
 datafusion-functions = { workspace = true, features = ["crypto_expressions"] }
 log = { workspace = true }
+rand = { workspace = true }
 sha1 = "0.10"
 url = { workspace = true }
 
 [dev-dependencies]
 criterion = { workspace = true }
-rand = { workspace = true }
 
 [[bench]]
 harness = false
diff --git a/datafusion/spark/src/function/array/mod.rs b/datafusion/spark/src/function/array/mod.rs
index fed52a494281d..01056ba952984 100644
--- a/datafusion/spark/src/function/array/mod.rs
+++ b/datafusion/spark/src/function/array/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+pub mod shuffle;
 pub mod spark_array;
 
 use datafusion_expr::ScalarUDF;
@@ -22,13 +23,19 @@ use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
 make_udf_function!(spark_array::SparkArray, array);
+make_udf_function!(shuffle::SparkShuffle, shuffle);
 
 pub mod expr_fn {
     use datafusion_functions::export_functions;
 
     export_functions!((array, "Returns an array with the given elements.", args));
+    export_functions!((
+        shuffle,
+        "Returns a random permutation of the given array.",
+        args
+    ));
 }
 
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
-    vec![array()]
+    vec![array(), shuffle()]
 }
diff --git a/datafusion/spark/src/function/array/shuffle.rs b/datafusion/spark/src/function/array/shuffle.rs
new file mode 100644
index 0000000000000..abeafd3a93660
--- /dev/null
+++ b/datafusion/spark/src/function/array/shuffle.rs
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::function::functions_nested_utils::make_scalar_function;
+use arrow::array::{
+    Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
+    OffsetSizeTrait,
+};
+use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
+use arrow::datatypes::{DataType, FieldRef};
+use datafusion_common::cast::{
+    as_fixed_size_list_array, as_large_list_array, as_list_array,
+};
+use datafusion_common::{exec_err, utils::take_function_args, Result};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use rand::rng;
+use rand::seq::SliceRandom;
+use std::any::Any;
+use std::sync::Arc;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkShuffle {
+    signature: Signature,
+}
+
+impl Default for SparkShuffle {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkShuffle {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::arrays(1, None, Volatility::Volatile),
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkShuffle {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "shuffle"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        Ok(arg_types[0].clone())
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: datafusion_expr::ScalarFunctionArgs,
+    ) -> Result<ColumnarValue> {
+        make_scalar_function(array_shuffle_inner)(&args.args)
+    }
+}
+
+/// array_shuffle SQL function
+pub fn array_shuffle_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+    let [input_array] = take_function_args("shuffle", arg)?;
+    match &input_array.data_type() {
+        List(field) => {
+            let array = as_list_array(input_array)?;
+            general_array_shuffle::<i32>(array, field)
+        }
+        LargeList(field) => {
+            let array = as_large_list_array(input_array)?;
+            general_array_shuffle::<i64>(array, field)
+        }
+        FixedSizeList(field, _) => {
+            let array = as_fixed_size_list_array(input_array)?;
+            fixed_size_array_shuffle(array, field)
+        }
+        Null => Ok(Arc::clone(input_array)),
+        array_type => exec_err!("shuffle does not support type '{array_type}'."),
+    }
+}
+
+fn general_array_shuffle<O: OffsetSizeTrait>(
+    array: &GenericListArray<O>,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut offsets = vec![O::usize_as(0)];
+    let mut nulls = vec![];
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let mut rng = rng();
+
+    for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+        // skip the null value
+        if array.is_null(row_index) {
+            nulls.push(false);
+            offsets.push(offsets[row_index] + O::one());
+            mutable.extend(0, 0, 1);
+            continue;
+        }
+        nulls.push(true);
+        let start = offset_window[0];
+        let end = offset_window[1];
+        let length = (end - start).to_usize().unwrap();
+
+        // Create indices and shuffle them
+        let mut indices: Vec<usize> =
+            (start.to_usize().unwrap()..end.to_usize().unwrap()).collect();
+        indices.shuffle(&mut rng);
+
+        // Add shuffled elements
+        for &index in &indices {
+            mutable.extend(0, index, index + 1);
+        }
+
+        offsets.push(offsets[row_index] + O::usize_as(length));
+    }
+
+    let data = mutable.freeze();
+    Ok(Arc::new(GenericListArray::<O>::try_new(
+        Arc::clone(field),
+        OffsetBuffer::<O>::new(offsets.into()),
+        arrow::array::make_array(data),
+        Some(nulls.into()),
+    )?))
+}
+
+fn fixed_size_array_shuffle(
+    array: &FixedSizeListArray,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let mut nulls = vec![];
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let value_length = array.value_length() as usize;
+    let mut rng = rng();
+
+    for row_index in 0..array.len() {
+        // skip the null value
+        if array.is_null(row_index) {
+            nulls.push(false);
+            mutable.extend(0, 0, value_length);
+            continue;
+        }
+        nulls.push(true);
+
+        let start = row_index * value_length;
+        let end = start + value_length;
+
+        // Create indices and shuffle them
+        let mut indices: Vec<usize> = (start..end).collect();
+        indices.shuffle(&mut rng);
+
+        // Add shuffled elements
+        for &index in &indices {
+            mutable.extend(0, index, index + 1);
+        }
+    }
+
+    let data = mutable.freeze();
+    Ok(Arc::new(FixedSizeListArray::try_new(
+        Arc::clone(field),
+        array.value_length(),
+        arrow::array::make_array(data),
+        Some(nulls.into()),
+    )?))
+}
diff --git a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
new file mode 100644
index 0000000000000..cb3c77cac8fbb
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
@@ -0,0 +1,113 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Test shuffle function with simple arrays
+query B
+SELECT array_sort(shuffle([1, 2, 3, 4, 5, NULL])) = [NULL,1, 2, 3, 4, 5];
+----
+true
+
+query B
+SELECT shuffle([1, 2, 3, 4, 5, NULL]) != [1, 2, 3, 4, 5, NULL];
+----
+true
+
+# Test shuffle function with string arrays
+
+query B
+SELECT array_sort(shuffle(['a', 'b', 'c', 'd', 'e', 'f'])) = ['a', 'b', 'c', 'd', 'e', 'f'];
+----
+true
+
+query B
+SELECT shuffle(['a', 'b', 'c', 'd', 'e', 'f']) != ['a', 'b', 'c', 'd', 'e', 'f'];;
+----
+true
+
+# Test shuffle function with empty array
+query ?
+SELECT shuffle([]);
+----
+[]
+
+# Test shuffle function with single element
+query ?
+SELECT shuffle([42]);
+----
+[42]
+
+# Test shuffle function with null array
+query ?
+SELECT shuffle(NULL);
+----
+NULL
+
+# Test shuffle function with fixed size list arrays
+query B
+SELECT array_sort(shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)'))) = [NULL, 1, 2, 3, 4, 5];
+----
+true
+
+query B
+SELECT shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)')) != [1, 2, NULL, 3, 4, 5];
+----
+true
+
+# Test shuffle on table data with different list types
+statement ok
+CREATE TABLE test_shuffle_list_types AS VALUES
+  ([1, 2, 3, 4]),
+  ([5, 6, 7, 8, 9]),
+  ([10]),
+  (NULL),
+  ([]);
+
+# Test shuffle with large list from table
+query ?
+SELECT array_sort(shuffle(column1)) FROM test_shuffle_list_types;
+----
+[1, 2, 3, 4]
+[5, 6, 7, 8, 9]
+[10]
+NULL
+[]
+
+# Test fixed size list table
+statement ok
+CREATE TABLE test_shuffle_fixed_size AS VALUES
+  (arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)')),
+  (arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)')),
+  (arrow_cast([NULL, 8, 9], 'FixedSizeList(3, Int64)')),
+  (NULL);
+
+# Test shuffle with fixed size list from table
+query ?
+SELECT array_sort(shuffle(column1)) FROM test_shuffle_fixed_size;
+----
+[1, 2, 3]
+[4, 5, 6]
+[NULL, 8, 9]
+NULL
+
+# Clean up
+statement ok
+DROP TABLE test_shuffle_list_types;
+
+statement ok
+DROP TABLE test_shuffle_fixed_size;
+
+

From f0ab1369a200bc80b0a737bcbf54609b15b5015c Mon Sep 17 00:00:00 2001
From: Leonardo Yvens <leoyvens@gmail.com>
Date: Fri, 17 Oct 2025 15:57:43 +0200
Subject: [PATCH 027/109] make Union::try_new pub (#18125)

## Which issue does this PR close?

- Closes #18126.

## Rationale for this change

It's a useful constructor for users manipulating logical plans where
they know the schemas will match exactly. We already expose other
constructors for Union and constructors for logical plans.

## What changes are included in this PR?

Makes `Union::try_new` a public function.

## Are these changes tested?

Seems unnecessary.

## Are there any user-facing changes?

The function is now public. Not a breaking change, but going forward
changes to it would breaking changes to users of the logical plan API.
---
 datafusion/expr/src/logical_plan/plan.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index b8200ab8a48c3..05a2564464c59 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -2753,7 +2753,8 @@ pub struct Union {
 
 impl Union {
     /// Constructs new Union instance deriving schema from inputs.
-    fn try_new(inputs: Vec<Arc<LogicalPlan>>) -> Result<Self> {
+    /// Schema data types must match exactly.
+    pub fn try_new(inputs: Vec<Arc<LogicalPlan>>) -> Result<Self> {
         let schema = Self::derive_schema_from_inputs(&inputs, false, false)?;
         Ok(Union { inputs, schema })
     }

From c9561049d96c4875efa6670f7a4b19556d00fb79 Mon Sep 17 00:00:00 2001
From: Chen Chongchen <chenkovsky@qq.com>
Date: Fri, 17 Oct 2025 22:22:12 +0800
Subject: [PATCH 028/109] fix: window unparsing (#17367)

## Which issue does this PR close?

- Closes #17360.

## Rationale for this change

in LogicalPlan::Filter unparsing,
if there's a window expr, it should be converted to quailify.

postgres must has an alias for derived table. otherwise it will
complain:
```
ERROR: subquery in FROM must have an alias.
```
fixed this issue at the same time.

## What changes are included in this PR?

If window expr is found, convert filter to quailify.

## Are these changes tested?

UT

## Are there any user-facing changes?

No

---------

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 datafusion/sql/src/unparser/dialect.rs    | 23 ++++++
 datafusion/sql/src/unparser/plan.rs       | 19 ++++-
 datafusion/sql/src/unparser/rewrite.rs    | 66 +++++++++++++++++
 datafusion/sql/tests/cases/plan_to_sql.rs | 86 +++++++++++++++++++++++
 4 files changed, 192 insertions(+), 2 deletions(-)

diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs
index 647ad680674b0..834b0a97a47b0 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -207,6 +207,13 @@ pub trait Dialect: Send + Sync {
         Ok(None)
     }
 
+    /// Allows the dialect to support the QUALIFY clause
+    ///
+    /// Some dialects, like Postgres, do not support the QUALIFY clause
+    fn supports_qualify(&self) -> bool {
+        true
+    }
+
     /// Allows the dialect to override logic of formatting datetime with tz into string.
     fn timestamp_with_tz_to_string(&self, dt: DateTime<Tz>, _unit: TimeUnit) -> String {
         dt.to_string()
@@ -274,6 +281,14 @@ impl Dialect for DefaultDialect {
 pub struct PostgreSqlDialect {}
 
 impl Dialect for PostgreSqlDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
+    fn requires_derived_table_alias(&self) -> bool {
+        true
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('"')
     }
@@ -424,6 +439,10 @@ impl Dialect for DuckDBDialect {
 pub struct MySqlDialect {}
 
 impl Dialect for MySqlDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
@@ -485,6 +504,10 @@ impl Dialect for MySqlDialect {
 pub struct SqliteDialect {}
 
 impl Dialect for SqliteDialect {
+    fn supports_qualify(&self) -> bool {
+        false
+    }
+
     fn identifier_quote_style(&self, _: &str) -> Option<char> {
         Some('`')
     }
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index b6c65614995a9..e7535338b7677 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -32,11 +32,11 @@ use super::{
     },
     Unparser,
 };
-use crate::unparser::ast::UnnestRelationBuilder;
 use crate::unparser::extension_unparser::{
     UnparseToStatementResult, UnparseWithinStatementResult,
 };
 use crate::unparser::utils::{find_unnest_node_until_relation, unproject_agg_exprs};
+use crate::unparser::{ast::UnnestRelationBuilder, rewrite::rewrite_qualify};
 use crate::utils::UNNEST_PLACEHOLDER;
 use datafusion_common::{
     internal_err, not_impl_err,
@@ -95,7 +95,10 @@ pub fn plan_to_sql(plan: &LogicalPlan) -> Result<ast::Statement> {
 
 impl Unparser<'_> {
     pub fn plan_to_sql(&self, plan: &LogicalPlan) -> Result<ast::Statement> {
-        let plan = normalize_union_schema(plan)?;
+        let mut plan = normalize_union_schema(plan)?;
+        if !self.dialect.supports_qualify() {
+            plan = rewrite_qualify(plan)?;
+        }
 
         match plan {
             LogicalPlan::Projection(_)
@@ -428,6 +431,18 @@ impl Unparser<'_> {
                         unproject_agg_exprs(filter.predicate.clone(), agg, None)?;
                     let filter_expr = self.expr_to_sql(&unprojected)?;
                     select.having(Some(filter_expr));
+                } else if let (Some(window), true) = (
+                    find_window_nodes_within_select(
+                        plan,
+                        None,
+                        select.already_projected(),
+                    ),
+                    self.dialect.supports_qualify(),
+                ) {
+                    let unprojected =
+                        unproject_window_exprs(filter.predicate.clone(), &window)?;
+                    let filter_expr = self.expr_to_sql(&unprojected)?;
+                    select.qualify(Some(filter_expr));
                 } else {
                     let filter_expr = self.expr_to_sql(&filter.predicate)?;
                     select.selection(Some(filter_expr));
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index aa480cf4fff92..c961f1d6f1f0c 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -100,6 +100,72 @@ fn rewrite_sort_expr_for_union(exprs: Vec<SortExpr>) -> Result<Vec<SortExpr>> {
     Ok(sort_exprs)
 }
 
+/// Rewrite Filter plans that have a Window as their input by inserting a SubqueryAlias.
+///
+/// When a Filter directly operates on a Window plan, it can cause issues during SQL unparsing
+/// because window functions in a WHERE clause are not valid SQL. The solution is to wrap
+/// the Window plan in a SubqueryAlias, effectively creating a derived table.
+///
+/// Example transformation:
+///
+/// Filter: condition
+///   Window: window_function
+///     TableScan: table
+///
+/// becomes:
+///
+/// Filter: condition
+///   SubqueryAlias: __qualify_subquery
+///     Projection: table.column1, table.column2
+///       Window: window_function
+///         TableScan: table
+///
+pub(super) fn rewrite_qualify(plan: LogicalPlan) -> Result<LogicalPlan> {
+    let transformed_plan = plan.transform_up(|plan| match plan {
+        // Check if the filter's input is a Window plan
+        LogicalPlan::Filter(mut filter) => {
+            if matches!(&*filter.input, LogicalPlan::Window(_)) {
+                // Create a SubqueryAlias around the Window plan
+                let qualifier = filter
+                    .input
+                    .schema()
+                    .iter()
+                    .find_map(|(q, _)| q)
+                    .map(|q| q.to_string())
+                    .unwrap_or_else(|| "__qualify_subquery".to_string());
+
+                // for Postgres, name of column for 'rank() over (...)' is 'rank'
+                // but in Datafusion, it is 'rank() over (...)'
+                // without projection, it's still an invalid sql in Postgres
+
+                let project_exprs = filter
+                    .input
+                    .schema()
+                    .iter()
+                    .map(|(_, f)| datafusion_expr::col(f.name()).alias(f.name()))
+                    .collect::<Vec<_>>();
+
+                let input =
+                    datafusion_expr::LogicalPlanBuilder::from(Arc::clone(&filter.input))
+                        .project(project_exprs)?
+                        .build()?;
+
+                let subquery_alias =
+                    datafusion_expr::SubqueryAlias::try_new(Arc::new(input), qualifier)?;
+
+                filter.input = Arc::new(LogicalPlan::SubqueryAlias(subquery_alias));
+                Ok(Transformed::yes(LogicalPlan::Filter(filter)))
+            } else {
+                Ok(Transformed::no(LogicalPlan::Filter(filter)))
+            }
+        }
+
+        _ => Ok(Transformed::no(plan)),
+    });
+
+    transformed_plan.data()
+}
+
 /// Rewrite logic plan for query that order by columns are not in projections
 /// Plan before rewrite:
 ///
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
index 7aa982dcf3dd9..5f76afb763cff 100644
--- a/datafusion/sql/tests/cases/plan_to_sql.rs
+++ b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -21,12 +21,14 @@ use datafusion_common::{
     assert_contains, Column, DFSchema, DFSchemaRef, DataFusionError, Result,
     TableReference,
 };
+use datafusion_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion_expr::test::function_stub::{
     count_udaf, max_udaf, min_udaf, sum, sum_udaf,
 };
 use datafusion_expr::{
     cast, col, lit, table_scan, wildcard, EmptyRelation, Expr, Extension, LogicalPlan,
     LogicalPlanBuilder, Union, UserDefinedLogicalNode, UserDefinedLogicalNodeCore,
+    WindowFrame, WindowFunctionDefinition,
 };
 use datafusion_functions::unicode;
 use datafusion_functions_aggregate::grouping::grouping_udaf;
@@ -2521,6 +2523,90 @@ fn test_unparse_left_semi_join_with_table_scan_projection() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_unparse_window() -> Result<()> {
+    // SubqueryAlias: t
+    // Projection: t.k, t.v, rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS r
+    //     Filter: rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW = UInt64(1)
+    //     WindowAggr: windowExpr=[[rank() PARTITION BY [t.k] ORDER BY [t.v ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+    //         TableScan: t projection=[k, v]
+
+    let schema = Schema::new(vec![
+        Field::new("k", DataType::Int32, false),
+        Field::new("v", DataType::Int32, false),
+    ]);
+    let window_expr = Expr::WindowFunction(Box::new(WindowFunction {
+        fun: WindowFunctionDefinition::WindowUDF(rank_udwf()),
+        params: WindowFunctionParams {
+            args: vec![],
+            partition_by: vec![col("k")],
+            order_by: vec![col("v").sort(true, true)],
+            window_frame: WindowFrame::new(None),
+            null_treatment: None,
+            distinct: false,
+            filter: None,
+        },
+    }));
+    let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?;
+    let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr.clone()])?;
+
+    let name = plan.schema().fields().last().unwrap().name().clone();
+    let plan = LogicalPlanBuilder::from(plan)
+        .filter(col(name.clone()).eq(lit(1i64)))?
+        .project(vec![col("k"), col("v"), col(name)])?
+        .build()?;
+
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "test"."k", "test"."v", "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v", rank() OVER (PARTITION BY "test"."k" ORDER BY "test"."v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM "test") AS "test" WHERE ("rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"#
+    );
+
+    let unparser = Unparser::new(&UnparserMySqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+    );
+
+    let unparser = Unparser::new(&SqliteDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT `test`.`k`, `test`.`v`, `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM (SELECT `test`.`k` AS `k`, `test`.`v` AS `v`, rank() OVER (PARTITION BY `test`.`k` ORDER BY `test`.`v` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` FROM `test`) AS `test` WHERE (`rank() PARTITION BY [test.k] ORDER BY [test.v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` = 1)"#
+    );
+
+    let unparser = Unparser::new(&DefaultDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT test.k, test.v, rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FROM test QUALIFY (rank() OVER (PARTITION BY test.k ORDER BY test.v ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) = 1)"#
+    );
+
+    // without table qualifier
+    let table = table_scan(Some("test"), &schema, Some(vec![0, 1]))?.build()?;
+    let table = LogicalPlanBuilder::from(table)
+        .project(vec![col("k").alias("k"), col("v").alias("v")])?
+        .build()?;
+    let plan = LogicalPlanBuilder::window_plan(table, vec![window_expr])?;
+
+    let name = plan.schema().fields().last().unwrap().name().clone();
+    let plan = LogicalPlanBuilder::from(plan)
+        .filter(col(name.clone()).eq(lit(1i64)))?
+        .project(vec![col("k"), col("v"), col(name)])?
+        .build()?;
+
+    let unparser = Unparser::new(&UnparserPostgreSqlDialect {});
+    let sql = unparser.plan_to_sql(&plan)?;
+    assert_snapshot!(
+        sql,
+        @r#"SELECT "k", "v", "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "k" AS "k", "v" AS "v", rank() OVER (PARTITION BY "k" ORDER BY "v" ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" FROM (SELECT "test"."k" AS "k", "test"."v" AS "v" FROM "test") AS "derived_projection") AS "__qualify_subquery" WHERE ("rank() PARTITION BY [k] ORDER BY [v ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" = 1)"#
+    );
+
+    Ok(())
+}
+
 #[test]
 fn test_like_filter() {
     let statement = generate_round_trip_statement(

From ec2402aee9bf510d3a98927cb3580850914fcf27 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Sat, 18 Oct 2025 00:22:21 +0800
Subject: [PATCH 029/109] feat: Support configurable `EXPLAIN ANALYZE` detail
 level (#18098)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
`EXPLAIN ANALYZE` can be used for profiling and displays the results
alongside the EXPLAIN plan. The issue is that it currently shows too
many low-level details. It would provide a better user experience if
only the most commonly used metrics were shown by default, with more
detailed metrics available through specific configuration options.

### Example
In `datafusion-cli`:
```
> CREATE EXTERNAL TABLE IF NOT EXISTS lineitem
STORED AS parquet
LOCATION '/Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem';
0 row(s) fetched.
Elapsed 0.000 seconds.

explain analyze select *
from lineitem
where l_orderkey = 3000000;
```
The parquet reader includes a large number of low-level details:
```
metrics=[output_rows=19813, elapsed_compute=14ns, batches_split=0, bytes_scanned=2147308, file_open_errors=0, file_scan_errors=0, files_ranges_pruned_statistics=18, num_predicate_creation_errors=0, page_index_rows_matched=19813, page_index_rows_pruned=729088, predicate_cache_inner_records=0, predicate_cache_records=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, row_groups_matched_bloom_filter=0, row_groups_matched_statistics=1, row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0, bloom_filter_eval_time=21.997µs, metadata_load_time=273.83µs, page_index_eval_time=29.915µs, row_pushdown_eval_time=42ns, statistics_eval_time=76.248µs, time_elapsed_opening=4.02146ms, time_elapsed_processing=24.787461ms, time_elapsed_scanning_total=24.17671ms, time_elapsed_scanning_until_data=23.103665ms]
```

I believe only a subset of it is commonly used, for example
`output_rows`, `metadata_load_time`, and how many file/row-group/pages
are pruned, and it would better to only display the most common ones by
default.

### Existing `VERBOSE` keyword
There is a existing verbose keyword in `EXPLAIN ANALYZE VERBOSE`,
however it's turning on per-partition metrics instead of controlling
detail level. I think it would be hard to mix this partition control and
the detail level introduced in this PR, so they're separated: the
following config will be used for detail level and the semantics of
`EXPLAIN ANALYZE VERBOSE` keep unchanged.

### This PR: configurable explain analyze level
1. Introduced a new config option `datafusion.explain.analyze_level`.
When set to `dev` (default value), all existing metrics will be shown.
If set to `summary`, only `BaselineMetrics` will be displayed (i.e.
`output_rows` and `elapsed_compute`).
Note now we only include `BaselineMetrics` for simplicity, in the
follow-up PRs we can figure out what's the commonly used metrics for
each operator, and add them to `summary` analyze level, finally set the
`summary` analyze level to default.
2. Add a `MetricType` field associated with `Metric` for detail level or
potentially category in the future. For different configurations, a
certain `MetricType` set will be shown accordingly.

#### Demo
```
-- continuing the above example
> set datafusion.explain.analyze_level = summary;
0 row(s) fetched.
Elapsed 0.000 seconds.

> explain analyze select *
from lineitem
where l_orderkey = 3000000;
+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type         | plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Plan with Metrics | CoalesceBatchesExec: target_batch_size=8192, metrics=[output_rows=5, elapsed_compute=25.339µs]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
|                   |   FilterExec: l_orderkey@0 = 3000000, metrics=[output_rows=5, elapsed_compute=81.221µs]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
|                   |     DataSourceExec: file_groups={14 groups: [[Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-0.parquet:0..11525426], [Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-0.parquet:11525426..20311205, Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-1.parquet:0..2739647], [Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-1.parquet:2739647..14265073], [Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-1.parquet:14265073..20193593, Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-2.parquet:0..5596906], [Users/yongting/Code/datafusion/benchmarks/data/tpch_sf1/lineitem/part-2.parquet:5596906..17122332], ...]}, projection=[l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate, l_commitdate, l_receiptdate, l_shipinstruct, l_shipmode, l_comment], file_type=parquet, predicate=l_orderkey@0 = 3000000, pruning_predicate=l_orderkey_null_count@2 != row_count@3 AND l_orderkey_min@0 <= 3000000 AND 3000000 <= l_orderkey_max@1, required_guarantees=[l_orderkey in (3000000)], metrics=[output_rows=19813, elapsed_compute=14ns] |
|                   |                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.025 seconds.
```
Only `BaselineMetrics` are shown.


## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
4. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
UT

## Are there any user-facing changes?

No
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/config.rs               |  7 ++-
 datafusion/common/src/format.rs               | 45 ++++++++++++++++
 .../src/datasource/physical_plan/parquet.rs   |  5 +-
 datafusion/core/src/physical_planner.rs       |  8 +++
 datafusion/core/tests/sql/explain_analyze.rs  | 35 ++++++++++++
 datafusion/physical-plan/src/analyze.rs       | 19 ++++++-
 datafusion/physical-plan/src/display.rs       | 33 ++++++++++++
 .../physical-plan/src/metrics/baseline.rs     | 12 +++--
 .../physical-plan/src/metrics/builder.rs      | 21 +++++++-
 datafusion/physical-plan/src/metrics/mod.rs   | 54 ++++++++++++++++++-
 datafusion/proto/src/physical_plan/mod.rs     |  2 +
 .../tests/cases/roundtrip_physical_plan.rs    |  2 +
 .../test_files/information_schema.slt         |  2 +
 .../test_files/spark/aggregate/avg.slt        |  2 +-
 .../test_files/spark/string/concat.slt        |  2 +-
 .../test_files/spark/string/format_string.slt |  8 +--
 docs/source/user-guide/configs.md             |  1 +
 docs/source/user-guide/sql/explain.md         | 11 ++--
 18 files changed, 251 insertions(+), 18 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 126935a1de90b..52e35985698f0 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -22,7 +22,7 @@ use arrow_ipc::CompressionType;
 #[cfg(feature = "parquet_encryption")]
 use crate::encryption::{FileDecryptionProperties, FileEncryptionProperties};
 use crate::error::_config_err;
-use crate::format::ExplainFormat;
+use crate::format::{ExplainAnalyzeLevel, ExplainFormat};
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
 use crate::{DataFusionError, Result};
@@ -991,6 +991,11 @@ config_namespace! {
         /// (format=tree only) Maximum total width of the rendered tree.
         /// When set to 0, the tree will have no width limit.
         pub tree_maximum_render_width: usize, default = 240
+
+        /// Verbosity level for "EXPLAIN ANALYZE". Default is "dev"
+        /// "summary" shows common metrics for high-level insights.
+        /// "dev" provides deep operator-level introspection for developers.
+        pub analyze_level: ExplainAnalyzeLevel, default = ExplainAnalyzeLevel::Dev
     }
 }
 
diff --git a/datafusion/common/src/format.rs b/datafusion/common/src/format.rs
index 06ec519ef356c..764190e1189bf 100644
--- a/datafusion/common/src/format.rs
+++ b/datafusion/common/src/format.rs
@@ -205,3 +205,48 @@ impl ConfigField for ExplainFormat {
         Ok(())
     }
 }
+
+/// Verbosity levels controlling how `EXPLAIN ANALYZE` renders metrics
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum ExplainAnalyzeLevel {
+    /// Show a compact view containing high-level metrics
+    Summary,
+    /// Show a developer-focused view with per-operator details
+    Dev,
+    // When adding new enum, update the error message in `from_str()` accordingly.
+}
+
+impl FromStr for ExplainAnalyzeLevel {
+    type Err = DataFusionError;
+
+    fn from_str(level: &str) -> Result<Self, Self::Err> {
+        match level.to_lowercase().as_str() {
+            "summary" => Ok(ExplainAnalyzeLevel::Summary),
+            "dev" => Ok(ExplainAnalyzeLevel::Dev),
+            other => Err(DataFusionError::Configuration(format!(
+                "Invalid explain analyze level. Expected 'summary' or 'dev'. Got '{other}'"
+            ))),
+        }
+    }
+}
+
+impl Display for ExplainAnalyzeLevel {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            ExplainAnalyzeLevel::Summary => "summary",
+            ExplainAnalyzeLevel::Dev => "dev",
+        };
+        write!(f, "{s}")
+    }
+}
+
+impl ConfigField for ExplainAnalyzeLevel {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = ExplainAnalyzeLevel::from_str(value)?;
+        Ok(())
+    }
+}
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index d0774e57174ee..10a475c1cc9a6 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -64,7 +64,9 @@ mod tests {
     use datafusion_physical_expr::planner::logical2physical;
     use datafusion_physical_plan::analyze::AnalyzeExec;
     use datafusion_physical_plan::collect;
-    use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+    use datafusion_physical_plan::metrics::{
+        ExecutionPlanMetricsSet, MetricType, MetricsSet,
+    };
     use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
     use chrono::{TimeZone, Utc};
@@ -238,6 +240,7 @@ mod tests {
             let analyze_exec = Arc::new(AnalyzeExec::new(
                 false,
                 false,
+                vec![MetricType::SUMMARY, MetricType::DEV],
                 // use a new ParquetSource to avoid sharing execution metrics
                 self.build_parquet_exec(
                     Arc::clone(table_schema),
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index c28e56790e660..0fa17deea1295 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -62,6 +62,7 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::Schema;
 use datafusion_catalog::ScanArgs;
 use datafusion_common::display::ToStringifiedPlan;
+use datafusion_common::format::ExplainAnalyzeLevel;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
 use datafusion_common::TableReference;
 use datafusion_common::{
@@ -90,6 +91,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
+use datafusion_physical_plan::metrics::MetricType;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::recursive_query::RecursiveQueryExec;
 use datafusion_physical_plan::unnest::ListUnnest;
@@ -2073,9 +2075,15 @@ impl DefaultPhysicalPlanner {
         let input = self.create_physical_plan(&a.input, session_state).await?;
         let schema = Arc::clone(a.schema.inner());
         let show_statistics = session_state.config_options().explain.show_statistics;
+        let analyze_level = session_state.config_options().explain.analyze_level;
+        let metric_types = match analyze_level {
+            ExplainAnalyzeLevel::Summary => vec![MetricType::SUMMARY],
+            ExplainAnalyzeLevel::Dev => vec![MetricType::SUMMARY, MetricType::DEV],
+        };
         Ok(Arc::new(AnalyzeExec::new(
             a.verbose,
             show_statistics,
+            metric_types,
             input,
             schema,
         )))
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index e082cabaadaff..54a57ed901162 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -22,6 +22,7 @@ use rstest::rstest;
 use datafusion::config::ConfigOptions;
 use datafusion::physical_plan::display::DisplayableExecutionPlan;
 use datafusion::physical_plan::metrics::Timestamp;
+use datafusion_common::format::ExplainAnalyzeLevel;
 use object_store::path::Path;
 
 #[tokio::test]
@@ -158,6 +159,40 @@ async fn explain_analyze_baseline_metrics() {
 fn nanos_from_timestamp(ts: &Timestamp) -> i64 {
     ts.value().unwrap().timestamp_nanos_opt().unwrap()
 }
+
+// Test different detail level for config `datafusion.explain.analyze_level`
+#[tokio::test]
+async fn explain_analyze_level() {
+    async fn collect_plan(level: ExplainAnalyzeLevel) -> String {
+        let mut config = SessionConfig::new();
+        config.options_mut().explain.analyze_level = level;
+        let ctx = SessionContext::new_with_config(config);
+        let sql = "EXPLAIN ANALYZE \
+            SELECT * \
+            FROM generate_series(10) as t1(v1) \
+            ORDER BY v1 DESC";
+        let dataframe = ctx.sql(sql).await.unwrap();
+        let batches = dataframe.collect().await.unwrap();
+        arrow::util::pretty::pretty_format_batches(&batches)
+            .unwrap()
+            .to_string()
+    }
+
+    for (level, needle, should_contain) in [
+        (ExplainAnalyzeLevel::Summary, "spill_count", false),
+        (ExplainAnalyzeLevel::Summary, "output_rows", true),
+        (ExplainAnalyzeLevel::Dev, "spill_count", true),
+        (ExplainAnalyzeLevel::Dev, "output_rows", true),
+    ] {
+        let plan = collect_plan(level).await;
+        assert_eq!(
+            plan.contains(needle),
+            should_contain,
+            "plan for level {level:?} unexpected content: {plan}"
+        );
+    }
+}
+
 #[tokio::test]
 async fn csv_explain_plans() {
     // This test verify the look of each plan in its full cycle plan creation
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index c095afe5e716e..c696cf5aa5e60 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -26,6 +26,7 @@ use super::{
     SendableRecordBatchStream,
 };
 use crate::display::DisplayableExecutionPlan;
+use crate::metrics::MetricType;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
@@ -44,6 +45,8 @@ pub struct AnalyzeExec {
     verbose: bool,
     /// If statistics should be displayed
     show_statistics: bool,
+    /// Which metric categories should be displayed
+    metric_types: Vec<MetricType>,
     /// The input plan (the plan being analyzed)
     pub(crate) input: Arc<dyn ExecutionPlan>,
     /// The output schema for RecordBatches of this exec node
@@ -56,6 +59,7 @@ impl AnalyzeExec {
     pub fn new(
         verbose: bool,
         show_statistics: bool,
+        metric_types: Vec<MetricType>,
         input: Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
     ) -> Self {
@@ -63,6 +67,7 @@ impl AnalyzeExec {
         AnalyzeExec {
             verbose,
             show_statistics,
+            metric_types,
             input,
             schema,
             cache,
@@ -145,6 +150,7 @@ impl ExecutionPlan for AnalyzeExec {
         Ok(Arc::new(Self::new(
             self.verbose,
             self.show_statistics,
+            self.metric_types.clone(),
             children.pop().unwrap(),
             Arc::clone(&self.schema),
         )))
@@ -182,6 +188,7 @@ impl ExecutionPlan for AnalyzeExec {
         let captured_schema = Arc::clone(&self.schema);
         let verbose = self.verbose;
         let show_statistics = self.show_statistics;
+        let metric_types = self.metric_types.clone();
 
         // future that gathers the results from all the tasks in the
         // JoinSet that computes the overall row count and final
@@ -201,6 +208,7 @@ impl ExecutionPlan for AnalyzeExec {
                 duration,
                 captured_input,
                 captured_schema,
+                &metric_types,
             )
         };
 
@@ -219,6 +227,7 @@ fn create_output_batch(
     duration: std::time::Duration,
     input: Arc<dyn ExecutionPlan>,
     schema: SchemaRef,
+    metric_types: &[MetricType],
 ) -> Result<RecordBatch> {
     let mut type_builder = StringBuilder::with_capacity(1, 1024);
     let mut plan_builder = StringBuilder::with_capacity(1, 1024);
@@ -227,6 +236,7 @@ fn create_output_batch(
     type_builder.append_value("Plan with Metrics");
 
     let annotated_plan = DisplayableExecutionPlan::with_metrics(input.as_ref())
+        .set_metric_types(metric_types.to_vec())
         .set_show_statistics(show_statistics)
         .indent(verbose)
         .to_string();
@@ -238,6 +248,7 @@ fn create_output_batch(
         type_builder.append_value("Plan with Full Metrics");
 
         let annotated_plan = DisplayableExecutionPlan::with_full_metrics(input.as_ref())
+            .set_metric_types(metric_types.to_vec())
             .set_show_statistics(show_statistics)
             .indent(verbose)
             .to_string();
@@ -282,7 +293,13 @@ mod tests {
 
         let blocking_exec = Arc::new(BlockingExec::new(Arc::clone(&schema), 1));
         let refs = blocking_exec.refs();
-        let analyze_exec = Arc::new(AnalyzeExec::new(true, false, blocking_exec, schema));
+        let analyze_exec = Arc::new(AnalyzeExec::new(
+            true,
+            false,
+            vec![MetricType::SUMMARY, MetricType::DEV],
+            blocking_exec,
+            schema,
+        ));
 
         let fut = collect(analyze_exec, task_ctx);
         let mut fut = fut.boxed();
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 2420edfc743da..35ca0b65ae294 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -28,6 +28,7 @@ use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
 use datafusion_expr::display_schema;
 use datafusion_physical_expr::LexOrdering;
 
+use crate::metrics::MetricType;
 use crate::render_tree::RenderTree;
 
 use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
@@ -120,11 +121,17 @@ pub struct DisplayableExecutionPlan<'a> {
     show_statistics: bool,
     /// If schema should be displayed. See [`Self::set_show_schema`]
     show_schema: bool,
+    /// Which metric categories should be included when rendering
+    metric_types: Vec<MetricType>,
     // (TreeRender) Maximum total width of the rendered tree
     tree_maximum_render_width: usize,
 }
 
 impl<'a> DisplayableExecutionPlan<'a> {
+    fn default_metric_types() -> Vec<MetricType> {
+        vec![MetricType::SUMMARY, MetricType::DEV]
+    }
+
     /// Create a wrapper around an [`ExecutionPlan`] which can be
     /// pretty printed in a variety of ways
     pub fn new(inner: &'a dyn ExecutionPlan) -> Self {
@@ -133,6 +140,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::None,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -146,6 +154,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::Aggregated,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -159,6 +168,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics::Full,
             show_statistics: false,
             show_schema: false,
+            metric_types: Self::default_metric_types(),
             tree_maximum_render_width: 240,
         }
     }
@@ -178,6 +188,12 @@ impl<'a> DisplayableExecutionPlan<'a> {
         self
     }
 
+    /// Specify which metric types should be rendered alongside the plan
+    pub fn set_metric_types(mut self, metric_types: Vec<MetricType>) -> Self {
+        self.metric_types = metric_types;
+        self
+    }
+
     /// Set the maximum render width for the tree format
     pub fn set_tree_maximum_render_width(mut self, width: usize) -> Self {
         self.tree_maximum_render_width = width;
@@ -206,6 +222,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics,
             show_statistics: bool,
             show_schema: bool,
+            metric_types: Vec<MetricType>,
         }
         impl fmt::Display for Wrapper<'_> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -216,6 +233,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
                     show_schema: self.show_schema,
+                    metric_types: &self.metric_types,
                 };
                 accept(self.plan, &mut visitor)
             }
@@ -226,6 +244,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
             show_schema: self.show_schema,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -245,6 +264,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             plan: &'a dyn ExecutionPlan,
             show_metrics: ShowMetrics,
             show_statistics: bool,
+            metric_types: Vec<MetricType>,
         }
         impl fmt::Display for Wrapper<'_> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
@@ -255,6 +275,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     t,
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
+                    metric_types: &self.metric_types,
                     graphviz_builder: GraphvizBuilder::default(),
                     parents: Vec::new(),
                 };
@@ -272,6 +293,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             plan: self.inner,
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -306,6 +328,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: ShowMetrics,
             show_statistics: bool,
             show_schema: bool,
+            metric_types: Vec<MetricType>,
         }
 
         impl fmt::Display for Wrapper<'_> {
@@ -317,6 +340,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
                     show_metrics: self.show_metrics,
                     show_statistics: self.show_statistics,
                     show_schema: self.show_schema,
+                    metric_types: &self.metric_types,
                 };
                 visitor.pre_visit(self.plan)?;
                 Ok(())
@@ -328,6 +352,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
             show_metrics: self.show_metrics,
             show_statistics: self.show_statistics,
             show_schema: self.show_schema,
+            metric_types: self.metric_types.clone(),
         }
     }
 
@@ -382,6 +407,8 @@ struct IndentVisitor<'a, 'b> {
     show_statistics: bool,
     /// If schema should be displayed
     show_schema: bool,
+    /// Which metric types should be rendered
+    metric_types: &'a [MetricType],
 }
 
 impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
@@ -394,6 +421,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
             ShowMetrics::Aggregated => {
                 if let Some(metrics) = plan.metrics() {
                     let metrics = metrics
+                        .filter_by_metric_types(self.metric_types)
                         .aggregate_by_name()
                         .sorted_for_display()
                         .timestamps_removed();
@@ -405,6 +433,7 @@ impl ExecutionPlanVisitor for IndentVisitor<'_, '_> {
             }
             ShowMetrics::Full => {
                 if let Some(metrics) = plan.metrics() {
+                    let metrics = metrics.filter_by_metric_types(self.metric_types);
                     write!(self.f, ", metrics=[{metrics}]")?;
                 } else {
                     write!(self.f, ", metrics=[]")?;
@@ -441,6 +470,8 @@ struct GraphvizVisitor<'a, 'b> {
     show_metrics: ShowMetrics,
     /// If statistics should be displayed
     show_statistics: bool,
+    /// Which metric types should be rendered
+    metric_types: &'a [MetricType],
 
     graphviz_builder: GraphvizBuilder,
     /// Used to record parent node ids when visiting a plan.
@@ -478,6 +509,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
             ShowMetrics::Aggregated => {
                 if let Some(metrics) = plan.metrics() {
                     let metrics = metrics
+                        .filter_by_metric_types(self.metric_types)
                         .aggregate_by_name()
                         .sorted_for_display()
                         .timestamps_removed();
@@ -489,6 +521,7 @@ impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
             }
             ShowMetrics::Full => {
                 if let Some(metrics) = plan.metrics() {
+                    let metrics = metrics.filter_by_metric_types(self.metric_types);
                     format!("metrics=[{metrics}]")
                 } else {
                     "metrics=[]".to_string()
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
index 15efb8f90aa20..45cef58b5dd8c 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-plan/src/metrics/baseline.rs
@@ -62,9 +62,15 @@ impl BaselineMetrics {
         start_time.record();
 
         Self {
-            end_time: MetricBuilder::new(metrics).end_timestamp(partition),
-            elapsed_compute: MetricBuilder::new(metrics).elapsed_compute(partition),
-            output_rows: MetricBuilder::new(metrics).output_rows(partition),
+            end_time: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .end_timestamp(partition),
+            elapsed_compute: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .elapsed_compute(partition),
+            output_rows: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .output_rows(partition),
         }
     }
 
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index dbda0a310ce52..74ba5a2a18343 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -19,6 +19,8 @@
 
 use std::{borrow::Cow, sync::Arc};
 
+use crate::metrics::MetricType;
+
 use super::{
     Count, ExecutionPlanMetricsSet, Gauge, Label, Metric, MetricValue, Time, Timestamp,
 };
@@ -52,15 +54,23 @@ pub struct MetricBuilder<'a> {
 
     /// arbitrary name=value pairs identifying this metric
     labels: Vec<Label>,
+
+    /// The type controlling the verbosity/category for this builder
+    /// See comments in [`MetricType`] for details
+    metric_type: MetricType,
 }
 
 impl<'a> MetricBuilder<'a> {
     /// Create a new `MetricBuilder` that will register the result of `build()` with the `metrics`
+    ///
+    /// `self.metric_type` controls when such metric is displayed. See comments in
+    /// [`MetricType`] for details.
     pub fn new(metrics: &'a ExecutionPlanMetricsSet) -> Self {
         Self {
             metrics,
             partition: None,
             labels: vec![],
+            metric_type: MetricType::DEV,
         }
     }
 
@@ -70,6 +80,12 @@ impl<'a> MetricBuilder<'a> {
         self
     }
 
+    /// Set the metric type to the metric being constructed
+    pub fn with_type(mut self, metric_type: MetricType) -> Self {
+        self.metric_type = metric_type;
+        self
+    }
+
     /// Add a label to the metric being constructed
     pub fn with_new_label(
         self,
@@ -92,8 +108,11 @@ impl<'a> MetricBuilder<'a> {
             labels,
             partition,
             metrics,
+            metric_type,
         } = self;
-        let metric = Arc::new(Metric::new_with_labels(value, partition, labels));
+        let metric = Arc::new(
+            Metric::new_with_labels(value, partition, labels).with_type(metric_type),
+        );
         metrics.register(metric);
     }
 
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 0b9b4bed856b8..0fd7bfb8c812d 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -78,6 +78,29 @@ pub struct Metric {
     /// To which partition of an operators output did this metric
     /// apply? If `None` then means all partitions.
     partition: Option<usize>,
+
+    metric_type: MetricType,
+}
+
+/// Categorizes metrics so the display layer can choose the desired verbosity.
+///
+/// # How is it used:
+/// The `datafusion.explain.analyze_level` configuration controls which category is shown.
+/// - When set to `dev`, all metrics with type `MetricType::Summary` or `MetricType::DEV`
+///   will be shown.
+/// - When set to `summary`, only metrics with type `MetricType::Summary` are shown.
+///
+/// # Difference from `EXPLAIN ANALYZE VERBOSE`:  
+/// The `VERBOSE` keyword controls whether per-partition metrics are shown (when specified),  
+/// or aggregated metrics are displayed (when omitted).  
+/// In contrast, the `analyze_level` configuration determines which categories or
+/// levels of metrics are displayed.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum MetricType {
+    /// Common metrics for high-level insights (answering which operator is slow)
+    SUMMARY,
+    /// For deep operator-level introspection for developers
+    DEV,
 }
 
 impl Display for Metric {
@@ -122,6 +145,7 @@ impl Metric {
             value,
             labels: vec![],
             partition,
+            metric_type: MetricType::DEV,
         }
     }
 
@@ -136,9 +160,16 @@ impl Metric {
             value,
             labels,
             partition,
+            metric_type: MetricType::DEV,
         }
     }
 
+    /// Set the type for this metric. Defaults to [`MetricType::DEV`]
+    pub fn with_type(mut self, metric_type: MetricType) -> Self {
+        self.metric_type = metric_type;
+        self
+    }
+
     /// Add a new label to this metric
     pub fn with_label(mut self, label: Label) -> Self {
         self.labels.push(label);
@@ -164,6 +195,11 @@ impl Metric {
     pub fn partition(&self) -> Option<usize> {
         self.partition
     }
+
+    /// Return the metric type (verbosity level) associated with this metric
+    pub fn metric_type(&self) -> MetricType {
+        self.metric_type
+    }
 }
 
 /// A snapshot of the metrics for a particular ([`ExecutionPlan`]).
@@ -286,7 +322,8 @@ impl MetricsSet {
                 .or_insert_with(|| {
                     // accumulate with no partition
                     let partition = None;
-                    let mut accum = Metric::new(metric.value().new_empty(), partition);
+                    let mut accum = Metric::new(metric.value().new_empty(), partition)
+                        .with_type(metric.metric_type());
                     accum.value_mut().aggregate(metric.value());
                     accum
                 });
@@ -324,6 +361,21 @@ impl MetricsSet {
 
         Self { metrics }
     }
+
+    /// Returns a new derived `MetricsSet` containing only metrics whose
+    /// [`MetricType`] appears in `allowed`.
+    pub fn filter_by_metric_types(self, allowed: &[MetricType]) -> Self {
+        if allowed.is_empty() {
+            return Self { metrics: vec![] };
+        }
+
+        let metrics = self
+            .metrics
+            .into_iter()
+            .filter(|metric| allowed.contains(&metric.metric_type()))
+            .collect::<Vec<_>>();
+        Self { metrics }
+    }
 }
 
 impl Display for MetricsSet {
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index d76bcc89b3db2..818109be68553 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -82,6 +82,7 @@ use datafusion::physical_plan::joins::{
 use datafusion::physical_plan::joins::{HashJoinExec, PartitionMode};
 use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion::physical_plan::memory::LazyMemoryExec;
+use datafusion::physical_plan::metrics::MetricType;
 use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion::physical_plan::projection::{ProjectionExec, ProjectionExpr};
 use datafusion::physical_plan::repartition::RepartitionExec;
@@ -1611,6 +1612,7 @@ impl protobuf::PhysicalPlanNode {
         Ok(Arc::new(AnalyzeExec::new(
             analyze.verbose,
             analyze.show_statistics,
+            vec![MetricType::SUMMARY, MetricType::DEV],
             input,
             Arc::new(convert_required!(analyze.schema)?),
         )))
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index b93d0d3c4e7cb..e6cfcb95805a1 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -32,6 +32,7 @@ use arrow::csv::WriterBuilder;
 use arrow::datatypes::{Fields, TimeUnit};
 use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion::physical_plan::metrics::MetricType;
 use datafusion_expr::dml::InsertOp;
 use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
@@ -1436,6 +1437,7 @@ fn roundtrip_analyze() -> Result<()> {
     roundtrip_test(Arc::new(AnalyzeExec::new(
         false,
         false,
+        vec![MetricType::SUMMARY, MetricType::DEV],
         input,
         Arc::new(schema),
     )))
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index a69a8d5c0d8f6..412e36b8124f0 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -269,6 +269,7 @@ datafusion.execution.split_file_groups_by_statistics false
 datafusion.execution.target_partitions 7
 datafusion.execution.time_zone +00:00
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
+datafusion.explain.analyze_level dev
 datafusion.explain.format indent
 datafusion.explain.logical_plan_only false
 datafusion.explain.physical_plan_only false
@@ -387,6 +388,7 @@ datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate
 datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
 datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
+datafusion.explain.analyze_level dev Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.
 datafusion.explain.format indent Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.
 datafusion.explain.logical_plan_only false When set to true, the explain statement will only print logical plans
 datafusion.explain.physical_plan_only false When set to true, the explain statement will only print physical plans
diff --git a/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt b/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
index a5bed6ea324a7..6ae647989aee9 100644
--- a/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
+++ b/datafusion/sqllogictest/test_files/spark/aggregate/avg.slt
@@ -53,4 +53,4 @@ FROM (VALUES (0::INT), (0::INT)) AS t(a)
 GROUP BY a
 ORDER BY a;
 ----
-0 0
\ No newline at end of file
+0 0
diff --git a/datafusion/sqllogictest/test_files/spark/string/concat.slt b/datafusion/sqllogictest/test_files/spark/string/concat.slt
index 0b796a54a69e8..258cb829d7d4b 100644
--- a/datafusion/sqllogictest/test_files/spark/string/concat.slt
+++ b/datafusion/sqllogictest/test_files/spark/string/concat.slt
@@ -45,4 +45,4 @@ query T
 SELECT concat(a, b, c) from (select 'a' a, 'b' b, 'c' c union all select null a, 'b', 'c') order by 1 nulls last;
 ----
 abc
-NULL
\ No newline at end of file
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/string/format_string.slt b/datafusion/sqllogictest/test_files/spark/string/format_string.slt
index 07c8cd10d1a96..048863ebfbedb 100644
--- a/datafusion/sqllogictest/test_files/spark/string/format_string.slt
+++ b/datafusion/sqllogictest/test_files/spark/string/format_string.slt
@@ -70,7 +70,7 @@ SELECT format_string('Value: %d', 42);
 Value: 42
 
 ## Hexadecimal integer formatting (lowercase)
-query T  
+query T
 SELECT format_string('Hex: %x', 255);
 ----
 Hex: ff
@@ -118,7 +118,7 @@ SELECT format_string('Negative: %d', -42);
 Negative: -42
 
 # ================================
-# Float formatting tests  
+# Float formatting tests
 # ================================
 
 ## Basic float formatting
@@ -254,7 +254,7 @@ SELECT format_string('Bool: %b', true);
 ----
 Bool: true
 
-## Boolean uppercase  
+## Boolean uppercase
 query T
 SELECT format_string('Bool: %B', false);
 ----
@@ -370,7 +370,7 @@ SELECT format_string('Minute: %tM', TIMESTAMP '2023-12-25 14:30:45');
 ----
 Minute: 30
 
-## Second formatting  
+## Second formatting
 query T
 SELECT format_string('Second: %tS', TIMESTAMP '2023-12-25 14:30:45');
 ----
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index ab3b11a8d833a..a302cecfa6220 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -160,6 +160,7 @@ The following configuration settings are available:
 | datafusion.explain.show_schema                                          | false                     | When set to true, the explain statement will print schema information                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.format                                               | indent                    | Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.explain.tree_maximum_render_width                            | 240                       | (format=tree only) Maximum total width of the rendered tree. When set to 0, the tree will have no width limit.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
+| datafusion.explain.analyze_level                                        | dev                       | Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | datafusion.sql_parser.parse_float_as_decimal                            | false                     | When set to true, SQL parser will parse float as decimal type                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.sql_parser.enable_ident_normalization                        | true                      | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.sql_parser.enable_options_value_normalization                | false                     | When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index c5e2e215a6b66..1caadcc291416 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -37,8 +37,6 @@ The optional `[FORMAT format]` clause controls how the plan is displayed as
 explained below. If this clause is not specified, the plan is displayed using
 the format from the [configuration value] `datafusion.explain.format`.
 
-[configuration value]: ../configs.md
-
 ### `tree` format (default)
 
 The `tree` format is modeled after [DuckDB plans] and is designed to be easier
@@ -239,8 +237,7 @@ Elapsed 0.010 seconds.
 
 ## `EXPLAIN ANALYZE`
 
-Shows the execution plan and metrics of a statement. If you need more
-information output, use `EXPLAIN ANALYZE VERBOSE`. Note that `EXPLAIN ANALYZE`
+Shows the execution plan and metrics of a statement. Note that `EXPLAIN ANALYZE`
 only supports the `indent` format.
 
 ```sql
@@ -259,3 +256,9 @@ EXPLAIN ANALYZE SELECT SUM(x) FROM table GROUP BY b;
 |                   |               DataSourceExec: file_groups={1 group: [[/tmp/table.csv]]}, has_header=false, metrics=[]                                                        |
 +-------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+
 ```
+
+By default `EXPLAIN ANALYZE` shows the aggregated metrics from all partitions for each operator. If you need to display per-partition metrics, use `EXPLAIN ANALYZE VERBOSE`.
+
+You can also set `datafusion.explain.analyze_level` from the [configuration value] to control the detail level for the metrics displayed.
+
+[configuration value]: ../configs.md

From 2222abda2545812c25f1720d9c3cfc45c0b9bc85 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Sat, 18 Oct 2025 03:39:13 +1100
Subject: [PATCH 030/109] refactor: remove unused `type_coercion/aggregate.rs`
 functions (#18091)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

N/A

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

There's a few functions in
`datafusion/expr-common/src/type_coercion/aggregates.rs` that are unused
elsewhere in the codebase, likely a remnant before the refactor to UDF,
so removing them. Some are still used (`coerce_avg_type()` and
`avg_return_type()`) so these are inlined into the Avg aggregate
function (similar to Sum). Also refactor some window functions to use
already available macros.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

- Remove some unused functions
- Inline avg coerce & return type logic
- Refactor Spark Avg a bit to remove unnecessary code
- Refactor ntile & nth window functions to use available macros

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

Existing tests.

## Are there any user-facing changes?

Yes as these functions were publicly exported; however I'm not sure they
were meant to be used by users anyway, given what they do.

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 .../src/type_coercion/aggregates.rs           | 300 +-----------------
 datafusion/expr/src/test/function_stub.rs     |  67 +++-
 datafusion/functions-aggregate/src/average.rs |  67 +++-
 datafusion/functions-aggregate/src/lib.rs     |   6 -
 datafusion/functions-window/src/nth_value.rs  |  25 +-
 datafusion/functions-window/src/ntile.rs      |  12 +-
 .../spark/src/function/aggregate/avg.rs       | 110 ++++---
 .../spark/src/function/aggregate/mod.rs       |   7 +-
 8 files changed, 194 insertions(+), 400 deletions(-)

diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
index e77a072a84f38..55a8843394b51 100644
--- a/datafusion/expr-common/src/type_coercion/aggregates.rs
+++ b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -16,31 +16,12 @@
 // under the License.
 
 use crate::signature::TypeSignature;
-use arrow::datatypes::{
-    DataType, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
-    DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
-};
+use arrow::datatypes::{DataType, FieldRef};
 
 use datafusion_common::{internal_err, plan_err, Result};
 
-pub static STRINGS: &[DataType] =
-    &[DataType::Utf8, DataType::LargeUtf8, DataType::Utf8View];
-
-pub static SIGNED_INTEGERS: &[DataType] = &[
-    DataType::Int8,
-    DataType::Int16,
-    DataType::Int32,
-    DataType::Int64,
-];
-
-pub static UNSIGNED_INTEGERS: &[DataType] = &[
-    DataType::UInt8,
-    DataType::UInt16,
-    DataType::UInt32,
-    DataType::UInt64,
-];
-
+// TODO: remove usage of these (INTEGERS and NUMERICS) in favour of signatures
+//       see https://github.com/apache/datafusion/issues/18092
 pub static INTEGERS: &[DataType] = &[
     DataType::Int8,
     DataType::Int16,
@@ -65,24 +46,6 @@ pub static NUMERICS: &[DataType] = &[
     DataType::Float64,
 ];
 
-pub static TIMESTAMPS: &[DataType] = &[
-    DataType::Timestamp(TimeUnit::Second, None),
-    DataType::Timestamp(TimeUnit::Millisecond, None),
-    DataType::Timestamp(TimeUnit::Microsecond, None),
-    DataType::Timestamp(TimeUnit::Nanosecond, None),
-];
-
-pub static DATES: &[DataType] = &[DataType::Date32, DataType::Date64];
-
-pub static BINARYS: &[DataType] = &[DataType::Binary, DataType::LargeBinary];
-
-pub static TIMES: &[DataType] = &[
-    DataType::Time32(TimeUnit::Second),
-    DataType::Time32(TimeUnit::Millisecond),
-    DataType::Time64(TimeUnit::Microsecond),
-    DataType::Time64(TimeUnit::Nanosecond),
-];
-
 /// Validate the length of `input_fields` matches the `signature` for `agg_fun`.
 ///
 /// This method DOES NOT validate the argument fields - only that (at least one,
@@ -144,260 +107,3 @@ pub fn check_arg_count(
     }
     Ok(())
 }
-
-/// Function return type of a sum
-pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Int64 => Ok(DataType::Int64),
-        DataType::UInt64 => Ok(DataType::UInt64),
-        DataType::Float64 => Ok(DataType::Float64),
-        DataType::Decimal32(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal32(new_precision, *scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal64(new_precision, *scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal128(new_precision, *scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+10), s)
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal256(new_precision, *scale))
-        }
-        other => plan_err!("SUM does not support type \"{other:?}\""),
-    }
-}
-
-/// Function return type of variance
-pub fn variance_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("VAR does not support {arg_type}")
-    }
-}
-
-/// Function return type of covariance
-pub fn covariance_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("COVAR does not support {arg_type}")
-    }
-}
-
-/// Function return type of correlation
-pub fn correlation_return_type(arg_type: &DataType) -> Result<DataType> {
-    if NUMERICS.contains(arg_type) {
-        Ok(DataType::Float64)
-    } else {
-        plan_err!("CORR does not support {arg_type}")
-    }
-}
-
-/// Function return type of an average
-pub fn avg_return_type(func_name: &str, arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Decimal32(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal32(new_precision, new_scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal64(new_precision, new_scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal128(new_precision, new_scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
-            // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
-            let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
-            Ok(DataType::Decimal256(new_precision, new_scale))
-        }
-        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-        arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
-        DataType::Dictionary(_, dict_value_type) => {
-            avg_return_type(func_name, dict_value_type.as_ref())
-        }
-        other => plan_err!("{func_name} does not support {other:?}"),
-    }
-}
-
-/// Internal sum type of an average
-pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
-    match arg_type {
-        DataType::Decimal32(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal32(new_precision, *scale))
-        }
-        DataType::Decimal64(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal64(new_precision, *scale))
-        }
-        DataType::Decimal128(precision, scale) => {
-            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal128(new_precision, *scale))
-        }
-        DataType::Decimal256(precision, scale) => {
-            // In Spark the sum type of avg is DECIMAL(min(38,precision+10), s)
-            let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 10);
-            Ok(DataType::Decimal256(new_precision, *scale))
-        }
-        DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-        arg_type if NUMERICS.contains(arg_type) => Ok(DataType::Float64),
-        DataType::Dictionary(_, dict_value_type) => {
-            avg_sum_type(dict_value_type.as_ref())
-        }
-        other => plan_err!("AVG does not support {other:?}"),
-    }
-}
-
-pub fn is_sum_support_arg_type(arg_type: &DataType) -> bool {
-    match arg_type {
-        DataType::Dictionary(_, dict_value_type) => {
-            is_sum_support_arg_type(dict_value_type.as_ref())
-        }
-        _ => matches!(
-            arg_type,
-            arg_type if NUMERICS.contains(arg_type)
-            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
-        ),
-    }
-}
-
-pub fn is_avg_support_arg_type(arg_type: &DataType) -> bool {
-    match arg_type {
-        DataType::Dictionary(_, dict_value_type) => {
-            is_avg_support_arg_type(dict_value_type.as_ref())
-        }
-        _ => matches!(
-            arg_type,
-            arg_type if NUMERICS.contains(arg_type)
-            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
-        ),
-    }
-}
-
-pub fn is_variance_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_covariance_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_correlation_support_arg_type(arg_type: &DataType) -> bool {
-    matches!(
-        arg_type,
-        arg_type if NUMERICS.contains(arg_type)
-    )
-}
-
-pub fn is_integer_arg_type(arg_type: &DataType) -> bool {
-    arg_type.is_integer()
-}
-
-pub fn coerce_avg_type(func_name: &str, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-    // Supported types smallint, int, bigint, real, double precision, decimal, or interval
-    // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
-    fn coerced_type(func_name: &str, data_type: &DataType) -> Result<DataType> {
-        match &data_type {
-            DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
-            DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
-            DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
-            DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
-            d if d.is_numeric() => Ok(DataType::Float64),
-            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
-            DataType::Dictionary(_, v) => coerced_type(func_name, v.as_ref()),
-            _ => {
-                plan_err!(
-                    "The function {:?} does not support inputs of type {}.",
-                    func_name,
-                    data_type
-                )
-            }
-        }
-    }
-    Ok(vec![coerced_type(func_name, &arg_types[0])?])
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_variance_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = variance_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(variance_return_type(&data_type).is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn test_sum_return_data_type() -> Result<()> {
-        let data_type = DataType::Decimal128(10, 5);
-        let result_type = sum_return_type(&data_type)?;
-        assert_eq!(DataType::Decimal128(20, 5), result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        let result_type = sum_return_type(&data_type)?;
-        assert_eq!(DataType::Decimal128(38, 10), result_type);
-        Ok(())
-    }
-
-    #[test]
-    fn test_covariance_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = covariance_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(covariance_return_type(&data_type).is_err());
-        Ok(())
-    }
-
-    #[test]
-    fn test_correlation_return_data_type() -> Result<()> {
-        let data_type = DataType::Float64;
-        let result_type = correlation_return_type(&data_type)?;
-        assert_eq!(DataType::Float64, result_type);
-
-        let data_type = DataType::Decimal128(36, 10);
-        assert!(correlation_return_type(&data_type).is_err());
-        Ok(())
-    }
-}
diff --git a/datafusion/expr/src/test/function_stub.rs b/datafusion/expr/src/test/function_stub.rs
index 41bc645058079..8609afeae6018 100644
--- a/datafusion/expr/src/test/function_stub.rs
+++ b/datafusion/expr/src/test/function_stub.rs
@@ -22,13 +22,15 @@
 use std::any::Any;
 
 use arrow::datatypes::{
-    DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
-    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    DataType, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
+    DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
 };
 
+use datafusion_common::plan_err;
 use datafusion_common::{exec_err, not_impl_err, utils::take_function_args, Result};
 
-use crate::type_coercion::aggregates::{avg_return_type, coerce_avg_type, NUMERICS};
+use crate::type_coercion::aggregates::NUMERICS;
 use crate::Volatility::Immutable;
 use crate::{
     expr::AggregateFunction,
@@ -488,8 +490,61 @@ impl AggregateUDFImpl for Avg {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        // Supported types smallint, int, bigint, real, double precision, decimal, or interval
+        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
+                DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
+                DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+                DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
+        match &arg_types[0] {
+            DataType::Decimal32(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal32(new_precision, new_scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal64(new_precision, new_scale))
+            }
+            DataType::Decimal128(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal128(new_precision, new_scale))
+            }
+            DataType::Decimal256(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal256(new_precision, new_scale))
+            }
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+            _ => Ok(DataType::Float64),
+        }
     }
 
     fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -503,8 +558,4 @@ impl AggregateUDFImpl for Avg {
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        coerce_avg_type(self.name(), arg_types)
-    }
 }
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index d007163e7c08f..11960779ed18c 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -27,14 +27,15 @@ use arrow::datatypes::{
     i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
     Decimal64Type, DecimalType, DurationMicrosecondType, DurationMillisecondType,
     DurationNanosecondType, DurationSecondType, Field, FieldRef, Float64Type, TimeUnit,
-    UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
-    DECIMAL32_MAX_PRECISION, DECIMAL64_MAX_PRECISION,
+    UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
+    DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
 };
+use datafusion_common::plan_err;
 use datafusion_common::{
     exec_err, not_impl_err, utils::take_function_args, Result, ScalarValue,
 };
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::{avg_return_type, coerce_avg_type};
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
@@ -125,8 +126,61 @@ impl AggregateUDFImpl for Avg {
         &self.signature
     }
 
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        // Supported types smallint, int, bigint, real, double precision, decimal, or interval
+        // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
+                DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
+                DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+                DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
+        match &arg_types[0] {
+            DataType::Decimal32(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL32_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal32(new_precision, new_scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL64_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal64(new_precision, new_scale))
+            }
+            DataType::Decimal128(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal128(new_precision, new_scale))
+            }
+            DataType::Decimal256(precision, scale) => {
+                // In the spark, the result type is DECIMAL(min(38,precision+4), min(38,scale+4)).
+                // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+                let new_precision = DECIMAL256_MAX_PRECISION.min(*precision + 4);
+                let new_scale = DECIMAL256_MAX_SCALE.min(*scale + 4);
+                Ok(DataType::Decimal256(new_precision, new_scale))
+            }
+            DataType::Duration(time_unit) => Ok(DataType::Duration(*time_unit)),
+            _ => Ok(DataType::Float64),
+        }
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
@@ -452,11 +506,6 @@ impl AggregateUDFImpl for Avg {
         ReversedUDAF::Identical
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [args] = take_function_args(self.name(), arg_types)?;
-        coerce_avg_type(self.name(), std::slice::from_ref(args))
-    }
-
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
diff --git a/datafusion/functions-aggregate/src/lib.rs b/datafusion/functions-aggregate/src/lib.rs
index b56b2b118e73b..056cd45fa2c32 100644
--- a/datafusion/functions-aggregate/src/lib.rs
+++ b/datafusion/functions-aggregate/src/lib.rs
@@ -211,13 +211,7 @@ mod tests {
     #[test]
     fn test_no_duplicate_name() -> Result<()> {
         let mut names = HashSet::new();
-        let migrated_functions = ["array_agg", "count", "max", "min"];
         for func in all_default_aggregate_functions() {
-            // TODO: remove this
-            // These functions are in intermediate migration state, skip them
-            if migrated_functions.contains(&func.name().to_lowercase().as_str()) {
-                continue;
-            }
             assert!(
                 names.insert(func.name().to_string().to_lowercase()),
                 "duplicate function name: {}",
diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs
index 329d8aa5ab178..1ba6ad5ce0d49 100644
--- a/datafusion/functions-window/src/nth_value.rs
+++ b/datafusion/functions-window/src/nth_value.rs
@@ -40,39 +40,28 @@ use std::hash::Hash;
 use std::ops::Range;
 use std::sync::{Arc, LazyLock};
 
-get_or_init_udwf!(
+define_udwf_and_expr!(
     First,
     first_value,
-    "returns the first value in the window frame",
+    [arg],
+    "Returns the first value in the window frame",
     NthValue::first
 );
-get_or_init_udwf!(
+define_udwf_and_expr!(
     Last,
     last_value,
-    "returns the last value in the window frame",
+    [arg],
+    "Returns the last value in the window frame",
     NthValue::last
 );
 get_or_init_udwf!(
     NthValue,
     nth_value,
-    "returns the nth value in the window frame",
+    "Returns the nth value in the window frame",
     NthValue::nth
 );
 
-/// Create an expression to represent the `first_value` window function
-///
-pub fn first_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr {
-    first_value_udwf().call(vec![arg])
-}
-
-/// Create an expression to represent the `last_value` window function
-///
-pub fn last_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr {
-    last_value_udwf().call(vec![arg])
-}
-
 /// Create an expression to represent the `nth_value` window function
-///
 pub fn nth_value(arg: datafusion_expr::Expr, n: i64) -> datafusion_expr::Expr {
     nth_value_udwf().call(vec![arg, n.lit()])
 }
diff --git a/datafusion/functions-window/src/ntile.rs b/datafusion/functions-window/src/ntile.rs
index d188db3bbf59e..008caaa848aab 100644
--- a/datafusion/functions-window/src/ntile.rs
+++ b/datafusion/functions-window/src/ntile.rs
@@ -25,8 +25,7 @@ use datafusion_common::arrow::array::{ArrayRef, UInt64Array};
 use datafusion_common::arrow::datatypes::{DataType, Field};
 use datafusion_common::{exec_datafusion_err, exec_err, Result};
 use datafusion_expr::{
-    Documentation, Expr, LimitEffect, PartitionEvaluator, Signature, Volatility,
-    WindowUDFImpl,
+    Documentation, LimitEffect, PartitionEvaluator, Signature, Volatility, WindowUDFImpl,
 };
 use datafusion_functions_window_common::field;
 use datafusion_functions_window_common::partition::PartitionEvaluatorArgs;
@@ -37,16 +36,13 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
-get_or_init_udwf!(
+define_udwf_and_expr!(
     Ntile,
     ntile,
-    "integer ranging from 1 to the argument value, dividing the partition as equally as possible"
+    [arg],
+    "Integer ranging from 1 to the argument value, dividing the partition as equally as possible."
 );
 
-pub fn ntile(arg: Expr) -> Expr {
-    ntile_udwf().call(vec![arg])
-}
-
 #[user_doc(
     doc_section(label = "Ranking Functions"),
     description = "Integer ranging from 1 to the argument value, dividing the partition as equally as possible",
diff --git a/datafusion/spark/src/function/aggregate/avg.rs b/datafusion/spark/src/function/aggregate/avg.rs
index a22561ba8b9ca..65736815fec5c 100644
--- a/datafusion/spark/src/function/aggregate/avg.rs
+++ b/datafusion/spark/src/function/aggregate/avg.rs
@@ -25,41 +25,38 @@ use arrow::array::{
 use arrow::compute::sum;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::take_function_args;
-use datafusion_common::{not_impl_err, Result, ScalarValue};
+use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
 use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion_expr::type_coercion::aggregates::coerce_avg_type;
 use datafusion_expr::utils::format_state_name;
 use datafusion_expr::Volatility::Immutable;
 use datafusion_expr::{
-    type_coercion::aggregates::avg_return_type, Accumulator, AggregateUDFImpl, EmitTo,
-    GroupsAccumulator, ReversedUDAF, Signature,
+    Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF, Signature,
 };
 use std::{any::Any, sync::Arc};
-use DataType::*;
 
 /// AVG aggregate expression
 /// Spark average aggregate expression. Differs from standard DataFusion average aggregate
 /// in that it uses an `i64` for the count (DataFusion version uses `u64`); also there is ANSI mode
 /// support planned in the future for Spark version.
 
+// TODO: see if can deduplicate with DF version
+//       https://github.com/apache/datafusion/issues/17964
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct SparkAvg {
-    name: String,
     signature: Signature,
-    input_data_type: DataType,
-    result_data_type: DataType,
+}
+
+impl Default for SparkAvg {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl SparkAvg {
     /// Implement AVG aggregate function
-    pub fn new(name: impl Into<String>, data_type: DataType) -> Self {
-        let result_data_type = avg_return_type("avg", &data_type).unwrap();
-
+    pub fn new() -> Self {
         Self {
-            name: name.into(),
             signature: Signature::user_defined(Immutable),
-            input_data_type: data_type,
-            result_data_type,
         }
     }
 }
@@ -69,63 +66,87 @@ impl AggregateUDFImpl for SparkAvg {
         self
     }
 
-    fn accumulator(&self, _acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
+        let [args] = take_function_args(self.name(), arg_types)?;
+
+        fn coerced_type(data_type: &DataType) -> Result<DataType> {
+            match &data_type {
+                d if d.is_numeric() => Ok(DataType::Float64),
+                DataType::Dictionary(_, v) => coerced_type(v.as_ref()),
+                _ => {
+                    plan_err!("Avg does not support inputs of type {data_type}.")
+                }
+            }
+        }
+        Ok(vec![coerced_type(args)?])
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Float64)
+    }
+
+    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
+        if acc_args.is_distinct {
+            return not_impl_err!("DistinctAvgAccumulator");
+        }
+
+        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+
         // instantiate specialized accumulator based for the type
-        match (&self.input_data_type, &self.result_data_type) {
-            (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
-            _ => not_impl_err!(
-                "AvgAccumulator for ({} --> {})",
-                self.input_data_type,
-                self.result_data_type
-            ),
+        match (&data_type, &acc_args.return_type()) {
+            (DataType::Float64, DataType::Float64) => {
+                Ok(Box::<AvgAccumulator>::default())
+            }
+            (dt, return_type) => {
+                not_impl_err!("AvgAccumulator for ({dt} --> {return_type})")
+            }
         }
     }
 
-    fn state_fields(&self, _args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
+    fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
         Ok(vec![
             Arc::new(Field::new(
-                format_state_name(&self.name, "sum"),
-                self.input_data_type.clone(),
+                format_state_name(self.name(), "sum"),
+                args.input_fields[0].data_type().clone(),
                 true,
             )),
             Arc::new(Field::new(
-                format_state_name(&self.name, "count"),
-                Int64,
+                format_state_name(self.name(), "count"),
+                DataType::Int64,
                 true,
             )),
         ])
     }
 
     fn name(&self) -> &str {
-        &self.name
+        "avg"
     }
 
     fn reverse_expr(&self) -> ReversedUDAF {
         ReversedUDAF::Identical
     }
 
-    fn groups_accumulator_supported(&self, _args: AccumulatorArgs) -> bool {
-        true
+    fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
+        !args.is_distinct
     }
 
     fn create_groups_accumulator(
         &self,
-        _args: AccumulatorArgs,
+        args: AccumulatorArgs,
     ) -> Result<Box<dyn GroupsAccumulator>> {
+        let data_type = args.exprs[0].data_type(args.schema)?;
+
         // instantiate specialized accumulator based for the type
-        match (&self.input_data_type, &self.result_data_type) {
-            (Float64, Float64) => {
+        match (&data_type, args.return_type()) {
+            (DataType::Float64, DataType::Float64) => {
                 Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
-                    &self.input_data_type,
+                    args.return_field.data_type(),
                     |sum: f64, count: i64| Ok(sum / count as f64),
                 )))
             }
-
-            _ => not_impl_err!(
-                "AvgGroupsAccumulator for ({} --> {})",
-                self.input_data_type,
-                self.result_data_type
-            ),
+            (dt, return_type) => {
+                not_impl_err!("AvgGroupsAccumulator for ({dt} --> {return_type})")
+            }
         }
     }
 
@@ -136,15 +157,6 @@ impl AggregateUDFImpl for SparkAvg {
     fn signature(&self) -> &Signature {
         &self.signature
     }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        avg_return_type(self.name(), &arg_types[0])
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        let [arg] = take_function_args(self.name(), arg_types)?;
-        coerce_avg_type(self.name(), std::slice::from_ref(arg))
-    }
 }
 
 /// An accumulator to compute the average
diff --git a/datafusion/spark/src/function/aggregate/mod.rs b/datafusion/spark/src/function/aggregate/mod.rs
index 54001d28da6b4..d765d9c82f068 100644
--- a/datafusion/spark/src/function/aggregate/mod.rs
+++ b/datafusion/spark/src/function/aggregate/mod.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
 use datafusion_expr::AggregateUDF;
 use std::sync::Arc;
 
@@ -26,11 +25,9 @@ pub mod expr_fn {
     export_functions!((avg, "Returns the average value of a given column", arg1));
 }
 
+// TODO: try use something like datafusion_functions_aggregate::create_func!()
 pub fn avg() -> Arc<AggregateUDF> {
-    Arc::new(AggregateUDF::new_from_impl(avg::SparkAvg::new(
-        "avg",
-        DataType::Float64,
-    )))
+    Arc::new(AggregateUDF::new_from_impl(avg::SparkAvg::new()))
 }
 
 pub fn functions() -> Vec<Arc<AggregateUDF>> {

From 765f2b9eaf2648ec0b89e59b40f7687d26b3b2ba Mon Sep 17 00:00:00 2001
From: Pepijn Van Eeckhoudt <pepijn@vaneeckhoudt.net>
Date: Fri, 17 Oct 2025 18:47:53 +0200
Subject: [PATCH 031/109] Add extra case_when benchmarks (#18097)

## Which issue does this PR close?

None

## Rationale for this change

More microbenchmarks make it easier to asses the performance impact of
`CaseExpr` implementation changes.

## What changes are included in this PR?

Add microbenchmarks for `case` expressions that are a bit more
representative for real world queries.

## Are these changes tested?

n/a

## Are there any user-facing changes?

no
---
 datafusion/physical-expr/benches/case_when.rs | 212 ++++++++++++------
 1 file changed, 149 insertions(+), 63 deletions(-)

diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 5a88604716d21..637d072ab8676 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -15,31 +15,38 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::builder::{Int32Builder, StringBuilder};
-use arrow::datatypes::{DataType, Field, Schema};
+use arrow::array::builder::StringBuilder;
+use arrow::array::{Array, ArrayRef, Int32Array};
+use arrow::datatypes::{Field, Schema};
 use arrow::record_batch::RecordBatch;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use datafusion_common::ScalarValue;
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::{BinaryExpr, CaseExpr, Column, Literal};
+use datafusion_physical_expr::expressions::{case, col, lit, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use std::sync::Arc;
 
-fn make_col(name: &str, index: usize) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Column::new(name, index))
+fn make_x_cmp_y(
+    x: &Arc<dyn PhysicalExpr>,
+    op: Operator,
+    y: i32,
+) -> Arc<dyn PhysicalExpr> {
+    Arc::new(BinaryExpr::new(Arc::clone(x), op, lit(y)))
 }
 
-fn make_lit_i32(n: i32) -> Arc<dyn PhysicalExpr> {
-    Arc::new(Literal::new(ScalarValue::Int32(Some(n))))
-}
+/// Create a record batch with the given number of rows and columns.
+/// Columns are named `c<i>` where `i` is the column index.
+///
+/// The minimum value for `column_count` is `3`.
+/// `c0` contains incrementing int32 values
+/// `c1` contains strings with one null inserted every 7 rows
+/// `c2` contains strings with one null inserted every 9 rows
+/// `c3` to `cn`, is present, contain unspecified int32 values
+fn make_batch(row_count: usize, column_count: usize) -> RecordBatch {
+    assert!(column_count >= 3);
 
-fn criterion_benchmark(c: &mut Criterion) {
-    // create input data
-    let mut c1 = Int32Builder::new();
     let mut c2 = StringBuilder::new();
     let mut c3 = StringBuilder::new();
-    for i in 0..1000 {
-        c1.append_value(i);
+    for i in 0..row_count {
         if i % 7 == 0 {
             c2.append_null();
         } else {
@@ -51,72 +58,151 @@ fn criterion_benchmark(c: &mut Criterion) {
             c3.append_value(format!("other string {i}"));
         }
     }
-    let c1 = Arc::new(c1.finish());
+    let c1 = Arc::new(Int32Array::from_iter_values(0..row_count as i32));
     let c2 = Arc::new(c2.finish());
     let c3 = Arc::new(c3.finish());
-    let schema = Schema::new(vec![
-        Field::new("c1", DataType::Int32, true),
-        Field::new("c2", DataType::Utf8, true),
-        Field::new("c3", DataType::Utf8, true),
-    ]);
-    let batch = RecordBatch::try_new(Arc::new(schema), vec![c1, c2, c3]).unwrap();
-
-    // use same predicate for all benchmarks
-    let predicate = Arc::new(BinaryExpr::new(
-        make_col("c1", 0),
-        Operator::LtEq,
-        make_lit_i32(500),
-    ));
+    let mut columns: Vec<ArrayRef> = vec![c1, c2, c3];
+    for _ in 3..column_count {
+        columns.push(Arc::new(Int32Array::from_iter_values(0..row_count as i32)));
+    }
 
-    // CASE WHEN c1 <= 500 THEN 1 ELSE 0 END
-    c.bench_function("case_when: scalar or scalar", |b| {
-        let expr = Arc::new(
-            CaseExpr::try_new(
-                None,
-                vec![(predicate.clone(), make_lit_i32(1))],
-                Some(make_lit_i32(0)),
+    let fields = columns
+        .iter()
+        .enumerate()
+        .map(|(i, c)| {
+            Field::new(
+                format!("c{}", i + 1),
+                c.data_type().clone(),
+                c.is_nullable(),
             )
-            .unwrap(),
-        );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
-    });
+        })
+        .collect::<Vec<_>>();
 
-    // CASE WHEN c1 <= 500 THEN c2 [ELSE NULL] END
-    c.bench_function("case_when: column or null", |b| {
-        let expr = Arc::new(
-            CaseExpr::try_new(None, vec![(predicate.clone(), make_col("c2", 1))], None)
+    let schema = Arc::new(Schema::new(fields));
+    RecordBatch::try_new(Arc::clone(&schema), columns).unwrap()
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    run_benchmarks(c, &make_batch(8192, 3));
+    run_benchmarks(c, &make_batch(8192, 50));
+    run_benchmarks(c, &make_batch(8192, 100));
+}
+
+fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
+    let c1 = col("c1", &batch.schema()).unwrap();
+    let c2 = col("c2", &batch.schema()).unwrap();
+    let c3 = col("c3", &batch.schema()).unwrap();
+
+    // No expression, when/then/else, literal values
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN 1 ELSE 0 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), lit(1))],
+                    Some(lit(0)),
+                )
                 .unwrap(),
-        );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
-    });
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // No expression, when/then/else, column reference values
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN c2 ELSE c3 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), Arc::clone(&c2))],
+                    Some(Arc::clone(&c3)),
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
 
-    // CASE WHEN c1 <= 500 THEN c2 ELSE c3 END
-    c.bench_function("case_when: expr or expr", |b| {
+    // No expression, when/then, implicit else
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE WHEN c1 <= 500 THEN c2 [ELSE NULL] END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    None,
+                    vec![(make_x_cmp_y(&c1, Operator::LtEq, 500), Arc::clone(&c2))],
+                    None,
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // With expression, two when/then branches
+    c.bench_function(
+        format!(
+            "case_when {}x{}: CASE c1 WHEN 1 THEN c2 WHEN 2 THEN c3 END",
+            batch.num_rows(),
+            batch.num_columns()
+        )
+        .as_str(),
+        |b| {
+            let expr = Arc::new(
+                case(
+                    Some(Arc::clone(&c1)),
+                    vec![(lit(1), Arc::clone(&c2)), (lit(2), Arc::clone(&c3))],
+                    None,
+                )
+                .unwrap(),
+            );
+            b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+        },
+    );
+
+    // Many when/then branches where all are effectively reachable
+    c.bench_function(format!("case_when {}x{}: CASE WHEN c1 == 0 THEN 0 WHEN c1 == 1 THEN 1 ... WHEN c1 == n THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Eq, i), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(
+            case(
                 None,
-                vec![(predicate.clone(), make_col("c2", 1))],
-                Some(make_col("c3", 2)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
             )
-            .unwrap(),
+                .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 
-    // CASE c1 WHEN 1 THEN c2 WHEN 2 THEN c3 END
-    c.bench_function("case_when: CASE expr", |b| {
+    // Many when/then branches where all but the first few are effectively unreachable
+    c.bench_function(format!("case_when {}x{}: CASE WHEN c1 < 0 THEN 0 WHEN c1 < 1000 THEN 1 ... WHEN c1 < n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Eq, i * 1000), lit(i))).collect();
         let expr = Arc::new(
-            CaseExpr::try_new(
-                Some(make_col("c1", 0)),
-                vec![
-                    (make_lit_i32(1), make_col("c2", 1)),
-                    (make_lit_i32(2), make_col("c3", 2)),
-                ],
+            case(
                 None,
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
             )
-            .unwrap(),
+                .unwrap(),
         );
-        b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
 }
 

From 7d294f1dc38214bc863aa661d5ca2290a9f90f34 Mon Sep 17 00:00:00 2001
From: Ahmed Mezghani <38987709+ahmed-mez@users.noreply.github.com>
Date: Fri, 17 Oct 2025 18:48:35 +0200
Subject: [PATCH 032/109] fix: Add dictionary coercion support for numeric
 comparison operations (#18099)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

Fixes comparison errors when using dictionary-encoded types with
comparison functions like NULLIF.

## Rationale for this change

When using dictionary-encoded columns (e.g., Dictionary(Int32, Utf8)) in
comparison operations with literals or other types, DataFusion would
throw an error stating the types are not comparable. This was
particularly problematic for functions like NULLIF which rely on
comparison coercion.

The issue was that comparison_coercion_numeric didn't handle dictionary
types, even though the general comparison_coercion function did have
dictionary support.

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

1. Refactored dictionary comparison logic: Extracted common dictionary
coercion logic into dictionary_comparison_coercion_generic to avoid code
duplication.
2. Added numeric-specific dictionary coercion: Introduced
dictionary_comparison_coercion_numeric that uses numeric-preferring
comparison rules when dealing with dictionary value types.
3. Updated comparison_coercion_numeric: Added a call to
dictionary_comparison_coercion_numeric in the coercion chain to properly
handle dictionary types.
4. Added sqllogictest cases demonstrating the fix works for various
dictionary comparison scenarios.

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

Yes, added tests in datafusion/sqllogictest/test_files/nullif.slt
covering:
  - Dictionary type compared with string literal
  - String compared with dictionary type
  - Dictionary compared with dictionary

All tests pass with the fix and would fail without it.

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

This is a bug fix that enables previously failing queries to work
correctly. No breaking changes or API modifications.

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 .../expr-common/src/type_coercion/binary.rs   | 58 +++++++++++++++----
 datafusion/sqllogictest/test_files/nullif.slt | 36 ++++++++++++
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs
index 52bb211d9b99b..122e0f987b6f9 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -866,6 +866,7 @@ pub fn comparison_coercion_numeric(
         return Some(lhs_type.clone());
     }
     binary_numeric_coercion(lhs_type, rhs_type)
+        .or_else(|| dictionary_comparison_coercion_numeric(lhs_type, rhs_type, true))
         .or_else(|| string_coercion(lhs_type, rhs_type))
         .or_else(|| null_coercion(lhs_type, rhs_type))
         .or_else(|| string_numeric_coercion_as_numeric(lhs_type, rhs_type))
@@ -1353,38 +1354,75 @@ fn both_numeric_or_null_and_numeric(lhs_type: &DataType, rhs_type: &DataType) ->
     }
 }
 
-/// Coercion rules for Dictionaries: the type that both lhs and rhs
+/// Generic coercion rules for Dictionaries: the type that both lhs and rhs
 /// can be casted to for the purpose of a computation.
 ///
 /// Not all operators support dictionaries, if `preserve_dictionaries` is true
-/// dictionaries will be preserved if possible
-fn dictionary_comparison_coercion(
+/// dictionaries will be preserved if possible.
+///
+/// The `coerce_fn` parameter determines which comparison coercion function to use
+/// for comparing the dictionary value types.
+fn dictionary_comparison_coercion_generic(
     lhs_type: &DataType,
     rhs_type: &DataType,
     preserve_dictionaries: bool,
+    coerce_fn: fn(&DataType, &DataType) -> Option<DataType>,
 ) -> Option<DataType> {
     use arrow::datatypes::DataType::*;
     match (lhs_type, rhs_type) {
         (
             Dictionary(_lhs_index_type, lhs_value_type),
             Dictionary(_rhs_index_type, rhs_value_type),
-        ) => comparison_coercion(lhs_value_type, rhs_value_type),
+        ) => coerce_fn(lhs_value_type, rhs_value_type),
         (d @ Dictionary(_, value_type), other_type)
         | (other_type, d @ Dictionary(_, value_type))
             if preserve_dictionaries && value_type.as_ref() == other_type =>
         {
             Some(d.clone())
         }
-        (Dictionary(_index_type, value_type), _) => {
-            comparison_coercion(value_type, rhs_type)
-        }
-        (_, Dictionary(_index_type, value_type)) => {
-            comparison_coercion(lhs_type, value_type)
-        }
+        (Dictionary(_index_type, value_type), _) => coerce_fn(value_type, rhs_type),
+        (_, Dictionary(_index_type, value_type)) => coerce_fn(lhs_type, value_type),
         _ => None,
     }
 }
 
+/// Coercion rules for Dictionaries: the type that both lhs and rhs
+/// can be casted to for the purpose of a computation.
+///
+/// Not all operators support dictionaries, if `preserve_dictionaries` is true
+/// dictionaries will be preserved if possible
+fn dictionary_comparison_coercion(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_dictionaries: bool,
+) -> Option<DataType> {
+    dictionary_comparison_coercion_generic(
+        lhs_type,
+        rhs_type,
+        preserve_dictionaries,
+        comparison_coercion,
+    )
+}
+
+/// Coercion rules for Dictionaries with numeric preference: similar to
+/// [`dictionary_comparison_coercion`] but uses [`comparison_coercion_numeric`]
+/// which prefers numeric types over strings when both are present.
+///
+/// This is used by [`comparison_coercion_numeric`] to maintain consistent
+/// numeric-preferring semantics when dealing with dictionary types.
+fn dictionary_comparison_coercion_numeric(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+    preserve_dictionaries: bool,
+) -> Option<DataType> {
+    dictionary_comparison_coercion_generic(
+        lhs_type,
+        rhs_type,
+        preserve_dictionaries,
+        comparison_coercion_numeric,
+    )
+}
+
 /// Coercion rules for string concat.
 /// This is a union of string coercion rules and specified rules:
 /// 1. At least one side of lhs and rhs should be string type (Utf8 / LargeUtf8)
diff --git a/datafusion/sqllogictest/test_files/nullif.slt b/datafusion/sqllogictest/test_files/nullif.slt
index 18642f6971ca8..6acb9aea26d56 100644
--- a/datafusion/sqllogictest/test_files/nullif.slt
+++ b/datafusion/sqllogictest/test_files/nullif.slt
@@ -174,3 +174,39 @@ query T
 SELECT NULLIF(arrow_cast('a', 'Utf8View'), null);
 ----
 a
+
+# Test with dictionary-encoded strings
+# This tests the fix for: "Dictionary(UInt32, Utf8) and Utf8 is not comparable"
+statement ok
+CREATE TABLE dict_test_base(
+  col1 TEXT,
+  col2 TEXT
+) as VALUES
+  ('foo', 'bar'),
+  ('bar', 'bar'),
+  ('baz', 'bar')
+;
+
+# Dictionary cast with string literal
+query T rowsort
+SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), 'bar') FROM dict_test_base;
+----
+NULL
+baz
+foo
+
+# String with dictionary cast
+query T rowsort
+SELECT NULLIF(col2, arrow_cast(col1, 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
+----
+NULL
+bar
+bar
+
+# Both as dictionaries
+query T rowsort
+SELECT NULLIF(arrow_cast(col1, 'Dictionary(Int32, Utf8)'), arrow_cast('bar', 'Dictionary(Int32, Utf8)')) FROM dict_test_base;
+----
+NULL
+baz
+foo

From ec3d20bff60511a9a7a3822a271af8fbbac7ddca Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Fri, 17 Oct 2025 11:46:10 -0600
Subject: [PATCH 033/109] Adds instrumentation to delimited LIST operations in
 CLI (#18134)

## Which issue does this PR close?

This does not fully close, but is an incremental building block
component for:
 - https://github.com/apache/datafusion/issues/17207

The full context of how this code is likely to progress can be seen in
the POC for this effort:
 - https://github.com/apache/datafusion/pull/17266

## Rationale for this change

Continued progress filling out methods that are instrumented by the
instrumented object store

## What changes are included in this PR?

- Adds instrumentation around delimited list operations into the
instrumented object store
 - Adds test cases for the new code

## Are these changes tested?

Yes, unit tests have been added.

Example output:
```sql
DataFusion CLI v50.2.0
> CREATE EXTERNAL TABLE overture_partitioned
STORED AS PARQUET LOCATION 's3://overturemaps-us-west-2/release/2025-09-24.0/theme=addresses/';
0 row(s) fetched.
Elapsed 2.307 seconds.

> \object_store_profiling trace
ObjectStore Profile mode set to Trace
> select count(*) from overture_partitioned;
+-----------+
| count(*)  |
+-----------+
| 446544475 |
+-----------+
1 row(s) fetched.
Elapsed 1.932 seconds.

Object Store Profiling
Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(overturemaps-us-west-2)
2025-10-17T17:05:27.922724180+00:00 operation=List duration=0.132154s path=release/2025-09-24.0/theme=addresses
2025-10-17T17:05:28.054894440+00:00 operation=List duration=0.049048s path=release/2025-09-24.0/theme=addresses/type=address
2025-10-17T17:05:28.104233937+00:00 operation=Get duration=0.053522s size=8 range: bytes=1070778162-1070778169 path=release/2025-09-24.0/theme=addresses/type=address/part-00000-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet
2025-10-17T17:05:28.106862343+00:00 operation=Get duration=0.108103s size=8 range: bytes=1017940335-1017940342 path=release/2025-09-24.0/theme=addresses/type=address/part-00003-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet

...

2025-10-17T17:05:28.589084204+00:00 operation=Get duration=0.084737s size=836971 range: bytes=1112791717-1113628687 path=release/2025-09-24.0/theme=addresses/type=address/part-00009-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet

Summaries:
List
count: 2
duration min: 0.049048s
duration max: 0.132154s
duration avg: 0.090601s

Get
count: 33
duration min: 0.045500s
duration max: 0.162114s
duration avg: 0.089775s
size min: 8 B
size max: 917946 B
size avg: 336000 B
size sum: 11088026 B

>
```
Note that a `LIST` report showing a duration must be a
`list_with_delimiter()` call because a standard `list` call does not
currently report a duration.

## Are there any user-facing changes?

No-ish

cc @alamb
---
 .../src/object_storage/instrumented.rs        | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 8acece315f764..94445ee64ef4c 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -163,6 +163,28 @@ impl InstrumentedObjectStore {
 
         ret
     }
+
+    async fn instrumented_list_with_delimiter(
+        &self,
+        prefix: Option<&Path>,
+    ) -> Result<ListResult> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.list_with_delimiter(prefix).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::List,
+            path: prefix.cloned().unwrap_or_else(|| Path::from("")),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
 }
 
 impl fmt::Display for InstrumentedObjectStore {
@@ -217,6 +239,10 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
+        if self.enabled() {
+            return self.instrumented_list_with_delimiter(prefix).await;
+        }
+
         self.inner.list_with_delimiter(prefix).await
     }
 
@@ -569,6 +595,29 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_list_with_delimiter() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::List);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[test]
     fn request_details() {
         let rd = RequestDetails {

From 522403bb44780679109055abca6048d21add0d25 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Fri, 17 Oct 2025 12:17:36 -0700
Subject: [PATCH 034/109] feat: add fp16 support to Substrait (#18086)

## Which issue does this PR close?

- Closes #16298

## Rationale for this change

Float16 is an Arrow type. Substrait serialization for the type is
defined in
https://github.com/apache/arrow/blame/main/format/substrait/extension_types.yaml
as part of Arrow. We should support it.

This picks up where https://github.com/apache/datafusion/pull/16793
leaves off.

## What changes are included in this PR?

Support for converting DataType::Float16 to/from Substrait.
Support for converting ScalarValue::Float16 to/from Substrait.

## Are these changes tested?

Yes

## Are there any user-facing changes?

Yes.

The `SubstraitProducer` trait received a new method (`register_type`)
which downstream implementors will need to provide an implementation
for. The example custom producer has been updated with a default
implementation.

One public method that changed is
[`datafusion_substrait::logical_plan::producer::from_empty_relation`](https://docs.rs/datafusion-substrait/50.2.0/datafusion_substrait/logical_plan/producer/fn.from_empty_relation.html).
I'm not sure if that is meant to be part of the public API (for one
thing, it is undocumented, though maybe this is because it serves an
obvious purpose. It also returns a `Rel` which is a pretty internal
structure).
---
 Cargo.lock                                    |  1 +
 datafusion/substrait/Cargo.toml               |  1 +
 .../src/logical_plan/consumer/expr/literal.rs | 34 +++++++++++-
 .../src/logical_plan/consumer/types.rs        |  6 ++-
 .../src/logical_plan/producer/expr/cast.rs    | 21 +++++---
 .../src/logical_plan/producer/expr/literal.rs | 53 ++++++++++++++++---
 .../src/logical_plan/producer/expr/mod.rs     |  2 +-
 .../src/logical_plan/producer/plan.rs         |  2 +-
 .../src/logical_plan/producer/rel/read_rel.rs | 11 ++--
 .../producer/substrait_producer.rs            | 19 ++++++-
 .../src/logical_plan/producer/types.rs        | 48 +++++++++++++----
 datafusion/substrait/src/variation_const.rs   |  3 ++
 .../tests/cases/roundtrip_logical_plan.rs     |  2 +
 13 files changed, 166 insertions(+), 37 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d69ece6d3fb05..7e1d2b332ab09 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2680,6 +2680,7 @@ dependencies = [
  "chrono",
  "datafusion",
  "datafusion-functions-aggregate",
+ "half",
  "insta",
  "itertools 0.14.0",
  "object_store",
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 16bb5cff4ad79..605dfc15be3f5 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -35,6 +35,7 @@ async-recursion = "1.0"
 async-trait = { workspace = true }
 chrono = { workspace = true }
 datafusion = { workspace = true, features = ["sql"] }
+half = { workspace = true }
 itertools = { workspace = true }
 object_store = { workspace = true }
 pbjson-types = { workspace = true }
diff --git a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
index dc7a5935c0149..eb3d345dc001d 100644
--- a/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/expr/literal.rs
@@ -18,6 +18,7 @@
 use crate::logical_plan::consumer::types::from_substrait_type;
 use crate::logical_plan::consumer::utils::{next_struct_field_name, DEFAULT_TIMEZONE};
 use crate::logical_plan::consumer::SubstraitConsumer;
+use crate::variation_const::FLOAT_16_TYPE_NAME;
 #[allow(deprecated)]
 use crate::variation_const::{
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
@@ -38,6 +39,7 @@ use datafusion::common::{
     not_impl_err, plan_err, substrait_datafusion_err, substrait_err, ScalarValue,
 };
 use datafusion::logical_expr::Expr;
+use prost::Message;
 use std::sync::Arc;
 use substrait::proto;
 use substrait::proto::expression::literal::user_defined::Val;
@@ -440,8 +442,6 @@ pub(crate) fn from_substrait_literal(
                 return Ok(value);
             }
 
-            // TODO: remove the code below once the producer has been updated
-
             // Helper function to prevent duplicating this code - can be inlined once the non-extension path is removed
             let interval_month_day_nano =
                 |user_defined: &proto::expression::literal::UserDefined| -> datafusion::common::Result<ScalarValue> {
@@ -474,6 +474,36 @@ pub(crate) fn from_substrait_literal(
                 .get(&user_defined.type_reference)
             {
                 match name.as_ref() {
+                    FLOAT_16_TYPE_NAME => {
+                        // Rules for encoding fp16 Substrait literals are defined as part of Arrow here:
+                        //
+                        // https://github.com/apache/arrow/blame/bab558061696ddc1841148d6210424b12923d48e/format/substrait/extension_types.yaml#L112
+
+                        let Some(value) = user_defined.val.as_ref() else {
+                            return substrait_err!("Float16 value is empty");
+                        };
+                        let Val::Value(value_any) = value else {
+                            return substrait_err!(
+                                "Float16 value is not a value type literal"
+                            );
+                        };
+                        if value_any.type_url != "google.protobuf.UInt32Value" {
+                            return substrait_err!(
+                                "Float16 value is not a google.protobuf.UInt32Value"
+                            );
+                        }
+                        let decoded_value =
+                            pbjson_types::UInt32Value::decode(value_any.value.clone())
+                                .map_err(|err| {
+                                    substrait_datafusion_err!(
+                                        "Failed to decode float16 value: {err}"
+                                    )
+                                })?;
+                        let u32_bytes = decoded_value.value.to_le_bytes();
+                        let f16_val =
+                            half::f16::from_le_bytes(u32_bytes[0..2].try_into().unwrap());
+                        return Ok(ScalarValue::Float16(Some(f16_val)));
+                    }
                     // Kept for backwards compatibility - producers should use IntervalCompound instead
                     #[allow(deprecated)]
                     INTERVAL_MONTH_DAY_NANO_TYPE_NAME => {
diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs
index 80300af24ac4a..772ea7177ca29 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -17,6 +17,7 @@
 
 use super::utils::{from_substrait_precision, next_struct_field_name, DEFAULT_TIMEZONE};
 use super::SubstraitConsumer;
+use crate::variation_const::FLOAT_16_TYPE_NAME;
 #[allow(deprecated)]
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
@@ -251,6 +252,7 @@ pub fn from_substrait_type(
                     match name.as_ref() {
                         // Kept for backwards compatibility, producers should use IntervalCompound instead
                         INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
+                        FLOAT_16_TYPE_NAME => Ok(DataType::Float16),
                         _ => not_impl_err!(
                                 "Unsupported Substrait user defined type with ref {} and variation {}",
                                 u.type_reference,
@@ -304,7 +306,7 @@ pub fn from_substrait_named_struct(
         })?,
         &base_schema.names,
         &mut name_idx,
-    );
+    )?;
     if name_idx != base_schema.names.len() {
         return substrait_err!(
             "Names list must match exactly to nested schema, but found {} uses for {} names",
@@ -312,7 +314,7 @@ pub fn from_substrait_named_struct(
             base_schema.names.len()
         );
     }
-    DFSchema::try_from(Schema::new(fields?))
+    DFSchema::try_from(Schema::new(fields))
 }
 
 fn from_substrait_struct_type(
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
index 9741dcdd10951..71c2140bac8bf 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/cast.rs
@@ -40,7 +40,7 @@ pub fn from_cast(
                 nullable: true,
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                 literal_type: Some(LiteralType::Null(to_substrait_type(
-                    data_type, true,
+                    producer, data_type, true,
                 )?)),
             };
             return Ok(Expression {
@@ -51,7 +51,7 @@ pub fn from_cast(
     Ok(Expression {
         rex_type: Some(RexType::Cast(Box::new(
             substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(data_type, true)?),
+                r#type: Some(to_substrait_type(producer, data_type, true)?),
                 input: Some(Box::new(producer.handle_expr(expr, schema)?)),
                 failure_behavior: FailureBehavior::ThrowException.into(),
             },
@@ -68,7 +68,7 @@ pub fn from_try_cast(
     Ok(Expression {
         rex_type: Some(RexType::Cast(Box::new(
             substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(data_type, true)?),
+                r#type: Some(to_substrait_type(producer, data_type, true)?),
                 input: Some(Box::new(producer.handle_expr(expr, schema)?)),
                 failure_behavior: FailureBehavior::ReturnNull.into(),
             },
@@ -79,7 +79,9 @@ pub fn from_try_cast(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::logical_plan::producer::to_substrait_extended_expr;
+    use crate::logical_plan::producer::{
+        to_substrait_extended_expr, DefaultSubstraitProducer,
+    };
     use datafusion::arrow::datatypes::{DataType, Field};
     use datafusion::common::DFSchema;
     use datafusion::execution::SessionStateBuilder;
@@ -92,6 +94,8 @@ mod tests {
         let empty_schema = DFSchemaRef::new(DFSchema::empty());
         let field = Field::new("out", DataType::Int32, false);
 
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
         let expr = Expr::Literal(ScalarValue::Null, None)
             .cast_to(&DataType::Int32, &empty_schema)
             .unwrap();
@@ -107,7 +111,7 @@ mod tests {
                 nullable: true,
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                 literal_type: Some(LiteralType::Null(
-                    to_substrait_type(&DataType::Int32, true).unwrap(),
+                    to_substrait_type(&mut producer, &DataType::Int32, true).unwrap(),
                 )),
             };
             let expected = Expression {
@@ -131,13 +135,16 @@ mod tests {
             typed_null.referred_expr[0].expr_type.as_ref().unwrap()
         {
             let cast_expr = substrait::proto::expression::Cast {
-                r#type: Some(to_substrait_type(&DataType::Int32, true).unwrap()),
+                r#type: Some(
+                    to_substrait_type(&mut producer, &DataType::Int32, true).unwrap(),
+                ),
                 input: Some(Box::new(Expression {
                     rex_type: Some(RexType::Literal(Literal {
                         nullable: true,
                         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
                         literal_type: Some(LiteralType::Null(
-                            to_substrait_type(&DataType::Int64, true).unwrap(),
+                            to_substrait_type(&mut producer, &DataType::Int64, true)
+                                .unwrap(),
                         )),
                     })),
                 })),
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
index 2c66e9f6b03c2..1bb24168e57a4 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/literal.rs
@@ -18,7 +18,7 @@
 use crate::logical_plan::producer::{to_substrait_type, SubstraitProducer};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DECIMAL_128_TYPE_VARIATION_REF,
-    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
+    DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, FLOAT_16_TYPE_NAME,
     LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
@@ -61,6 +61,7 @@ pub(crate) fn to_substrait_literal(
             nullable: true,
             type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
             literal_type: Some(LiteralType::Null(to_substrait_type(
+                producer,
                 &value.data_type(),
                 true,
             )?)),
@@ -94,6 +95,41 @@ pub(crate) fn to_substrait_literal(
             LiteralType::I64(*n as i64),
             UNSIGNED_INTEGER_TYPE_VARIATION_REF,
         ),
+        ScalarValue::Float16(Some(f)) => {
+            // Rules for encoding fp16 Substrait literals are defined as part of Arrow here:
+            //
+            // https://github.com/apache/arrow/blame/bab558061696ddc1841148d6210424b12923d48e/format/substrait/extension_types.yaml#L112
+            //
+            // fp16 literals are encoded as user defined literals with
+            // a google.protobuf.UInt32Value message where the lower 16 bits are
+            // the fp16 value.
+            let type_anchor = producer.register_type(FLOAT_16_TYPE_NAME.to_string());
+
+            // The spec says "lower 16 bits" but neglects to mention the endianness.
+            // Let's just use little-endian for now.
+            //
+            // See https://github.com/apache/arrow/issues/47846
+            let f_bytes = f.to_le_bytes();
+            let value = u32::from_le_bytes([f_bytes[0], f_bytes[1], 0, 0]);
+
+            let value = pbjson_types::UInt32Value { value };
+            let encoded_value = prost::Message::encode_to_vec(&value);
+            (
+                LiteralType::UserDefined(
+                    substrait::proto::expression::literal::UserDefined {
+                        type_reference: type_anchor,
+                        type_parameters: vec![],
+                        val: Some(substrait::proto::expression::literal::user_defined::Val::Value(
+                            pbjson_types::Any {
+                                type_url: "google.protobuf.UInt32Value".to_string(),
+                                value: encoded_value.into(),
+                            },
+                        )),
+                    },
+                ),
+                DEFAULT_TYPE_VARIATION_REF,
+            )
+        }
         ScalarValue::Float32(Some(f)) => {
             (LiteralType::Fp32(*f), DEFAULT_TYPE_VARIATION_REF)
         }
@@ -241,7 +277,7 @@ pub(crate) fn to_substrait_literal(
         ),
         ScalarValue::Map(m) => {
             let map = if m.is_empty() || m.value(0).is_empty() {
-                let mt = to_substrait_type(m.data_type(), m.is_nullable())?;
+                let mt = to_substrait_type(producer, m.data_type(), m.is_nullable())?;
                 let mt = match mt {
                     substrait::proto::Type {
                         kind: Some(r#type::Kind::Map(mt)),
@@ -354,12 +390,13 @@ fn convert_array_to_literal_list<T: OffsetSizeTrait>(
         .collect::<datafusion::common::Result<Vec<_>>>()?;
 
     if values.is_empty() {
-        let lt = match to_substrait_type(array.data_type(), array.is_nullable())? {
-            substrait::proto::Type {
-                kind: Some(r#type::Kind::List(lt)),
-            } => lt.as_ref().to_owned(),
-            _ => unreachable!(),
-        };
+        let lt =
+            match to_substrait_type(producer, array.data_type(), array.is_nullable())? {
+                substrait::proto::Type {
+                    kind: Some(r#type::Kind::List(lt)),
+                } => lt.as_ref().to_owned(),
+                _ => unreachable!(),
+            };
         Ok(LiteralType::EmptyList(lt))
     } else {
         Ok(LiteralType::List(List { values }))
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
index 42e1f962f1d1f..d37694ccea05c 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
@@ -78,7 +78,7 @@ pub fn to_substrait_extended_expr(
             })
         })
         .collect::<datafusion::common::Result<Vec<_>>>()?;
-    let substrait_schema = to_substrait_named_struct(schema)?;
+    let substrait_schema = to_substrait_named_struct(&mut producer, schema)?;
 
     let extensions = producer.get_extensions();
     Ok(Box::new(ExtendedExpression {
diff --git a/datafusion/substrait/src/logical_plan/producer/plan.rs b/datafusion/substrait/src/logical_plan/producer/plan.rs
index 7d5b7754122d6..28f6acd0890c7 100644
--- a/datafusion/substrait/src/logical_plan/producer/plan.rs
+++ b/datafusion/substrait/src/logical_plan/producer/plan.rs
@@ -36,7 +36,7 @@ pub fn to_substrait_plan(
     let plan_rels = vec![PlanRel {
         rel_type: Some(plan_rel::RelType::Root(RelRoot {
             input: Some(*producer.handle_plan(plan)?),
-            names: to_substrait_named_struct(plan.schema())?.names,
+            names: to_substrait_named_struct(&mut producer, plan.schema())?.names,
         })),
     }];
 
diff --git a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
index 212874e7913b5..4b2e3782108b6 100644
--- a/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
+++ b/datafusion/substrait/src/logical_plan/producer/rel/read_rel.rs
@@ -48,7 +48,7 @@ pub fn from_table_scan(
     });
 
     let table_schema = scan.source.schema().to_dfschema_ref()?;
-    let base_schema = to_substrait_named_struct(&table_schema)?;
+    let base_schema = to_substrait_named_struct(producer, &table_schema)?;
 
     let filter_option = if scan.filters.is_empty() {
         None
@@ -83,7 +83,10 @@ pub fn from_table_scan(
     }))
 }
 
-pub fn from_empty_relation(e: &EmptyRelation) -> datafusion::common::Result<Box<Rel>> {
+pub fn from_empty_relation(
+    producer: &mut impl SubstraitProducer,
+    e: &EmptyRelation,
+) -> datafusion::common::Result<Box<Rel>> {
     if e.produce_one_row {
         return not_impl_err!("Producing a row from empty relation is unsupported");
     }
@@ -91,7 +94,7 @@ pub fn from_empty_relation(e: &EmptyRelation) -> datafusion::common::Result<Box<
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
-            base_schema: Some(to_substrait_named_struct(&e.schema)?),
+            base_schema: Some(to_substrait_named_struct(producer, &e.schema)?),
             filter: None,
             best_effort_filter: None,
             projection: None,
@@ -135,7 +138,7 @@ pub fn from_values(
     Ok(Box::new(Rel {
         rel_type: Some(RelType::Read(Box::new(ReadRel {
             common: None,
-            base_schema: Some(to_substrait_named_struct(&v.schema)?),
+            base_schema: Some(to_substrait_named_struct(producer, &v.schema)?),
             filter: None,
             best_effort_filter: None,
             projection: None,
diff --git a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
index 56edfac5769cf..db08e0f7bfd0c 100644
--- a/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer/substrait_producer.rs
@@ -70,6 +70,10 @@ use substrait::proto::{
 ///        self.extensions.register_function(signature)
 ///     }
 ///
+///     fn register_type(&mut self, type_name: String) -> u32 {
+///         self.extensions.register_type(type_name)
+///     }
+///
 ///     fn get_extensions(self) -> Extensions {
 ///         self.extensions
 ///     }
@@ -114,6 +118,15 @@ pub trait SubstraitProducer: Send + Sync + Sized {
     /// there is one. Otherwise, it should generate a new anchor.
     fn register_function(&mut self, signature: String) -> u32;
 
+    /// Within a Substrait plan, user defined types are referenced using type anchors that are stored at
+    /// the top level of the [Plan](substrait::proto::Plan) within
+    /// [ExtensionType](substrait::proto::extensions::simple_extension_declaration::ExtensionType)
+    /// messages.
+    ///
+    /// When given a type name, this method should return the existing anchor for it if
+    /// there is one. Otherwise, it should generate a new anchor.
+    fn register_type(&mut self, name: String) -> u32;
+
     /// Consume the producer to generate the [Extensions] for the Substrait plan based on the
     /// functions that have been registered
     fn get_extensions(self) -> Extensions;
@@ -182,7 +195,7 @@ pub trait SubstraitProducer: Send + Sync + Sized {
         &mut self,
         plan: &EmptyRelation,
     ) -> datafusion::common::Result<Box<Rel>> {
-        from_empty_relation(plan)
+        from_empty_relation(self, plan)
     }
 
     fn handle_subquery_alias(
@@ -367,6 +380,10 @@ impl SubstraitProducer for DefaultSubstraitProducer<'_> {
         self.extensions.register_function(fn_name)
     }
 
+    fn register_type(&mut self, type_name: String) -> u32 {
+        self.extensions.register_type(type_name)
+    }
+
     fn get_extensions(self) -> Extensions {
         self.extensions
     }
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs
index 3da9269c5b9e3..2079d7fd34bb6 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -15,15 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical_plan::producer::to_substrait_precision;
 use crate::logical_plan::producer::utils::flatten_names;
+use crate::logical_plan::producer::{to_substrait_precision, SubstraitProducer};
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
     DECIMAL_128_TYPE_VARIATION_REF, DECIMAL_256_TYPE_VARIATION_REF,
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
     DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
-    LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
+    FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
@@ -32,6 +32,7 @@ use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
 use substrait::proto::{r#type, NamedStruct};
 
 pub(crate) fn to_substrait_type(
+    producer: &mut impl SubstraitProducer,
     dt: &DataType,
     nullable: bool,
 ) -> datafusion::common::Result<substrait::proto::Type> {
@@ -96,7 +97,17 @@ pub(crate) fn to_substrait_type(
                 nullability,
             })),
         }),
-        // Float16 is not supported in Substrait
+        DataType::Float16 => {
+            let type_anchor = producer.register_type(FLOAT_16_TYPE_NAME.to_string());
+            Ok(substrait::proto::Type {
+                kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
+                    type_reference: type_anchor,
+                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                    nullability,
+                    type_parameters: vec![],
+                })),
+            })
+        }
         DataType::Float32 => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Fp32(r#type::Fp32 {
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
@@ -244,7 +255,8 @@ pub(crate) fn to_substrait_type(
             })),
         }),
         DataType::List(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(producer, inner.data_type(), inner.is_nullable())?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -254,7 +266,8 @@ pub(crate) fn to_substrait_type(
             })
         }
         DataType::LargeList(inner) => {
-            let inner_type = to_substrait_type(inner.data_type(), inner.is_nullable())?;
+            let inner_type =
+                to_substrait_type(producer, inner.data_type(), inner.is_nullable())?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::List(Box::new(r#type::List {
                     r#type: Some(Box::new(inner_type)),
@@ -266,10 +279,12 @@ pub(crate) fn to_substrait_type(
         DataType::Map(inner, _) => match inner.data_type() {
             DataType::Struct(key_and_value) if key_and_value.len() == 2 => {
                 let key_type = to_substrait_type(
+                    producer,
                     key_and_value[0].data_type(),
                     key_and_value[0].is_nullable(),
                 )?;
                 let value_type = to_substrait_type(
+                    producer,
                     key_and_value[1].data_type(),
                     key_and_value[1].is_nullable(),
                 )?;
@@ -285,8 +300,8 @@ pub(crate) fn to_substrait_type(
             _ => plan_err!("Map fields must contain a Struct with exactly 2 fields"),
         },
         DataType::Dictionary(key_type, value_type) => {
-            let key_type = to_substrait_type(key_type, nullable)?;
-            let value_type = to_substrait_type(value_type, nullable)?;
+            let key_type = to_substrait_type(producer, key_type, nullable)?;
+            let value_type = to_substrait_type(producer, value_type, nullable)?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::Map(Box::new(r#type::Map {
                     key: Some(Box::new(key_type)),
@@ -299,7 +314,9 @@ pub(crate) fn to_substrait_type(
         DataType::Struct(fields) => {
             let field_types = fields
                 .iter()
-                .map(|field| to_substrait_type(field.data_type(), field.is_nullable()))
+                .map(|field| {
+                    to_substrait_type(producer, field.data_type(), field.is_nullable())
+                })
                 .collect::<datafusion::common::Result<Vec<_>>>()?;
             Ok(substrait::proto::Type {
                 kind: Some(r#type::Kind::Struct(r#type::Struct {
@@ -330,6 +347,7 @@ pub(crate) fn to_substrait_type(
 }
 
 pub(crate) fn to_substrait_named_struct(
+    producer: &mut impl SubstraitProducer,
     schema: &DFSchemaRef,
 ) -> datafusion::common::Result<NamedStruct> {
     let mut names = Vec::with_capacity(schema.fields().len());
@@ -341,7 +359,7 @@ pub(crate) fn to_substrait_named_struct(
         types: schema
             .fields()
             .iter()
-            .map(|f| to_substrait_type(f.data_type(), f.is_nullable()))
+            .map(|f| to_substrait_type(producer, f.data_type(), f.is_nullable()))
             .collect::<datafusion::common::Result<_>>()?,
         type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
         nullability: r#type::Nullability::Required as i32,
@@ -360,8 +378,10 @@ mod tests {
     use crate::logical_plan::consumer::{
         from_substrait_named_struct, from_substrait_type_without_names,
     };
+    use crate::logical_plan::producer::DefaultSubstraitProducer;
     use datafusion::arrow::datatypes::{Field, Fields, Schema, TimeUnit};
     use datafusion::common::{DFSchema, Result};
+    use datafusion::prelude::SessionContext;
     use std::sync::Arc;
 
     #[test]
@@ -448,9 +468,12 @@ mod tests {
     fn round_trip_type(dt: DataType) -> Result<()> {
         println!("Checking round trip of {dt}");
 
+        let state = SessionContext::default().state();
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
         // As DataFusion doesn't consider nullability as a property of the type, but field,
         // it doesn't matter if we set nullability to true or false here.
-        let substrait = to_substrait_type(&dt, true)?;
+        let substrait = to_substrait_type(&mut producer, &dt, true)?;
         let consumer = test_consumer();
         let roundtrip_dt = from_substrait_type_without_names(&consumer, &substrait)?;
         assert_eq!(dt, roundtrip_dt);
@@ -473,7 +496,10 @@ mod tests {
             Field::new("trailer", DataType::Float64, true),
         ]))?);
 
-        let named_struct = to_substrait_named_struct(&schema)?;
+        let state = SessionContext::default().state();
+        let mut producer = DefaultSubstraitProducer::new(&state);
+
+        let named_struct = to_substrait_named_struct(&mut producer, &schema)?;
 
         // Struct field names should be flattened DFS style
         // List field names should be omitted
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index f78b3d785303c..591f33aeb4b7d 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -122,3 +122,6 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_REF: u32 = 3;
     note = "Use Substrait `IntervalCompound` type instead"
 )]
 pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
+
+/// Defined in <https://github.com/apache/arrow/blame/main/format/substrait/extension_types.yaml>
+pub const FLOAT_16_TYPE_NAME: &str = "fp16";
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 39e4984ab9f79..f14d4cbf1fcc3 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -1144,6 +1144,7 @@ async fn all_type_literal() -> Result<()> {
             uint32_col = arrow_cast('0', 'UInt32') AND
             int64_col = arrow_cast('0', 'Int64') AND
             uint64_col = arrow_cast('0', 'UInt64') AND
+            float16_col = arrow_cast(0.0, 'Float16') AND
             float32_col = arrow_cast('0', 'Float32') AND
             float64_col = arrow_cast('0', 'Float64') AND
             sec_timestamp_col = arrow_cast('2020-01-01 00:00:00', 'Timestamp (Second, None)') AND
@@ -1856,6 +1857,7 @@ async fn create_all_type_context() -> Result<SessionContext> {
         Field::new("uint32_col", DataType::UInt32, true),
         Field::new("int64_col", DataType::Int64, true),
         Field::new("uint64_col", DataType::UInt64, true),
+        Field::new("float16_col", DataType::Float16, true),
         Field::new("float32_col", DataType::Float32, true),
         Field::new("float64_col", DataType::Float64, true),
         Field::new(

From 1b001a14a1dde5a08d7d6df4c7ceb9ba9a4e9809 Mon Sep 17 00:00:00 2001
From: Victor Barua <victor.barua@datadoghq.com>
Date: Fri, 17 Oct 2025 14:09:49 -0700
Subject: [PATCH 035/109] fix(substrait): schema errors for Aggregates with no
 groupings (#17909)

## Which issue does this PR close?
Closes https://github.com/apache/datafusion/issues/16590

## Rationale for this change
When consuming Substrait plans containing aggregates with no groupings,
we would see the following error
```
Error: Substrait("Named schema must contain names for all fields")
```

The Substrait plan had one _less_ field than DataFusion expected because
DataFusion was adding an extra "__grouping_id" to the output of the
Aggregate node. This happens when the

https://github.com/apache/datafusion/blob/daeb6597a0c7344735460bb2dce13879fd89d7bd/datafusion/expr/src/logical_plan/plan.rs#L3418
condition is true.

A natural followup question to this is "Why are we creating an Aggregate
with a single empty GroupingSet for the group by, instead of just
leaving the group by entirely?".

## What changes are included in this PR?
Instead of setting group_exprs to a vector with a single empty grouping
set, let's just leave group_exprs empty entirely. This means that the
`is_grouping_set` is not triggered, so the Datafusion schema matches the
Substrait schema.

## Are these changes tested?
Yes

I have added direct tests via example Substrait plans

## Are there any user-facing changes?
Substrait plans that were not consumable before are now consumable.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../consumer/rel/aggregate_rel.rs             |   1 +
 .../tests/cases/aggregation_tests.rs          |  71 ++++++++++++
 datafusion/substrait/tests/cases/mod.rs       |   1 +
 .../aggregate_groupings/no_groupings.json     |  92 +++++++++++++++
 .../aggregate_groupings/single_grouping.json  | 109 ++++++++++++++++++
 5 files changed, 274 insertions(+)
 create mode 100644 datafusion/substrait/tests/cases/aggregation_tests.rs
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json
 create mode 100644 datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json

diff --git a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
index c919bd038936d..fce074cd51983 100644
--- a/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/rel/aggregate_rel.rs
@@ -40,6 +40,7 @@ pub async fn from_aggregate_rel(
         let mut aggr_exprs = vec![];
 
         match agg.groupings.len() {
+            0 => {}
             1 => {
                 group_exprs.extend_from_slice(
                     &from_substrait_grouping(
diff --git a/datafusion/substrait/tests/cases/aggregation_tests.rs b/datafusion/substrait/tests/cases/aggregation_tests.rs
new file mode 100644
index 0000000000000..815550bca5b89
--- /dev/null
+++ b/datafusion/substrait/tests/cases/aggregation_tests.rs
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests to verify aggregation relation handling in Substrait
+
+#[cfg(test)]
+mod tests {
+    use crate::utils::test::{add_plan_schemas_to_ctx, read_json};
+    use datafusion::common::Result;
+    use datafusion::dataframe::DataFrame;
+    use datafusion::prelude::SessionContext;
+    use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
+    use insta::assert_snapshot;
+
+    #[tokio::test]
+    async fn no_grouping_set() -> Result<()> {
+        let proto_plan =
+            read_json("tests/testdata/test_plans/aggregate_groupings/no_groupings.json");
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+        assert_snapshot!(
+            plan,
+            @r#"
+                Aggregate: groupBy=[[]], aggr=[[sum(c0) AS summation]]
+                  EmptyRelation: rows=0
+                "#
+        );
+
+        // Trigger execution to ensure plan validity
+        DataFrame::new(ctx.state(), plan).show().await?;
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn one_grouping_set() -> Result<()> {
+        let proto_plan = read_json(
+            "tests/testdata/test_plans/aggregate_groupings/single_grouping.json",
+        );
+        let ctx = add_plan_schemas_to_ctx(SessionContext::new(), &proto_plan)?;
+        let plan = from_substrait_plan(&ctx.state(), &proto_plan).await?;
+
+        assert_snapshot!(
+            plan,
+            @r#"
+                Aggregate: groupBy=[[c0]], aggr=[[sum(c0) AS summation]]
+                  EmptyRelation: rows=0
+                "#
+        );
+
+        // Trigger execution to ensure plan validity
+        DataFrame::new(ctx.state(), plan).show().await?;
+
+        Ok(())
+    }
+}
diff --git a/datafusion/substrait/tests/cases/mod.rs b/datafusion/substrait/tests/cases/mod.rs
index 9e69bb4edd854..0870c56cd3ba2 100644
--- a/datafusion/substrait/tests/cases/mod.rs
+++ b/datafusion/substrait/tests/cases/mod.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+mod aggregation_tests;
 mod builtin_expr_semantics_tests;
 mod consumer_integration;
 mod emit_kind_tests;
diff --git a/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json
new file mode 100644
index 0000000000000..9305aa6461ab9
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/no_groupings.json
@@ -0,0 +1,92 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "sum:i8"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "read": {
+                "baseSchema": {
+                  "names": [
+                    "c0",
+                    "c1"
+                  ],
+                  "struct": {
+                    "nullability": "NULLABILITY_REQUIRED",
+                    "types": [
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    ]
+                  }
+                },
+                "common": {
+                  "direct": {}
+                },
+                "virtualTable": {}
+              }
+            },
+            "measures": [
+              {
+                "measure": {
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    }
+                  ],
+                  "functionReference": 1,
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "outputType": {
+                    "i8": {
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT"
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "summation"
+        ]
+      }
+    }
+  ],
+  "version": {
+    "minorNumber": 29,
+    "producer": "substrait-go v4.2.0"
+  }
+}
\ No newline at end of file
diff --git a/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json
new file mode 100644
index 0000000000000..9535596a1e819
--- /dev/null
+++ b/datafusion/substrait/tests/testdata/test_plans/aggregate_groupings/single_grouping.json
@@ -0,0 +1,109 @@
+{
+  "extensionUris": [
+    {
+      "extensionUriAnchor": 1,
+      "uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"
+    }
+  ],
+  "extensions": [
+    {
+      "extensionFunction": {
+        "extensionUriReference": 1,
+        "functionAnchor": 1,
+        "name": "sum:i8"
+      }
+    }
+  ],
+  "relations": [
+    {
+      "root": {
+        "input": {
+          "aggregate": {
+            "common": {
+              "direct": {}
+            },
+            "input": {
+              "read": {
+                "baseSchema": {
+                  "names": [
+                    "c0",
+                    "c1"
+                  ],
+                  "struct": {
+                    "nullability": "NULLABILITY_REQUIRED",
+                    "types": [
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      },
+                      {
+                        "i8": {
+                          "nullability": "NULLABILITY_NULLABLE"
+                        }
+                      }
+                    ]
+                  }
+                },
+                "common": {
+                  "direct": {}
+                },
+                "virtualTable": {}
+              }
+            },
+            "groupingExpressions": [
+              {
+                "selection": {
+                  "directReference": {
+                    "structField": {}
+                  },
+                  "rootReference": {}
+                }
+              }
+            ],
+            "groupings": [
+              {
+                "expressionReferences": [0]
+              }
+
+            ],
+            "measures": [
+              {
+                "measure": {
+                  "arguments": [
+                    {
+                      "value": {
+                        "selection": {
+                          "directReference": {
+                            "structField": {}
+                          },
+                          "rootReference": {}
+                        }
+                      }
+                    }
+                  ],
+                  "functionReference": 1,
+                  "invocation": "AGGREGATION_INVOCATION_ALL",
+                  "outputType": {
+                    "i8": {
+                      "nullability": "NULLABILITY_NULLABLE"
+                    }
+                  },
+                  "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT"
+                }
+              }
+            ]
+          }
+        },
+        "names": [
+          "c0",
+          "summation"
+        ]
+      }
+    }
+  ],
+  "version": {
+    "minorNumber": 29,
+    "producer": "substrait-go v4.2.0"
+  }
+}
\ No newline at end of file

From 5a074ea4c8486431b67a4dfda048aa45fe9a2ebd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 17 Oct 2025 14:10:13 -0700
Subject: [PATCH 036/109] Improve datafusion-cli object store profiling summary
 display (#18085)

## Which issue does this PR close?

- part of https://github.com/apache/datafusion/issues/17207

## Rationale for this change

As suggested by @BlakeOrth in
https://github.com/apache/datafusion/pull/18045#issuecomment-3403692516
here is an attempt to improve the output of datafusion object store
trace profiling:

## What changes are included in this PR?

Update the output format when `\object_store_profiling summary` is set

Current format (on main, before this PR):
```sql
Summaries:
Get
count: 2
duration min: 0.024603s
duration max: 0.031946s
duration avg: 0.028274s
size min: 8 B
size max: 34322 B
size avg: 17165 B
size sum: 34330 B
```


New format (after this PR):

```sql
DataFusion CLI v50.2.0
> \object_store_profiling summary
ObjectStore Profile mode set to Summary
> select count(*) from 'https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet';
+----------+
| count(*) |
+----------+
| 1000000  |
+----------+
1 row(s) fetched.
Elapsed 6.754 seconds.

Object Store Profiling
Instrumented Object Store: instrument_mode: Summary, inner: HttpStore
Summaries:
+-----------+----------+-----------+-----------+-----------+-----------+-------+
| Operation | Metric   | min       | max       | avg       | sum       | count |
+-----------+----------+-----------+-----------+-----------+-----------+-------+
| Get       | duration | 0.031645s | 0.047780s | 0.039713s | 0.079425s | 2     |
| Get       | size     | 8 B       | 34322 B   | 17165 B   | 34330 B   | 2     |
+-----------+----------+-----------+-----------+-----------+-----------+-------+
```



## Are these changes tested?
Yes
## Are there any user-facing changes?
Nicer datafusion-cli output
---
 .../src/object_storage/instrumented.rs        | 338 ++++++++++++------
 datafusion-cli/src/print_options.rs           |   9 +-
 datafusion-cli/tests/cli_integration.rs       |  14 +-
 ...lback.snap => object_store_profiling.snap} |  34 +-
 docs/source/user-guide/cli/usage.md           |  29 ++
 5 files changed, 278 insertions(+), 146 deletions(-)
 rename datafusion-cli/tests/snapshots/{object_store_profiling@s3_url_fallback.snap => object_store_profiling.snap} (69%)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 94445ee64ef4c..4486b84a90ea6 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -26,6 +26,8 @@ use std::{
     time::Duration,
 };
 
+use arrow::array::{ArrayRef, RecordBatch, StringArray};
+use arrow::util::pretty::pretty_format_batches;
 use async_trait::async_trait;
 use chrono::Utc;
 use datafusion::{
@@ -260,7 +262,7 @@ impl ObjectStore for InstrumentedObjectStore {
 }
 
 /// Object store operation types tracked by [`InstrumentedObjectStore`]
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum Operation {
     _Copy,
     _Delete,
@@ -270,6 +272,12 @@ pub enum Operation {
     _Put,
 }
 
+impl fmt::Display for Operation {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
 /// Holds profiling details about individual requests made through an [`InstrumentedObjectStore`]
 #[derive(Debug)]
 pub struct RequestDetails {
@@ -309,35 +317,172 @@ impl fmt::Display for RequestDetails {
     }
 }
 
-/// Summary statistics for an [`InstrumentedObjectStore`]'s [`RequestDetails`]
+/// Summary statistics for all requests recorded in an [`InstrumentedObjectStore`]
 #[derive(Default)]
-pub struct RequestSummary {
-    count: usize,
-    duration_stats: Option<Stats<Duration>>,
-    size_stats: Option<Stats<usize>>,
+pub struct RequestSummaries {
+    summaries: Vec<RequestSummary>,
 }
 
-impl RequestSummary {
-    /// Generates a set of [RequestSummaries](RequestSummary) from the input [`RequestDetails`]
-    /// grouped by the input's [`Operation`]
-    pub fn summarize_by_operation(
-        requests: &[RequestDetails],
-    ) -> HashMap<Operation, Self> {
-        let mut summaries: HashMap<Operation, Self> = HashMap::new();
+/// Display the summary as a table
+impl fmt::Display for RequestSummaries {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Don't expect an error, but avoid panicking if it happens
+        match pretty_format_batches(&[self.to_batch()]) {
+            Err(e) => {
+                write!(f, "Error formatting summary: {e}")
+            }
+            Ok(displayable) => {
+                write!(f, "{displayable}")
+            }
+        }
+    }
+}
+
+impl RequestSummaries {
+    /// Summarizes input [`RequestDetails`]
+    pub fn new(requests: &[RequestDetails]) -> Self {
+        let mut summaries: HashMap<Operation, RequestSummary> = HashMap::new();
         for rd in requests {
             match summaries.get_mut(&rd.op) {
                 Some(rs) => rs.push(rd),
                 None => {
-                    let mut rs = RequestSummary::default();
+                    let mut rs = RequestSummary::new(rd.op);
                     rs.push(rd);
                     summaries.insert(rd.op, rs);
                 }
             }
         }
-
-        summaries
+        // Convert to a Vec with consistent ordering
+        let mut summaries: Vec<RequestSummary> = summaries.into_values().collect();
+        summaries.sort_by_key(|s| s.operation);
+        Self { summaries }
+    }
+
+    /// Convert the summaries into a `RecordBatch` for display
+    ///
+    /// Results in a table like:
+    /// ```text
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// | Operation | Metric   | min       | max       | avg       | sum       | count     |
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// | Get       | duration | 5.000000s | 5.000000s | 5.000000s |           | 1         |
+    /// | Get       | size     | 100 B     | 100 B     | 100 B     | 100 B     | 1         |
+    /// +-----------+----------+-----------+-----------+-----------+-----------+-----------+
+    /// ```
+    pub fn to_batch(&self) -> RecordBatch {
+        let operations: StringArray = self
+            .iter()
+            .flat_map(|s| std::iter::repeat_n(Some(s.operation.to_string()), 2))
+            .collect();
+        let metrics: StringArray = self
+            .iter()
+            .flat_map(|_s| [Some("duration"), Some("size")])
+            .collect();
+        let mins: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                let dur_min =
+                    duration_stats.map(|d| format!("{:.6}s", d.min.as_secs_f32()));
+                let size_min = size_stats.map(|s| format!("{} B", s.min));
+                [dur_min, size_min]
+            })
+            .collect();
+        let maxs: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                let dur_max =
+                    duration_stats.map(|d| format!("{:.6}s", d.max.as_secs_f32()));
+                let size_max = size_stats.map(|s| format!("{} B", s.max));
+                [dur_max, size_max]
+            })
+            .collect();
+        let avgs: StringArray = self
+            .iter()
+            .flat_map(|s| {
+                let count = s.count as f32;
+                let duration_stats = s.duration_stats.as_ref();
+                let size_stats = s.size_stats.as_ref();
+                let dur_avg = duration_stats.map(|d| {
+                    let avg = d.sum.as_secs_f32() / count;
+                    format!("{:.6}s", avg)
+                });
+                let size_avg = size_stats.map(|s| {
+                    let avg = s.sum as f32 / count;
+                    format!("{} B", avg)
+                });
+                [dur_avg, size_avg]
+            })
+            .collect();
+        let sums: StringArray = self
+            .stats_iter()
+            .flat_map(|(duration_stats, size_stats)| {
+                // Omit a sum stat for duration in the initial
+                // implementation because it can be a bit misleading (at least
+                // at first glance). For example, particularly large queries the
+                // sum of the durations was often larger than the total time of
+                // the query itself, can be confusing without additional
+                // explanation (e.g. that the sum is of individual requests,
+                // which may be concurrent).
+                let dur_sum =
+                    duration_stats.map(|d| format!("{:.6}s", d.sum.as_secs_f32()));
+                let size_sum = size_stats.map(|s| format!("{} B", s.sum));
+                [dur_sum, size_sum]
+            })
+            .collect();
+        let counts: StringArray = self
+            .iter()
+            .flat_map(|s| {
+                let count = s.count.to_string();
+                [Some(count.clone()), Some(count)]
+            })
+            .collect();
+
+        RecordBatch::try_from_iter(vec![
+            ("Operation", Arc::new(operations) as ArrayRef),
+            ("Metric", Arc::new(metrics) as ArrayRef),
+            ("min", Arc::new(mins) as ArrayRef),
+            ("max", Arc::new(maxs) as ArrayRef),
+            ("avg", Arc::new(avgs) as ArrayRef),
+            ("sum", Arc::new(sums) as ArrayRef),
+            ("count", Arc::new(counts) as ArrayRef),
+        ])
+        .expect("Created the batch correctly")
+    }
+
+    /// Return an iterator over the summaries
+    fn iter(&self) -> impl Iterator<Item = &RequestSummary> {
+        self.summaries.iter()
+    }
+
+    /// Return an iterator over (duration_stats, size_stats) tuples
+    /// for each summary
+    fn stats_iter(
+        &self,
+    ) -> impl Iterator<Item = (Option<&Stats<Duration>>, Option<&Stats<usize>>)> {
+        self.summaries
+            .iter()
+            .map(|s| (s.duration_stats.as_ref(), s.size_stats.as_ref()))
     }
+}
+
+/// Summary statistics for a particular type of [`Operation`] (e.g. `GET` or `PUT`)
+/// in an [`InstrumentedObjectStore`]'s [`RequestDetails`]
+pub struct RequestSummary {
+    operation: Operation,
+    count: usize,
+    duration_stats: Option<Stats<Duration>>,
+    size_stats: Option<Stats<usize>>,
+}
 
+impl RequestSummary {
+    fn new(operation: Operation) -> Self {
+        Self {
+            operation,
+            count: 0,
+            duration_stats: None,
+            size_stats: None,
+        }
+    }
     fn push(&mut self, request: &RequestDetails) {
         self.count += 1;
         if let Some(dur) = request.duration {
@@ -349,29 +494,6 @@ impl RequestSummary {
     }
 }
 
-impl fmt::Display for RequestSummary {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        writeln!(f, "count: {}", self.count)?;
-
-        if let Some(dur_stats) = &self.duration_stats {
-            writeln!(f, "duration min: {:.6}s", dur_stats.min.as_secs_f32())?;
-            writeln!(f, "duration max: {:.6}s", dur_stats.max.as_secs_f32())?;
-            let avg = dur_stats.sum.as_secs_f32() / (self.count as f32);
-            writeln!(f, "duration avg: {:.6}s", avg)?;
-        }
-
-        if let Some(size_stats) = &self.size_stats {
-            writeln!(f, "size min: {} B", size_stats.min)?;
-            writeln!(f, "size max: {} B", size_stats.max)?;
-            let avg = size_stats.sum / self.count;
-            writeln!(f, "size avg: {} B", avg)?;
-            writeln!(f, "size sum: {} B", size_stats.sum)?;
-        }
-
-        Ok(())
-    }
-}
-
 struct Stats<T: Copy + Ord + AddAssign<T>> {
     min: T,
     max: T,
@@ -478,6 +600,7 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use insta::assert_snapshot;
 
     #[test]
     fn instrumented_mode() {
@@ -640,8 +763,12 @@ mod tests {
     fn request_summary() {
         // Test empty request list
         let mut requests = Vec::new();
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert!(summaries.is_empty());
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+--------+-----+-----+-----+-----+-------+
+        | Operation | Metric | min | max | avg | sum | count |
+        +-----------+--------+-----+-----+-----+-----+-------+
+        +-----------+--------+-----+-----+-----+-----+-------+
+        ");
 
         requests.push(RequestDetails {
             op: Operation::Get,
@@ -653,26 +780,14 @@ mod tests {
             extra_display: None,
         });
 
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 1);
-
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_secs(5)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().max,
-            Duration::from_secs(5)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().sum,
-            Duration::from_secs(5)
-        );
-        assert_eq!(summary.size_stats.as_ref().unwrap().min, 100);
-        assert_eq!(summary.size_stats.as_ref().unwrap().max, 100);
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 100);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum       | count |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Get       | duration | 5.000000s | 5.000000s | 5.000000s | 5.000000s | 1     |
+        | Get       | size     | 100 B     | 100 B     | 100 B     | 100 B     | 1     |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        ");
 
         // Add more Get requests to test aggregation
         requests.push(RequestDetails {
@@ -693,27 +808,14 @@ mod tests {
             range: None,
             extra_display: None,
         });
-
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 1);
-
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 3);
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_secs(2)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().max,
-            Duration::from_secs(8)
-        );
-        assert_eq!(
-            summary.duration_stats.as_ref().unwrap().sum,
-            Duration::from_secs(15)
-        );
-        assert_eq!(summary.size_stats.as_ref().unwrap().min, 50);
-        assert_eq!(summary.size_stats.as_ref().unwrap().max, 150);
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 300);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum        | count |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Get       | duration | 2.000000s | 8.000000s | 5.000000s | 15.000000s | 3     |
+        | Get       | size     | 50 B      | 150 B     | 100 B     | 300 B      | 3     |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        ");
 
         // Add Put requests to test grouping
         requests.push(RequestDetails {
@@ -726,20 +828,20 @@ mod tests {
             extra_display: None,
         });
 
-        let summaries = RequestSummary::summarize_by_operation(&requests);
-        assert_eq!(summaries.len(), 2);
-
-        let get_summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(get_summary.count, 3);
-
-        let put_summary = summaries.get(&Operation::_Put).unwrap();
-        assert_eq!(put_summary.count, 1);
-        assert_eq!(
-            put_summary.duration_stats.as_ref().unwrap().min,
-            Duration::from_millis(200)
-        );
-        assert_eq!(put_summary.size_stats.as_ref().unwrap().sum, 75);
+        assert_snapshot!(RequestSummaries::new(&requests), @r"
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum        | count |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        | Get       | duration | 2.000000s | 8.000000s | 5.000000s | 15.000000s | 3     |
+        | Get       | size     | 50 B      | 150 B     | 100 B     | 300 B      | 3     |
+        | _Put      | duration | 0.200000s | 0.200000s | 0.200000s | 0.200000s  | 1     |
+        | _Put      | size     | 75 B      | 75 B      | 75 B      | 75 B       | 1     |
+        +-----------+----------+-----------+-----------+-----------+------------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_only_duration() {
         // Test request with only duration (no size)
         let only_duration = vec![RequestDetails {
             op: Operation::Get,
@@ -750,12 +852,18 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&only_duration);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_some());
-        assert!(summary.size_stats.is_none());
+        assert_snapshot!(RequestSummaries::new(&only_duration), @r"
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Operation | Metric   | min       | max       | avg       | sum       | count |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        | Get       | duration | 3.000000s | 3.000000s | 3.000000s | 3.000000s | 1     |
+        | Get       | size     |           |           |           |           | 1     |
+        +-----------+----------+-----------+-----------+-----------+-----------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_only_size() {
         // Test request with only size (no duration)
         let only_size = vec![RequestDetails {
             op: Operation::Get,
@@ -766,13 +874,18 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&only_size);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_none());
-        assert!(summary.size_stats.is_some());
-        assert_eq!(summary.size_stats.as_ref().unwrap().sum, 200);
+        assert_snapshot!(RequestSummaries::new(&only_size), @r"
+        +-----------+----------+-------+-------+-------+-------+-------+
+        | Operation | Metric   | min   | max   | avg   | sum   | count |
+        +-----------+----------+-------+-------+-------+-------+-------+
+        | Get       | duration |       |       |       |       | 1     |
+        | Get       | size     | 200 B | 200 B | 200 B | 200 B | 1     |
+        +-----------+----------+-------+-------+-------+-------+-------+
+        ");
+    }
 
+    #[test]
+    fn request_summary_neither_duration_or_size() {
         // Test request with neither duration nor size
         let no_stats = vec![RequestDetails {
             op: Operation::Get,
@@ -783,10 +896,13 @@ mod tests {
             range: None,
             extra_display: None,
         }];
-        let summaries = RequestSummary::summarize_by_operation(&no_stats);
-        let summary = summaries.get(&Operation::Get).unwrap();
-        assert_eq!(summary.count, 1);
-        assert!(summary.duration_stats.is_none());
-        assert!(summary.size_stats.is_none());
+        assert_snapshot!(RequestSummaries::new(&no_stats), @r"
+        +-----------+----------+-----+-----+-----+-----+-------+
+        | Operation | Metric   | min | max | avg | sum | count |
+        +-----------+----------+-----+-----+-----+-----+-------+
+        | Get       | duration |     |     |     |     | 1     |
+        | Get       | size     |     |     |     |     | 1     |
+        +-----------+----------+-----+-----+-----+-----+-------+
+        ");
     }
 }
diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs
index 01be736ca54df..5804617f39a79 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -22,7 +22,7 @@ use std::str::FromStr;
 use std::sync::Arc;
 
 use crate::object_storage::instrumented::{
-    InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry, RequestSummary,
+    InstrumentedObjectStoreMode, InstrumentedObjectStoreRegistry, RequestSummaries,
 };
 use crate::print_format::PrintFormat;
 
@@ -205,11 +205,8 @@ impl PrintOptions {
                         }
 
                         writeln!(writer, "Summaries:")?;
-                        let summaries = RequestSummary::summarize_by_operation(&requests);
-                        for (op, summary) in summaries {
-                            writeln!(writer, "{op:?}")?;
-                            writeln!(writer, "{summary}")?;
-                        }
+                        let summaries = RequestSummaries::new(&requests);
+                        writeln!(writer, "{}", summaries)?;
                     }
                 }
             }
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index 56620346ed0fe..809e6fd32c4f0 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -402,7 +402,6 @@ async fn test_object_store_profiling() {
     let container = setup_minio_container().await;
 
     let mut settings = make_settings();
-    settings.set_snapshot_suffix("s3_url_fallback");
 
     // as the object store profiling contains timestamps and durations, we must
     // filter them out to have stable snapshots
@@ -416,14 +415,13 @@ async fn test_object_store_profiling() {
         "<TIMESTAMP> operation=$1 duration=[DURATION] size=$2 path=$3",
     );
 
-    // We also need to filter out the durations reported in the summary output
-    //
+    // We also need to filter out the summary statistics (anything with an 's' at the end)
     // Example line(s) to filter:
-    //
-    // duration min: 0.000729s
-    // duration max: 0.000729s
-    // duration avg: 0.000729s
-    settings.add_filter(r"duration (min|max|avg): \d+\.\d{6}s", "[SUMMARY_DURATION]");
+    // | Get       | duration | 5.000000s | 5.000000s | 5.000000s |           | 1         |
+    settings.add_filter(
+        r"\| (Get|Put|Delete|List|Head)( +)\| duration \| .*? \| .*? \| .*? \| .*? \| (.*?) \|",
+        "| $1$2 | duration | ...NORMALIZED...| $3 |",
+    );
 
     let _bound = settings.bind_to_scope();
 
diff --git a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap b/datafusion-cli/tests/snapshots/object_store_profiling.snap
similarity index 69%
rename from datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
rename to datafusion-cli/tests/snapshots/object_store_profiling.snap
index 5c91800676a4d..cff646f3b0e0c 100644
--- a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap
+++ b/datafusion-cli/tests/snapshots/object_store_profiling.snap
@@ -6,7 +6,7 @@ info:
   env:
     AWS_ACCESS_KEY_ID: TEST-DataFusionLogin
     AWS_ALLOW_HTTP: "true"
-    AWS_ENDPOINT: "http://localhost:55031"
+    AWS_ENDPOINT: "http://localhost:55057"
     AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword
   stdin: "\n    CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling trace\n-- Query again to see the full profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling summary\n-- Query again to see the summarized profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n"
 snapshot_kind: text
@@ -40,16 +40,12 @@ Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(data)
 <TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
 
 Summaries:
-Get
-count: 1
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-size min: 1006 B
-size max: 1006 B
-size avg: 1006 B
-size sum: 1006 B
-
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get        | duration | ...NORMALIZED...| 1     |
+| Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
 ObjectStore Profile mode set to Summary
 +-----+-------+---------------------+
 | car | speed | time                |
@@ -62,16 +58,12 @@ ObjectStore Profile mode set to Summary
 Object Store Profiling
 Instrumented Object Store: instrument_mode: Summary, inner: AmazonS3(data)
 Summaries:
-Get
-count: 1
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-[SUMMARY_DURATION]
-size min: 1006 B
-size max: 1006 B
-size avg: 1006 B
-size sum: 1006 B
-
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get        | duration | ...NORMALIZED...| 1     |
+| Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
 ObjectStore Profile mode set to Disabled
 +-----+-------+---------------------+
 | car | speed | time                |
diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index 29ed6b8183c26..68804b2817e74 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -132,6 +132,35 @@ Available commands inside DataFusion CLI are:
 > \object_store_profiling [disabled|summary|trace]
 ```
 
+When enabled, prints detailed information about object store (I/O) operations
+performed during query execution to STDOUT.
+
+```sql
+> \object_store_profiling trace
+ObjectStore Profile mode set to Trace
+> select count(*) from 'https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet';
++----------+
+| count(*) |
++----------+
+| 1000000  |
++----------+
+1 row(s) fetched.
+Elapsed 0.552 seconds.
+
+Object Store Profiling
+Instrumented Object Store: instrument_mode: Trace, inner: HttpStore
+2025-10-17T18:08:48.457992+00:00 operation=Get duration=0.043592s size=8 range: bytes=174965036-174965043 path=hits_compatible/athena_partitioned/hits_1.parquet
+2025-10-17T18:08:48.501878+00:00 operation=Get duration=0.031542s size=34322 range: bytes=174930714-174965035 path=hits_compatible/athena_partitioned/hits_1.parquet
+
+Summaries:
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Operation | Metric   | min       | max       | avg       | sum       | count |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+| Get       | duration | 0.031542s | 0.043592s | 0.037567s | 0.075133s | 2     |
+| Get       | size     | 8 B       | 34322 B   | 17165 B   | 34330 B   | 2     |
++-----------+----------+-----------+-----------+-----------+-----------+-------+
+```
+
 ## Supported SQL
 
 In addition to the normal [SQL supported in DataFusion], `datafusion-cli` also

From e323357b1e245d8651183e42747cb92709cb1998 Mon Sep 17 00:00:00 2001
From: Khanh Duong <dqkqdlot@gmail.com>
Date: Sat, 18 Oct 2025 10:07:38 +0900
Subject: [PATCH 037/109] test: `to_timestamp(double)` for vectorized input
 (#18147)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #16678.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

The issue has been fixed in #16639, this PR just adds a testcase for it.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Add a test case for `to_timestamp(double)` with vectorized input.
Similar to the one presented in the issue.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

Yes

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

No
---
 .../sqllogictest/test_files/timestamps.slt    | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 38b599260de19..ebc79533f176b 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -525,6 +525,29 @@ SELECT to_timestamp(123456789.123456789) as c1, cast(123456789.123456789 as time
 ----
 1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
 
+## to_timestamp float vectorized inputs
+query PPP
+SELECT
+  to_timestamp(x) as c1,
+  cast(x as timestamp) as c2,
+  x::timestamp as c3
+FROM (
+  VALUES
+    (1.1),
+    (-1.1),
+    (0.0),
+    (1.23456789),
+    (123456789.123456789),
+    (NULL)
+) t(x);
+----
+1970-01-01T00:00:01.100 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
+1969-12-31T23:59:58.900 1969-12-31T23:59:59.999999999 1969-12-31T23:59:59.999999999
+1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00
+1970-01-01T00:00:01.234567890 1970-01-01T00:00:00.000000001 1970-01-01T00:00:00.000000001
+1973-11-29T21:33:09.123456784 1970-01-01T00:00:00.123456789 1970-01-01T00:00:00.123456789
+NULL NULL NULL
+
 # to_timestamp Decimal128 inputs
 
 query PPP

From 9079bbd4f337d7c59e2b97700c1e02791c2e7f1d Mon Sep 17 00:00:00 2001
From: Samuele Resca <samuele.resca@gmail.com>
Date: Sat, 18 Oct 2025 08:48:10 +0100
Subject: [PATCH 038/109] Fix `concat_elements_utf8view` capacity
 initialization. (#18003)

## Which issue does this PR close?

- Relates to #17857 (See
https://github.com/apache/datafusion/issues/17857#issuecomment-3368519097)

## Rationale for this change

The capacity calculation replaced with `left.len()` (assuming
`left.len()` and `right.len()` are the same). As the `with_capacity`
refers to the length of the views (or strings), not to the length of the
bytes

## Are these changes tested?

The function is already covered by tests.

## Are there any user-facing changes?
No
---
 .../src/expressions/binary/kernels.rs              | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index 71d1242eea85c..36ecd1c816190 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -145,12 +145,14 @@ pub fn concat_elements_utf8view(
     left: &StringViewArray,
     right: &StringViewArray,
 ) -> std::result::Result<StringViewArray, ArrowError> {
-    let capacity = left
-        .data_buffers()
-        .iter()
-        .zip(right.data_buffers().iter())
-        .map(|(b1, b2)| b1.len() + b2.len())
-        .sum();
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+    let capacity = left.len();
     let mut result = StringViewBuilder::with_capacity(capacity);
 
     // Avoid reallocations by writing to a reused buffer (note we

From 0ddc82e71bfb4b09ed792913f83f8bb36e0d9c75 Mon Sep 17 00:00:00 2001
From: Pepijn Van Eeckhoudt <pepijn@vaneeckhoudt.net>
Date: Sat, 18 Oct 2025 12:48:21 +0200
Subject: [PATCH 039/109] Use < instead of = in case benchmark predicates, use
 Integers (#18144)

## Which issue does this PR close?

- Followup to #18097

## Rationale for this change

The last benchmark was incorrectly essentially indentical to the second
to last one. The actual predicate was using `=` instead of `<`.

## What changes are included in this PR?

- Adjust the operator in the case predicates to `<`
- Adds two additional benchmarks covering `case x when ...`

## Are these changes tested?

Verified with debugger.

## Are there any user-facing changes?

No
---
 datafusion/physical-expr/benches/case_when.rs | 54 +++++++++++++------
 1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index 637d072ab8676..ec850047e5866 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -15,8 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::builder::StringBuilder;
-use arrow::array::{Array, ArrayRef, Int32Array};
+use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder};
 use arrow::datatypes::{Field, Schema};
 use arrow::record_batch::RecordBatch;
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
@@ -37,25 +36,22 @@ fn make_x_cmp_y(
 /// Columns are named `c<i>` where `i` is the column index.
 ///
 /// The minimum value for `column_count` is `3`.
-/// `c0` contains incrementing int32 values
-/// `c1` contains strings with one null inserted every 7 rows
-/// `c2` contains strings with one null inserted every 9 rows
-/// `c3` to `cn`, is present, contain unspecified int32 values
+/// `c1` contains incrementing int32 values
+/// `c2` contains int32 values in blocks of 1000 that increment by 1000
+/// `c3` contains int32 values with one null inserted every 9 rows
+/// `c4` to `cn`, is present, contain unspecified int32 values
 fn make_batch(row_count: usize, column_count: usize) -> RecordBatch {
     assert!(column_count >= 3);
 
-    let mut c2 = StringBuilder::new();
-    let mut c3 = StringBuilder::new();
+    let mut c2 = Int32Builder::new();
+    let mut c3 = Int32Builder::new();
     for i in 0..row_count {
-        if i % 7 == 0 {
-            c2.append_null();
-        } else {
-            c2.append_value(format!("string {i}"));
-        }
+        c2.append_value(i as i32 / 1000 * 1000);
+
         if i % 9 == 0 {
             c3.append_null();
         } else {
-            c3.append_value(format!("other string {i}"));
+            c3.append_value(i as i32);
         }
     }
     let c1 = Arc::new(Int32Array::from_iter_values(0..row_count as i32));
@@ -193,7 +189,7 @@ fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
 
     // Many when/then branches where all but the first few are effectively unreachable
     c.bench_function(format!("case_when {}x{}: CASE WHEN c1 < 0 THEN 0 WHEN c1 < 1000 THEN 1 ... WHEN c1 < n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
-        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Eq, i * 1000), lit(i))).collect();
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (make_x_cmp_y(&c1, Operator::Lt, i * 1000), lit(i))).collect();
         let expr = Arc::new(
             case(
                 None,
@@ -204,6 +200,34 @@ fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
         );
         b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
     });
+
+    // Many when/then branches where all are effectively reachable
+    c.bench_function(format!("case_when {}x{}: CASE c1 WHEN 0 THEN 0 WHEN 1 THEN 1 ... WHEN n THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i), lit(i))).collect();
+        let expr = Arc::new(
+            case(
+                Some(Arc::clone(&c1)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
+            )
+                .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+    });
+
+    // Many when/then branches where all but the first few are effectively unreachable
+    c.bench_function(format!("case_when {}x{}: CASE c2 WHEN 0 THEN 0 WHEN 1000 THEN 1 ... WHEN n * 1000 THEN n ELSE n + 1 END", batch.num_rows(), batch.num_columns()).as_str(), |b| {
+        let when_thens = (0..batch.num_rows() as i32).map(|i| (lit(i * 1000), lit(i))).collect();
+        let expr = Arc::new(
+            case(
+                Some(Arc::clone(&c2)),
+                when_thens,
+                Some(lit(batch.num_rows() as i32))
+            )
+                .unwrap(),
+        );
+        b.iter(|| black_box(expr.evaluate(black_box(batch)).unwrap()))
+    });
 }
 
 criterion_group!(benches, criterion_benchmark);

From 93f136c06dcb6d4cb362110ae5a4b2b3b8571bb7 Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Sat, 18 Oct 2025 05:09:43 -0600
Subject: [PATCH 040/109] Adds instrumentation to PUT ops in the CLI (#18139)

## Which issue does this PR close?

This does not fully close, but is an incremental building block
component for:
 - https://github.com/apache/datafusion/issues/17207

The full context of how this code is likely to progress can be seen in
the POC for this effort:
 - https://github.com/apache/datafusion/pull/17266

## Rationale for this change

Further fills out the missing methods that have yet to be instrumented
in the instrumented object store.

## What changes are included in this PR?

 - Adds instrumentation around put_opts
 - Adds instrumentation around put_multipart
 - Adds tests for newly instrumented methods

## Are these changes tested?

Yes. Unit tests have been added for the new methods

Example output:
```sql
DataFusion CLI v50.2.0
> CREATE EXTERNAL TABLE
test(a bigint, b bigint)
STORED AS parquet LOCATION '../../test_table/';
0 row(s) fetched.
Elapsed 0.003 seconds.

> \object_store_profiling trace
ObjectStore Profile mode set to Trace
> INSERT INTO test values (1, 2), (3, 4);
+-------+
| count |
+-------+
| 2     |
+-------+
1 row(s) fetched.
Elapsed 0.007 seconds.

Object Store Profiling
Instrumented Object Store: instrument_mode: Trace, inner: LocalFileSystem(file:///)
2025-10-17T19:02:15.440246215+00:00 operation=List path=home/blake/open_source_src/datafusion-BlakeOrth/test_table
2025-10-17T19:02:15.444096012+00:00 operation=Put duration=0.000249s size=815 path=home/blake/open_source_src/datafusion-BlakeOrth/test_table/a9pjKBxSOtXZobJO_0.parquet

Summaries:
List
count: 1

Put
count: 1
duration min: 0.000249s
duration max: 0.000249s
duration avg: 0.000249s
size min: 815 B
size max: 815 B
size avg: 815 B
size sum: 815 B

>
```
(note: I have no idea how to exercise/show a multi-part put operation,
or if DataFusion even utilizes multipart puts for large files)

## Are there any user-facing changes?

No-ish

cc @alamb

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../src/object_storage/instrumented.rs        | 132 +++++++++++++++++-
 1 file changed, 128 insertions(+), 4 deletions(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 4486b84a90ea6..722d4e1ce7a86 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -121,6 +121,54 @@ impl InstrumentedObjectStore {
             != InstrumentedObjectStoreMode::Disabled as u8
     }
 
+    async fn instrumented_put_opts(
+        &self,
+        location: &Path,
+        payload: PutPayload,
+        opts: PutOptions,
+    ) -> Result<PutResult> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let size = payload.content_length();
+        let ret = self.inner.put_opts(location, payload, opts).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Put,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: Some(size),
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
+
+    async fn instrumented_put_multipart(
+        &self,
+        location: &Path,
+        opts: PutMultipartOptions,
+    ) -> Result<Box<dyn MultipartUpload>> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.put_multipart_opts(location, opts).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Put,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
+
     async fn instrumented_get_opts(
         &self,
         location: &Path,
@@ -209,6 +257,10 @@ impl ObjectStore for InstrumentedObjectStore {
         payload: PutPayload,
         opts: PutOptions,
     ) -> Result<PutResult> {
+        if self.enabled() {
+            return self.instrumented_put_opts(location, payload, opts).await;
+        }
+
         self.inner.put_opts(location, payload, opts).await
     }
 
@@ -217,6 +269,10 @@ impl ObjectStore for InstrumentedObjectStore {
         location: &Path,
         opts: PutMultipartOptions,
     ) -> Result<Box<dyn MultipartUpload>> {
+        if self.enabled() {
+            return self.instrumented_put_multipart(location, opts).await;
+        }
+
         self.inner.put_multipart_opts(location, opts).await
     }
 
@@ -269,7 +325,7 @@ pub enum Operation {
     Get,
     _Head,
     List,
-    _Put,
+    Put,
 }
 
 impl fmt::Display for Operation {
@@ -599,6 +655,8 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
 
 #[cfg(test)]
 mod tests {
+    use object_store::WriteMultipart;
+
     use super::*;
     use insta::assert_snapshot;
 
@@ -741,6 +799,72 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_put_opts() {
+        // The `setup_test_store()` method comes with data already `put` into it, so we'll setup
+        // manually for this test
+        let store = Arc::new(object_store::memory::InMemory::new());
+        let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
+        let instrumented = InstrumentedObjectStore::new(store, mode);
+
+        let path = Path::from("test/data");
+        let payload = PutPayload::from_static(b"test_data");
+        let size = payload.content_length();
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.put(&path, payload.clone()).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.put(&path, payload).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::Put);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert_eq!(request.size.unwrap(), size);
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_put_multipart() {
+        // The `setup_test_store()` method comes with data already `put` into it, so we'll setup
+        // manually for this test
+        let store = Arc::new(object_store::memory::InMemory::new());
+        let mode = AtomicU8::new(InstrumentedObjectStoreMode::default() as u8);
+        let instrumented = InstrumentedObjectStore::new(store, mode);
+
+        let path = Path::from("test/data");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let mp = instrumented.put_multipart(&path).await.unwrap();
+        let mut write = WriteMultipart::new(mp);
+        write.write(b"test_data");
+        write.finish().await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let mp = instrumented.put_multipart(&path).await.unwrap();
+        let mut write = WriteMultipart::new(mp);
+        write.write(b"test_data");
+        write.finish().await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let request = instrumented.take_requests().pop().unwrap();
+        assert_eq!(request.op, Operation::Put);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[test]
     fn request_details() {
         let rd = RequestDetails {
@@ -819,7 +943,7 @@ mod tests {
 
         // Add Put requests to test grouping
         requests.push(RequestDetails {
-            op: Operation::_Put,
+            op: Operation::Put,
             path: Path::from("test4"),
             timestamp: chrono::DateTime::from_timestamp(3, 0).unwrap(),
             duration: Some(Duration::from_millis(200)),
@@ -834,8 +958,8 @@ mod tests {
         +-----------+----------+-----------+-----------+-----------+------------+-------+
         | Get       | duration | 2.000000s | 8.000000s | 5.000000s | 15.000000s | 3     |
         | Get       | size     | 50 B      | 150 B     | 100 B     | 300 B      | 3     |
-        | _Put      | duration | 0.200000s | 0.200000s | 0.200000s | 0.200000s  | 1     |
-        | _Put      | size     | 75 B      | 75 B      | 75 B      | 75 B       | 1     |
+        | Put       | duration | 0.200000s | 0.200000s | 0.200000s | 0.200000s  | 1     |
+        | Put       | size     | 75 B      | 75 B      | 75 B      | 75 B       | 1     |
         +-----------+----------+-----------+-----------+-----------+------------+-------+
         ");
     }

From 28a68543d800c52d0fad0f6168d3b66528ed6618 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 18 Oct 2025 10:25:20 -0700
Subject: [PATCH 041/109] [main] chore: Fix `no space left on device` (#18141)
 (#18151)

## Which issue does this PR close?

- related to #18135

- ## Rationale for this  change

Our example job was failing due to out of space on other branches, so
let's remove some unecessary pre-installed software

## What changes are included in this PR?

- forward port changes from
https://github.com/apache/datafusion/pull/18141 to main

## Are these changes tested?

It is part of CI

## Are there any user-facing changes?
No

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 .github/workflows/rust.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 09be2f2ad9e4a..133d287f18197 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -353,6 +353,19 @@ jobs:
         with:
           save-if: ${{ github.ref_name == 'main' }}
           shared-key: "amd-ci-linux-test-example"
+      - name: Remove unnecessary preinstalled software
+        run: |
+          echo "Disk space before cleanup:"
+          df -h
+          apt-get clean
+          rm -rf /__t/CodeQL
+          rm -rf /__t/PyPy
+          rm -rf /__t/Java_Temurin-Hotspot_jdk
+          rm -rf /__t/Python
+          rm -rf /__t/go
+          rm -rf /__t/Ruby
+          echo "Disk space after cleanup:"
+          df -h
       - name: Run examples
         run: |
           # test datafusion-sql examples

From b98cad616ad9c69df9a425fc7473b799ffc258ee Mon Sep 17 00:00:00 2001
From: Thomas Tanon <thomas@pellissier-tanon.fr>
Date: Sun, 19 Oct 2025 03:34:54 +0200
Subject: [PATCH 042/109] Fix `DISTINCT ON` for tables with no columns
 (ReplaceDistinctWithAggregate: do not fail when on input without columns)
 (#18133)

Use LIMIT 1 instead of a GROUP BY in this case: if there are results,
they are all empty, we can just take the first one

We cannot use GROUP BY here because GROUP BY requires at least one
variable in the grouping side or one aggregation function

- Close #18132
---
 .../src/replace_distinct_aggregate.rs         | 43 +++++++++++++++++--
 1 file changed, 39 insertions(+), 4 deletions(-)

diff --git a/datafusion/optimizer/src/replace_distinct_aggregate.rs b/datafusion/optimizer/src/replace_distinct_aggregate.rs
index 2383787fa0e8a..215f5e240d5de 100644
--- a/datafusion/optimizer/src/replace_distinct_aggregate.rs
+++ b/datafusion/optimizer/src/replace_distinct_aggregate.rs
@@ -25,7 +25,7 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_common::{Column, Result};
 use datafusion_expr::expr_rewriter::normalize_cols;
 use datafusion_expr::utils::expand_wildcard;
-use datafusion_expr::{col, ExprFunctionExt, LogicalPlanBuilder};
+use datafusion_expr::{col, lit, ExprFunctionExt, Limit, LogicalPlanBuilder};
 use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 
 /// Optimizer that replaces logical [[Distinct]] with a logical [[Aggregate]]
@@ -54,6 +54,17 @@ use datafusion_expr::{Aggregate, Distinct, DistinctOn, Expr, LogicalPlan};
 /// )
 /// ORDER BY a DESC
 /// ```
+///
+/// In case there are no columns, the [[Distinct]] is replaced by a [[Limit]]
+///
+/// ```text
+/// SELECT DISTINCT * FROM empty_table
+/// ```
+///
+/// Into
+/// ```text
+/// SELECT * FROM empty_table LIMIT 1
+/// ```
 #[derive(Default, Debug)]
 pub struct ReplaceDistinctWithAggregate {}
 
@@ -78,6 +89,16 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
             LogicalPlan::Distinct(Distinct::All(input)) => {
                 let group_expr = expand_wildcard(input.schema(), &input, None)?;
 
+                if group_expr.is_empty() {
+                    // Special case: there are no columns to group by, so we can't replace it by a group by
+                    // however, we can replace it by LIMIT 1 because there is either no output or a single empty row
+                    return Ok(Transformed::yes(LogicalPlan::Limit(Limit {
+                        skip: None,
+                        fetch: Some(Box::new(lit(1i64))),
+                        input,
+                    })));
+                }
+
                 let field_count = input.schema().fields().len();
                 for dep in input.schema().functional_dependencies().iter() {
                     // If distinct is exactly the same with a previous GROUP BY, we can
@@ -184,15 +205,17 @@ impl OptimizerRule for ReplaceDistinctWithAggregate {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
     use crate::assert_optimized_plan_eq_snapshot;
     use crate::replace_distinct_aggregate::ReplaceDistinctWithAggregate;
     use crate::test::*;
+    use arrow::datatypes::{Fields, Schema};
+    use std::sync::Arc;
 
     use crate::OptimizerContext;
     use datafusion_common::Result;
-    use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, Expr};
+    use datafusion_expr::{
+        col, logical_plan::builder::LogicalPlanBuilder, table_scan, Expr,
+    };
     use datafusion_functions_aggregate::sum::sum;
 
     macro_rules! assert_optimized_plan_equal {
@@ -274,4 +297,16 @@ mod tests {
               TableScan: test
         ")
     }
+
+    #[test]
+    fn use_limit_1_when_no_columns() -> Result<()> {
+        let plan = table_scan(Some("test"), &Schema::new(Fields::empty()), None)?
+            .distinct()?
+            .build()?;
+
+        assert_optimized_plan_equal!(plan, @r"
+        Limit: skip=0, fetch=1
+          TableScan: test
+        ")
+    }
 }

From f199b000861360aca01d4f1b9104bf73e9d831cc Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Sun, 19 Oct 2025 00:00:19 -0400
Subject: [PATCH 043/109] refactor: remove core crate from datafusion-proto
 (#18123)

## Which issue does this PR close?

- Closes #17713.

## Rationale for this change

Now that we have all the required supporting code moved out of the
`core` crate, we can remove this as a dependency to reduce build times
for downstream projects.

## What changes are included in this PR?

Remove dependency.
Update paths.

## Are these changes tested?

Existing unit tests since this is just a code shuffle.

## Are there any user-facing changes?

- Methods within the proto crate now take `TaskContext` instead of
`SessionContext`
---
 Cargo.lock                                    |  13 ++
 benchmarks/src/imdb/run.rs                    |   2 +-
 benchmarks/src/tpch/run.rs                    |   2 +-
 datafusion/proto/Cargo.toml                   |  22 +++-
 datafusion/proto/src/bytes/mod.rs             |  22 ++--
 datafusion/proto/src/bytes/registry.rs        |   2 +-
 datafusion/proto/src/lib.rs                   |   6 +-
 .../proto/src/logical_plan/file_formats.rs    |  78 ++++++------
 .../proto/src/logical_plan/from_proto.rs      |   2 +-
 datafusion/proto/src/logical_plan/mod.rs      |  77 +++++-------
 .../proto/src/physical_plan/from_proto.rs     |  33 ++---
 datafusion/proto/src/physical_plan/mod.rs     | 114 +++++++++---------
 .../proto/src/physical_plan/to_proto.rs       |  42 +++----
 .../tests/cases/roundtrip_logical_plan.rs     |  74 ++++++------
 datafusion/proto/tests/cases/serialize.rs     |   2 +-
 docs/source/library-user-guide/upgrading.md   |  22 ++++
 16 files changed, 274 insertions(+), 239 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7e1d2b332ab09..13fe25c914102 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2533,11 +2533,24 @@ dependencies = [
  "arrow",
  "chrono",
  "datafusion",
+ "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-datasource-arrow",
+ "datafusion-datasource-avro",
+ "datafusion-datasource-csv",
+ "datafusion-datasource-json",
+ "datafusion-datasource-parquet",
+ "datafusion-execution",
  "datafusion-expr",
  "datafusion-functions",
  "datafusion-functions-aggregate",
+ "datafusion-functions-table",
  "datafusion-functions-window-common",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
  "datafusion-proto-common",
  "doc-comment",
  "object_store",
diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs
index 3d58d5f54d4ba..11bd424ba6866 100644
--- a/benchmarks/src/imdb/run.rs
+++ b/benchmarks/src/imdb/run.rs
@@ -534,7 +534,7 @@ mod tests {
             let plan = ctx.sql(&query).await?;
             let plan = plan.into_optimized_plan()?;
             let bytes = logical_plan_to_bytes(&plan)?;
-            let plan2 = logical_plan_from_bytes(&bytes, &ctx)?;
+            let plan2 = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
             let plan_formatted = format!("{}", plan.display_indent());
             let plan2_formatted = format!("{}", plan2.display_indent());
             assert_eq!(plan_formatted, plan2_formatted);
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index b93bdf254a279..2b66de641b670 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -387,7 +387,7 @@ mod tests {
             let plan = ctx.sql(&query).await?;
             let plan = plan.into_optimized_plan()?;
             let bytes = logical_plan_to_bytes(&plan)?;
-            let plan2 = logical_plan_from_bytes(&bytes, &ctx)?;
+            let plan2 = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
             let plan_formatted = format!("{}", plan.display_indent());
             let plan2_formatted = format!("{}", plan2.display_indent());
             assert_eq!(plan_formatted, plan2_formatted);
diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml
index c1d894a6c0629..920e277b8ccc0 100644
--- a/datafusion/proto/Cargo.toml
+++ b/datafusion/proto/Cargo.toml
@@ -40,15 +40,31 @@ name = "datafusion_proto"
 [features]
 default = ["parquet"]
 json = ["pbjson", "serde", "serde_json", "datafusion-proto-common/json"]
-parquet = ["datafusion/parquet", "datafusion-common/parquet"]
-avro = ["datafusion/avro", "datafusion-common/avro"]
+parquet = ["datafusion-datasource-parquet", "datafusion-common/parquet", "datafusion/parquet"]
+avro = ["datafusion-datasource-avro", "datafusion-common/avro"]
+
+# Note to developers: do *not* add `datafusion` as a dependency in
+# this crate. See https://github.com/apache/datafusion/issues/17713
+# for additional information.
 
 [dependencies]
 arrow = { workspace = true }
 chrono = { workspace = true }
-datafusion = { workspace = true, default-features = false }
+datafusion-catalog = { workspace = true }
+datafusion-catalog-listing = { workspace = true }
 datafusion-common = { workspace = true }
+datafusion-datasource = { workspace = true }
+datafusion-datasource-arrow = { workspace = true }
+datafusion-datasource-avro = { workspace = true, optional = true }
+datafusion-datasource-csv = { workspace = true }
+datafusion-datasource-json = { workspace = true }
+datafusion-datasource-parquet = { workspace = true, optional = true }
+datafusion-execution = { workspace = true }
 datafusion-expr = { workspace = true }
+datafusion-functions-table = { workspace = true }
+datafusion-physical-expr = { workspace = true }
+datafusion-physical-expr-common = { workspace = true }
+datafusion-physical-plan = { workspace = true }
 datafusion-proto-common = { workspace = true }
 object_store = { workspace = true }
 pbjson = { workspace = true, optional = true }
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 5b07e59e807f0..12d9938373ce6 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -24,8 +24,8 @@ use crate::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
 use crate::protobuf;
-use datafusion::execution::TaskContext;
 use datafusion_common::{plan_datafusion_err, Result};
+use datafusion_execution::TaskContext;
 use datafusion_expr::{
     create_udaf, create_udf, create_udwf, AggregateUDF, Expr, LogicalPlan, Volatility,
     WindowUDF,
@@ -37,10 +37,9 @@ use prost::{
 use std::sync::Arc;
 
 // Reexport Bytes which appears in the API
-use datafusion::execution::registry::FunctionRegistry;
-use datafusion::physical_plan::ExecutionPlan;
-use datafusion::prelude::SessionContext;
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::planner::ExprPlanner;
+use datafusion_physical_plan::ExecutionPlan;
 
 mod registry;
 
@@ -240,16 +239,13 @@ pub fn logical_plan_to_json_with_extension_codec(
 
 /// Deserialize a LogicalPlan from JSON
 #[cfg(feature = "json")]
-pub fn logical_plan_from_json(json: &str, ctx: &SessionContext) -> Result<LogicalPlan> {
+pub fn logical_plan_from_json(json: &str, ctx: &TaskContext) -> Result<LogicalPlan> {
     let extension_codec = DefaultLogicalExtensionCodec {};
     logical_plan_from_json_with_extension_codec(json, ctx, &extension_codec)
 }
 
 /// Deserialize a LogicalPlan from bytes
-pub fn logical_plan_from_bytes(
-    bytes: &[u8],
-    ctx: &SessionContext,
-) -> Result<LogicalPlan> {
+pub fn logical_plan_from_bytes(bytes: &[u8], ctx: &TaskContext) -> Result<LogicalPlan> {
     let extension_codec = DefaultLogicalExtensionCodec {};
     logical_plan_from_bytes_with_extension_codec(bytes, ctx, &extension_codec)
 }
@@ -257,7 +253,7 @@ pub fn logical_plan_from_bytes(
 /// Deserialize a LogicalPlan from bytes
 pub fn logical_plan_from_bytes_with_extension_codec(
     bytes: &[u8],
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<LogicalPlan> {
     let protobuf = protobuf::LogicalPlanNode::decode(bytes)
@@ -269,7 +265,7 @@ pub fn logical_plan_from_bytes_with_extension_codec(
 #[cfg(feature = "json")]
 pub fn logical_plan_from_json_with_extension_codec(
     json: &str,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<LogicalPlan> {
     let back: protobuf::LogicalPlanNode = serde_json::from_str(json)
@@ -312,12 +308,12 @@ pub fn physical_plan_to_bytes_with_extension_codec(
 #[cfg(feature = "json")]
 pub fn physical_plan_from_json(
     json: &str,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let back: protobuf::PhysicalPlanNode = serde_json::from_str(json)
         .map_err(|e| plan_datafusion_err!("Error serializing plan: {e}"))?;
     let extension_codec = DefaultPhysicalExtensionCodec {};
-    back.try_into_physical_plan(&ctx.task_ctx(), &extension_codec)
+    back.try_into_physical_plan(&ctx, &extension_codec)
 }
 
 /// Deserialize a PhysicalPlan from bytes
diff --git a/datafusion/proto/src/bytes/registry.rs b/datafusion/proto/src/bytes/registry.rs
index 5d46d41f793ed..087e073db21af 100644
--- a/datafusion/proto/src/bytes/registry.rs
+++ b/datafusion/proto/src/bytes/registry.rs
@@ -17,9 +17,9 @@
 
 use std::{collections::HashSet, sync::Arc};
 
-use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::plan_err;
 use datafusion_common::Result;
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::planner::ExprPlanner;
 use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index bb7b992f145f5..b1590b9ad2aaf 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -34,8 +34,8 @@
 //!
 //! [`LogicalPlan`]: datafusion_expr::LogicalPlan
 //! [`Expr`]: datafusion_expr::Expr
-//! [`ExecutionPlan`]: datafusion::physical_plan::ExecutionPlan
-//! [`PhysicalExpr`]: datafusion::physical_expr::PhysicalExpr
+//! [`ExecutionPlan`]: datafusion_physical_plan::ExecutionPlan
+//! [`PhysicalExpr`]: datafusion_physical_expr::PhysicalExpr
 //!
 //! Internally, this crate is implemented by converting the plans to [protocol
 //! buffers] using [prost].
@@ -93,7 +93,7 @@
 //!  let bytes = logical_plan_to_bytes(&plan)?;
 //!
 //!  // Decode bytes from somewhere (over network, etc.) back to LogicalPlan
-//!  let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+//!  let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
 //!  assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
 //! # Ok(())
 //! # }
diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs
index 0e76e19ecb1ab..d32bfb22ffddd 100644
--- a/datafusion/proto/src/logical_plan/file_formats.rs
+++ b/datafusion/proto/src/logical_plan/file_formats.rs
@@ -17,22 +17,19 @@
 
 use std::sync::Arc;
 
-use datafusion::{
-    config::{CsvOptions, JsonOptions},
-    datasource::file_format::{
-        arrow::ArrowFormatFactory, csv::CsvFormatFactory, json::JsonFormatFactory,
-        FileFormatFactory,
-    },
-    prelude::SessionContext,
-};
+use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
+use datafusion_common::config::{CsvOptions, JsonOptions};
 use datafusion_common::{
     exec_datafusion_err, exec_err, not_impl_err, parsers::CompressionTypeVariant,
     TableReference,
 };
+use datafusion_datasource::file_format::FileFormatFactory;
+use datafusion_datasource_arrow::file_format::ArrowFormatFactory;
+use datafusion_datasource_csv::file_format::CsvFormatFactory;
+use datafusion_datasource_json::file_format::JsonFormatFactory;
+use datafusion_execution::TaskContext;
 use prost::Message;
 
-use crate::protobuf::{CsvOptions as CsvOptionsProto, JsonOptions as JsonOptionsProto};
-
 use super::LogicalExtensionCodec;
 
 #[derive(Debug)]
@@ -165,7 +162,7 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -183,15 +180,15 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -200,7 +197,7 @@ impl LogicalExtensionCodec for CsvLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         let proto = CsvOptionsProto::decode(buf).map_err(|e| {
             exec_datafusion_err!("Failed to decode CsvOptionsProto: {e:?}")
@@ -272,7 +269,7 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -290,15 +287,15 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -307,7 +304,7 @@ impl LogicalExtensionCodec for JsonLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         let proto = JsonOptionsProto::decode(buf).map_err(|e| {
             exec_datafusion_err!("Failed to decode JsonOptionsProto: {e:?}")
@@ -353,11 +350,10 @@ mod parquet {
         ParquetOptions as ParquetOptionsProto,
         TableParquetOptions as TableParquetOptionsProto,
     };
-
-    use datafusion::{
-        config::{ParquetColumnOptions, ParquetOptions, TableParquetOptions},
-        datasource::file_format::parquet::ParquetFormatFactory,
+    use datafusion_common::config::{
+        ParquetColumnOptions, ParquetOptions, TableParquetOptions,
     };
+    use datafusion_datasource_parquet::file_format::ParquetFormatFactory;
 
     impl TableParquetOptionsProto {
         fn from_factory(factory: &ParquetFormatFactory) -> Self {
@@ -595,7 +591,7 @@ mod parquet {
             &self,
             _buf: &[u8],
             _inputs: &[datafusion_expr::LogicalPlan],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> datafusion_common::Result<datafusion_expr::Extension> {
             not_impl_err!("Method not implemented")
         }
@@ -613,8 +609,8 @@ mod parquet {
             _buf: &[u8],
             _table_ref: &TableReference,
             _schema: arrow::datatypes::SchemaRef,
-            _ctx: &SessionContext,
-        ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>>
+            _ctx: &TaskContext,
+        ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>>
         {
             not_impl_err!("Method not implemented")
         }
@@ -622,7 +618,7 @@ mod parquet {
         fn try_encode_table_provider(
             &self,
             _table_ref: &TableReference,
-            _node: Arc<dyn datafusion::datasource::TableProvider>,
+            _node: Arc<dyn datafusion_catalog::TableProvider>,
             _buf: &mut Vec<u8>,
         ) -> datafusion_common::Result<()> {
             not_impl_err!("Method not implemented")
@@ -631,14 +627,14 @@ mod parquet {
         fn try_decode_file_format(
             &self,
             buf: &[u8],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
             let proto = TableParquetOptionsProto::decode(buf).map_err(|e| {
                 exec_datafusion_err!("Failed to decode TableParquetOptionsProto: {e:?}")
             })?;
             let options: TableParquetOptions = (&proto).into();
             Ok(Arc::new(
-                datafusion::datasource::file_format::parquet::ParquetFormatFactory {
+                datafusion_datasource_parquet::file_format::ParquetFormatFactory {
                     options: Some(options),
                 },
             ))
@@ -649,7 +645,7 @@ mod parquet {
             buf: &mut Vec<u8>,
             node: Arc<dyn FileFormatFactory>,
         ) -> datafusion_common::Result<()> {
-            use datafusion::datasource::file_format::parquet::ParquetFormatFactory;
+            use datafusion_datasource_parquet::file_format::ParquetFormatFactory;
 
             let options = if let Some(parquet_factory) =
                 node.as_any().downcast_ref::<ParquetFormatFactory>()
@@ -683,7 +679,7 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -701,15 +697,15 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _ctx: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _ctx: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -718,7 +714,7 @@ impl LogicalExtensionCodec for ArrowLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         __buf: &[u8],
-        __ctx: &SessionContext,
+        __ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         Ok(Arc::new(ArrowFormatFactory::new()))
     }
@@ -741,7 +737,7 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[datafusion_expr::LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> datafusion_common::Result<datafusion_expr::Extension> {
         not_impl_err!("Method not implemented")
     }
@@ -759,15 +755,15 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: arrow::datatypes::SchemaRef,
-        _cts: &SessionContext,
-    ) -> datafusion_common::Result<Arc<dyn datafusion::datasource::TableProvider>> {
+        _cts: &TaskContext,
+    ) -> datafusion_common::Result<Arc<dyn datafusion_catalog::TableProvider>> {
         not_impl_err!("Method not implemented")
     }
 
     fn try_encode_table_provider(
         &self,
         _table_ref: &TableReference,
-        _node: Arc<dyn datafusion::datasource::TableProvider>,
+        _node: Arc<dyn datafusion_catalog::TableProvider>,
         _buf: &mut Vec<u8>,
     ) -> datafusion_common::Result<()> {
         not_impl_err!("Method not implemented")
@@ -776,7 +772,7 @@ impl LogicalExtensionCodec for AvroLogicalExtensionCodec {
     fn try_decode_file_format(
         &self,
         __buf: &[u8],
-        __ctx: &SessionContext,
+        __ctx: &TaskContext,
     ) -> datafusion_common::Result<Arc<dyn FileFormatFactory>> {
         Ok(Arc::new(ArrowFormatFactory::new()))
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index ec6415adc4c9b..42968670490fa 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -17,11 +17,11 @@
 
 use std::sync::Arc;
 
-use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality,
     RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions,
 };
+use datafusion_execution::registry::FunctionRegistry;
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::expr::{Alias, NullTreatment, Placeholder, Sort};
 use datafusion_expr::expr::{Unnest, WildcardOptions};
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index fd9e07914b076..ad56185166062 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -34,32 +34,23 @@ use crate::{
 
 use crate::protobuf::{proto_error, ToProtoError};
 use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef};
-use datafusion::datasource::cte_worktable::CteWorkTable;
-use datafusion::datasource::file_format::arrow::ArrowFormat;
-#[cfg(feature = "avro")]
-use datafusion::datasource::file_format::avro::AvroFormat;
-#[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetFormat;
-use datafusion::datasource::file_format::{
-    file_type_to_format, format_as_file_type, FileFormatFactory,
-};
-use datafusion::{
-    datasource::{
-        file_format::{
-            csv::CsvFormat, json::JsonFormat as OtherNdJsonFormat, FileFormat,
-        },
-        listing::{ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl},
-        view::ViewTable,
-        TableProvider,
-    },
-    datasource::{provider_as_source, source_as_provider},
-    prelude::SessionContext,
-};
+use datafusion_catalog::cte_worktable::CteWorkTable;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
     context, internal_datafusion_err, internal_err, not_impl_err, plan_err, Result,
     TableReference, ToDFSchema,
 };
+use datafusion_datasource::file_format::FileFormat;
+use datafusion_datasource::file_format::{
+    file_type_to_format, format_as_file_type, FileFormatFactory,
+};
+use datafusion_datasource_arrow::file_format::ArrowFormat;
+#[cfg(feature = "avro")]
+use datafusion_datasource_avro::file_format::AvroFormat;
+use datafusion_datasource_csv::file_format::CsvFormat;
+use datafusion_datasource_json::file_format::JsonFormat as OtherNdJsonFormat;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::file_format::ParquetFormat;
 use datafusion_expr::{
     dml,
     logical_plan::{
@@ -77,6 +68,12 @@ use datafusion_expr::{
 
 use self::to_proto::{serialize_expr, serialize_exprs};
 use crate::logical_plan::to_proto::serialize_sorts;
+use datafusion_catalog::default_table_source::{provider_as_source, source_as_provider};
+use datafusion_catalog::view::ViewTable;
+use datafusion_catalog::TableProvider;
+use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
+use datafusion_datasource::ListingTableUrl;
+use datafusion_execution::TaskContext;
 use prost::bytes::BufMut;
 use prost::Message;
 
@@ -96,7 +93,7 @@ pub trait AsLogicalPlan: Debug + Send + Sync + Clone {
 
     fn try_into_logical_plan(
         &self,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
         extension_codec: &dyn LogicalExtensionCodec,
     ) -> Result<LogicalPlan>;
 
@@ -113,7 +110,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
         &self,
         buf: &[u8],
         inputs: &[LogicalPlan],
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Extension>;
 
     fn try_encode(&self, node: &Extension, buf: &mut Vec<u8>) -> Result<()>;
@@ -123,7 +120,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
         buf: &[u8],
         table_ref: &TableReference,
         schema: SchemaRef,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>>;
 
     fn try_encode_table_provider(
@@ -136,7 +133,7 @@ pub trait LogicalExtensionCodec: Debug + Send + Sync {
     fn try_decode_file_format(
         &self,
         _buf: &[u8],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn FileFormatFactory>> {
         not_impl_err!("LogicalExtensionCodec is not provided for file format")
     }
@@ -184,7 +181,7 @@ impl LogicalExtensionCodec for DefaultLogicalExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("LogicalExtensionCodec is not provided")
     }
@@ -198,7 +195,7 @@ impl LogicalExtensionCodec for DefaultLogicalExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         not_impl_err!("LogicalExtensionCodec is not provided")
     }
@@ -242,7 +239,7 @@ fn from_table_reference(
 /// serialized by [from_table_source]
 fn to_table_source(
     node: &Option<Box<LogicalPlanNode>>,
-    ctx: &SessionContext,
+    ctx: &TaskContext,
     extension_codec: &dyn LogicalExtensionCodec,
 ) -> Result<Arc<dyn TableSource>> {
     if let Some(node) = node {
@@ -296,7 +293,7 @@ impl AsLogicalPlan for LogicalPlanNode {
 
     fn try_into_logical_plan(
         &self,
-        ctx: &SessionContext,
+        ctx: &TaskContext,
         extension_codec: &dyn LogicalExtensionCodec,
     ) -> Result<LogicalPlan> {
         let plan = self.logical_plan_type.as_ref().ok_or_else(|| {
@@ -475,10 +472,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                         .with_schema(Arc::new(schema));
 
                 let provider = ListingTable::try_new(config)?.with_cache(
-                    ctx.state()
-                        .runtime_env()
-                        .cache_manager
-                        .get_file_statistic_cache(),
+                    ctx.runtime_env().cache_manager.get_file_statistic_cache(),
                 );
 
                 let table_name =
@@ -547,7 +541,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .build()
             }
             LogicalPlanType::Repartition(repartition) => {
-                use datafusion::logical_expr::Partitioning;
+                use datafusion_expr::Partitioning;
                 let input: LogicalPlan =
                     into_logical_plan!(repartition.input, ctx, extension_codec)?;
                 use protobuf::repartition_node::PartitionMethod;
@@ -595,11 +589,6 @@ impl AsLogicalPlan for LogicalPlanNode {
                     None
                 };
 
-                let file_type = create_extern_table.file_type.as_str();
-                if ctx.table_factory(file_type).is_none() {
-                    internal_err!("No TableProviderFactory for file type: {file_type}")?
-                }
-
                 let mut order_exprs = vec![];
                 for expr in &create_extern_table.order_exprs {
                     order_exprs.push(from_proto::parse_sorts(
@@ -961,14 +950,14 @@ impl AsLogicalPlan for LogicalPlanNode {
                 )?
                 .build()
             }
-            LogicalPlanType::Dml(dml_node) => Ok(LogicalPlan::Dml(
-                datafusion::logical_expr::DmlStatement::new(
+            LogicalPlanType::Dml(dml_node) => {
+                Ok(LogicalPlan::Dml(datafusion_expr::DmlStatement::new(
                     from_table_reference(dml_node.table_name.as_ref(), "DML ")?,
                     to_table_source(&dml_node.target, ctx, extension_codec)?,
                     dml_node.dml_type().into(),
                     Arc::new(into_logical_plan!(dml_node.input, ctx, extension_codec)?),
-                ),
-            )),
+                )))
+            }
         }
     }
 
@@ -1417,7 +1406,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 input,
                 partitioning_scheme,
             }) => {
-                use datafusion::logical_expr::Partitioning;
+                use datafusion_expr::Partitioning;
                 let input: LogicalPlanNode = LogicalPlanNode::try_from_logical_plan(
                     input.as_ref(),
                     extension_codec,
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index e2ee1be7d7321..7c4b9e55b8137 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -28,26 +28,27 @@ use datafusion_expr::dml::InsertOp;
 use object_store::path::Path;
 use object_store::ObjectMeta;
 
-use datafusion::arrow::datatypes::Schema;
-use datafusion::datasource::file_format::csv::CsvSink;
-use datafusion::datasource::file_format::json::JsonSink;
+use arrow::datatypes::Schema;
+use datafusion_common::{internal_datafusion_err, not_impl_err, DataFusionError, Result};
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_groups::FileGroup;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile};
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_json::file_format::JsonSink;
 #[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
-use datafusion::datasource::listing::{FileRange, ListingTableUrl, PartitionedFile};
-use datafusion::datasource::object_store::ObjectStoreUrl;
-use datafusion::datasource::physical_plan::{
-    FileGroup, FileScanConfig, FileScanConfigBuilder, FileSinkConfig, FileSource,
-};
-use datafusion::execution::{FunctionRegistry, TaskContext};
-use datafusion::logical_expr::WindowFunctionDefinition;
-use datafusion::physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
-use datafusion::physical_plan::expressions::{
+use datafusion_datasource_parquet::file_format::ParquetSink;
+use datafusion_execution::object_store::ObjectStoreUrl;
+use datafusion_execution::{FunctionRegistry, TaskContext};
+use datafusion_expr::WindowFunctionDefinition;
+use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr, ScalarFunctionExpr};
+use datafusion_physical_plan::expressions::{
     in_list, BinaryExpr, CaseExpr, CastExpr, Column, IsNotNullExpr, IsNullExpr, LikeExpr,
     Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
 };
-use datafusion::physical_plan::windows::{create_window_expr, schema_add_window_field};
-use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
-use datafusion_common::{internal_datafusion_err, not_impl_err, DataFusionError, Result};
+use datafusion_physical_plan::windows::{create_window_expr, schema_add_window_field};
+use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 use datafusion_proto_common::common::proto_error;
 
 use crate::convert_required;
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index 818109be68553..cd9bd672851d0 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -23,8 +23,7 @@ use self::to_proto::{serialize_partitioning, serialize_physical_expr};
 use crate::common::{byte_to_string, str_to_byte};
 use crate::physical_plan::from_proto::{
     parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
-    parse_physical_window_expr, parse_protobuf_file_scan_config,
-    parse_protobuf_file_scan_schema, parse_record_batches,
+    parse_physical_window_expr, parse_protobuf_file_scan_config, parse_record_batches,
 };
 use crate::physical_plan::to_proto::{
     serialize_file_scan_config, serialize_maybe_filter, serialize_physical_aggr_expr,
@@ -40,65 +39,64 @@ use crate::protobuf::{
 };
 use crate::{convert_required, into_required};
 
-use datafusion::arrow::compute::SortOptions;
-use datafusion::arrow::datatypes::{IntervalMonthDayNanoType, Schema, SchemaRef};
-use datafusion::catalog::memory::MemorySourceConfig;
-use datafusion::datasource::file_format::csv::CsvSink;
-use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
-use datafusion::datasource::file_format::json::JsonSink;
+use arrow::compute::SortOptions;
+use arrow::datatypes::{IntervalMonthDayNanoType, SchemaRef};
+use datafusion_catalog::memory::MemorySourceConfig;
+use datafusion_common::{
+    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
+};
 #[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
+use datafusion_datasource::file::FileSource;
+use datafusion_datasource::file_compression_type::FileCompressionType;
+use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
+use datafusion_datasource::sink::DataSinkExec;
+use datafusion_datasource::source::{DataSource, DataSourceExec};
 #[cfg(feature = "avro")]
-use datafusion::datasource::physical_plan::AvroSource;
+use datafusion_datasource_avro::source::AvroSource;
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_csv::source::CsvSource;
+use datafusion_datasource_json::file_format::JsonSink;
+use datafusion_datasource_json::source::JsonSource;
 #[cfg(feature = "parquet")]
-use datafusion::datasource::physical_plan::ParquetSource;
-use datafusion::datasource::physical_plan::{
-    CsvSource, FileScanConfig, FileScanConfigBuilder, FileSource, JsonSource,
-};
-use datafusion::datasource::sink::DataSinkExec;
-use datafusion::datasource::source::{DataSource, DataSourceExec};
-use datafusion::execution::{FunctionRegistry, TaskContext};
-use datafusion::functions_table::generate_series::{
+use datafusion_datasource_parquet::file_format::ParquetSink;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::source::ParquetSource;
+use datafusion_execution::{FunctionRegistry, TaskContext};
+use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use datafusion_functions_table::generate_series::{
     Empty, GenSeriesArgs, GenerateSeriesTable, GenericSeriesState, TimestampValue,
 };
-use datafusion::physical_expr::aggregate::AggregateExprBuilder;
-use datafusion::physical_expr::aggregate::AggregateFunctionExpr;
-use datafusion::physical_expr::{LexOrdering, LexRequirement, PhysicalExprRef};
-use datafusion::physical_plan::aggregates::AggregateMode;
-use datafusion::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
-use datafusion::physical_plan::analyze::AnalyzeExec;
-use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion::physical_plan::coop::CooperativeExec;
-use datafusion::physical_plan::empty::EmptyExec;
-use datafusion::physical_plan::explain::ExplainExec;
-use datafusion::physical_plan::expressions::PhysicalSortExpr;
-use datafusion::physical_plan::filter::FilterExec;
-use datafusion::physical_plan::joins::utils::{ColumnIndex, JoinFilter};
-use datafusion::physical_plan::joins::{
+use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+use datafusion_physical_expr::aggregate::AggregateFunctionExpr;
+use datafusion_physical_expr::{LexOrdering, LexRequirement, PhysicalExprRef};
+use datafusion_physical_plan::aggregates::AggregateMode;
+use datafusion_physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
+use datafusion_physical_plan::analyze::AnalyzeExec;
+use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
+use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::coop::CooperativeExec;
+use datafusion_physical_plan::empty::EmptyExec;
+use datafusion_physical_plan::explain::ExplainExec;
+use datafusion_physical_plan::expressions::PhysicalSortExpr;
+use datafusion_physical_plan::filter::FilterExec;
+use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter};
+use datafusion_physical_plan::joins::{
     CrossJoinExec, NestedLoopJoinExec, SortMergeJoinExec, StreamJoinPartitionMode,
     SymmetricHashJoinExec,
 };
-use datafusion::physical_plan::joins::{HashJoinExec, PartitionMode};
-use datafusion::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use datafusion::physical_plan::memory::LazyMemoryExec;
-use datafusion::physical_plan::metrics::MetricType;
-use datafusion::physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion::physical_plan::projection::{ProjectionExec, ProjectionExpr};
-use datafusion::physical_plan::repartition::RepartitionExec;
-use datafusion::physical_plan::sorts::sort::SortExec;
-use datafusion::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion::physical_plan::union::{InterleaveExec, UnionExec};
-use datafusion::physical_plan::unnest::{ListUnnest, UnnestExec};
-use datafusion::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
-use datafusion::physical_plan::{
-    ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr,
-};
-use datafusion_common::config::TableParquetOptions;
-use datafusion_common::{
-    internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
-};
-use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
+use datafusion_physical_plan::joins::{HashJoinExec, PartitionMode};
+use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
+use datafusion_physical_plan::memory::LazyMemoryExec;
+use datafusion_physical_plan::metrics::MetricType;
+use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
+use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
+use datafusion_physical_plan::repartition::RepartitionExec;
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
+use datafusion_physical_plan::union::{InterleaveExec, UnionExec};
+use datafusion_physical_plan::unnest::{ListUnnest, UnnestExec};
+use datafusion_physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
+use datafusion_physical_plan::{ExecutionPlan, InputOrderMode, PhysicalExpr, WindowExpr};
 
 use prost::bytes::BufMut;
 use prost::Message;
@@ -662,8 +660,9 @@ impl protobuf::PhysicalPlanNode {
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "parquet")]
         {
-            let schema =
-                parse_protobuf_file_scan_schema(scan.base_conf.as_ref().unwrap())?;
+            let schema = from_proto::parse_protobuf_file_scan_schema(
+                scan.base_conf.as_ref().unwrap(),
+            )?;
 
             // Check if there's a projection and use projected schema for predicate parsing
             let base_conf = scan.base_conf.as_ref().unwrap();
@@ -674,7 +673,7 @@ impl protobuf::PhysicalPlanNode {
                     .iter()
                     .map(|&i| schema.field(i as usize).clone())
                     .collect();
-                Arc::new(Schema::new(projected_fields))
+                Arc::new(arrow::datatypes::Schema::new(projected_fields))
             } else {
                 schema
             };
@@ -691,7 +690,7 @@ impl protobuf::PhysicalPlanNode {
                     )
                 })
                 .transpose()?;
-            let mut options = TableParquetOptions::default();
+            let mut options = datafusion_common::config::TableParquetOptions::default();
 
             if let Some(table_options) = scan.parquet_options.as_ref() {
                 options = table_options.try_into()?;
@@ -1694,6 +1693,7 @@ impl protobuf::PhysicalPlanNode {
         )))
     }
 
+    #[cfg_attr(not(feature = "parquet"), expect(unused_variables))]
     fn try_into_parquet_sink_physical_plan(
         &self,
         sink: &protobuf::ParquetSinkExecNode,
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 19a76de3e5b08..7e29a1deaeed0 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -20,32 +20,30 @@ use std::sync::Arc;
 use arrow::array::RecordBatch;
 use arrow::datatypes::Schema;
 use arrow::ipc::writer::StreamWriter;
-#[cfg(feature = "parquet")]
-use datafusion::datasource::file_format::parquet::ParquetSink;
-use datafusion::datasource::physical_plan::FileSink;
-use datafusion::physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
-use datafusion::physical_expr::ScalarFunctionExpr;
-use datafusion::physical_expr_common::physical_expr::snapshot_physical_expr;
-use datafusion::physical_expr_common::sort_expr::PhysicalSortExpr;
-use datafusion::physical_plan::expressions::{
-    BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
-    Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
-};
-use datafusion::physical_plan::udaf::AggregateFunctionExpr;
-use datafusion::physical_plan::windows::{PlainAggregateWindowExpr, WindowUDFExpr};
-use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
-use datafusion::{
-    datasource::{
-        file_format::{csv::CsvSink, json::JsonSink},
-        listing::{FileRange, PartitionedFile},
-        physical_plan::{FileScanConfig, FileSinkConfig},
-    },
-    physical_plan::expressions::LikeExpr,
-};
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
 };
+use datafusion_datasource::file_scan_config::FileScanConfig;
+use datafusion_datasource::file_sink_config::FileSink;
+use datafusion_datasource::file_sink_config::FileSinkConfig;
+use datafusion_datasource::{FileRange, PartitionedFile};
+use datafusion_datasource_csv::file_format::CsvSink;
+use datafusion_datasource_json::file_format::JsonSink;
+#[cfg(feature = "parquet")]
+use datafusion_datasource_parquet::file_format::ParquetSink;
 use datafusion_expr::WindowFrame;
+use datafusion_physical_expr::window::{SlidingAggregateWindowExpr, StandardWindowExpr};
+use datafusion_physical_expr::ScalarFunctionExpr;
+use datafusion_physical_expr_common::physical_expr::snapshot_physical_expr;
+use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+use datafusion_physical_plan::expressions::LikeExpr;
+use datafusion_physical_plan::expressions::{
+    BinaryExpr, CaseExpr, CastExpr, Column, InListExpr, IsNotNullExpr, IsNullExpr,
+    Literal, NegativeExpr, NotExpr, TryCastExpr, UnKnownColumn,
+};
+use datafusion_physical_plan::udaf::AggregateFunctionExpr;
+use datafusion_physical_plan::windows::{PlainAggregateWindowExpr, WindowUDFExpr};
+use datafusion_physical_plan::{Partitioning, PhysicalExpr, WindowExpr};
 
 use crate::protobuf::{
     self, physical_aggregate_expr_node, physical_window_expr_node, PhysicalSortExprNode,
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 3d51038eba72c..516f178cc07de 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -71,6 +71,7 @@ use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, DFSchemaRef,
     DataFusionError, Result, ScalarValue, TableReference,
 };
+use datafusion_execution::TaskContext;
 use datafusion_expr::dml::CopyTo;
 use datafusion_expr::expr::{
     self, Between, BinaryExpr, Case, Cast, GroupingSet, InList, Like, NullTreatment,
@@ -153,8 +154,11 @@ async fn roundtrip_logical_plan() -> Result<()> {
     });
     let extension_codec = TopKExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&topk_plan, &extension_codec)?;
-    let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &extension_codec)?;
+    let logical_round_trip = logical_plan_from_bytes_with_extension_codec(
+        &bytes,
+        &ctx.task_ctx(),
+        &extension_codec,
+    )?;
     assert_eq!(format!("{topk_plan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
@@ -177,7 +181,7 @@ impl LogicalExtensionCodec for TestTableProviderCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("No extension codec provided")
     }
@@ -191,7 +195,7 @@ impl LogicalExtensionCodec for TestTableProviderCodec {
         buf: &[u8],
         table_ref: &TableReference,
         schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         let msg = TestTableProto::decode(buf)
             .map_err(|_| internal_datafusion_err!("Error decoding test table"))?;
@@ -240,7 +244,7 @@ async fn roundtrip_custom_tables() -> Result<()> {
     let scan = ctx.table("t").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes_with_extension_codec(&scan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{scan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
@@ -266,7 +270,7 @@ async fn roundtrip_custom_memory_tables() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
 
     Ok(())
@@ -293,7 +297,7 @@ async fn roundtrip_custom_listing_tables() -> Result<()> {
     let plan = ctx.state().create_logical_plan(query).await?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // Use exact matching to verify everything. Make sure during round-trip,
     // information like constraints, column defaults, and other aspects of the plan are preserved.
     assert_eq!(plan, logical_round_trip);
@@ -328,7 +332,7 @@ async fn roundtrip_logical_plan_aggregation_with_pk() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -354,7 +358,7 @@ async fn roundtrip_logical_plan_aggregation() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -380,7 +384,7 @@ async fn roundtrip_logical_plan_sort() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -412,7 +416,7 @@ async fn roundtrip_logical_plan_dml() -> Result<()> {
     for query in queries {
         let plan = ctx.sql(query).await?.into_optimized_plan()?;
         let bytes = logical_plan_to_bytes(&plan)?;
-        let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+        let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
         assert_eq!(
             format!("{plan}"),
             format!("{logical_round_trip}"),
@@ -442,7 +446,7 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
     let codec = CsvLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -482,7 +486,7 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
     let codec = ParquetLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     match logical_round_trip {
         LogicalPlan::Copy(copy_to) => {
@@ -514,7 +518,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
     let codec = ArrowLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -561,7 +565,7 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
     let codec = CsvLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
 
     match logical_round_trip {
@@ -628,7 +632,7 @@ async fn roundtrip_logical_plan_copy_to_json() -> Result<()> {
     let codec = JsonLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -700,7 +704,7 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> {
     let codec = ParquetLogicalExtensionCodec {};
     let bytes = logical_plan_to_bytes_with_extension_codec(&plan, &codec)?;
     let logical_round_trip =
-        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx, &codec)?;
+        logical_plan_from_bytes_with_extension_codec(&bytes, &ctx.task_ctx(), &codec)?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     match logical_round_trip {
@@ -787,7 +791,7 @@ async fn roundtrip_logical_plan_distinct_on() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -813,7 +817,7 @@ async fn roundtrip_single_count_distinct() -> Result<()> {
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -826,7 +830,7 @@ async fn roundtrip_logical_plan_with_extension() -> Result<()> {
         .await?;
     let plan = ctx.table("t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -851,7 +855,7 @@ async fn roundtrip_logical_plan_unnest() -> Result<()> {
     let query = "SELECT unnest(b) FROM t1";
     let plan = ctx.sql(query).await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -1039,7 +1043,7 @@ async fn roundtrip_expr_api() -> Result<()> {
     // ensure expressions created with the expr api can be round tripped
     let plan = table.select(expr_list)?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
     Ok(())
 }
@@ -1059,13 +1063,13 @@ async fn roundtrip_logical_plan_with_view_scan() -> Result<()> {
         .into_optimized_plan()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     // DROP
     let plan = ctx.sql("DROP VIEW view_t1").await?.into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan}"), format!("{logical_round_trip}"));
 
     Ok(())
@@ -1160,7 +1164,7 @@ impl LogicalExtensionCodec for TopKExtensionCodec {
         &self,
         buf: &[u8],
         inputs: &[LogicalPlan],
-        ctx: &SessionContext,
+        ctx: &TaskContext,
     ) -> Result<Extension> {
         if let Some((input, _)) = inputs.split_first() {
             let proto = proto::TopKPlanProto::decode(buf).map_err(|e| {
@@ -1207,7 +1211,7 @@ impl LogicalExtensionCodec for TopKExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         internal_err!("unsupported plan type")
     }
@@ -1230,7 +1234,7 @@ impl LogicalExtensionCodec for UDFExtensionCodec {
         &self,
         _buf: &[u8],
         _inputs: &[LogicalPlan],
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Extension> {
         not_impl_err!("No extension codec provided")
     }
@@ -1244,7 +1248,7 @@ impl LogicalExtensionCodec for UDFExtensionCodec {
         _buf: &[u8],
         _table_ref: &TableReference,
         _schema: SchemaRef,
-        _ctx: &SessionContext,
+        _ctx: &TaskContext,
     ) -> Result<Arc<dyn TableProvider>> {
         internal_err!("unsupported plan type")
     }
@@ -2269,7 +2273,7 @@ fn roundtrip_scalar_udf() {
             &self,
             _buf: &[u8],
             _inputs: &[LogicalPlan],
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> Result<Extension> {
             not_impl_err!("LogicalExtensionCodec is not provided")
         }
@@ -2283,7 +2287,7 @@ fn roundtrip_scalar_udf() {
             _buf: &[u8],
             _table_ref: &TableReference,
             _schema: SchemaRef,
-            _ctx: &SessionContext,
+            _ctx: &TaskContext,
         ) -> Result<Arc<dyn TableProvider>> {
             not_impl_err!("LogicalExtensionCodec is not provided")
         }
@@ -2677,7 +2681,7 @@ async fn roundtrip_recursive_query() {
     let bytes = logical_plan_to_bytes(&plan).unwrap();
 
     let ctx = SessionContext::new();
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx).unwrap();
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx()).unwrap();
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     let dataframe = ctx.execute_logical_plan(logical_round_trip).await.unwrap();
     let output_round_trip = dataframe.collect().await.unwrap();
@@ -2708,7 +2712,7 @@ async fn roundtrip_union_query() -> Result<()> {
         .await?;
     ctx.register_csv("t2", "tests/testdata/test.csv", CsvReadOptions::default())
         .await?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     // proto deserialization only supports 2-way union, hence this plan has nested unions
     // apply the flatten unions optimizer rule to be able to compare
     let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
@@ -2745,7 +2749,7 @@ async fn roundtrip_custom_listing_tables_schema() -> Result<()> {
         .clone();
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let new_plan = logical_plan_from_bytes(&bytes, &ctx)?;
+    let new_plan = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(plan, new_plan);
     Ok(())
 }
@@ -2782,7 +2786,7 @@ async fn roundtrip_custom_listing_tables_schema_table_scan_projection() -> Resul
     .build()?;
 
     let bytes = logical_plan_to_bytes(&plan)?;
-    let new_plan = logical_plan_from_bytes(&bytes, &ctx)?;
+    let new_plan = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
 
     assert_eq!(plan, new_plan);
     Ok(())
@@ -2796,7 +2800,7 @@ async fn roundtrip_arrow_scan() -> Result<()> {
         .await?
         .into_optimized_plan()?;
     let bytes = logical_plan_to_bytes(&plan)?;
-    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index c9ef4377d43b1..3d69183668851 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -61,7 +61,7 @@ fn json_to_plan() {
 
     let input = r#"{"emptyRelation":{}}"#.to_string();
     let ctx = SessionContext::new();
-    let actual = logical_plan_from_json(&input, &ctx).unwrap();
+    let actual = logical_plan_from_json(&input, &ctx.task_ctx()).unwrap();
     let result = matches!(actual, LogicalPlan::EmptyRelation(_));
     assert!(result, "Should parse empty relation");
 }
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 0b9da1b5a86ae..8b03193e7f992 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -94,6 +94,28 @@ has changed from `&str` to `&Dialect`.
 crate under the `config` module that provides type safety
 and better validation for SQL dialect selection
 
+### Reorganization of `ListingTable` into `datafusion-catalog-listing` crate
+
+There has been a long standing request to remove features such as `ListingTable`
+from the `datafusion` crate to support faster build times. The structs
+`ListingOptions`, `ListingTable`, and `ListingTableConfig` are now available
+within the `datafusion-catalog-listing` crate. These are re-exported in
+the `datafusion` crate, so this should be a minimal impact to existing users.
+
+See [issue #14462] and [issue #17713] for more details.
+
+[issue #14462]: https://github.com/apache/datafusion/issues/14462
+[issue #17713]: https://github.com/apache/datafusion/issues/17713
+
+### Reorganization of `ArrowSource` into `datafusion-datasource-arrow` crate
+
+To support [issue #17713] the `ArrowSource` code has been removed from
+the `datafusion` core crate into it's own crate, `datafusion-datasource-arrow`.
+This follows the pattern for the AVRO, CSV, JSON, and Parquet data sources.
+Users may need to update their paths to account for these changes.
+
+See [issue #17713] for more details.
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables

From f198fc8885524860063b2ded18a96e94b8d71ffe Mon Sep 17 00:00:00 2001
From: Christian <9384305+ctsk@users.noreply.github.com>
Date: Sun, 19 Oct 2025 23:47:26 +0300
Subject: [PATCH 044/109] Fix quadratic runtime in min_max_bytes (#18044)

## Which issue does this PR close?

- Closes https://github.com/apache/datafusion/issues/17897

## What changes are included in this PR?

This PR replaces the `locations` vector used to reduce the number of
allocations / resizes in the accumulator with. a HashMap instead.

## Are these changes tested?

Not in particular. Additional unit-tests and broader regression testing
would be useful. A microbenchmark verifies that the runtime is no longer
quadratic.

## Are there any user-facing changes?

No.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/lib.rs                  |  6 +++
 .../src/min_max/min_max_bytes.rs              | 50 ++++++++-----------
 2 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 24ec9b7be3233..44375b36b3145 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -108,6 +108,12 @@ pub use error::{
 // The HashMap and HashSet implementations that should be used as the uniform defaults
 pub type HashMap<K, V, S = DefaultHashBuilder> = hashbrown::HashMap<K, V, S>;
 pub type HashSet<T, S = DefaultHashBuilder> = hashbrown::HashSet<T, S>;
+pub mod hash_map {
+    pub use hashbrown::hash_map::Entry;
+}
+pub mod hash_set {
+    pub use hashbrown::hash_set::Entry;
+}
 
 /// Downcast an Arrow Array to a concrete type, return an `DataFusionError::Internal` if the cast is
 /// not possible. In normal usage of DataFusion the downcast should always succeed.
diff --git a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
index 05321c2ff52d2..30b2739c08edc 100644
--- a/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/src/min_max/min_max_bytes.rs
@@ -20,7 +20,8 @@ use arrow::array::{
     LargeBinaryBuilder, LargeStringBuilder, StringBuilder, StringViewBuilder,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result};
+use datafusion_common::hash_map::Entry;
+use datafusion_common::{internal_err, HashMap, Result};
 use datafusion_expr::{EmitTo, GroupsAccumulator};
 use datafusion_functions_aggregate_common::aggregate::groups_accumulator::nulls::apply_filter_as_nulls;
 use std::mem::size_of;
@@ -391,14 +392,6 @@ struct MinMaxBytesState {
     total_data_bytes: usize,
 }
 
-#[derive(Debug, Clone, Copy)]
-enum MinMaxLocation<'a> {
-    /// the min/max value is stored in the existing `min_max` array
-    ExistingMinMax,
-    /// the min/max value is stored in the input array at the given index
-    Input(&'a [u8]),
-}
-
 /// Implement the MinMaxBytesAccumulator with a comparison function
 /// for comparing strings
 impl MinMaxBytesState {
@@ -450,7 +443,7 @@ impl MinMaxBytesState {
         // Minimize value copies by calculating the new min/maxes for each group
         // in this batch (either the existing min/max or the new input value)
         // and updating the owned values in `self.min_maxes` at most once
-        let mut locations = vec![MinMaxLocation::ExistingMinMax; total_num_groups];
+        let mut locations = HashMap::<usize, &[u8]>::with_capacity(group_indices.len());
 
         // Figure out the new min value for each group
         for (new_val, group_index) in iter.into_iter().zip(group_indices.iter()) {
@@ -459,32 +452,29 @@ impl MinMaxBytesState {
                 continue; // skip nulls
             };
 
-            let existing_val = match locations[group_index] {
-                // previous input value was the min/max, so compare it
-                MinMaxLocation::Input(existing_val) => existing_val,
-                MinMaxLocation::ExistingMinMax => {
-                    let Some(existing_val) = self.min_max[group_index].as_ref() else {
-                        // no existing min/max, so this is the new min/max
-                        locations[group_index] = MinMaxLocation::Input(new_val);
-                        continue;
-                    };
-                    existing_val.as_ref()
+            match locations.entry(group_index) {
+                Entry::Occupied(mut occupied_entry) => {
+                    if cmp(new_val, occupied_entry.get()) {
+                        occupied_entry.insert(new_val);
+                    }
+                }
+                Entry::Vacant(vacant_entry) => {
+                    if let Some(old_val) = self.min_max[group_index].as_ref() {
+                        if cmp(new_val, old_val) {
+                            vacant_entry.insert(new_val);
+                        }
+                    } else {
+                        vacant_entry.insert(new_val);
+                    }
                 }
             };
-
-            // Compare the new value to the existing value, replacing if necessary
-            if cmp(new_val, existing_val) {
-                locations[group_index] = MinMaxLocation::Input(new_val);
-            }
         }
 
         // Update self.min_max with any new min/max values we found in the input
-        for (group_index, location) in locations.iter().enumerate() {
-            match location {
-                MinMaxLocation::ExistingMinMax => {}
-                MinMaxLocation::Input(new_val) => self.set_value(group_index, new_val),
-            }
+        for (group_index, location) in locations.iter() {
+            self.set_value(*group_index, location);
         }
+
         Ok(())
     }
 

From 35b2e359f6703dedc6bf8ced3f0699d8a885d4df Mon Sep 17 00:00:00 2001
From: Khanh Duong <dqkqdlot@gmail.com>
Date: Mon, 20 Oct 2025 09:48:30 +0900
Subject: [PATCH 045/109] fix: `array_distinct` inner nullability causing type
 mismatch (#18104)

## Which issue does this PR close?

- Closes #17416.

## Rationale for this change

`array_distinct`'s inner return type is always `nullable`,
however `general_array_distinct` maintain input nullability,
causing type mismatch error.

I believe the same error happens for `array_union` and `array_intersect`
(in `set_ops.rs`).
I can include the fix for those in this PR or maybe another separated
PR.

## What changes are included in this PR?

- Match return type nullability for `array_distinct`.

## Are these changes tested?

Yes.
I tried to add unit tests checking return types (similar to #15901),
but it wasn't clear to me whether the added tests could verify
the issue #17416. So I switched to the integration test.

- Added test for `List` with inner `nullability = true / false`.
- I did not added tests for `LargeList`, I don't think it needed
because the code path for the return type is identical to `List`.

## Are there any user-facing changes?

I don't think so.
---
 datafusion/functions-nested/src/set_ops.rs | 63 ++++++++++++++++++----
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs
index 555767f8f070b..53642bf1622b0 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -29,9 +29,7 @@ use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow::row::{RowConverter, SortField};
 use datafusion_common::cast::{as_large_list_array, as_list_array};
 use datafusion_common::utils::ListCoercion;
-use datafusion_common::{
-    exec_err, internal_err, plan_err, utils::take_function_args, Result,
-};
+use datafusion_common::{exec_err, internal_err, utils::take_function_args, Result};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
@@ -289,13 +287,7 @@ impl ScalarUDFImpl for ArrayDistinct {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match &arg_types[0] {
-            List(field) => Ok(DataType::new_list(field.data_type().clone(), true)),
-            LargeList(field) => {
-                Ok(DataType::new_large_list(field.data_type().clone(), true))
-            }
-            arg_type => plan_err!("{} does not support type {arg_type}", self.name()),
-        }
+        Ok(arg_types[0].clone())
     }
 
     fn invoke_with_args(
@@ -563,3 +555,54 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
         array.nulls().cloned(),
     )?))
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::{
+        array::{Int32Array, ListArray},
+        buffer::OffsetBuffer,
+        datatypes::{DataType, Field},
+    };
+    use datafusion_common::{config::ConfigOptions, DataFusionError};
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+
+    use crate::set_ops::array_distinct_udf;
+
+    #[test]
+    fn test_array_distinct_inner_nullability_result_type_match_return_type(
+    ) -> Result<(), DataFusionError> {
+        let udf = array_distinct_udf();
+
+        for inner_nullable in [true, false] {
+            let inner_field = Field::new_list_field(DataType::Int32, inner_nullable);
+            let input_field =
+                Field::new_list("input", Arc::new(inner_field.clone()), true);
+
+            // [[1, 1, 2]]
+            let input_array = ListArray::new(
+                inner_field.into(),
+                OffsetBuffer::new(vec![0, 3].into()),
+                Arc::new(Int32Array::new(vec![1, 1, 2].into(), None)),
+                None,
+            );
+
+            let input_array = ColumnarValue::Array(Arc::new(input_array));
+
+            let result = udf.invoke_with_args(ScalarFunctionArgs {
+                args: vec![input_array],
+                arg_fields: vec![input_field.clone().into()],
+                number_rows: 1,
+                return_field: input_field.clone().into(),
+                config_options: Arc::new(ConfigOptions::default()),
+            })?;
+
+            assert_eq!(
+                result.data_type(),
+                udf.return_type(&[input_field.data_type().clone()])?
+            );
+        }
+        Ok(())
+    }
+}

From 7c215ed8dc850bcc2632fa5183ccdc58dd6491e7 Mon Sep 17 00:00:00 2001
From: Pepijn Van Eeckhoudt <pepijn@vaneeckhoudt.net>
Date: Mon, 20 Oct 2025 12:35:35 +0200
Subject: [PATCH 046/109] Short circuit complex case evaluation modes as soon
 as possible (#17898)

## Which issue does this PR close?

Improvement in the context of #18075

## Rationale for this change

Speculative performance improvements for case evaluation

## What changes are included in this PR?

Short circuit case evaluation loop when as soon as a value has been
calculated for each input rows

## Are these changes tested?

(Hopefully) covered by SQL logic tests

## Are there any user-facing changes?

No

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../physical-expr/src/expressions/case.rs     | 72 +++++++++++-------
 datafusion/sqllogictest/test_files/case.slt   | 76 +++++++++++++++++++
 2 files changed, 120 insertions(+), 28 deletions(-)

diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index d14146a20d8bd..2db599047bcda 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -205,10 +205,15 @@ impl CaseExpr {
         let mut current_value = new_null_array(&return_type, batch.num_rows());
         // We only consider non-null values while comparing with whens
         let mut remainder = not(&base_nulls)?;
+        let mut non_null_remainder_count = remainder.true_count();
         for i in 0..self.when_then_expr.len() {
-            let when_value = self.when_then_expr[i]
-                .0
-                .evaluate_selection(batch, &remainder)?;
+            // If there are no rows left to process, break out of the loop early
+            if non_null_remainder_count == 0 {
+                break;
+            }
+
+            let when_predicate = &self.when_then_expr[i].0;
+            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
             let when_value = when_value.into_array(batch.num_rows())?;
             // build boolean array representing which rows match the "when" value
             let when_match = compare_with_eq(
@@ -224,41 +229,46 @@ impl CaseExpr {
                 _ => Cow::Owned(prep_null_mask_filter(&when_match)),
             };
             // Make sure we only consider rows that have not been matched yet
-            let when_match = and(&when_match, &remainder)?;
+            let when_value = and(&when_match, &remainder)?;
 
-            // When no rows available for when clause, skip then clause
-            if when_match.true_count() == 0 {
+            // If the predicate did not match any rows, continue to the next branch immediately
+            let when_match_count = when_value.true_count();
+            if when_match_count == 0 {
                 continue;
             }
 
-            let then_value = self.when_then_expr[i]
-                .1
-                .evaluate_selection(batch, &when_match)?;
+            let then_expression = &self.when_then_expr[i].1;
+            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
 
             current_value = match then_value {
                 ColumnarValue::Scalar(ScalarValue::Null) => {
-                    nullif(current_value.as_ref(), &when_match)?
+                    nullif(current_value.as_ref(), &when_value)?
                 }
                 ColumnarValue::Scalar(then_value) => {
-                    zip(&when_match, &then_value.to_scalar()?, &current_value)?
+                    zip(&when_value, &then_value.to_scalar()?, &current_value)?
                 }
                 ColumnarValue::Array(then_value) => {
-                    zip(&when_match, &then_value, &current_value)?
+                    zip(&when_value, &then_value, &current_value)?
                 }
             };
 
-            remainder = and_not(&remainder, &when_match)?;
+            remainder = and_not(&remainder, &when_value)?;
+            non_null_remainder_count -= when_match_count;
         }
 
         if let Some(e) = self.else_expr() {
-            // keep `else_expr`'s data type and return type consistent
-            let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
             // null and unmatched tuples should be assigned else value
             remainder = or(&base_nulls, &remainder)?;
-            let else_ = expr
-                .evaluate_selection(batch, &remainder)?
-                .into_array(batch.num_rows())?;
-            current_value = zip(&remainder, &else_, &current_value)?;
+
+            if remainder.true_count() > 0 {
+                // keep `else_expr`'s data type and return type consistent
+                let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
+
+                let else_ = expr
+                    .evaluate_selection(batch, &remainder)?
+                    .into_array(batch.num_rows())?;
+                current_value = zip(&remainder, &else_, &current_value)?;
+            }
         }
 
         Ok(ColumnarValue::Array(current_value))
@@ -277,10 +287,15 @@ impl CaseExpr {
         // start with nulls as default output
         let mut current_value = new_null_array(&return_type, batch.num_rows());
         let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
+        let mut remainder_count = batch.num_rows();
         for i in 0..self.when_then_expr.len() {
-            let when_value = self.when_then_expr[i]
-                .0
-                .evaluate_selection(batch, &remainder)?;
+            // If there are no rows left to process, break out of the loop early
+            if remainder_count == 0 {
+                break;
+            }
+
+            let when_predicate = &self.when_then_expr[i].0;
+            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
             let when_value = when_value.into_array(batch.num_rows())?;
             let when_value = as_boolean_array(&when_value).map_err(|_| {
                 internal_datafusion_err!("WHEN expression did not return a BooleanArray")
@@ -293,14 +308,14 @@ impl CaseExpr {
             // Make sure we only consider rows that have not been matched yet
             let when_value = and(&when_value, &remainder)?;
 
-            // When no rows available for when clause, skip then clause
-            if when_value.true_count() == 0 {
+            // If the predicate did not match any rows, continue to the next branch immediately
+            let when_match_count = when_value.true_count();
+            if when_match_count == 0 {
                 continue;
             }
 
-            let then_value = self.when_then_expr[i]
-                .1
-                .evaluate_selection(batch, &when_value)?;
+            let then_expression = &self.when_then_expr[i].1;
+            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
 
             current_value = match then_value {
                 ColumnarValue::Scalar(ScalarValue::Null) => {
@@ -317,10 +332,11 @@ impl CaseExpr {
             // Succeed tuples should be filtered out for short-circuit evaluation,
             // null values for the current when expr should be kept
             remainder = and_not(&remainder, &when_value)?;
+            remainder_count -= when_match_count;
         }
 
         if let Some(e) = self.else_expr() {
-            if remainder.true_count() > 0 {
+            if remainder_count > 0 {
                 // keep `else_expr`'s data type and return type consistent
                 let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
                 let else_ = expr
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 9bc1f83ed1196..2f9173d2dcbd8 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -519,3 +519,79 @@ query I
 SELECT case when false then 1 / 0 else 1 / 1 end;
 ----
 1
+
+# Else branch evaluation with case expression, 1 when branch, null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL)) t(a)
+----
+1
+
+# Else branch evaluation with case expression, 2 when branches, null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL)) t(a)
+----
+2
+
+# Else branch evaluation without case expression, 1 when branch, null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL)) t(a)
+----
+1
+
+# Else branch evaluation without case expression, 2 when branches, null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL)) t(a)
+----
+2
+
+# Else branch evaluation with case expression, 1 when branch, non-null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES ('z')) t(a)
+----
+1
+
+# Else branch evaluation with case expression, 2 when branches, non-null input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES ('z')) t(a)
+----
+2
+
+# Else branch evaluation without case expression, 1 when branch, non-null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES ('z')) t(a)
+----
+1
+
+# Else branch evaluation without case expression, 2 when branches, non-null input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES ('z')) t(a)
+----
+2
+
+# Else branch evaluation with case expression, 1 when branch, mixed input
+query I
+SELECT CASE a WHEN 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL), ('z')) t(a)
+----
+1
+1
+
+# Else branch evaluation with case expression, 2 when branches, mixed input
+query I
+SELECT CASE a WHEN 'a' THEN 0 WHEN 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
+----
+2
+2
+
+# Else branch evaluation without case expression, 1 when branch, mixed input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 ELSE 1 END FROM (VALUES (NULL), ('z')) t(a)
+----
+1
+1
+
+# Else branch evaluation without case expression, 2 when branches, mixed input
+query I
+SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
+----
+2
+2
\ No newline at end of file

From 7f75e58027b79f30be6fe017d7202ad84c6b907d Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Mon, 20 Oct 2025 23:26:17 +0800
Subject: [PATCH 047/109] perf: Fix NLJ slow join with condition `array_has`
 (#18161)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes https://github.com/apache/datafusion/issues/18070

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
See the above issue and its comment
https://github.com/apache/datafusion/issues/18070#issuecomment-3419730649

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
In nested loop join, when the join column includes `List(Utf8View)`, use
`take()` instead of `to_array_of_size()` to avoid deep copying the utf8
buffers inside `Utf8View` array.

This is the quick fix, avoiding deep copy inside `to_array_of_size()` is
a bit tricky.
Here is `ListArray`'s physical layout:
https://arrow.apache.org/rust/arrow/array/struct.GenericListArray.html
If multiple elements is pointing to the same list range, the underlying
payload can't be reused.So the potential fix in `to_array_of_size` can
only avoids copying the inner-inner utf8view array buffers, but can't
avoid copying the inner array (i.e. views are still copied), and deep
copying for other primitive types also can't be avoided. Seems this can
be better solved when `ListView` type is ready 🤔

### Benchmark
I tried query 1 in https://github.com/apache/datafusion/issues/18070,
but only used 3 randomly sampled `places` parquet file.

49.0.0: 4s
50.0.0: stuck > 1 minute
PR: 4s

Now the performance are similar, I suspect the most time is spend
evaluating the expensive `array_has` so the optimization in
https://github.com/apache/datafusion/pull/16996 can't help much.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
Existing tests
## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
No
<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../src/joins/nested_loop_join.rs             | 35 +++++++++--
 .../sqllogictest/test_files/join_lists.slt    | 63 +++++++++++++++++++
 2 files changed, 92 insertions(+), 6 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/join_lists.slt

diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 0974b3a9114ef..7ae09a42de880 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -48,11 +48,15 @@ use crate::{
 
 use arrow::array::{
     new_null_array, Array, BooleanArray, BooleanBufferBuilder, RecordBatchOptions,
+    UInt64Array,
 };
 use arrow::buffer::BooleanBuffer;
-use arrow::compute::{concat_batches, filter, filter_record_batch, not, BatchCoalescer};
+use arrow::compute::{
+    concat_batches, filter, filter_record_batch, not, take, BatchCoalescer,
+};
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
+use arrow_schema::DataType;
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{
     arrow_err, internal_datafusion_err, internal_err, project_schema,
@@ -1661,11 +1665,30 @@ fn build_row_join_batch(
             // Broadcast the single build-side row to match the filtered
             // probe-side batch length
             let original_left_array = build_side_batch.column(column_index.index);
-            let scalar_value = ScalarValue::try_from_array(
-                original_left_array.as_ref(),
-                build_side_index,
-            )?;
-            scalar_value.to_array_of_size(filtered_probe_batch.num_rows())?
+            // Avoid using `ScalarValue::to_array_of_size()` for `List(Utf8View)` to avoid
+            // deep copies for buffers inside `Utf8View` array. See below for details.
+            // https://github.com/apache/datafusion/issues/18159
+            //
+            // In other cases, `to_array_of_size()` is faster.
+            match original_left_array.data_type() {
+                DataType::List(field) | DataType::LargeList(field)
+                    if field.data_type() == &DataType::Utf8View =>
+                {
+                    let indices_iter = std::iter::repeat_n(
+                        build_side_index as u64,
+                        filtered_probe_batch.num_rows(),
+                    );
+                    let indices_array = UInt64Array::from_iter_values(indices_iter);
+                    take(original_left_array.as_ref(), &indices_array, None)?
+                }
+                _ => {
+                    let scalar_value = ScalarValue::try_from_array(
+                        original_left_array.as_ref(),
+                        build_side_index,
+                    )?;
+                    scalar_value.to_array_of_size(filtered_probe_batch.num_rows())?
+                }
+            }
         } else {
             // Take the filtered probe-side column using compute::take
             Arc::clone(filtered_probe_batch.column(column_index.index))
diff --git a/datafusion/sqllogictest/test_files/join_lists.slt b/datafusion/sqllogictest/test_files/join_lists.slt
new file mode 100644
index 0000000000000..c07bd85551f34
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/join_lists.slt
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+
+## Ensure test coverage for NLJ using joining on LISTS
+
+## Reproducer for https://github.com/apache/datafusion/issues/18070
+
+statement ok
+CREATE TABLE categories_raw
+AS SELECT arrow_cast('cat_' || value, 'Utf8View')  AS category_id FROM generate_series(1, 5);
+
+statement ok
+CREATE TABLE places
+AS SELECT column1 as id, column2 as fsq_category_ids, column3 as date_refreshed
+FROM VALUES
+    (1, ['cat_1', 'cat_2', 'cat_3'], DATE '2023-05-10'),
+    (2, ['cat_4', 'cat_5'], DATE '2021-12-01'),
+    (3, ['cat_6', 'cat_7', 'cat_8', 'cat_9'], DATE '2024-01-15'); --> NOTE these categories do not exist in categories_raw
+
+
+query I
+WITH categories_arr AS (
+    SELECT array_agg(category_id) AS category_ids FROM categories_raw LIMIT 500
+)
+SELECT COUNT(*)
+    FROM places p
+    WHERE array_has_any(p.fsq_category_ids, (SELECT category_ids FROM categories_arr));
+----
+2
+
+query I
+WITH categories_arr AS (
+    SELECT array_agg(category_id) AS category_ids FROM categories_raw LIMIT 500
+)
+SELECT COUNT(*)
+    FROM places p
+    WHERE id <> 1 AND array_has_any(p.fsq_category_ids, (SELECT category_ids FROM categories_arr));
+----
+1
+
+# cleanup
+statement ok
+DROP TABLE categories_raw;
+
+statement ok
+DROP TABLE places;
+

From 5d2372366e492bd0407d650ad0b08cb2c7fecd3c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Oct 2025 09:08:30 -0700
Subject: [PATCH 048/109] chore(deps): bump getrandom from 0.3.3 to 0.3.4
 (#18163)

Bumps [getrandom](https://github.com/rust-random/getrandom) from 0.3.3
to 0.3.4.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/rust-random/getrandom/releases">getrandom's
releases</a>.</em></p>
<blockquote>
<h2>getrandom v0.3.4</h2>
<h3>Major change to <code>wasm_js</code> backend</h3>
<p>Now, when the <code>wasm_js</code> feature is enabled, the
<code>wasm_js</code> backend will be used
by default. Users of <code>wasm32-unknown-unknown</code> targeting
JavaScript environments
like the Web and Node.js will no longer need to specify:</p>
<pre><code>--cfg getrandom_backend=&quot;wasm_js&quot;
</code></pre>
<p>in <code>RUSTFLAGS</code> for the crate to compile. They can now
simple enable a feature.</p>
<p>Note: this should not affect non-JS users of the
<code>wasm32-unknown-unknown</code>
target. Using <code>--cfg getrandom_backend</code> will still override
the source of
randomness <em>even if</em> the <code>wasm_js</code> feature is enabled.
This includes
<code>--cfg getrandom_backend=custom</code> and <code>--cfg
getrandom_backend=unsupported</code>.</p>
<p>For more information, see the discussions in <a
href="https://redirect.github.com/rust-random/getrandom/issues/671">#671</a>,
<a
href="https://redirect.github.com/rust-random/getrandom/issues/675">#675</a>,
and <a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a>.</p>
<h3>Added</h3>
<ul>
<li><code>unsupported</code> opt-in backend <a
href="https://redirect.github.com/rust-random/getrandom/issues/667">#667</a></li>
<li><code>windows_legacy</code> opt-in backend <a
href="https://redirect.github.com/rust-random/getrandom/issues/724">#724</a></li>
</ul>
<h3>Changed</h3>
<ul>
<li>Implement Memory Sanitizer unpoisoning more precisely <a
href="https://redirect.github.com/rust-random/getrandom/issues/678">#678</a></li>
<li>Relax MSRV for the <code>linux_raw</code> opt-in backend on ARM
targets <a
href="https://redirect.github.com/rust-random/getrandom/issues/688">#688</a></li>
<li>Use <code>getrandom</code> syscall on all RISC-V Linux targets <a
href="https://redirect.github.com/rust-random/getrandom/issues/699">#699</a></li>
<li>Replaced <code>wasi</code> dependency with <code>wasip2</code> <a
href="https://redirect.github.com/rust-random/getrandom/issues/721">#721</a></li>
<li>Enable <code>wasm_js</code> backend by default if the
<code>wasm_js</code> feature is enabled <a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a></li>
</ul>
<h3>Removed</h3>
<ul>
<li>Unstable <code>rustc-dep-of-std</code> crate feature <a
href="https://redirect.github.com/rust-random/getrandom/issues/694">#694</a></li>
</ul>
<p><a
href="https://redirect.github.com/rust-random/getrandom/issues/667">#667</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/667">rust-random/getrandom#667</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/671">#671</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/issues/671">rust-random/getrandom#671</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/675">#675</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/675">rust-random/getrandom#675</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/678">#678</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/678">rust-random/getrandom#678</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/688">#688</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/688">rust-random/getrandom#688</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/694">#694</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/694">rust-random/getrandom#694</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/699">#699</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/699">rust-random/getrandom#699</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/721">#721</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/721">rust-random/getrandom#721</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/724">#724</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/724">rust-random/getrandom#724</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/730">rust-random/getrandom#730</a></p>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/rust-random/getrandom/blob/master/CHANGELOG.md">getrandom's
changelog</a>.</em></p>
<blockquote>
<h2>[0.3.4] - 2025-10-14</h2>
<h3>Major change to <code>wasm_js</code> backend</h3>
<p>Now, when the <code>wasm_js</code> feature is enabled, the
<code>wasm_js</code> backend will be used
by default. Users of <code>wasm32-unknown-unknown</code> targeting
JavaScript environments
like the Web and Node.js will no longer need to specify:</p>
<pre><code>--cfg getrandom_backend=&quot;wasm_js&quot;
</code></pre>
<p>in <code>RUSTFLAGS</code> for the crate to compile. They can now
simple enable a feature.</p>
<p>Note: this should not affect non-JS users of the
<code>wasm32-unknown-unknown</code>
target. Using <code>--cfg getrandom_backend</code> will still override
the source of
randomness <em>even if</em> the <code>wasm_js</code> feature is enabled.
This includes
<code>--cfg getrandom_backend=custom</code> and <code>--cfg
getrandom_backend=unsupported</code>.</p>
<p>For more information, see the discussions in <a
href="https://redirect.github.com/rust-random/getrandom/issues/671">#671</a>,
<a
href="https://redirect.github.com/rust-random/getrandom/issues/675">#675</a>,
and <a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a>.</p>
<h3>Added</h3>
<ul>
<li><code>unsupported</code> opt-in backend <a
href="https://redirect.github.com/rust-random/getrandom/issues/667">#667</a></li>
<li><code>windows_legacy</code> opt-in backend <a
href="https://redirect.github.com/rust-random/getrandom/issues/724">#724</a></li>
</ul>
<h3>Changed</h3>
<ul>
<li>Implement Memory Sanitizer unpoisoning more precisely <a
href="https://redirect.github.com/rust-random/getrandom/issues/678">#678</a></li>
<li>Relax MSRV for the <code>linux_raw</code> opt-in backend on ARM
targets <a
href="https://redirect.github.com/rust-random/getrandom/issues/688">#688</a></li>
<li>Use <code>getrandom</code> syscall on all RISC-V Linux targets <a
href="https://redirect.github.com/rust-random/getrandom/issues/699">#699</a></li>
<li>Replaced <code>wasi</code> dependency with <code>wasip2</code> <a
href="https://redirect.github.com/rust-random/getrandom/issues/721">#721</a></li>
<li>Enable <code>wasm_js</code> backend by default if the
<code>wasm_js</code> feature is enabled <a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a></li>
</ul>
<h3>Removed</h3>
<ul>
<li>Unstable <code>rustc-dep-of-std</code> crate feature <a
href="https://redirect.github.com/rust-random/getrandom/issues/694">#694</a></li>
</ul>
<p><a
href="https://redirect.github.com/rust-random/getrandom/issues/667">#667</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/667">rust-random/getrandom#667</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/671">#671</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/issues/671">rust-random/getrandom#671</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/675">#675</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/675">rust-random/getrandom#675</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/678">#678</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/678">rust-random/getrandom#678</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/688">#688</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/688">rust-random/getrandom#688</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/694">#694</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/694">rust-random/getrandom#694</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/699">#699</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/699">rust-random/getrandom#699</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/721">#721</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/721">rust-random/getrandom#721</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/724">#724</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/724">rust-random/getrandom#724</a>
<a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a>:
<a
href="https://redirect.github.com/rust-random/getrandom/pull/730">rust-random/getrandom#730</a></p>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/rust-random/getrandom/commit/38e4ad38309a85b56eef4fc759535ccfc322ba9a"><code>38e4ad3</code></a>
Update version number to v0.3.4 (<a
href="https://redirect.github.com/rust-random/getrandom/issues/736">#736</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/2d033b76f18aafed9ae10e9c36a1ee596574e15a"><code>2d033b7</code></a>
Release Version v0.3.4 (<a
href="https://redirect.github.com/rust-random/getrandom/issues/735">#735</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/ccb0ca9ab039bdf0b1f7b165957f5b0e1b58f541"><code>ccb0ca9</code></a>
build(deps): bump the all-deps group across 1 directory with 5 updates
(<a
href="https://redirect.github.com/rust-random/getrandom/issues/734">#734</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/1af25102d644c28b3b42521a17f2b65440e28a6f"><code>1af2510</code></a>
Implement <a
href="https://redirect.github.com/rust-random/getrandom/issues/675">#675</a>:
let wasm_js enable the backend by default (<a
href="https://redirect.github.com/rust-random/getrandom/issues/730">#730</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/18d89843981b93032b2a2c6f1e33897075a8d727"><code>18d8984</code></a>
Don't run doctests with -Zsanitizer=memory</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/c904801e8bdac1f2b1903f80edeb07a0d4a81b20"><code>c904801</code></a>
Change removed doc_auto_cfg feature</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/d4cb6a2b29fd87d5c8f624cf9f3be60c5e4c2b9b"><code>d4cb6a2</code></a>
Update all nightly toolchains to latest nightly</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/9b78fcc83ad54d2a0e045090d6fa90b2a4b315c1"><code>9b78fcc</code></a>
Update Cargo.lock (<a
href="https://redirect.github.com/rust-random/getrandom/issues/731">#731</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/b6ac385bc4bd20a40a07c94a3be347ac88c19606"><code>b6ac385</code></a>
ci: re-enable NetBSD job (<a
href="https://redirect.github.com/rust-random/getrandom/issues/729">#729</a>)</li>
<li><a
href="https://github.com/rust-random/getrandom/commit/3983e0fbec37516bd5e1b32eb4bdacb612f36f8b"><code>3983e0f</code></a>
Add <code>windows_legacy</code> opt-in backend (<a
href="https://redirect.github.com/rust-random/getrandom/issues/724">#724</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/rust-random/getrandom/compare/v0.3.3...v0.3.4">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=getrandom&package-manager=cargo&previous-version=0.3.3&new-version=0.3.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 13fe25c914102..5d2aec2cbe21d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -84,7 +84,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -2719,7 +2719,7 @@ dependencies = [
  "datafusion-optimizer",
  "datafusion-physical-plan",
  "datafusion-sql",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "object_store",
  "tokio",
  "url",
@@ -3221,21 +3221,21 @@ dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "wasm-bindgen",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
 dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
  "r-efi",
- "wasi 0.14.7+wasi-0.2.4",
+ "wasip2",
  "wasm-bindgen",
 ]
 
@@ -3875,7 +3875,7 @@ version = "0.1.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "libc",
 ]
 
@@ -4166,7 +4166,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
 dependencies = [
  "libc",
- "wasi 0.11.1+wasi-snapshot-preview1",
+ "wasi",
  "windows-sys 0.59.0",
 ]
 
@@ -5084,7 +5084,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31"
 dependencies = [
  "bytes",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "lru-slab",
  "rand 0.9.2",
  "ring",
@@ -5199,7 +5199,7 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
 ]
 
 [[package]]
@@ -6310,7 +6310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
 dependencies = [
  "fastrand",
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "once_cell",
  "rustix",
  "windows-sys 0.61.0",
@@ -6957,7 +6957,7 @@ version = "1.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
 dependencies = [
- "getrandom 0.3.3",
+ "getrandom 0.3.4",
  "js-sys",
  "serde",
  "wasm-bindgen",
@@ -7006,15 +7006,6 @@ version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
-[[package]]
-name = "wasi"
-version = "0.14.7+wasi-0.2.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
-dependencies = [
- "wasip2",
-]
-
 [[package]]
 name = "wasip2"
 version = "1.0.1+wasi-0.2.4"

From fcbbfa4cc8e96e06326b4393d8e4a9664f1faa95 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:08:58 +0000
Subject: [PATCH 049/109] chore(deps): bump tokio from 1.47.1 to 1.48.0
 (#18164)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[//]: # (dependabot-start)
⚠️  **Dependabot is rebasing this PR** ⚠️

Rebasing might not happen immediately, so don't worry if this takes some
time.

Note: if you make any changes to this PR yourself, they will take
precedence over the rebase.

---

[//]: # (dependabot-end)

Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.47.1 to 1.48.0.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/tokio-rs/tokio/releases">tokio's
releases</a>.</em></p>
<blockquote>
<h2>Tokio v1.48.0</h2>
<h1>1.48.0 (October 14th, 2025)</h1>
<p>The MSRV is increased to 1.71.</p>
<h3>Added</h3>
<ul>
<li>fs: add <code>File::max_buf_size</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7594">#7594</a>)</li>
<li>io: export <code>Chain</code> of <code>AsyncReadExt::chain</code>
(<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7599">#7599</a>)</li>
<li>net: add <code>SocketAddr::as_abstract_name</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7491">#7491</a>)</li>
<li>net: add <code>TcpStream::quickack</code> and
<code>TcpStream::set_quickack</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7490">#7490</a>)</li>
<li>net: implement <code>AsRef&lt;Self&gt;</code> for
<code>TcpStream</code> and <code>UnixStream</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7573">#7573</a>)</li>
<li>task: add <code>LocalKey::try_get</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7666">#7666</a>)</li>
<li>task: implement <code>Ord</code> for <code>task::Id</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7530">#7530</a>)</li>
</ul>
<h3>Changed</h3>
<ul>
<li>deps: bump windows-sys to version 0.61 (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7645">#7645</a>)</li>
<li>fs: preserve <code>max_buf_size</code> when cloning a
<code>File</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7593">#7593</a>)</li>
<li>macros: suppress <code>clippy::unwrap_in_result</code> in
<code>#[tokio::main]</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7651">#7651</a>)</li>
<li>net: remove <code>PollEvented</code> noise from Debug formats (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7675">#7675</a>)</li>
<li>process: upgrade <code>Command::spawn_with</code> to use
<code>FnOnce</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7511">#7511</a>)</li>
<li>sync: remove inner mutex in <code>SetOnce</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7554">#7554</a>)</li>
<li>sync: use <code>UnsafeCell::get_mut</code> in
<code>Mutex::get_mut</code> and <code>RwLock::get_mut</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7569">#7569</a>)</li>
<li>time: reduce the generated code size of
<code>Timeout&lt;T&gt;::poll</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7535">#7535</a>)</li>
</ul>
<h3>Fixed</h3>
<ul>
<li>macros: fix hygiene issue in <code>join!</code> and
<code>try_join!</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7638">#7638</a>)</li>
<li>net: fix copy/paste errors in udp peek methods (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7604">#7604</a>)</li>
<li>process: fix error when runtime is shut down on nightly-2025-10-12
(<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7672">#7672</a>)</li>
<li>runtime: use release ordering in <code>wake_by_ref()</code> even if
already woken (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7622">#7622</a>)</li>
<li>sync: close the <code>broadcast::Sender</code> in
<code>broadcast::Sender::new()</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7629">#7629</a>)</li>
<li>sync: fix implementation of unused <code>RwLock::try_*</code>
methods (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7587">#7587</a>)</li>
</ul>
<h3>Unstable</h3>
<ul>
<li>tokio: use cargo features instead of <code>--cfg</code> flags for
<code>taskdump</code> and <code>io_uring</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7655">#7655</a>,
<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7621">#7621</a>)</li>
<li>fs: support <code>io_uring</code> in <code>fs::write</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7567">#7567</a>)</li>
<li>fs: support <code>io_uring</code> with <code>File::open()</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7617">#7617</a>)</li>
<li>fs: support <code>io_uring</code> with <code>OpenOptions</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7321">#7321</a>)</li>
<li>macros: add <code>local</code> runtime flavor (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7375">#7375</a>,
<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7597">#7597</a>)</li>
</ul>
<h3>Documented</h3>
<ul>
<li>io: clarify the zero capacity case of
<code>AsyncRead::poll_read</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7580">#7580</a>)</li>
<li>io: fix typos in the docs of <code>AsyncFd</code> readiness guards
(<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7583">#7583</a>)</li>
<li>net: clarify socket gets closed on drop (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7526">#7526</a>)</li>
<li>net: clarify the behavior of <code>UCred::pid()</code> on Cygwin (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7611">#7611</a>)</li>
<li>net: clarify the supported platform of <code>set_reuseport()</code>
and <code>reuseport()</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7628">#7628</a>)</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/tokio-rs/tokio/commit/556820ff84030b37e74e11b86b7733f5795770ea"><code>556820f</code></a>
chore: prepare Tokio v1.48.0 (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7677">#7677</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/fd1659a05222858b675d5515ef609ca39d825bff"><code>fd1659a</code></a>
chore: prepare tokio-macros v2.6.0 (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7676">#7676</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/53e8acac641a614b89e35912ebed0520c6dbcf93"><code>53e8aca</code></a>
ci: update nightly version to 2025-10-12 (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7670">#7670</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/9e5527d1d5eadbeed46f4d5d4eb22cd96c72a39a"><code>9e5527d</code></a>
process: fix error when runtime is shut down on nightly-2025-10-12 (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7672">#7672</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/25a24de0e661d86fa059779e87e0605909465f4a"><code>25a24de</code></a>
net: remove PollEvented noise from Debug formats (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7675">#7675</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/c1fa25f3009d6f5374e337b999fe4fe926c8e7f2"><code>c1fa25f</code></a>
task: clarify the behavior of several <code>spawn_local</code> methods
(<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7669">#7669</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/e7e02fcf0f16fc906c0fac48aafd6a168ae3cf24"><code>e7e02fc</code></a>
fs: use <code>FileOptions</code> inside <code>fs::File</code> to support
uring (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7617">#7617</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/f7a7f62959aafd03fd40a07a4f511476dff1e57f"><code>f7a7f62</code></a>
ci: remove cargo-deny Unicode-DFS-2016 license exception config (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7619">#7619</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/d1f1499f630c34c1d319acdc2cc86d7a1008c4b4"><code>d1f1499</code></a>
tokio: use cargo feature for taskdump support instead of cfg (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7655">#7655</a>)</li>
<li><a
href="https://github.com/tokio-rs/tokio/commit/ad6f6189529b1067bd4628d1c62abf9a3a64281e"><code>ad6f618</code></a>
runtime: clarify the behavior of <code>Handle::block_on</code> (<a
href="https://redirect.github.com/tokio-rs/tokio/issues/7665">#7665</a>)</li>
<li>Additional commits viewable in <a
href="https://github.com/tokio-rs/tokio/compare/tokio-1.47.1...tokio-1.48.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=tokio&package-manager=cargo&previous-version=1.47.1&new-version=1.48.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 69 ++++--------------------------------------------------
 Cargo.toml |  2 +-
 2 files changed, 6 insertions(+), 65 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5d2aec2cbe21d..d8e271ac555fd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -50,15 +50,6 @@ dependencies = [
  "core_extensions",
 ]
 
-[[package]]
-name = "addr2line"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
-dependencies = [
- "gimli",
-]
-
 [[package]]
 name = "adler2"
 version = "2.0.1"
@@ -965,21 +956,6 @@ dependencies = [
  "tower-service",
 ]
 
-[[package]]
-name = "backtrace"
-version = "0.3.75"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
-dependencies = [
- "addr2line",
- "cfg-if",
- "libc",
- "miniz_oxide",
- "object",
- "rustc-demangle",
- "windows-targets 0.52.6",
-]
-
 [[package]]
 name = "base64"
 version = "0.21.7"
@@ -3239,12 +3215,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "gimli"
-version = "0.31.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
-
 [[package]]
 name = "glob"
 version = "0.3.3"
@@ -3768,17 +3738,6 @@ version = "3.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
 
-[[package]]
-name = "io-uring"
-version = "0.7.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b"
-dependencies = [
- "bitflags 2.9.4",
- "cfg-if",
- "libc",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -4325,15 +4284,6 @@ dependencies = [
  "objc2-core-foundation",
 ]
 
-[[package]]
-name = "object"
-version = "0.36.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "object_store"
 version = "0.12.4"
@@ -5513,12 +5463,6 @@ dependencies = [
  "serde_json",
 ]
 
-[[package]]
-name = "rustc-demangle"
-version = "0.1.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
-
 [[package]]
 name = "rustc-hash"
 version = "2.1.1"
@@ -6491,29 +6435,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.47.1"
+version = "1.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038"
+checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408"
 dependencies = [
- "backtrace",
  "bytes",
- "io-uring",
  "libc",
  "mio",
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "slab",
  "socket2 0.6.0",
  "tokio-macros",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
 name = "tokio-macros"
-version = "2.5.0"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
+checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index 79c14d6cca799..0a26a72806893 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -178,7 +178,7 @@ sqlparser = { version = "0.59.0", default-features = false, features = ["std", "
 tempfile = "3"
 testcontainers = { version = "0.24", features = ["default"] }
 testcontainers-modules = { version = "0.12" }
-tokio = { version = "1.47", features = ["macros", "rt", "sync"] }
+tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
 url = "2.5.7"
 
 [workspace.lints.clippy]

From 5c19eeda77c60ade24d0a10b13a93668a77d64a2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Oct 2025 16:09:15 +0000
Subject: [PATCH 050/109] chore(deps): bump indexmap from 2.11.4 to 2.12.0
 (#18162)

Bumps [indexmap](https://github.com/indexmap-rs/indexmap) from 2.11.4 to
2.12.0.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/indexmap-rs/indexmap/blob/main/RELEASES.md">indexmap's
changelog</a>.</em></p>
<blockquote>
<h2>2.12.0 (2025-10-17)</h2>
<ul>
<li><strong>MSRV</strong>: Rust 1.82.0 or later is now required.</li>
<li>Updated the <code>hashbrown</code> dependency to 0.16 alone.</li>
<li>Error types now implement <code>core::error::Error</code>.</li>
<li>Added <code>pop_if</code> methods to <code>IndexMap</code> and
<code>IndexSet</code>, similar to the
method for <code>Vec</code> added in Rust 1.86.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/0e68f8a3605f56c79d2ed84bff5908ee1dcd8a95"><code>0e68f8a</code></a>
Merge pull request <a
href="https://redirect.github.com/indexmap-rs/indexmap/issues/422">#422</a>
from cuviper/msrv-1.82</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/61c9c94672c2862b29dd65202ccf892969b0fe4c"><code>61c9c94</code></a>
ci: only run full miri in the merge queue</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/db43f1945a7f0c39f28754e3557d7f34f1cb1ab3"><code>db43f19</code></a>
Release 2.12.0</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/b46a32a5859fb5aa26f9a7e38c9c93c30fb773f9"><code>b46a32a</code></a>
Move more to the lints table</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/4849b1679f6594112805ef8d542e230dfb4c37cf"><code>4849b16</code></a>
Make use of RFC2145 type privacy for sealed traits</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/cfff4b7d03e53688b82b7afc350d472cca2d2e32"><code>cfff4b7</code></a>
Use bounds in associated type position</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/c7178d73c45fe0cc52aec684282a0aef5b3675b2"><code>c7178d7</code></a>
Use <code>core::error::Error</code></li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/76b459b82cd7de7035d96f7b6f0a178e02c4425c"><code>76b459b</code></a>
Use more precise capturing for some <code>impl Trait</code></li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/b3d9cc355e227a847a2d2ebcae38a230c005174d"><code>b3d9cc3</code></a>
Use the primitive slice's <code>is_sorted</code> methods</li>
<li><a
href="https://github.com/indexmap-rs/indexmap/commit/09db3cce19a24b74cfa13a1510fd72b8ab58ae83"><code>09db3cc</code></a>
Use inherent <code>usize::div_ceil</code></li>
<li>Additional commits viewable in <a
href="https://github.com/indexmap-rs/indexmap/compare/2.11.4...2.12.0">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=indexmap&package-manager=cargo&previous-version=2.11.4&new-version=2.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 34 +++++++++++++++++-----------------
 Cargo.toml |  2 +-
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d8e271ac555fd..698e16e80f1ab 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -393,7 +393,7 @@ dependencies = [
  "arrow-schema",
  "chrono",
  "half",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "lexical-core",
  "memchr",
  "num",
@@ -1947,7 +1947,7 @@ dependencies = [
  "half",
  "hashbrown 0.14.5",
  "hex",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "libc",
  "log",
@@ -2192,7 +2192,7 @@ dependencies = [
  "datafusion-functions-window-common",
  "datafusion-physical-expr-common",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "paste",
@@ -2207,7 +2207,7 @@ version = "50.2.0"
 dependencies = [
  "arrow",
  "datafusion-common",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "itertools 0.14.0",
  "paste",
 ]
@@ -2387,7 +2387,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-sql",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2411,7 +2411,7 @@ dependencies = [
  "datafusion-physical-expr-common",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "parking_lot",
@@ -2490,7 +2490,7 @@ dependencies = [
  "futures",
  "half",
  "hashbrown 0.14.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -2615,7 +2615,7 @@ dependencies = [
  "datafusion-functions-nested",
  "datafusion-functions-window",
  "env_logger",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "insta",
  "itertools 0.14.0",
  "log",
@@ -3246,7 +3246,7 @@ dependencies = [
  "futures-core",
  "futures-sink",
  "http 1.3.1",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -3677,9 +3677,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.11.4"
+version = "2.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5"
+checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f"
 dependencies = [
  "equivalent",
  "hashbrown 0.16.0",
@@ -4525,7 +4525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
  "fixedbitset",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
 ]
 
 [[package]]
@@ -4536,7 +4536,7 @@ checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455"
 dependencies = [
  "fixedbitset",
  "hashbrown 0.15.5",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "serde",
 ]
 
@@ -5809,7 +5809,7 @@ dependencies = [
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "schemars 0.9.0",
  "schemars 1.0.4",
  "serde",
@@ -5837,7 +5837,7 @@ version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "itoa",
  "ryu",
  "serde",
@@ -6551,7 +6551,7 @@ version = "0.23.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
 dependencies = [
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "toml_datetime",
  "toml_parser",
  "winnow",
@@ -6603,7 +6603,7 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
 dependencies = [
  "futures-core",
  "futures-util",
- "indexmap 2.11.4",
+ "indexmap 2.12.0",
  "pin-project-lite",
  "slab",
  "sync_wrapper",
diff --git a/Cargo.toml b/Cargo.toml
index 0a26a72806893..cb5ddd8884dbe 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -154,7 +154,7 @@ futures = "0.3"
 half = { version = "2.7.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 hex = { version = "0.4.3" }
-indexmap = "2.11.4"
+indexmap = "2.12.0"
 itertools = "0.14"
 log = "^0.4"
 object_store = { version = "0.12.4", default-features = false }

From b1deb1f376793f3d7c0d65859886fdff99016f25 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 20 Oct 2025 09:09:37 -0700
Subject: [PATCH 051/109] chore(deps): bump bzip2 from 0.6.0 to 0.6.1 (#18165)

Bumps [bzip2](https://github.com/trifectatechfoundation/bzip2-rs) from
0.6.0 to 0.6.1.
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/trifectatechfoundation/bzip2-rs/commit/eebf6e470f6c4a14295fcaf43c619ae4a0e5690a"><code>eebf6e4</code></a>
release version 0.6.1</li>
<li><a
href="https://github.com/trifectatechfoundation/bzip2-rs/commit/1ed41c45255ac550592ed4df1c59a37f18be6089"><code>1ed41c4</code></a>
Add <code>MaybeUninit\&lt;u8&gt;</code> (de)compress API</li>
<li><a
href="https://github.com/trifectatechfoundation/bzip2-rs/commit/a1652196e8451c8e32022d5a4fa4debb51c279ee"><code>a165219</code></a>
chore: remove redundant word in comment</li>
<li><a
href="https://github.com/trifectatechfoundation/bzip2-rs/commit/422ae6da4a4a52774e8d44c918cc2140363d23ec"><code>422ae6d</code></a>
fix(bzip2-sys): license expression to be a valid spdx expression</li>
<li><a
href="https://github.com/trifectatechfoundation/bzip2-rs/commit/21db6979453d284196df0c6f968c56900db80f00"><code>21db697</code></a>
ran <code>cargo clippy --fix -- -A clippy::all -W
clippy::use_self</code></li>
<li>See full diff in <a
href="https://github.com/trifectatechfoundation/bzip2-rs/compare/v0.6.0...v0.6.1">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=bzip2&package-manager=cargo&previous-version=0.6.0&new-version=0.6.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock                       | 10 +++++-----
 datafusion/core/Cargo.toml       |  2 +-
 datafusion/datasource/Cargo.toml |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 698e16e80f1ab..84cf7793ed690 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -190,7 +190,7 @@ checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7"
 dependencies = [
  "bigdecimal",
  "bon",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "crc32fast",
  "digest",
  "log",
@@ -1257,9 +1257,9 @@ dependencies = [
 
 [[package]]
 name = "bzip2"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bea8dcd42434048e4f7a304411d9273a411f647446c1234a65ce0554923f4cff"
+checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c"
 dependencies = [
  "libbz2-rs-sys",
 ]
@@ -1768,7 +1768,7 @@ dependencies = [
  "arrow-schema",
  "async-trait",
  "bytes",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "chrono",
  "criterion",
  "ctor",
@@ -1979,7 +1979,7 @@ dependencies = [
  "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.6.0",
+ "bzip2 0.6.1",
  "chrono",
  "criterion",
  "datafusion-common",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index a5a715cea94f1..22c9f43a902e8 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -112,7 +112,7 @@ arrow = { workspace = true }
 arrow-schema = { workspace = true, features = ["canonical_extension_types"] }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.0", optional = true }
+bzip2 = { version = "0.6.1", optional = true }
 chrono = { workspace = true }
 datafusion-catalog = { workspace = true }
 datafusion-catalog-listing = { workspace = true }
diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml
index afd0256ba9720..8e0738448a75e 100644
--- a/datafusion/datasource/Cargo.toml
+++ b/datafusion/datasource/Cargo.toml
@@ -45,7 +45,7 @@ async-compression = { version = "0.4.19", features = [
 ], optional = true }
 async-trait = { workspace = true }
 bytes = { workspace = true }
-bzip2 = { version = "0.6.0", optional = true }
+bzip2 = { version = "0.6.1", optional = true }
 chrono = { workspace = true }
 datafusion-common = { workspace = true, features = ["object_store"] }
 datafusion-common-runtime = { workspace = true }

From a4acec3966e5a5fbfa835761ecc89eff4c717c8a Mon Sep 17 00:00:00 2001
From: Namgung Chan <33323415+getChan@users.noreply.github.com>
Date: Tue, 21 Oct 2025 04:16:10 +0900
Subject: [PATCH 052/109] fix: improve document ui (#18157)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #17913 .

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
- Improve SQL code block rendering by upgrading `pydata-sphinx-theme`
- fix sidebar layout

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
4. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
yes

## Are there any user-facing changes?

documentation ui
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 docs/requirements.txt                    |  2 +-
 docs/source/_templates/docs-sidebar.html | 29 ++++++++++++------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 78206d2c19866..355cd347ef582 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -17,7 +17,7 @@
 
 sphinx==8.2.3
 sphinx-reredirects==1.0.0
-pydata-sphinx-theme==0.8.0
+pydata-sphinx-theme==0.16.1
 myst-parser==4.0.1
 maturin==1.9.6
 jinja2==3.1.6
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
index 7c3ecc3d802e1..9a4a1472be02e 100644
--- a/docs/source/_templates/docs-sidebar.html
+++ b/docs/source/_templates/docs-sidebar.html
@@ -1,21 +1,20 @@
-
-
-<form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
-  <i class="icon fas fa-search"></i>
-  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
-</form>
-
+<p>
+  <a href="{{ pathto(master_doc) }}">
+    <img src="{{ pathto('_static/images/2x_bgwhite_original.png', 1) }}" class="logo" alt="logo">
+  </a>
+</p>
+<p>
+  <form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+    <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+  </form>
+</p>
 <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+
   <div class="bd-toc-item active">
     {% if "python/api" in pagename or "python/generated" in pagename %}
-    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {{ generate_toctree_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
     {% else %}
-    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {{ generate_toctree_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
     {% endif %}
   </div>
-
-  <a class="navbar-brand" href="{{ pathto(master_doc) }}">
-    <img src="{{ pathto('_static/images/2x_bgwhite_original.png', 1) }}" class="logo" alt="logo">
-  </a>
-</nav>
-
+</nav>
\ No newline at end of file

From 54fff6080f95b9e9313f145ad75c5ca5a49e8a9e Mon Sep 17 00:00:00 2001
From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com>
Date: Mon, 20 Oct 2025 22:55:02 +0300
Subject: [PATCH 053/109] perf: improve `ScalarValue::to_array_of_size` for
 Boolean and some null values (#18180)

## Which issue does this PR close?

N/A

## Rationale for this change

make stuff faster

## What changes are included in this PR?

1. updated Boolean scalar to array to use existing functions for
creating `BooleanArray` with the same value n times
2. Update None `Binary`/`BinaryView`/`FixedSizeBinary`/`LargeBinary` to
use `new_null_array` instead of repeat

## Are these changes tested?

Existing tests

## Are there any user-facing changes?

Nope
---
 datafusion/common/src/scalar/mod.rs | 34 ++++++++++++++---------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index 60ff1f4b2ed44..a70a027a8face 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -70,7 +70,7 @@ use arrow::array::{
     TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
     UInt64Array, UInt8Array, UnionArray,
 };
-use arrow::buffer::ScalarBuffer;
+use arrow::buffer::{BooleanBuffer, ScalarBuffer};
 use arrow::compute::kernels::cast::{cast_with_options, CastOptions};
 use arrow::compute::kernels::numeric::{
     add, add_wrapping, div, mul, mul_wrapping, rem, sub, sub_wrapping,
@@ -2888,9 +2888,17 @@ impl ScalarValue {
             ScalarValue::Decimal256(e, precision, scale) => Arc::new(
                 ScalarValue::build_decimal256_array(*e, *precision, *scale, size)?,
             ),
-            ScalarValue::Boolean(e) => {
-                Arc::new(BooleanArray::from(vec![*e; size])) as ArrayRef
-            }
+            ScalarValue::Boolean(e) => match e {
+                None => new_null_array(&DataType::Boolean, size),
+                Some(true) => {
+                    Arc::new(BooleanArray::new(BooleanBuffer::new_set(size), None))
+                        as ArrayRef
+                }
+                Some(false) => {
+                    Arc::new(BooleanArray::new(BooleanBuffer::new_unset(size), None))
+                        as ArrayRef
+                }
+            },
             ScalarValue::Float64(e) => {
                 build_array_from_option!(Float64, Float64Array, e, size)
             }
@@ -2973,15 +2981,13 @@ impl ScalarValue {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<BinaryArray>(),
                 ),
-                None => Arc::new(repeat_n(None::<&str>, size).collect::<BinaryArray>()),
+                None => new_null_array(&DataType::Binary, size),
             },
             ScalarValue::BinaryView(e) => match e {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<BinaryViewArray>(),
                 ),
-                None => {
-                    Arc::new(repeat_n(None::<&str>, size).collect::<BinaryViewArray>())
-                }
+                None => new_null_array(&DataType::BinaryView, size),
             },
             ScalarValue::FixedSizeBinary(s, e) => match e {
                 Some(value) => Arc::new(
@@ -2991,21 +2997,13 @@ impl ScalarValue {
                     )
                     .unwrap(),
                 ),
-                None => Arc::new(
-                    FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-                        repeat_n(None::<&[u8]>, size),
-                        *s,
-                    )
-                    .unwrap(),
-                ),
+                None => Arc::new(FixedSizeBinaryArray::new_null(*s, size)),
             },
             ScalarValue::LargeBinary(e) => match e {
                 Some(value) => Arc::new(
                     repeat_n(Some(value.as_slice()), size).collect::<LargeBinaryArray>(),
                 ),
-                None => {
-                    Arc::new(repeat_n(None::<&str>, size).collect::<LargeBinaryArray>())
-                }
+                None => new_null_array(&DataType::LargeBinary, size),
             },
             ScalarValue::List(arr) => {
                 if size == 1 {

From 37aad28424da933e8966a032b502b9a0cf4b7507 Mon Sep 17 00:00:00 2001
From: Sriram Sundar <111196722+codetyri0n@users.noreply.github.com>
Date: Tue, 21 Oct 2025 02:33:29 +0530
Subject: [PATCH 054/109] Feat: Make current_time aware of execution timezone.
 (#18040)

## Which issue does this PR close?
- Closes #17996.

## Rationale for this change
- The current_time() function currently uses UTC tz. This PR updates
current_time() to use the tz set in 'datafusion.execution.time_zone'

## What changes are included in this PR?
- current_time() returns a tz aware date via the
'datafusion.execution.time_zone' config option.

## Are these changes tested?
- Tested with Datafusion CLI with slt covering popular scenarios added.
---
 .../functions/src/datetime/current_time.rs    | 119 +++++++++++++++++-
 .../test_files/current_time_timezone.slt      | 100 +++++++++++++++
 .../source/user-guide/sql/scalar_functions.md |   4 +-
 3 files changed, 218 insertions(+), 5 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/current_time_timezone.slt

diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index 79d5bfc1783c1..b1592d8df2bb9 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -15,24 +15,28 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::array::timezone::Tz;
 use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::Time64;
 use arrow::datatypes::TimeUnit::Nanosecond;
-use std::any::Any;
-
+use chrono::TimeZone;
+use chrono::Timelike;
 use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     ColumnarValue, Documentation, Expr, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
+use std::any::Any;
 
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = r#"
-Returns the current UTC time.
+Returns the current time in the session time zone.
 
 The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
+
+The session time zone can be set using the statement 'SET datafusion.execution.time_zone = desired time zone'. The time zone can be a value like +00:00, 'Europe/London' etc.
 "#,
     syntax_example = "current_time()"
 )]
@@ -93,7 +97,20 @@ impl ScalarUDFImpl for CurrentTimeFunc {
         info: &dyn SimplifyInfo,
     ) -> Result<ExprSimplifyResult> {
         let now_ts = info.execution_props().query_execution_start_time;
-        let nano = now_ts.timestamp_nanos_opt().map(|ts| ts % 86400000000000);
+
+        // Try to get timezone from config and convert to local time
+        let nano = info
+            .execution_props()
+            .config_options()
+            .and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
+            .map_or_else(
+                || datetime_to_time_nanos(&now_ts),
+                |tz| {
+                    let local_now = tz.from_utc_datetime(&now_ts.naive_utc());
+                    datetime_to_time_nanos(&local_now)
+                },
+            );
+
         Ok(ExprSimplifyResult::Simplified(Expr::Literal(
             ScalarValue::Time64Nanosecond(nano),
             None,
@@ -104,3 +121,97 @@ impl ScalarUDFImpl for CurrentTimeFunc {
         self.doc()
     }
 }
+
+// Helper function for conversion of datetime to a timestamp.
+fn datetime_to_time_nanos<Tz: TimeZone>(dt: &chrono::DateTime<Tz>) -> Option<i64> {
+    let hour = dt.hour() as i64;
+    let minute = dt.minute() as i64;
+    let second = dt.second() as i64;
+    let nanosecond = dt.nanosecond() as i64;
+    Some((hour * 3600 + minute * 60 + second) * 1_000_000_000 + nanosecond)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::datatypes::{DataType, TimeUnit::Nanosecond};
+    use chrono::{DateTime, Utc};
+    use datafusion_common::{Result, ScalarValue};
+    use datafusion_expr::execution_props::ExecutionProps;
+    use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
+    use std::sync::Arc;
+
+    struct MockSimplifyInfo {
+        execution_props: ExecutionProps,
+    }
+
+    impl SimplifyInfo for MockSimplifyInfo {
+        fn is_boolean_type(&self, _expr: &Expr) -> Result<bool> {
+            Ok(false)
+        }
+
+        fn nullable(&self, _expr: &Expr) -> Result<bool> {
+            Ok(true)
+        }
+
+        fn execution_props(&self) -> &ExecutionProps {
+            &self.execution_props
+        }
+
+        fn get_data_type(&self, _expr: &Expr) -> Result<DataType> {
+            Ok(Time64(Nanosecond))
+        }
+    }
+
+    fn set_session_timezone_env(tz: &str, start_time: DateTime<Utc>) -> MockSimplifyInfo {
+        let mut config = datafusion_common::config::ConfigOptions::default();
+        config.execution.time_zone = tz.to_string();
+        let mut execution_props =
+            ExecutionProps::new().with_query_execution_start_time(start_time);
+        execution_props.config_options = Some(Arc::new(config));
+        MockSimplifyInfo { execution_props }
+    }
+
+    #[test]
+    fn test_current_time_timezone_offset() {
+        // Use a fixed start time for consistent testing
+        let start_time = Utc.with_ymd_and_hms(2025, 1, 1, 12, 0, 0).unwrap();
+
+        // Test with UTC+05:00
+        let info_plus_5 = set_session_timezone_env("+05:00", start_time);
+        let result_plus_5 = CurrentTimeFunc::new()
+            .simplify(vec![], &info_plus_5)
+            .unwrap();
+
+        // Test with UTC-05:00
+        let info_minus_5 = set_session_timezone_env("-05:00", start_time);
+        let result_minus_5 = CurrentTimeFunc::new()
+            .simplify(vec![], &info_minus_5)
+            .unwrap();
+
+        // Extract nanoseconds from results
+        let nanos_plus_5 = match result_plus_5 {
+            ExprSimplifyResult::Simplified(Expr::Literal(
+                ScalarValue::Time64Nanosecond(Some(n)),
+                _,
+            )) => n,
+            _ => panic!("Expected Time64Nanosecond literal"),
+        };
+
+        let nanos_minus_5 = match result_minus_5 {
+            ExprSimplifyResult::Simplified(Expr::Literal(
+                ScalarValue::Time64Nanosecond(Some(n)),
+                _,
+            )) => n,
+            _ => panic!("Expected Time64Nanosecond literal"),
+        };
+
+        // Calculate the difference: UTC+05:00 should be 10 hours ahead of UTC-05:00
+        let difference = nanos_plus_5 - nanos_minus_5;
+
+        // 10 hours in nanoseconds
+        let expected_offset = 10i64 * 3600 * 1_000_000_000;
+
+        assert_eq!(difference, expected_offset, "Expected 10-hour offset difference in nanoseconds between UTC+05:00 and UTC-05:00");
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/current_time_timezone.slt b/datafusion/sqllogictest/test_files/current_time_timezone.slt
new file mode 100644
index 0000000000000..a9e27bd4045ff
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/current_time_timezone.slt
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## current_time with timezone tests
+##########
+
+# Test 1: Verify current_time is consistent within the same query (default UTC)
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 2: Verify data type is correct
+query T
+SELECT arrow_typeof(current_time());
+----
+Time64(Nanosecond)
+
+# Test 3: Set timezone to +08:00 and verify current_time is still stable
+statement ok
+SET datafusion.execution.time_zone = '+08:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 4: Verify current_time returns Time64 type in different timezone
+query T
+SELECT arrow_typeof(current_time());
+----
+Time64(Nanosecond)
+
+# Test 5: Test with negative offset timezone
+statement ok
+SET datafusion.execution.time_zone = '-05:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 6: Test with named timezone (America/New_York)
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 7: Verify current_time is stable within a query
+query B
+SELECT 
+  current_time() = current_time() AND
+  current_time() = current_time();
+----
+true
+
+# Test 8: Reset to UTC
+statement ok
+SET datafusion.execution.time_zone = '+00:00';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 9: Verify current_time with Asia/Tokyo timezone
+statement ok
+SET datafusion.execution.time_zone = 'Asia/Tokyo';
+
+query B
+SELECT current_time() = current_time();
+----
+true
+
+# Test 10: Verify current_time with Europe/London timezone
+statement ok
+SET datafusion.execution.time_zone = 'Europe/London';
+
+query B
+SELECT current_time() = current_time();
+----
+true
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index ec2faf8b3d5df..4ca3a822db9dc 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2417,10 +2417,12 @@ current_date()
 
 ### `current_time`
 
-Returns the current UTC time.
+Returns the current time in the session time zone.
 
 The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes.
 
+The session time zone can be set using the statement 'SET datafusion.execution.time_zone = desired time zone'. The time zone can be a value like +00:00, 'Europe/London' etc.
+
 ```sql
 current_time()
 ```

From 1f434dcdda227504cd4730a6c8bcad468b363983 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Mon, 20 Oct 2025 20:18:53 -0400
Subject: [PATCH 055/109] feat: `ClassicJoin` for PWMJ (#17482)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- part of #17427

## Rationale for this change
Adds regular joins (left, right, full, inner) for PWMJ as they behave
differently in the code path.

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?
Adds classic join + physical planner

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?
Yes SLT tests  + unit tests

## Follow up work to this pull request
- Handling partitioned queries and multiple record batches (fuzz testing
will be handled with this)
- Simplify physical planning
- Add more unit tests for different types (another pr as the LOC in this
pr is getting a little daunting)

next would be to implement the existence joins

---------

Co-authored-by: Yongting You <2010youy01@gmail.com>
---
 datafusion/common/src/config.rs               |    5 +
 datafusion/core/src/physical_planner.rs       |  158 +-
 .../src/joins/hash_join/stream.rs             |    1 +
 datafusion/physical-plan/src/joins/mod.rs     |    2 +
 .../piecewise_merge_join/classic_join.rs      | 1550 +++++++++++++++++
 .../src/joins/piecewise_merge_join/exec.rs    |  748 ++++++++
 .../src/joins/piecewise_merge_join/mod.rs     |   24 +
 .../src/joins/piecewise_merge_join/utils.rs   |   61 +
 .../src/joins/sort_merge_join/stream.rs       |   97 +-
 datafusion/physical-plan/src/joins/utils.rs   |  125 +-
 .../test_files/information_schema.slt         |    2 +
 datafusion/sqllogictest/test_files/joins.slt  |   39 +-
 datafusion/sqllogictest/test_files/pwmj.slt   |  354 ++++
 docs/source/user-guide/configs.md             |    1 +
 14 files changed, 3058 insertions(+), 109 deletions(-)
 create mode 100644 datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
 create mode 100644 datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
 create mode 100644 datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs
 create mode 100644 datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs
 create mode 100644 datafusion/sqllogictest/test_files/pwmj.slt

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 52e35985698f0..271ba6ddcff51 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -938,6 +938,11 @@ config_namespace! {
         /// HashJoin can work more efficiently than SortMergeJoin but consumes more memory
         pub prefer_hash_join: bool, default = true
 
+        /// When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently
+        /// experimental. Physical planner will opt for PiecewiseMergeJoin when there is only
+        /// one range filter.
+        pub enable_piecewise_merge_join: bool, default = false
+
         /// The maximum estimated size in bytes for one input side of a HashJoin
         /// will be collected into a single partition
         pub hash_join_single_partition_threshold: usize, default = 1024 * 1024
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 0fa17deea1295..bea51d31baacb 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -78,10 +78,11 @@ use datafusion_expr::expr::{
 };
 use datafusion_expr::expr_rewriter::unnormalize_cols;
 use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary;
+use datafusion_expr::utils::split_conjunction;
 use datafusion_expr::{
-    Analyze, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension, FetchType,
-    Filter, JoinType, RecursiveQuery, SkipType, StringifiedPlan, WindowFrame,
-    WindowFrameBound, WriteOp,
+    Analyze, BinaryExpr, DescribeTable, DmlStatement, Explain, ExplainFormat, Extension,
+    FetchType, Filter, JoinType, Operator, RecursiveQuery, SkipType, StringifiedPlan,
+    WindowFrame, WindowFrameBound, WriteOp,
 };
 use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctionExpr};
 use datafusion_physical_expr::expressions::Literal;
@@ -91,6 +92,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_optimizer::PhysicalOptimizerRule;
 use datafusion_physical_plan::empty::EmptyExec;
 use datafusion_physical_plan::execution_plan::InvariantLevel;
+use datafusion_physical_plan::joins::PiecewiseMergeJoinExec;
 use datafusion_physical_plan::metrics::MetricType;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::recursive_query::RecursiveQueryExec;
@@ -1133,8 +1135,42 @@ impl DefaultPhysicalPlanner {
                     })
                     .collect::<Result<join_utils::JoinOn>>()?;
 
+                // TODO: `num_range_filters` can be used later on for ASOF joins (`num_range_filters > 1`)
+                let mut num_range_filters = 0;
+                let mut range_filters: Vec<Expr> = Vec::new();
+                let mut total_filters = 0;
+
                 let join_filter = match filter {
                     Some(expr) => {
+                        let split_expr = split_conjunction(expr);
+                        for expr in split_expr.iter() {
+                            match *expr {
+                                Expr::BinaryExpr(BinaryExpr {
+                                    left: _,
+                                    right: _,
+                                    op,
+                                }) => {
+                                    if matches!(
+                                        op,
+                                        Operator::Lt
+                                            | Operator::LtEq
+                                            | Operator::Gt
+                                            | Operator::GtEq
+                                    ) {
+                                        range_filters.push((**expr).clone());
+                                        num_range_filters += 1;
+                                    }
+                                    total_filters += 1;
+                                }
+                                // TODO: Want to deal with `Expr::Between` for IEJoins, it counts as two range predicates
+                                // which is why it is not dealt with in PWMJ
+                                // Expr::Between(_) => {},
+                                _ => {
+                                    total_filters += 1;
+                                }
+                            }
+                        }
+
                         // Extract columns from filter expression and saved in a HashSet
                         let cols = expr.column_refs();
 
@@ -1190,6 +1226,7 @@ impl DefaultPhysicalPlanner {
                         )?;
                         let filter_schema =
                             Schema::new_with_metadata(filter_fields, metadata);
+
                         let filter_expr = create_physical_expr(
                             expr,
                             &filter_df_schema,
@@ -1212,10 +1249,125 @@ impl DefaultPhysicalPlanner {
                 let prefer_hash_join =
                     session_state.config_options().optimizer.prefer_hash_join;
 
+                // TODO: Allow PWMJ to deal with residual equijoin conditions
                 let join: Arc<dyn ExecutionPlan> = if join_on.is_empty() {
                     if join_filter.is_none() && matches!(join_type, JoinType::Inner) {
                         // cross join if there is no join conditions and no join filter set
                         Arc::new(CrossJoinExec::new(physical_left, physical_right))
+                    } else if num_range_filters == 1
+                        && total_filters == 1
+                        && !matches!(
+                            join_type,
+                            JoinType::LeftSemi
+                                | JoinType::RightSemi
+                                | JoinType::LeftAnti
+                                | JoinType::RightAnti
+                                | JoinType::LeftMark
+                                | JoinType::RightMark
+                        )
+                        && session_state
+                            .config_options()
+                            .optimizer
+                            .enable_piecewise_merge_join
+                    {
+                        let Expr::BinaryExpr(be) = &range_filters[0] else {
+                            return plan_err!(
+                                "Unsupported expression for PWMJ: Expected `Expr::BinaryExpr`"
+                            );
+                        };
+
+                        let mut op = be.op;
+                        if !matches!(
+                            op,
+                            Operator::Lt | Operator::LtEq | Operator::Gt | Operator::GtEq
+                        ) {
+                            return plan_err!(
+                                "Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=",
+                                op
+                            );
+                        }
+
+                        fn reverse_ineq(op: Operator) -> Operator {
+                            match op {
+                                Operator::Lt => Operator::Gt,
+                                Operator::LtEq => Operator::GtEq,
+                                Operator::Gt => Operator::Lt,
+                                Operator::GtEq => Operator::LtEq,
+                                _ => op,
+                            }
+                        }
+
+                        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+                        enum Side {
+                            Left,
+                            Right,
+                            Both,
+                        }
+
+                        let side_of = |e: &Expr| -> Result<Side> {
+                            let cols = e.column_refs();
+                            let any_left = cols
+                                .iter()
+                                .any(|c| left_df_schema.index_of_column(c).is_ok());
+                            let any_right = cols
+                                .iter()
+                                .any(|c| right_df_schema.index_of_column(c).is_ok());
+
+                            Ok(match (any_left, any_right) {
+                                (true, false) => Side::Left,
+                                (false, true) => Side::Right,
+                                (true, true) => Side::Both,
+                                _ => unreachable!(),
+                            })
+                        };
+
+                        let mut lhs_logical = &be.left;
+                        let mut rhs_logical = &be.right;
+
+                        let left_side = side_of(lhs_logical)?;
+                        let right_side = side_of(rhs_logical)?;
+                        if matches!(left_side, Side::Both)
+                            || matches!(right_side, Side::Both)
+                        {
+                            return Ok(Arc::new(NestedLoopJoinExec::try_new(
+                                physical_left,
+                                physical_right,
+                                join_filter,
+                                join_type,
+                                None,
+                            )?));
+                        }
+
+                        if left_side == Side::Right && right_side == Side::Left {
+                            std::mem::swap(&mut lhs_logical, &mut rhs_logical);
+                            op = reverse_ineq(op);
+                        } else if !(left_side == Side::Left && right_side == Side::Right)
+                        {
+                            return plan_err!(
+                                "Unsupported operator for PWMJ: {:?}. Expected one of <, <=, >, >=",
+                                op
+                            );
+                        }
+
+                        let on_left = create_physical_expr(
+                            lhs_logical,
+                            left_df_schema,
+                            session_state.execution_props(),
+                        )?;
+                        let on_right = create_physical_expr(
+                            rhs_logical,
+                            right_df_schema,
+                            session_state.execution_props(),
+                        )?;
+
+                        Arc::new(PiecewiseMergeJoinExec::try_new(
+                            physical_left,
+                            physical_right,
+                            (on_left, on_right),
+                            op,
+                            *join_type,
+                            session_state.config().target_partitions(),
+                        )?)
                     } else {
                         // there is no equal join condition, use the nested loop join
                         Arc::new(NestedLoopJoinExec::try_new(
diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs
index adc00d9fe75ec..88c50c2eb2cee 100644
--- a/datafusion/physical-plan/src/joins/hash_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs
@@ -637,6 +637,7 @@ impl HashJoinStream {
         let (left_side, right_side) = get_final_indices_from_shared_bitmap(
             build_side.left_data.visited_indices_bitmap(),
             self.join_type,
+            true,
         );
         let empty_right_batch = RecordBatch::new_empty(self.right.schema());
         // use the left and right indices to produce the batch result
diff --git a/datafusion/physical-plan/src/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs
index 1d36db996434e..b0c28cf994f71 100644
--- a/datafusion/physical-plan/src/joins/mod.rs
+++ b/datafusion/physical-plan/src/joins/mod.rs
@@ -24,11 +24,13 @@ pub use hash_join::HashJoinExec;
 pub use nested_loop_join::NestedLoopJoinExec;
 use parking_lot::Mutex;
 // Note: SortMergeJoin is not used in plans yet
+pub use piecewise_merge_join::PiecewiseMergeJoinExec;
 pub use sort_merge_join::SortMergeJoinExec;
 pub use symmetric_hash_join::SymmetricHashJoinExec;
 mod cross_join;
 mod hash_join;
 mod nested_loop_join;
+mod piecewise_merge_join;
 mod sort_merge_join;
 mod stream_join_utils;
 mod symmetric_hash_join;
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
new file mode 100644
index 0000000000000..646905e0d7875
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/classic_join.rs
@@ -0,0 +1,1550 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Stream Implementation for PiecewiseMergeJoin's Classic Join (Left, Right, Full, Inner)
+
+use arrow::array::{new_null_array, Array, PrimitiveBuilder};
+use arrow::compute::{take, BatchCoalescer};
+use arrow::datatypes::UInt32Type;
+use arrow::{
+    array::{ArrayRef, RecordBatch, UInt32Array},
+    compute::{sort_to_indices, take_record_batch},
+};
+use arrow_schema::{Schema, SchemaRef, SortOptions};
+use datafusion_common::NullEquality;
+use datafusion_common::{internal_err, Result};
+use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
+use datafusion_expr::{JoinType, Operator};
+use datafusion_physical_expr::PhysicalExprRef;
+use futures::{Stream, StreamExt};
+use std::{cmp::Ordering, task::ready};
+use std::{sync::Arc, task::Poll};
+
+use crate::handle_state;
+use crate::joins::piecewise_merge_join::exec::{BufferedSide, BufferedSideReadyState};
+use crate::joins::piecewise_merge_join::utils::need_produce_result_in_final;
+use crate::joins::utils::{compare_join_arrays, get_final_indices_from_shared_bitmap};
+use crate::joins::utils::{BuildProbeJoinMetrics, StatefulStreamResult};
+
+pub(super) enum PiecewiseMergeJoinStreamState {
+    WaitBufferedSide,
+    FetchStreamBatch,
+    ProcessStreamBatch(SortedStreamBatch),
+    ProcessUnmatched,
+    Completed,
+}
+
+impl PiecewiseMergeJoinStreamState {
+    // Grab mutable reference to the current stream batch
+    fn try_as_process_stream_batch_mut(&mut self) -> Result<&mut SortedStreamBatch> {
+        match self {
+            PiecewiseMergeJoinStreamState::ProcessStreamBatch(state) => Ok(state),
+            _ => internal_err!("Expected streamed batch in StreamBatch"),
+        }
+    }
+}
+
+/// The stream side incoming batch with required sort order.
+///
+/// Note the compare key in the join predicate might include expressions on the original
+/// columns, so we store the evaluated compare key separately.
+/// e.g. For join predicate `buffer.v1 < (stream.v1 + 1)`, the `compare_key_values` field stores
+/// the evaluated `stream.v1 + 1` array.
+pub(super) struct SortedStreamBatch {
+    pub batch: RecordBatch,
+    compare_key_values: Vec<ArrayRef>,
+}
+
+impl SortedStreamBatch {
+    #[allow(dead_code)]
+    fn new(batch: RecordBatch, compare_key_values: Vec<ArrayRef>) -> Self {
+        Self {
+            batch,
+            compare_key_values,
+        }
+    }
+
+    fn compare_key_values(&self) -> &Vec<ArrayRef> {
+        &self.compare_key_values
+    }
+}
+
+pub(super) struct ClassicPWMJStream {
+    // Output schema of the `PiecewiseMergeJoin`
+    pub schema: Arc<Schema>,
+
+    // Physical expression that is evaluated on the streamed side
+    // We do not need on_buffered as this is already evaluated when
+    // creating the buffered side which happens before initializing
+    // `PiecewiseMergeJoinStream`
+    pub on_streamed: PhysicalExprRef,
+    // Type of join
+    pub join_type: JoinType,
+    // Comparison operator
+    pub operator: Operator,
+    // Streamed batch
+    pub streamed: SendableRecordBatchStream,
+    // Streamed schema
+    streamed_schema: SchemaRef,
+    // Buffered side data
+    buffered_side: BufferedSide,
+    // Tracks the state of the `PiecewiseMergeJoin`
+    state: PiecewiseMergeJoinStreamState,
+    // Sort option for streamed side (specifies whether
+    // the sort is ascending or descending)
+    sort_option: SortOptions,
+    // Metrics for build + probe joins
+    join_metrics: BuildProbeJoinMetrics,
+    // Tracking incremental state for emitting record batches
+    batch_process_state: BatchProcessState,
+}
+
+impl RecordBatchStream for ClassicPWMJStream {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+}
+
+// `PiecewiseMergeJoinStreamState` is separated into `WaitBufferedSide`, `FetchStreamBatch`,
+// `ProcessStreamBatch`, `ProcessUnmatched` and `Completed`.
+//
+// Classic Joins
+//  1. `WaitBufferedSide` - Load in the buffered side data into memory.
+//  2. `FetchStreamBatch` -  Fetch + sort incoming stream batches. We switch the state to
+//     `Completed` if there are are still remaining partitions to process. It is only switched to
+//     `ExhaustedStreamBatch` if all partitions have been processed.
+//  3. `ProcessStreamBatch` - Compare stream batch row values against the buffered side data.
+//  4. `ExhaustedStreamBatch` - If the join type is Left or Inner we will return state as
+//      `Completed` however for Full and Right we will need to process the unmatched buffered rows.
+impl ClassicPWMJStream {
+    // Creates a new `PiecewiseMergeJoinStream` instance
+    #[allow(clippy::too_many_arguments)]
+    pub fn try_new(
+        schema: Arc<Schema>,
+        on_streamed: PhysicalExprRef,
+        join_type: JoinType,
+        operator: Operator,
+        streamed: SendableRecordBatchStream,
+        buffered_side: BufferedSide,
+        state: PiecewiseMergeJoinStreamState,
+        sort_option: SortOptions,
+        join_metrics: BuildProbeJoinMetrics,
+        batch_size: usize,
+    ) -> Self {
+        Self {
+            schema: Arc::clone(&schema),
+            on_streamed,
+            join_type,
+            operator,
+            streamed_schema: streamed.schema(),
+            streamed,
+            buffered_side,
+            state,
+            sort_option,
+            join_metrics,
+            batch_process_state: BatchProcessState::new(schema, batch_size),
+        }
+    }
+
+    fn poll_next_impl(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        loop {
+            return match self.state {
+                PiecewiseMergeJoinStreamState::WaitBufferedSide => {
+                    handle_state!(ready!(self.collect_buffered_side(cx)))
+                }
+                PiecewiseMergeJoinStreamState::FetchStreamBatch => {
+                    handle_state!(ready!(self.fetch_stream_batch(cx)))
+                }
+                PiecewiseMergeJoinStreamState::ProcessStreamBatch(_) => {
+                    handle_state!(self.process_stream_batch())
+                }
+                PiecewiseMergeJoinStreamState::ProcessUnmatched => {
+                    handle_state!(self.process_unmatched_buffered_batch())
+                }
+                PiecewiseMergeJoinStreamState::Completed => Poll::Ready(None),
+            };
+        }
+    }
+
+    // Collects buffered side data
+    fn collect_buffered_side(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
+        let build_timer = self.join_metrics.build_time.timer();
+        let buffered_data = ready!(self
+            .buffered_side
+            .try_as_initial_mut()?
+            .buffered_fut
+            .get_shared(cx))?;
+        build_timer.done();
+
+        // We will start fetching stream batches for classic joins
+        self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+
+        self.buffered_side =
+            BufferedSide::Ready(BufferedSideReadyState { buffered_data });
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    // Fetches incoming stream batches
+    fn fetch_stream_batch(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<StatefulStreamResult<Option<RecordBatch>>>> {
+        match ready!(self.streamed.poll_next_unpin(cx)) {
+            None => {
+                if self
+                    .buffered_side
+                    .try_as_ready_mut()?
+                    .buffered_data
+                    .remaining_partitions
+                    .fetch_sub(1, std::sync::atomic::Ordering::SeqCst)
+                    == 1
+                {
+                    self.batch_process_state.reset();
+                    self.state = PiecewiseMergeJoinStreamState::ProcessUnmatched;
+                } else {
+                    self.state = PiecewiseMergeJoinStreamState::Completed;
+                }
+            }
+            Some(Ok(batch)) => {
+                // Evaluate the streamed physical expression on the stream batch
+                let stream_values: ArrayRef = self
+                    .on_streamed
+                    .evaluate(&batch)?
+                    .into_array(batch.num_rows())?;
+
+                self.join_metrics.input_batches.add(1);
+                self.join_metrics.input_rows.add(batch.num_rows());
+
+                // Sort stream values and change the streamed record batch accordingly
+                let indices = sort_to_indices(
+                    stream_values.as_ref(),
+                    Some(self.sort_option),
+                    None,
+                )?;
+                let stream_batch = take_record_batch(&batch, &indices)?;
+                let stream_values = take(stream_values.as_ref(), &indices, None)?;
+
+                // Reset BatchProcessState before processing a new stream batch
+                self.batch_process_state.reset();
+                self.state = PiecewiseMergeJoinStreamState::ProcessStreamBatch(
+                    SortedStreamBatch {
+                        batch: stream_batch,
+                        compare_key_values: vec![stream_values],
+                    },
+                );
+            }
+            Some(Err(err)) => return Poll::Ready(Err(err)),
+        };
+
+        Poll::Ready(Ok(StatefulStreamResult::Continue))
+    }
+
+    // Only classic join will call. This function will process stream batches and evaluate against
+    // the buffered side data.
+    fn process_stream_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        let buffered_side = self.buffered_side.try_as_ready_mut()?;
+        let stream_batch = self.state.try_as_process_stream_batch_mut()?;
+
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        // Produce more work
+        let batch = resolve_classic_join(
+            buffered_side,
+            stream_batch,
+            Arc::clone(&self.schema),
+            self.operator,
+            self.sort_option,
+            self.join_type,
+            &mut self.batch_process_state,
+        )?;
+
+        if !self.batch_process_state.continue_process {
+            // We finished scanning this stream batch.
+            self.batch_process_state
+                .output_batches
+                .finish_buffered_batch()?;
+            if let Some(b) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+                return Ok(StatefulStreamResult::Ready(Some(b)));
+            }
+
+            // Nothing pending; hand back whatever `resolve` returned (often empty) and move on.
+            if self.batch_process_state.output_batches.is_empty() {
+                self.state = PiecewiseMergeJoinStreamState::FetchStreamBatch;
+
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+        }
+
+        Ok(StatefulStreamResult::Ready(Some(batch)))
+    }
+
+    // Process remaining unmatched rows
+    fn process_unmatched_buffered_batch(
+        &mut self,
+    ) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
+        // Return early for `JoinType::Right` and `JoinType::Inner`
+        if matches!(self.join_type, JoinType::Right | JoinType::Inner) {
+            self.state = PiecewiseMergeJoinStreamState::Completed;
+            return Ok(StatefulStreamResult::Ready(None));
+        }
+
+        if !self.batch_process_state.continue_process {
+            if let Some(batch) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+
+            self.batch_process_state
+                .output_batches
+                .finish_buffered_batch()?;
+            if let Some(batch) = self
+                .batch_process_state
+                .output_batches
+                .next_completed_batch()
+            {
+                self.state = PiecewiseMergeJoinStreamState::Completed;
+                return Ok(StatefulStreamResult::Ready(Some(batch)));
+            }
+        }
+
+        let buffered_data =
+            Arc::clone(&self.buffered_side.try_as_ready().unwrap().buffered_data);
+
+        let (buffered_indices, _streamed_indices) = get_final_indices_from_shared_bitmap(
+            &buffered_data.visited_indices_bitmap,
+            self.join_type,
+            true,
+        );
+
+        let new_buffered_batch =
+            take_record_batch(buffered_data.batch(), &buffered_indices)?;
+        let mut buffered_columns = new_buffered_batch.columns().to_vec();
+
+        let streamed_columns: Vec<ArrayRef> = self
+            .streamed_schema
+            .fields()
+            .iter()
+            .map(|f| new_null_array(f.data_type(), new_buffered_batch.num_rows()))
+            .collect();
+
+        buffered_columns.extend(streamed_columns);
+
+        let batch = RecordBatch::try_new(Arc::clone(&self.schema), buffered_columns)?;
+
+        self.batch_process_state.output_batches.push_batch(batch)?;
+
+        self.batch_process_state.continue_process = false;
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        self.batch_process_state
+            .output_batches
+            .finish_buffered_batch()?;
+        if let Some(batch) = self
+            .batch_process_state
+            .output_batches
+            .next_completed_batch()
+        {
+            self.state = PiecewiseMergeJoinStreamState::Completed;
+            return Ok(StatefulStreamResult::Ready(Some(batch)));
+        }
+
+        self.state = PiecewiseMergeJoinStreamState::Completed;
+        self.batch_process_state.reset();
+        Ok(StatefulStreamResult::Ready(None))
+    }
+}
+
+struct BatchProcessState {
+    // Used to pick up from the last index on the stream side
+    output_batches: Box<BatchCoalescer>,
+    // Used to store the unmatched stream indices for `JoinType::Right` and `JoinType::Full`
+    unmatched_indices: PrimitiveBuilder<UInt32Type>,
+    // Used to store the start index on the buffered side; used to resume processing on the correct
+    // row
+    start_buffer_idx: usize,
+    // Used to store the start index on the stream side; used to resume processing on the correct
+    // row
+    start_stream_idx: usize,
+    // Signals if we found a match for the current stream row
+    found: bool,
+    // Signals to continue processing the current stream batch
+    continue_process: bool,
+    // Skip nulls
+    processed_null_count: bool,
+}
+
+impl BatchProcessState {
+    pub(crate) fn new(schema: Arc<Schema>, batch_size: usize) -> Self {
+        Self {
+            output_batches: Box::new(BatchCoalescer::new(schema, batch_size)),
+            unmatched_indices: PrimitiveBuilder::new(),
+            start_buffer_idx: 0,
+            start_stream_idx: 0,
+            found: false,
+            continue_process: true,
+            processed_null_count: false,
+        }
+    }
+
+    pub(crate) fn reset(&mut self) {
+        self.unmatched_indices = PrimitiveBuilder::new();
+        self.start_buffer_idx = 0;
+        self.start_stream_idx = 0;
+        self.found = false;
+        self.continue_process = true;
+        self.processed_null_count = false;
+    }
+}
+
+impl Stream for ClassicPWMJStream {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        self.poll_next_impl(cx)
+    }
+}
+
+// For Left, Right, Full, and Inner joins, incoming stream batches will already be sorted.
+#[allow(clippy::too_many_arguments)]
+fn resolve_classic_join(
+    buffered_side: &mut BufferedSideReadyState,
+    stream_batch: &SortedStreamBatch,
+    join_schema: Arc<Schema>,
+    operator: Operator,
+    sort_options: SortOptions,
+    join_type: JoinType,
+    batch_process_state: &mut BatchProcessState,
+) -> Result<RecordBatch> {
+    let buffered_len = buffered_side.buffered_data.values().len();
+    let stream_values = stream_batch.compare_key_values();
+
+    let mut buffer_idx = batch_process_state.start_buffer_idx;
+    let mut stream_idx = batch_process_state.start_stream_idx;
+
+    if !batch_process_state.processed_null_count {
+        let buffered_null_idx = buffered_side.buffered_data.values().null_count();
+        let stream_null_idx = stream_values[0].null_count();
+        buffer_idx = buffered_null_idx;
+        stream_idx = stream_null_idx;
+        batch_process_state.processed_null_count = true;
+    }
+
+    // Our buffer_idx variable allows us to start probing on the buffered side where we last matched
+    // in the previous stream row.
+    for row_idx in stream_idx..stream_batch.batch.num_rows() {
+        while buffer_idx < buffered_len {
+            let compare = {
+                let buffered_values = buffered_side.buffered_data.values();
+                compare_join_arrays(
+                    &[Arc::clone(&stream_values[0])],
+                    row_idx,
+                    &[Arc::clone(buffered_values)],
+                    buffer_idx,
+                    &[sort_options],
+                    NullEquality::NullEqualsNothing,
+                )?
+            };
+
+            // If we find a match we append all indices and move to the next stream row index
+            match operator {
+                Operator::Gt | Operator::Lt => {
+                    if matches!(compare, Ordering::Less) {
+                        batch_process_state.found = true;
+                        let count = buffered_len - buffer_idx;
+
+                        let batch = build_matched_indices_and_set_buffered_bitmap(
+                            (buffer_idx, count),
+                            (row_idx, count),
+                            buffered_side,
+                            stream_batch,
+                            join_type,
+                            Arc::clone(&join_schema),
+                        )?;
+
+                        batch_process_state.output_batches.push_batch(batch)?;
+
+                        // Flush batch and update pointers if we have a completed batch
+                        if let Some(batch) =
+                            batch_process_state.output_batches.next_completed_batch()
+                        {
+                            batch_process_state.found = false;
+                            batch_process_state.start_buffer_idx = buffer_idx;
+                            batch_process_state.start_stream_idx = row_idx + 1;
+                            return Ok(batch);
+                        }
+
+                        break;
+                    }
+                }
+                Operator::GtEq | Operator::LtEq => {
+                    if matches!(compare, Ordering::Equal | Ordering::Less) {
+                        batch_process_state.found = true;
+                        let count = buffered_len - buffer_idx;
+                        let batch = build_matched_indices_and_set_buffered_bitmap(
+                            (buffer_idx, count),
+                            (row_idx, count),
+                            buffered_side,
+                            stream_batch,
+                            join_type,
+                            Arc::clone(&join_schema),
+                        )?;
+
+                        // Flush batch and update pointers if we have a completed batch
+                        batch_process_state.output_batches.push_batch(batch)?;
+                        if let Some(batch) =
+                            batch_process_state.output_batches.next_completed_batch()
+                        {
+                            batch_process_state.found = false;
+                            batch_process_state.start_buffer_idx = buffer_idx;
+                            batch_process_state.start_stream_idx = row_idx + 1;
+                            return Ok(batch);
+                        }
+
+                        break;
+                    }
+                }
+                _ => {
+                    return internal_err!(
+                        "PiecewiseMergeJoin should not contain operator, {}",
+                        operator
+                    )
+                }
+            };
+
+            // Increment buffer_idx after every row
+            buffer_idx += 1;
+        }
+
+        // If a match was not found for the current stream row index the stream indice is appended
+        // to the unmatched indices to be flushed later.
+        if matches!(join_type, JoinType::Right | JoinType::Full)
+            && !batch_process_state.found
+        {
+            batch_process_state
+                .unmatched_indices
+                .append_value(row_idx as u32);
+        }
+
+        batch_process_state.found = false;
+    }
+
+    // Flushed all unmatched indices on the streamed side
+    if matches!(join_type, JoinType::Right | JoinType::Full) {
+        let batch = create_unmatched_batch(
+            &mut batch_process_state.unmatched_indices,
+            stream_batch,
+            Arc::clone(&join_schema),
+        )?;
+
+        batch_process_state.output_batches.push_batch(batch)?;
+    }
+
+    batch_process_state.continue_process = false;
+    Ok(RecordBatch::new_empty(Arc::clone(&join_schema)))
+}
+
+// Builds a record batch from indices ranges on the buffered and streamed side.
+//
+// The two ranges are: buffered_range: (start index, count) and streamed_range: (start index, count) due
+// to batch.slice(start, count).
+fn build_matched_indices_and_set_buffered_bitmap(
+    buffered_range: (usize, usize),
+    streamed_range: (usize, usize),
+    buffered_side: &mut BufferedSideReadyState,
+    stream_batch: &SortedStreamBatch,
+    join_type: JoinType,
+    join_schema: Arc<Schema>,
+) -> Result<RecordBatch> {
+    // Mark the buffered indices as visited
+    if need_produce_result_in_final(join_type) {
+        let mut bitmap = buffered_side.buffered_data.visited_indices_bitmap.lock();
+        for i in buffered_range.0..buffered_range.0 + buffered_range.1 {
+            bitmap.set_bit(i, true);
+        }
+    }
+
+    let new_buffered_batch = buffered_side
+        .buffered_data
+        .batch()
+        .slice(buffered_range.0, buffered_range.1);
+    let mut buffered_columns = new_buffered_batch.columns().to_vec();
+
+    let indices = UInt32Array::from_value(streamed_range.0 as u32, streamed_range.1);
+    let new_stream_batch = take_record_batch(&stream_batch.batch, &indices)?;
+    let streamed_columns = new_stream_batch.columns().to_vec();
+
+    buffered_columns.extend(streamed_columns);
+
+    Ok(RecordBatch::try_new(
+        Arc::clone(&join_schema),
+        buffered_columns,
+    )?)
+}
+
+// Creates a record batch from the unmatched indices on the streamed side
+fn create_unmatched_batch(
+    streamed_indices: &mut PrimitiveBuilder<UInt32Type>,
+    stream_batch: &SortedStreamBatch,
+    join_schema: Arc<Schema>,
+) -> Result<RecordBatch> {
+    let streamed_indices = streamed_indices.finish();
+    let new_stream_batch = take_record_batch(&stream_batch.batch, &streamed_indices)?;
+    let streamed_columns = new_stream_batch.columns().to_vec();
+    let buffered_cols_len = join_schema.fields().len() - streamed_columns.len();
+
+    let num_rows = new_stream_batch.num_rows();
+    let mut buffered_columns: Vec<ArrayRef> = join_schema
+        .fields()
+        .iter()
+        .take(buffered_cols_len)
+        .map(|field| new_null_array(field.data_type(), num_rows))
+        .collect();
+
+    buffered_columns.extend(streamed_columns);
+
+    Ok(RecordBatch::try_new(
+        Arc::clone(&join_schema),
+        buffered_columns,
+    )?)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::{
+        common,
+        joins::PiecewiseMergeJoinExec,
+        test::{build_table_i32, TestMemoryExec},
+        ExecutionPlan,
+    };
+    use arrow::array::{Date32Array, Date64Array};
+    use arrow_schema::{DataType, Field};
+    use datafusion_common::test_util::batches_to_string;
+    use datafusion_execution::TaskContext;
+    use datafusion_expr::JoinType;
+    use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
+    use insta::assert_snapshot;
+    use std::sync::Arc;
+
+    fn columns(schema: &Schema) -> Vec<String> {
+        schema.fields().iter().map(|f| f.name().clone()).collect()
+    }
+
+    fn build_table(
+        a: (&str, &Vec<i32>),
+        b: (&str, &Vec<i32>),
+        c: (&str, &Vec<i32>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let batch = build_table_i32(a, b, c);
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn build_date_table(
+        a: (&str, &Vec<i32>),
+        b: (&str, &Vec<i32>),
+        c: (&str, &Vec<i32>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = Schema::new(vec![
+            Field::new(a.0, DataType::Date32, false),
+            Field::new(b.0, DataType::Date32, false),
+            Field::new(c.0, DataType::Date32, false),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Date32Array::from(a.1.clone())),
+                Arc::new(Date32Array::from(b.1.clone())),
+                Arc::new(Date32Array::from(c.1.clone())),
+            ],
+        )
+        .unwrap();
+
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn build_date64_table(
+        a: (&str, &Vec<i64>),
+        b: (&str, &Vec<i64>),
+        c: (&str, &Vec<i64>),
+    ) -> Arc<dyn ExecutionPlan> {
+        let schema = Schema::new(vec![
+            Field::new(a.0, DataType::Date64, false),
+            Field::new(b.0, DataType::Date64, false),
+            Field::new(c.0, DataType::Date64, false),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(Date64Array::from(a.1.clone())),
+                Arc::new(Date64Array::from(b.1.clone())),
+                Arc::new(Date64Array::from(c.1.clone())),
+            ],
+        )
+        .unwrap();
+
+        let schema = batch.schema();
+        TestMemoryExec::try_new_exec(&[vec![batch]], schema, None).unwrap()
+    }
+
+    fn join(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<PiecewiseMergeJoinExec> {
+        PiecewiseMergeJoinExec::try_new(left, right, on, operator, join_type, 1)
+    }
+
+    async fn join_collect(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (PhysicalExprRef, PhysicalExprRef),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<(Vec<String>, Vec<RecordBatch>)> {
+        join_collect_with_options(left, right, on, operator, join_type).await
+    }
+
+    async fn join_collect_with_options(
+        left: Arc<dyn ExecutionPlan>,
+        right: Arc<dyn ExecutionPlan>,
+        on: (PhysicalExprRef, PhysicalExprRef),
+        operator: Operator,
+        join_type: JoinType,
+    ) -> Result<(Vec<String>, Vec<RecordBatch>)> {
+        let task_ctx = Arc::new(TaskContext::default());
+        let join = join(left, right, on, operator, join_type)?;
+        let columns = columns(&join.schema());
+
+        let stream = join.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+        Ok((columns, batches))
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 3  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![3, 2, 1]), // this has a repetition
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 2  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 4  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![2, 3, 4]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 3  | 7  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 30 | 4  | 90 |
+        | 3  | 1  | 9  | 30 | 4  | 90 |
+        | 2  | 2  | 8  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 10 | 2  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than_unsorted() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 3  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![3, 2, 1]), // this has a repetition
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 4  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 4]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+            +----+----+----+----+----+----+
+            | a1 | b1 | c1 | a2 | b1 | c2 |
+            +----+----+----+----+----+----+
+            | 1  | 3  | 7  | 30 | 4  | 90 |
+            | 2  | 2  | 8  | 30 | 4  | 90 |
+            | 3  | 1  | 9  | 30 | 4  | 90 |
+            | 2  | 2  | 8  | 10 | 3  | 70 |
+            | 3  | 1  | 9  | 10 | 3  | 70 |
+            | 3  | 1  | 9  | 20 | 2  | 80 |
+            +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_greater_than_equal_to() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 2  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![2, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 1  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 1]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 2  | 7  | 30 | 1  | 90 |
+        | 2  | 3  | 8  | 30 | 1  | 90 |
+        | 3  | 4  | 9  | 30 | 1  | 90 |
+        | 1  | 2  | 7  | 20 | 2  | 80 |
+        | 2  | 3  | 8  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 20 | 2  | 80 |
+        | 2  | 3  | 8  | 10 | 3  | 70 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_empty_left() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // (empty)
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &Vec::<i32>::new()),
+            ("b1", &Vec::<i32>::new()),
+            ("c1", &Vec::<i32>::new()),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 1  | 1  | 1  |
+        // | 2  | 2  | 2  |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c2", &vec![1, 2]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_full_greater_than_equal_to() -> Result<()> {
+        // +----+----+-----+
+        // | a1 | b1 | c1  |
+        // +----+----+-----+
+        // | 1  | 1  | 100 |
+        // | 2  | 2  | 200 |
+        // +----+----+-----+
+        let left = build_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c1", &vec![100, 200]),
+        );
+
+        // +----+----+-----+
+        // | a2 | b1 | c2  |
+        // +----+----+-----+
+        // | 10 | 3  | 300 |
+        // | 20 | 2  | 400 |
+        // +----+----+-----+
+        let right = build_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![3, 2]),
+            ("c2", &vec![300, 400]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Full).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+-----+----+----+-----+
+        | a1 | b1 | c1  | a2 | b1 | c2  |
+        +----+----+-----+----+----+-----+
+        | 2  | 2  | 200 | 20 | 2  | 400 |
+        |    |    |     | 10 | 3  | 300 |
+        | 1  | 1  | 100 |    |    |     |
+        +----+----+-----+----+----+-----+
+        "#);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_left_greater_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 2  | 80 |
+        // | 30 | 1  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 2, 1]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Left).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 3  | 8  | 30 | 1  | 90 |
+        | 3  | 4  | 9  | 30 | 1  | 90 |
+        | 2  | 3  | 8  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 20 | 2  | 80 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        | 1  | 1  | 7  |    |    |    |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_greater_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 3, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 5  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![5, 3, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 3  | 8  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 20 | 3  | 80 |
+        |    |    |    | 10 | 5  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_less_than() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 4  | 7  |
+        // | 2  | 3  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 3, 1]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 2  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 5  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![2, 3, 5]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 4  | 7  | 30 | 5  | 90 |
+        | 2  | 3  | 8  | 30 | 5  | 90 |
+        | 3  | 1  | 9  | 30 | 5  | 90 |
+        | 3  | 1  | 9  | 20 | 3  | 80 |
+        | 3  | 1  | 9  | 10 | 2  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_less_than_equal_with_dups() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 4  | 7  |
+        // | 2  | 4  | 8  |
+        // | 3  | 2  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![4, 4, 2]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 4  | 70 |
+        // | 20 | 3  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![4, 3, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Inner).await?;
+
+        // Expected grouping follows right.b1 descending (4, 3, 2)
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 1  | 4  | 7  | 10 | 4  | 70 |
+        | 2  | 4  | 8  | 10 | 4  | 70 |
+        | 3  | 2  | 9  | 10 | 4  | 70 |
+        | 3  | 2  | 9  | 20 | 3  | 80 |
+        | 3  | 2  | 9  | 30 | 2  | 90 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_greater_than_unsorted_right() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 2  | 8  |
+        // | 3  | 4  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 2, 4]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 1  | 80 |
+        // | 30 | 2  | 90 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![3, 1, 2]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
+
+        // Grouped by right in ascending evaluation for > (1,2,3)
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 2  | 2  | 8  | 20 | 1  | 80 |
+        | 3  | 4  | 9  | 20 | 1  | 80 |
+        | 3  | 4  | 9  | 30 | 2  | 90 |
+        | 3  | 4  | 9  | 10 | 3  | 70 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_left_less_than_equal_with_left_nulls_on_no_match() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 5  | 7  |
+        // | 2  | 4  | 8  |
+        // | 3  | 1  | 9  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![5, 4, 1]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // +----+----+----+
+        let right = build_table(("a2", &vec![10]), ("b1", &vec![3]), ("c2", &vec![70]));
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::LtEq, JoinType::Left).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        | 3  | 1  | 9  | 10 | 3  | 70 |
+        | 1  | 5  | 7  |    |    |    |
+        | 2  | 4  | 8  |    |    |    |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_right_greater_than_equal_with_right_nulls_on_no_match() -> Result<()> {
+        // +----+----+----+
+        // | a1 | b1 | c1 |
+        // +----+----+----+
+        // | 1  | 1  | 7  |
+        // | 2  | 2  | 8  |
+        // +----+----+----+
+        let left = build_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1, 2]),
+            ("c1", &vec![7, 8]),
+        );
+
+        // +----+----+----+
+        // | a2 | b1 | c2 |
+        // +----+----+----+
+        // | 10 | 3  | 70 |
+        // | 20 | 5  | 80 |
+        // +----+----+----+
+        let right = build_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![3, 5]),
+            ("c2", &vec![70, 80]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::GtEq, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        |    |    |    | 10 | 3  | 70 |
+        |    |    |    | 20 | 5  | 80 |
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_single_row_left_less_than() -> Result<()> {
+        let left = build_table(("a1", &vec![42]), ("b1", &vec![5]), ("c1", &vec![999]));
+
+        let right = build_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![1, 5, 7]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+-----+----+----+----+
+        | a1 | b1 | c1  | a2 | b1 | c2 |
+        +----+----+-----+----+----+----+
+        | 42 | 5  | 999 | 30 | 7  | 90 |
+        +----+----+-----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_inner_empty_right() -> Result<()> {
+        let left = build_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1, 2, 3]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        let right = build_table(
+            ("a2", &Vec::<i32>::new()),
+            ("b1", &Vec::<i32>::new()),
+            ("c2", &Vec::<i32>::new()),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Gt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +----+----+----+----+----+----+
+        | a1 | b1 | c1 | a2 | b1 | c2 |
+        +----+----+----+----+----+----+
+        +----+----+----+----+----+----+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date32_inner_less_than() -> Result<()> {
+        // +----+-------+----+
+        // | a1 |  b1   | c1 |
+        // +----+-------+----+
+        // | 1  | 19107 | 7  |
+        // | 2  | 19107 | 8  |
+        // | 3  | 19105 | 9  |
+        // +----+-------+----+
+        let left = build_date_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![19107, 19107, 19105]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+-------+----+
+        // | a2 |  b1   | c2 |
+        // +----+-------+----+
+        // | 10 | 19105 | 70 |
+        // | 20 | 19103 | 80 |
+        // | 30 | 19107 | 90 |
+        // +----+-------+----+
+        let right = build_date_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![19105, 19103, 19107]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+    +------------+------------+------------+------------+------------+------------+
+    | a1         | b1         | c1         | a2         | b1         | c2         |
+    +------------+------------+------------+------------+------------+------------+
+    | 1970-01-04 | 2022-04-23 | 1970-01-10 | 1970-01-31 | 2022-04-25 | 1970-04-01 |
+    +------------+------------+------------+------------+------------+------------+
+    "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date64_inner_less_than() -> Result<()> {
+        // +----+---------------+----+
+        // | a1 |     b1        | c1 |
+        // +----+---------------+----+
+        // | 1  | 1650903441000 |  7 |
+        // | 2  | 1650903441000 |  8 |
+        // | 3  | 1650703441000 |  9 |
+        // +----+---------------+----+
+        let left = build_date64_table(
+            ("a1", &vec![1, 2, 3]),
+            ("b1", &vec![1650903441000, 1650903441000, 1650703441000]),
+            ("c1", &vec![7, 8, 9]),
+        );
+
+        // +----+---------------+----+
+        // | a2 |     b1        | c2 |
+        // +----+---------------+----+
+        // | 10 | 1650703441000 | 70 |
+        // | 20 | 1650503441000 | 80 |
+        // | 30 | 1650903441000 | 90 |
+        // +----+---------------+----+
+        let right = build_date64_table(
+            ("a2", &vec![10, 20, 30]),
+            ("b1", &vec![1650703441000, 1650503441000, 1650903441000]),
+            ("c2", &vec![70, 80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Inner).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        | 1970-01-01T00:00:00.003 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.009 | 1970-01-01T00:00:00.030 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+        +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+        "#);
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn join_date64_right_less_than() -> Result<()> {
+        // +----+---------------+----+
+        // | a1 |     b1        | c1 |
+        // +----+---------------+----+
+        // | 1  | 1650903441000 |  7 |
+        // | 2  | 1650703441000 |  8 |
+        // +----+---------------+----+
+        let left = build_date64_table(
+            ("a1", &vec![1, 2]),
+            ("b1", &vec![1650903441000, 1650703441000]),
+            ("c1", &vec![7, 8]),
+        );
+
+        // +----+---------------+----+
+        // | a2 |     b1        | c2 |
+        // +----+---------------+----+
+        // | 10 | 1650703441000 | 80 |
+        // | 20 | 1650903441000 | 90 |
+        // +----+---------------+----+
+        let right = build_date64_table(
+            ("a2", &vec![10, 20]),
+            ("b1", &vec![1650703441000, 1650903441000]),
+            ("c2", &vec![80, 90]),
+        );
+
+        let on = (
+            Arc::new(Column::new_with_schema("b1", &left.schema())?) as _,
+            Arc::new(Column::new_with_schema("b1", &right.schema())?) as _,
+        );
+
+        let (_, batches) =
+            join_collect(left, right, on, Operator::Lt, JoinType::Right).await?;
+
+        assert_snapshot!(batches_to_string(&batches), @r#"
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | a1                      | b1                  | c1                      | a2                      | b1                  | c2                      |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+    | 1970-01-01T00:00:00.002 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.008 | 1970-01-01T00:00:00.020 | 2022-04-25T16:17:21 | 1970-01-01T00:00:00.090 |
+    |                         |                     |                         | 1970-01-01T00:00:00.010 | 2022-04-23T08:44:01 | 1970-01-01T00:00:00.080 |
+    +-------------------------+---------------------+-------------------------+-------------------------+---------------------+-------------------------+
+"#);
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
new file mode 100644
index 0000000000000..987f3e9df45ac
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
@@ -0,0 +1,748 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::Array;
+use arrow::{
+    array::{ArrayRef, BooleanBufferBuilder, RecordBatch},
+    compute::concat_batches,
+    util::bit_util,
+};
+use arrow_schema::{SchemaRef, SortOptions};
+use datafusion_common::not_impl_err;
+use datafusion_common::{internal_err, JoinSide, Result};
+use datafusion_execution::{
+    memory_pool::{MemoryConsumer, MemoryReservation},
+    SendableRecordBatchStream,
+};
+use datafusion_expr::{JoinType, Operator};
+use datafusion_physical_expr::equivalence::join_equivalence_properties;
+use datafusion_physical_expr::{
+    Distribution, LexOrdering, OrderingRequirements, PhysicalExpr, PhysicalExprRef,
+    PhysicalSortExpr,
+};
+use datafusion_physical_expr_common::physical_expr::fmt_sql;
+use futures::TryStreamExt;
+use parking_lot::Mutex;
+use std::fmt::Formatter;
+use std::sync::atomic::AtomicUsize;
+use std::sync::Arc;
+
+use crate::execution_plan::{boundedness_from_children, EmissionType};
+
+use crate::joins::piecewise_merge_join::classic_join::{
+    ClassicPWMJStream, PiecewiseMergeJoinStreamState,
+};
+use crate::joins::piecewise_merge_join::utils::{
+    build_visited_indices_map, is_existence_join, is_right_existence_join,
+};
+use crate::joins::utils::asymmetric_join_output_partitioning;
+use crate::{
+    joins::{
+        utils::{build_join_schema, BuildProbeJoinMetrics, OnceAsync, OnceFut},
+        SharedBitmapBuilder,
+    },
+    metrics::ExecutionPlanMetricsSet,
+    spill::get_record_batch_memory_size,
+    ExecutionPlan, PlanProperties,
+};
+use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties};
+
+/// `PiecewiseMergeJoinExec` is a join execution plan that only evaluates single range filter and show much
+/// better performance for these workloads than `NestedLoopJoin`
+///
+/// The physical planner will choose to evaluate this join when there is only one comparison filter. This
+/// is a binary expression which contains [`Operator::Lt`], [`Operator::LtEq`], [`Operator::Gt`], and
+/// [`Operator::GtEq`].:
+/// Examples:
+///  - `col0` < `colb`, `col0` <= `colb`, `col0` > `colb`, `col0` >= `colb`
+///
+/// # Execution Plan Inputs
+/// For `PiecewiseMergeJoin` we label all right inputs as the `streamed' side and the left outputs as the
+/// 'buffered' side.
+///
+/// `PiecewiseMergeJoin` takes a sorted input for the side to be buffered and is able to sort streamed record
+/// batches during processing. Sorted input must specifically be ascending/descending based on the operator.
+///
+/// # Algorithms
+/// Classic joins are processed differently compared to existence joins.
+///
+/// ## Classic Joins (Inner, Full, Left, Right)
+/// For classic joins we buffer the build side and stream the probe side (the "probe" side).
+/// Both sides are sorted so that we can iterate from index 0 to the end on each side.  This ordering ensures
+/// that when we find the first matching pair of rows, we can emit the current stream row joined with all remaining
+/// probe rows from the match position onward, without rescanning earlier probe rows.
+///  
+/// For `<` and `<=` operators, both inputs are sorted in **descending** order, while for `>` and `>=` operators
+/// they are sorted in **ascending** order. This choice ensures that the pointer on the buffered side can advance
+/// monotonically as we stream new batches from the stream side.
+///
+/// The streamed side may arrive unsorted, so this operator sorts each incoming batch in memory before
+/// processing. The buffered side is required to be globally sorted; the plan declares this requirement
+/// in `requires_input_order`, which allows the optimizer to automatically insert a `SortExec` on that side if needed.
+/// By the time this operator runs, the buffered side is guaranteed to be in the proper order.
+///
+/// The pseudocode for the algorithm looks like this:
+///
+/// ```text
+/// for stream_row in stream_batch:
+///     for buffer_row in buffer_batch:
+///         if compare(stream_row, probe_row):
+///             output stream_row X buffer_batch[buffer_row:]
+///         else:
+///             continue
+/// ```
+///
+/// The algorithm uses the streamed side (larger) to drive the loop. This is due to every row on the stream side iterating
+/// the buffered side to find every first match. By doing this, each match can output more result so that output
+/// handling can be better vectorized for performance.
+///
+/// Here is an example:
+///
+/// We perform a `JoinType::Left` with these two batches and the operator being `Operator::Lt`(<). For each
+/// row on the streamed side we move a pointer on the buffered until it matches the condition. Once we reach
+/// the row which matches (in this case with row 1 on streamed will have its first match on row 2 on
+/// buffered; 100 < 200 is true), we can emit all rows after that match. We can emit the rows like this because
+/// if the batch is sorted in ascending order, every subsequent row will also satisfy the condition as they will
+/// all be larger values.
+///
+/// ```text
+/// SQL statement:
+/// SELECT *
+/// FROM (VALUES (100), (200), (500)) AS streamed(a)
+/// LEFT JOIN (VALUES (100), (200), (200), (300), (400)) AS buffered(b)
+///   ON streamed.a < buffered.b;
+///
+/// Processing Row 1:
+///
+///       Sorted Buffered Side                                         Sorted Streamed Side          
+///       ┌──────────────────┐                                         ┌──────────────────┐         
+///     1 │       100        │                                       1 │       100        │        
+///       ├──────────────────┤                                         ├──────────────────┤         
+///     2 │       200        │ ─┐                                    2 │       200        │        
+///       ├──────────────────┤  │  For row 1 on streamed side with     ├──────────────────┤         
+///     3 │       200        │  │  value 100, we emit rows 2 - 5.    3 │       500        │       
+///       ├──────────────────┤  │  as matches when the operator is     └──────────────────┘
+///     4 │       300        │  │  `Operator::Lt` (<) Emitting all
+///       ├──────────────────┤  │  rows after the first match (row
+///     5 │       400        │ ─┘  2 buffered side; 100 < 200)
+///       └──────────────────┘     
+///
+/// Processing Row 2:
+///   By sorting the streamed side we know
+///
+///       Sorted Buffered Side                                         Sorted Streamed Side          
+///       ┌──────────────────┐                                         ┌──────────────────┐         
+///     1 │       100        │                                       1 │       100        │        
+///       ├──────────────────┤                                         ├──────────────────┤         
+///     2 │       200        │ <- Start here when probing for the    2 │       200        │        
+///       ├──────────────────┤    streamed side row 2.                 ├──────────────────┤         
+///     3 │       200        │                                       3 │       500        │       
+///       ├──────────────────┤                                         └──────────────────┘
+///     4 │       300        │  
+///       ├──────────────────┤  
+///     5 │       400        │
+///       └──────────────────┘     
+///
+/// ```
+///
+/// ## Existence Joins (Semi, Anti, Mark)
+/// Existence joins are made magnitudes of times faster with a `PiecewiseMergeJoin` as we only need to find
+/// the min/max value of the streamed side to be able to emit all matches on the buffered side. By putting
+/// the side we need to mark onto the sorted buffer side, we can emit all these matches at once.
+///
+/// For less than operations (`<`) both inputs are to be sorted in descending order and vice versa for greater
+/// than (`>`) operations. `SortExec` is used to enforce sorting on the buffered side and streamed side does not
+/// need to be sorted due to only needing to find the min/max.
+///
+/// For Left Semi, Anti, and Mark joins we swap the inputs so that the marked side is on the buffered side.
+///
+/// The pseudocode for the algorithm looks like this:
+///
+/// ```text
+/// // Using the example of a less than `<` operation
+/// let max = max_batch(streamed_batch)
+///
+/// for buffer_row in buffer_batch:
+///     if buffer_row < max:
+///         output buffer_batch[buffer_row:]
+/// ```
+///
+/// Only need to find the min/max value and iterate through the buffered side once.
+///
+/// Here is an example:
+/// We perform a `JoinType::LeftSemi` with these two batches and the operator being `Operator::Lt`(<). Because
+/// the operator is `Operator::Lt` we can find the minimum value in the streamed side; in this case it is 200.
+/// We can then advance a pointer from the start of the buffer side until we find the first value that satisfies
+/// the predicate. All rows after that first matched value satisfy the condition 200 < x so we can mark all of
+/// those rows as matched.
+///
+/// ```text
+/// SQL statement:
+/// SELECT *
+/// FROM (VALUES (500), (200), (300)) AS streamed(a)
+/// LEFT SEMI JOIN (VALUES (100), (200), (200), (300), (400)) AS buffered(b)
+///   ON streamed.a < buffered.b;
+///
+///          Sorted Buffered Side             Unsorted Streamed Side
+///            ┌──────────────────┐          ┌──────────────────┐
+///          1 │       100        │        1 │       500        │
+///            ├──────────────────┤          ├──────────────────┤
+///          2 │       200        │        2 │       200        │
+///            ├──────────────────┤          ├──────────────────┤    
+///          3 │       200        │        3 │       300        │
+///            ├──────────────────┤          └──────────────────┘
+///          4 │       300        │ ─┐       
+///            ├──────────────────┤  | We emit matches for row 4 - 5
+///          5 │       400        │ ─┘ on the buffered side.
+///            └──────────────────┘
+///             min value: 200
+/// ```
+///
+/// For both types of joins, the buffered side must be sorted ascending for `Operator::Lt` (<) or
+/// `Operator::LtEq` (<=) and descending for `Operator::Gt` (>) or `Operator::GtEq` (>=).
+///
+/// # Partitioning Logic
+/// Piecewise Merge Join requires one buffered side partition + round robin partitioned stream side. A counter
+/// is used in the buffered side to coordinate when all streamed partitions are finished execution. This allows
+/// for processing the rest of the unmatched rows for Left and Full joins. The last partition that finishes
+/// execution will be responsible for outputting the unmatched rows.
+///
+/// # Performance Explanation (cost)
+/// Piecewise Merge Join is used over Nested Loop Join due to its superior performance. Here is the breakdown:
+///
+/// R: Buffered Side
+/// S: Streamed Side
+///
+/// ## Piecewise Merge Join (PWMJ)
+///
+/// # Classic Join:
+/// Requires sorting the probe side and, for each probe row, scanning the buffered side until the first match
+/// is found.
+///     Complexity: `O(sort(S) + num_of_batches(|S|) * scan(R))`.
+///
+/// # Mark Join:
+/// Sorts the probe side, then computes the min/max range of the probe keys and scans the buffered side only
+/// within that range.  
+///   Complexity: `O(|S| + scan(R[range]))`.
+///
+/// ## Nested Loop Join
+/// Compares every row from `S` with every row from `R`.  
+///   Complexity: `O(|S| * |R|)`.
+///
+/// ## Nested Loop Join
+///   Always going to be probe (O(S) * O(R)).
+///
+/// # Further Reference Material
+/// DuckDB blog on Range Joins: [Range Joins in DuckDB](https://duckdb.org/2022/05/27/iejoin.html)
+#[derive(Debug)]
+pub struct PiecewiseMergeJoinExec {
+    /// Left buffered execution plan
+    pub buffered: Arc<dyn ExecutionPlan>,
+    /// Right streamed execution plan
+    pub streamed: Arc<dyn ExecutionPlan>,
+    /// The two expressions being compared
+    pub on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+    /// Comparison operator in the range predicate
+    pub operator: Operator,
+    /// How the join is performed
+    pub join_type: JoinType,
+    /// The schema once the join is applied
+    schema: SchemaRef,
+    /// Buffered data
+    buffered_fut: OnceAsync<BufferedSideData>,
+    /// Execution metrics
+    metrics: ExecutionPlanMetricsSet,
+
+    /// Sort expressions - See above for more details [`PiecewiseMergeJoinExec`]
+    ///
+    /// The left sort order, descending for `<`, `<=` operations + ascending for `>`, `>=` operations
+    left_child_plan_required_order: LexOrdering,
+    /// The right sort order, descending for `<`, `<=` operations + ascending for `>`, `>=` operations
+    /// Unsorted for mark joins
+    #[allow(unused)]
+    right_batch_required_orders: LexOrdering,
+
+    /// This determines the sort order of all join columns used in sorting the stream and buffered execution plans.
+    sort_options: SortOptions,
+    /// Cache holding plan properties like equivalences, output partitioning etc.
+    cache: PlanProperties,
+    /// Number of partitions to process
+    num_partitions: usize,
+}
+
+impl PiecewiseMergeJoinExec {
+    pub fn try_new(
+        buffered: Arc<dyn ExecutionPlan>,
+        streamed: Arc<dyn ExecutionPlan>,
+        on: (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>),
+        operator: Operator,
+        join_type: JoinType,
+        num_partitions: usize,
+    ) -> Result<Self> {
+        // TODO: Implement existence joins for PiecewiseMergeJoin
+        if is_existence_join(join_type) {
+            return not_impl_err!(
+                "Existence Joins are currently not supported for PiecewiseMergeJoin"
+            );
+        }
+
+        // Take the operator and enforce a sort order on the streamed + buffered side based on
+        // the operator type.
+        let sort_options = match operator {
+            Operator::Lt | Operator::LtEq => {
+                // For left existence joins the inputs will be swapped so the sort
+                // options are switched
+                if is_right_existence_join(join_type) {
+                    SortOptions::new(false, true)
+                } else {
+                    SortOptions::new(true, true)
+                }
+            }
+            Operator::Gt | Operator::GtEq => {
+                if is_right_existence_join(join_type) {
+                    SortOptions::new(true, true)
+                } else {
+                    SortOptions::new(false, true)
+                }
+            }
+            _ => {
+                return internal_err!(
+                    "Cannot contain non-range operator in PiecewiseMergeJoinExec"
+                )
+            }
+        };
+
+        // Give the same `sort_option for comparison later`
+        let left_child_plan_required_order =
+            vec![PhysicalSortExpr::new(Arc::clone(&on.0), sort_options)];
+        let right_batch_required_orders =
+            vec![PhysicalSortExpr::new(Arc::clone(&on.1), sort_options)];
+
+        let Some(left_child_plan_required_order) =
+            LexOrdering::new(left_child_plan_required_order)
+        else {
+            return internal_err!(
+                "PiecewiseMergeJoinExec requires valid sort expressions for its left side"
+            );
+        };
+        let Some(right_batch_required_orders) =
+            LexOrdering::new(right_batch_required_orders)
+        else {
+            return internal_err!(
+                "PiecewiseMergeJoinExec requires valid sort expressions for its right side"
+            );
+        };
+
+        let buffered_schema = buffered.schema();
+        let streamed_schema = streamed.schema();
+
+        // Create output schema for the join
+        let schema =
+            Arc::new(build_join_schema(&buffered_schema, &streamed_schema, &join_type).0);
+        let cache = Self::compute_properties(
+            &buffered,
+            &streamed,
+            Arc::clone(&schema),
+            join_type,
+            &on,
+        )?;
+
+        Ok(Self {
+            streamed,
+            buffered,
+            on,
+            operator,
+            join_type,
+            schema,
+            buffered_fut: Default::default(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            left_child_plan_required_order,
+            right_batch_required_orders,
+            sort_options,
+            cache,
+            num_partitions,
+        })
+    }
+
+    /// Reference to buffered side execution plan
+    pub fn buffered(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.buffered
+    }
+
+    /// Reference to streamed side execution plan
+    pub fn streamed(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.streamed
+    }
+
+    /// Join type
+    pub fn join_type(&self) -> JoinType {
+        self.join_type
+    }
+
+    /// Reference to sort options
+    pub fn sort_options(&self) -> &SortOptions {
+        &self.sort_options
+    }
+
+    /// Get probe side (streamed side) for the PiecewiseMergeJoin
+    /// In current implementation, probe side is determined according to join type.
+    pub fn probe_side(join_type: &JoinType) -> JoinSide {
+        match join_type {
+            JoinType::Right
+            | JoinType::Inner
+            | JoinType::Full
+            | JoinType::RightSemi
+            | JoinType::RightAnti
+            | JoinType::RightMark => JoinSide::Right,
+            JoinType::Left
+            | JoinType::LeftAnti
+            | JoinType::LeftSemi
+            | JoinType::LeftMark => JoinSide::Left,
+        }
+    }
+
+    pub fn compute_properties(
+        buffered: &Arc<dyn ExecutionPlan>,
+        streamed: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+        join_on: &(PhysicalExprRef, PhysicalExprRef),
+    ) -> Result<PlanProperties> {
+        let eq_properties = join_equivalence_properties(
+            buffered.equivalence_properties().clone(),
+            streamed.equivalence_properties().clone(),
+            &join_type,
+            schema,
+            &Self::maintains_input_order(join_type),
+            Some(Self::probe_side(&join_type)),
+            std::slice::from_ref(join_on),
+        )?;
+
+        let output_partitioning =
+            asymmetric_join_output_partitioning(buffered, streamed, &join_type)?;
+
+        Ok(PlanProperties::new(
+            eq_properties,
+            output_partitioning,
+            EmissionType::Incremental,
+            boundedness_from_children([buffered, streamed]),
+        ))
+    }
+
+    // TODO: Add input order. Now they're all `false` indicating it will not maintain the input order.
+    // However, for certain join types the order is maintained. This can be updated in the future after
+    // more testing.
+    fn maintains_input_order(join_type: JoinType) -> Vec<bool> {
+        match join_type {
+            // The existence side is expected to come in sorted
+            JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
+                vec![false, false]
+            }
+            JoinType::RightSemi | JoinType::RightAnti | JoinType::RightMark => {
+                vec![false, false]
+            }
+            // Left, Right, Full, Inner Join is not guaranteed to maintain
+            // input order as the streamed side will be sorted during
+            // execution for `PiecewiseMergeJoin`
+            _ => vec![false, false],
+        }
+    }
+
+    // TODO
+    pub fn swap_inputs(&self) -> Result<Arc<dyn ExecutionPlan>> {
+        todo!()
+    }
+}
+
+impl ExecutionPlan for PiecewiseMergeJoinExec {
+    fn name(&self) -> &str {
+        "PiecewiseMergeJoinExec"
+    }
+
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn properties(&self) -> &PlanProperties {
+        &self.cache
+    }
+
+    fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
+        vec![&self.buffered, &self.streamed]
+    }
+
+    fn required_input_distribution(&self) -> Vec<Distribution> {
+        vec![
+            Distribution::SinglePartition,
+            Distribution::UnspecifiedDistribution,
+        ]
+    }
+
+    fn required_input_ordering(&self) -> Vec<Option<OrderingRequirements>> {
+        // Existence joins don't need to be sorted on one side.
+        if is_right_existence_join(self.join_type) {
+            unimplemented!()
+        } else {
+            // Sort the right side in memory, so we do not need to enforce any sorting
+            vec![
+                Some(OrderingRequirements::from(
+                    self.left_child_plan_required_order.clone(),
+                )),
+                None,
+            ]
+        }
+    }
+
+    fn with_new_children(
+        self: Arc<Self>,
+        children: Vec<Arc<dyn ExecutionPlan>>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        match &children[..] {
+            [left, right] => Ok(Arc::new(PiecewiseMergeJoinExec::try_new(
+                Arc::clone(left),
+                Arc::clone(right),
+                self.on.clone(),
+                self.operator,
+                self.join_type,
+                self.num_partitions,
+            )?)),
+            _ => internal_err!(
+                "PiecewiseMergeJoin should have 2 children, found {}",
+                children.len()
+            ),
+        }
+    }
+
+    fn execute(
+        &self,
+        partition: usize,
+        context: Arc<datafusion_execution::TaskContext>,
+    ) -> Result<SendableRecordBatchStream> {
+        let on_buffered = Arc::clone(&self.on.0);
+        let on_streamed = Arc::clone(&self.on.1);
+
+        let metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
+        let buffered_fut = self.buffered_fut.try_once(|| {
+            let reservation = MemoryConsumer::new("PiecewiseMergeJoinInput")
+                .register(context.memory_pool());
+
+            let buffered_stream = self.buffered.execute(0, Arc::clone(&context))?;
+            Ok(build_buffered_data(
+                buffered_stream,
+                Arc::clone(&on_buffered),
+                metrics.clone(),
+                reservation,
+                build_visited_indices_map(self.join_type),
+                self.num_partitions,
+            ))
+        })?;
+
+        let streamed = self.streamed.execute(partition, Arc::clone(&context))?;
+
+        let batch_size = context.session_config().batch_size();
+
+        // TODO: Add existence joins + this is guarded at physical planner
+        if is_existence_join(self.join_type()) {
+            unreachable!()
+        } else {
+            Ok(Box::pin(ClassicPWMJStream::try_new(
+                Arc::clone(&self.schema),
+                on_streamed,
+                self.join_type,
+                self.operator,
+                streamed,
+                BufferedSide::Initial(BufferedSideInitialState { buffered_fut }),
+                PiecewiseMergeJoinStreamState::WaitBufferedSide,
+                self.sort_options,
+                metrics,
+                batch_size,
+            )))
+        }
+    }
+}
+
+impl DisplayAs for PiecewiseMergeJoinExec {
+    fn fmt_as(&self, t: DisplayFormatType, f: &mut Formatter) -> std::fmt::Result {
+        let on_str = format!(
+            "({} {} {})",
+            fmt_sql(self.on.0.as_ref()),
+            self.operator,
+            fmt_sql(self.on.1.as_ref())
+        );
+
+        match t {
+            DisplayFormatType::Default | DisplayFormatType::Verbose => {
+                write!(
+                    f,
+                    "PiecewiseMergeJoin: operator={:?}, join_type={:?}, on={}",
+                    self.operator, self.join_type, on_str
+                )
+            }
+
+            DisplayFormatType::TreeRender => {
+                writeln!(f, "operator={:?}", self.operator)?;
+                if self.join_type != JoinType::Inner {
+                    writeln!(f, "join_type={:?}", self.join_type)?;
+                }
+                writeln!(f, "on={on_str}")
+            }
+        }
+    }
+}
+
+async fn build_buffered_data(
+    buffered: SendableRecordBatchStream,
+    on_buffered: PhysicalExprRef,
+    metrics: BuildProbeJoinMetrics,
+    reservation: MemoryReservation,
+    build_map: bool,
+    remaining_partitions: usize,
+) -> Result<BufferedSideData> {
+    let schema = buffered.schema();
+
+    // Combine batches and record number of rows
+    let initial = (Vec::new(), 0, metrics, reservation);
+    let (batches, num_rows, metrics, mut reservation) = buffered
+        .try_fold(initial, |mut acc, batch| async {
+            let batch_size = get_record_batch_memory_size(&batch);
+            acc.3.try_grow(batch_size)?;
+            acc.2.build_mem_used.add(batch_size);
+            acc.2.build_input_batches.add(1);
+            acc.2.build_input_rows.add(batch.num_rows());
+            // Update row count
+            acc.1 += batch.num_rows();
+            // Push batch to output
+            acc.0.push(batch);
+            Ok(acc)
+        })
+        .await?;
+
+    let single_batch = concat_batches(&schema, batches.iter())?;
+
+    // Evaluate physical expression on the buffered side.
+    let buffered_values = on_buffered
+        .evaluate(&single_batch)?
+        .into_array(single_batch.num_rows())?;
+
+    // We add the single batch size + the memory of the join keys
+    // size of the size estimation
+    let size_estimation = get_record_batch_memory_size(&single_batch)
+        + buffered_values.get_array_memory_size();
+    reservation.try_grow(size_estimation)?;
+    metrics.build_mem_used.add(size_estimation);
+
+    // Created visited indices bitmap only if the join type requires it
+    let visited_indices_bitmap = if build_map {
+        let bitmap_size = bit_util::ceil(single_batch.num_rows(), 8);
+        reservation.try_grow(bitmap_size)?;
+        metrics.build_mem_used.add(bitmap_size);
+
+        let mut bitmap_buffer = BooleanBufferBuilder::new(single_batch.num_rows());
+        bitmap_buffer.append_n(num_rows, false);
+        bitmap_buffer
+    } else {
+        BooleanBufferBuilder::new(0)
+    };
+
+    let buffered_data = BufferedSideData::new(
+        single_batch,
+        buffered_values,
+        Mutex::new(visited_indices_bitmap),
+        remaining_partitions,
+        reservation,
+    );
+
+    Ok(buffered_data)
+}
+
+pub(super) struct BufferedSideData {
+    pub(super) batch: RecordBatch,
+    values: ArrayRef,
+    pub(super) visited_indices_bitmap: SharedBitmapBuilder,
+    pub(super) remaining_partitions: AtomicUsize,
+    _reservation: MemoryReservation,
+}
+
+impl BufferedSideData {
+    pub(super) fn new(
+        batch: RecordBatch,
+        values: ArrayRef,
+        visited_indices_bitmap: SharedBitmapBuilder,
+        remaining_partitions: usize,
+        reservation: MemoryReservation,
+    ) -> Self {
+        Self {
+            batch,
+            values,
+            visited_indices_bitmap,
+            remaining_partitions: AtomicUsize::new(remaining_partitions),
+            _reservation: reservation,
+        }
+    }
+
+    pub(super) fn batch(&self) -> &RecordBatch {
+        &self.batch
+    }
+
+    pub(super) fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+pub(super) enum BufferedSide {
+    /// Indicates that build-side not collected yet
+    Initial(BufferedSideInitialState),
+    /// Indicates that build-side data has been collected
+    Ready(BufferedSideReadyState),
+}
+
+impl BufferedSide {
+    // Takes a mutable state of the buffered row batches
+    pub(super) fn try_as_initial_mut(&mut self) -> Result<&mut BufferedSideInitialState> {
+        match self {
+            BufferedSide::Initial(state) => Ok(state),
+            _ => internal_err!("Expected build side in initial state"),
+        }
+    }
+
+    pub(super) fn try_as_ready(&self) -> Result<&BufferedSideReadyState> {
+        match self {
+            BufferedSide::Ready(state) => Ok(state),
+            _ => {
+                internal_err!("Expected build side in ready state")
+            }
+        }
+    }
+
+    /// Tries to extract BuildSideReadyState from BuildSide enum.
+    /// Returns an error if state is not Ready.
+    pub(super) fn try_as_ready_mut(&mut self) -> Result<&mut BufferedSideReadyState> {
+        match self {
+            BufferedSide::Ready(state) => Ok(state),
+            _ => internal_err!("Expected build side in ready state"),
+        }
+    }
+}
+
+pub(super) struct BufferedSideInitialState {
+    pub(crate) buffered_fut: OnceFut<BufferedSideData>,
+}
+
+pub(super) struct BufferedSideReadyState {
+    /// Collected build-side data
+    pub(super) buffered_data: Arc<BufferedSideData>,
+}
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs
new file mode 100644
index 0000000000000..c85a7cc16f657
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/mod.rs
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! PiecewiseMergeJoin is currently experimental
+
+pub use exec::PiecewiseMergeJoinExec;
+
+mod classic_join;
+mod exec;
+mod utils;
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs
new file mode 100644
index 0000000000000..5bbb496322b5f
--- /dev/null
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/utils.rs
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion_expr::JoinType;
+
+// Returns boolean for whether the join is a right existence join
+pub(super) fn is_right_existence_join(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::RightAnti | JoinType::RightSemi | JoinType::RightMark
+    )
+}
+
+// Returns boolean for whether the join is an existence join
+pub(super) fn is_existence_join(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::LeftAnti
+            | JoinType::RightAnti
+            | JoinType::LeftSemi
+            | JoinType::RightSemi
+            | JoinType::LeftMark
+            | JoinType::RightMark
+    )
+}
+
+// Returns boolean to check if the join type needs to record
+// buffered side matches for classic joins
+pub(super) fn need_produce_result_in_final(join_type: JoinType) -> bool {
+    matches!(join_type, JoinType::Full | JoinType::Left)
+}
+
+// Returns boolean for whether or not we need to build the buffered side
+// bitmap for marking matched rows on the buffered side.
+pub(super) fn build_visited_indices_map(join_type: JoinType) -> bool {
+    matches!(
+        join_type,
+        JoinType::Full
+            | JoinType::Left
+            | JoinType::LeftAnti
+            | JoinType::RightAnti
+            | JoinType::LeftSemi
+            | JoinType::RightSemi
+            | JoinType::LeftMark
+            | JoinType::RightMark
+    )
+}
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
index 879f47638d2c4..5a2e3669ab5ec 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
@@ -34,7 +34,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics;
-use crate::joins::utils::JoinFilter;
+use crate::joins::utils::{compare_join_arrays, JoinFilter};
 use crate::spill::spill_manager::SpillManager;
 use crate::{PhysicalExpr, RecordBatchStream, SendableRecordBatchStream};
 
@@ -1865,101 +1865,6 @@ fn join_arrays(batch: &RecordBatch, on_column: &[PhysicalExprRef]) -> Vec<ArrayR
         .collect()
 }
 
-/// Get comparison result of two rows of join arrays
-fn compare_join_arrays(
-    left_arrays: &[ArrayRef],
-    left: usize,
-    right_arrays: &[ArrayRef],
-    right: usize,
-    sort_options: &[SortOptions],
-    null_equality: NullEquality,
-) -> Result<Ordering> {
-    let mut res = Ordering::Equal;
-    for ((left_array, right_array), sort_options) in
-        left_arrays.iter().zip(right_arrays).zip(sort_options)
-    {
-        macro_rules! compare_value {
-            ($T:ty) => {{
-                let left_array = left_array.as_any().downcast_ref::<$T>().unwrap();
-                let right_array = right_array.as_any().downcast_ref::<$T>().unwrap();
-                match (left_array.is_null(left), right_array.is_null(right)) {
-                    (false, false) => {
-                        let left_value = &left_array.value(left);
-                        let right_value = &right_array.value(right);
-                        res = left_value.partial_cmp(right_value).unwrap();
-                        if sort_options.descending {
-                            res = res.reverse();
-                        }
-                    }
-                    (true, false) => {
-                        res = if sort_options.nulls_first {
-                            Ordering::Less
-                        } else {
-                            Ordering::Greater
-                        };
-                    }
-                    (false, true) => {
-                        res = if sort_options.nulls_first {
-                            Ordering::Greater
-                        } else {
-                            Ordering::Less
-                        };
-                    }
-                    _ => {
-                        res = match null_equality {
-                            NullEquality::NullEqualsNothing => Ordering::Less,
-                            NullEquality::NullEqualsNull => Ordering::Equal,
-                        };
-                    }
-                }
-            }};
-        }
-
-        match left_array.data_type() {
-            DataType::Null => {}
-            DataType::Boolean => compare_value!(BooleanArray),
-            DataType::Int8 => compare_value!(Int8Array),
-            DataType::Int16 => compare_value!(Int16Array),
-            DataType::Int32 => compare_value!(Int32Array),
-            DataType::Int64 => compare_value!(Int64Array),
-            DataType::UInt8 => compare_value!(UInt8Array),
-            DataType::UInt16 => compare_value!(UInt16Array),
-            DataType::UInt32 => compare_value!(UInt32Array),
-            DataType::UInt64 => compare_value!(UInt64Array),
-            DataType::Float32 => compare_value!(Float32Array),
-            DataType::Float64 => compare_value!(Float64Array),
-            DataType::Utf8 => compare_value!(StringArray),
-            DataType::Utf8View => compare_value!(StringViewArray),
-            DataType::LargeUtf8 => compare_value!(LargeStringArray),
-            DataType::Binary => compare_value!(BinaryArray),
-            DataType::BinaryView => compare_value!(BinaryViewArray),
-            DataType::FixedSizeBinary(_) => compare_value!(FixedSizeBinaryArray),
-            DataType::LargeBinary => compare_value!(LargeBinaryArray),
-            DataType::Decimal32(..) => compare_value!(Decimal32Array),
-            DataType::Decimal64(..) => compare_value!(Decimal64Array),
-            DataType::Decimal128(..) => compare_value!(Decimal128Array),
-            DataType::Timestamp(time_unit, None) => match time_unit {
-                TimeUnit::Second => compare_value!(TimestampSecondArray),
-                TimeUnit::Millisecond => compare_value!(TimestampMillisecondArray),
-                TimeUnit::Microsecond => compare_value!(TimestampMicrosecondArray),
-                TimeUnit::Nanosecond => compare_value!(TimestampNanosecondArray),
-            },
-            DataType::Date32 => compare_value!(Date32Array),
-            DataType::Date64 => compare_value!(Date64Array),
-            dt => {
-                return not_impl_err!(
-                    "Unsupported data type in sort merge join comparator: {}",
-                    dt
-                );
-            }
-        }
-        if !res.is_eq() {
-            break;
-        }
-    }
-    Ok(res)
-}
-
 /// A faster version of compare_join_arrays() that only output whether
 /// the given two rows are equal
 fn is_join_arrays_equal(
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index c50bfce93a2d5..78652d443d3cb 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -17,7 +17,7 @@
 
 //! Join related functionality used both on logical and physical plans
 
-use std::cmp::min;
+use std::cmp::{min, Ordering};
 use std::collections::HashSet;
 use std::fmt::{self, Debug};
 use std::future::Future;
@@ -43,7 +43,13 @@ use arrow::array::{
     BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, RecordBatchOptions,
     UInt32Array, UInt32Builder, UInt64Array,
 };
-use arrow::array::{ArrayRef, BooleanArray};
+use arrow::array::{
+    ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
+    Decimal128Array, FixedSizeBinaryArray, Float32Array, Float64Array, Int16Array,
+    Int32Array, Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, StringArray,
+    StringViewArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt8Array,
+};
 use arrow::buffer::{BooleanBuffer, NullBuffer};
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::{self, and, take, FilterBuilder};
@@ -51,12 +57,13 @@ use arrow::datatypes::{
     ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type,
 };
 use arrow_ord::cmp::not_distinct;
-use arrow_schema::ArrowError;
+use arrow_schema::{ArrowError, DataType, SortOptions, TimeUnit};
 use datafusion_common::cast::as_boolean_array;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::stats::Precision;
 use datafusion_common::{
-    plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result, SharedResult,
+    not_impl_err, plan_err, DataFusionError, JoinSide, JoinType, NullEquality, Result,
+    SharedResult,
 };
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::Operator;
@@ -284,7 +291,7 @@ pub fn build_join_schema(
         JoinType::LeftSemi | JoinType::LeftAnti => left_fields().unzip(),
         JoinType::LeftMark => {
             let right_field = once((
-                Field::new("mark", arrow::datatypes::DataType::Boolean, false),
+                Field::new("mark", DataType::Boolean, false),
                 ColumnIndex {
                     index: 0,
                     side: JoinSide::None,
@@ -295,7 +302,7 @@ pub fn build_join_schema(
         JoinType::RightSemi | JoinType::RightAnti => right_fields().unzip(),
         JoinType::RightMark => {
             let left_field = once((
-                Field::new("mark", arrow_schema::DataType::Boolean, false),
+                Field::new("mark", DataType::Boolean, false),
                 ColumnIndex {
                     index: 0,
                     side: JoinSide::None,
@@ -812,9 +819,10 @@ pub(crate) fn need_produce_result_in_final(join_type: JoinType) -> bool {
 pub(crate) fn get_final_indices_from_shared_bitmap(
     shared_bitmap: &SharedBitmapBuilder,
     join_type: JoinType,
+    piecewise: bool,
 ) -> (UInt64Array, UInt32Array) {
     let bitmap = shared_bitmap.lock();
-    get_final_indices_from_bit_map(&bitmap, join_type)
+    get_final_indices_from_bit_map(&bitmap, join_type, piecewise)
 }
 
 /// In the end of join execution, need to use bit map of the matched
@@ -829,16 +837,22 @@ pub(crate) fn get_final_indices_from_shared_bitmap(
 pub(crate) fn get_final_indices_from_bit_map(
     left_bit_map: &BooleanBufferBuilder,
     join_type: JoinType,
+    // We add a flag for whether this is being passed from the `PiecewiseMergeJoin`
+    // because the bitmap can be for left + right `JoinType`s
+    piecewise: bool,
 ) -> (UInt64Array, UInt32Array) {
     let left_size = left_bit_map.len();
-    if join_type == JoinType::LeftMark {
+    if join_type == JoinType::LeftMark || (join_type == JoinType::RightMark && piecewise)
+    {
         let left_indices = (0..left_size as u64).collect::<UInt64Array>();
         let right_indices = (0..left_size)
             .map(|idx| left_bit_map.get_bit(idx).then_some(0))
             .collect::<UInt32Array>();
         return (left_indices, right_indices);
     }
-    let left_indices = if join_type == JoinType::LeftSemi {
+    let left_indices = if join_type == JoinType::LeftSemi
+        || (join_type == JoinType::RightSemi && piecewise)
+    {
         (0..left_size)
             .filter_map(|idx| (left_bit_map.get_bit(idx)).then_some(idx as u64))
             .collect::<UInt64Array>()
@@ -1749,6 +1763,99 @@ fn eq_dyn_null(
     }
 }
 
+/// Get comparison result of two rows of join arrays
+pub fn compare_join_arrays(
+    left_arrays: &[ArrayRef],
+    left: usize,
+    right_arrays: &[ArrayRef],
+    right: usize,
+    sort_options: &[SortOptions],
+    null_equality: NullEquality,
+) -> Result<Ordering> {
+    let mut res = Ordering::Equal;
+    for ((left_array, right_array), sort_options) in
+        left_arrays.iter().zip(right_arrays).zip(sort_options)
+    {
+        macro_rules! compare_value {
+            ($T:ty) => {{
+                let left_array = left_array.as_any().downcast_ref::<$T>().unwrap();
+                let right_array = right_array.as_any().downcast_ref::<$T>().unwrap();
+                match (left_array.is_null(left), right_array.is_null(right)) {
+                    (false, false) => {
+                        let left_value = &left_array.value(left);
+                        let right_value = &right_array.value(right);
+                        res = left_value.partial_cmp(right_value).unwrap();
+                        if sort_options.descending {
+                            res = res.reverse();
+                        }
+                    }
+                    (true, false) => {
+                        res = if sort_options.nulls_first {
+                            Ordering::Less
+                        } else {
+                            Ordering::Greater
+                        };
+                    }
+                    (false, true) => {
+                        res = if sort_options.nulls_first {
+                            Ordering::Greater
+                        } else {
+                            Ordering::Less
+                        };
+                    }
+                    _ => {
+                        res = match null_equality {
+                            NullEquality::NullEqualsNothing => Ordering::Less,
+                            NullEquality::NullEqualsNull => Ordering::Equal,
+                        };
+                    }
+                }
+            }};
+        }
+
+        match left_array.data_type() {
+            DataType::Null => {}
+            DataType::Boolean => compare_value!(BooleanArray),
+            DataType::Int8 => compare_value!(Int8Array),
+            DataType::Int16 => compare_value!(Int16Array),
+            DataType::Int32 => compare_value!(Int32Array),
+            DataType::Int64 => compare_value!(Int64Array),
+            DataType::UInt8 => compare_value!(UInt8Array),
+            DataType::UInt16 => compare_value!(UInt16Array),
+            DataType::UInt32 => compare_value!(UInt32Array),
+            DataType::UInt64 => compare_value!(UInt64Array),
+            DataType::Float32 => compare_value!(Float32Array),
+            DataType::Float64 => compare_value!(Float64Array),
+            DataType::Binary => compare_value!(BinaryArray),
+            DataType::BinaryView => compare_value!(BinaryViewArray),
+            DataType::FixedSizeBinary(_) => compare_value!(FixedSizeBinaryArray),
+            DataType::LargeBinary => compare_value!(LargeBinaryArray),
+            DataType::Utf8 => compare_value!(StringArray),
+            DataType::Utf8View => compare_value!(StringViewArray),
+            DataType::LargeUtf8 => compare_value!(LargeStringArray),
+            DataType::Decimal128(..) => compare_value!(Decimal128Array),
+            DataType::Timestamp(time_unit, None) => match time_unit {
+                TimeUnit::Second => compare_value!(TimestampSecondArray),
+                TimeUnit::Millisecond => compare_value!(TimestampMillisecondArray),
+                TimeUnit::Microsecond => compare_value!(TimestampMicrosecondArray),
+                TimeUnit::Nanosecond => compare_value!(TimestampNanosecondArray),
+            },
+            DataType::Date32 => compare_value!(Date32Array),
+            DataType::Date64 => compare_value!(Date64Array),
+            dt => {
+                return not_impl_err!(
+                    "Unsupported data type in sort merge join comparator: {}",
+                    dt
+                );
+            }
+        }
+        if !res.is_eq() {
+            break;
+        }
+    }
+    Ok(res)
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 412e36b8124f0..b15ec026372d5 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -291,6 +291,7 @@ datafusion.optimizer.default_filter_selectivity 20
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true
 datafusion.optimizer.enable_dynamic_filter_pushdown true
 datafusion.optimizer.enable_join_dynamic_filter_pushdown true
+datafusion.optimizer.enable_piecewise_merge_join false
 datafusion.optimizer.enable_round_robin_repartition true
 datafusion.optimizer.enable_topk_aggregation true
 datafusion.optimizer.enable_topk_dynamic_filter_pushdown true
@@ -410,6 +411,7 @@ datafusion.optimizer.default_filter_selectivity 20 The default filter selectivit
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
 datafusion.optimizer.enable_dynamic_filter_pushdown true When set to true attempts to push down dynamic filters generated by operators (topk & join) into the file scan phase. For example, for a query such as `SELECT * FROM t ORDER BY timestamp DESC LIMIT 10`, the optimizer will attempt to push down the current top 10 timestamps that the TopK operator references into the file scans. This means that if we already have 10 timestamps in the year 2025 any files that only have timestamps in the year 2024 can be skipped / pruned at various stages in the scan. The config will suppress `enable_join_dynamic_filter_pushdown` & `enable_topk_dynamic_filter_pushdown` So if you disable `enable_topk_dynamic_filter_pushdown`, then enable `enable_dynamic_filter_pushdown`, the `enable_topk_dynamic_filter_pushdown` will be overridden.
 datafusion.optimizer.enable_join_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down Join dynamic filters into the file scan phase.
+datafusion.optimizer.enable_piecewise_merge_join false When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.
 datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
 datafusion.optimizer.enable_topk_dynamic_filter_pushdown true When set to true, the optimizer will attempt to push down TopK dynamic filters into the file scan phase.
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 9472395da6418..0174321dd831e 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -4240,7 +4240,7 @@ physical_plan
 03)----DataSourceExec: partitions=1, partition_sizes=[2]
 04)----DataSourceExec: partitions=1, partition_sizes=[2]
 
-## Test join.on.is_empty() && join.filter.is_some()
+## Test join.on.is_empty() && join.filter.is_some() -> single filter now a PWMJ
 query TT
 EXPLAIN SELECT * FROM t0 FULL JOIN t1 ON t0.c2 >= t1.c2 LIMIT 2;
 ----
@@ -5193,6 +5193,40 @@ SELECT c
 8
 9
 
+# PiecewiseMergeJoin Test
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = true;
+
+query II
+SELECT join_t1.t1_id, join_t2.t2_id
+FROM join_t1
+INNER JOIN join_t2 ON join_t1.t1_id > join_t2.t2_id
+WHERE join_t1.t1_id > 10 AND join_t2.t2_int > 1
+ORDER BY 1
+----
+22 11
+33 11
+44 11
+
+query TT
+EXPLAIN
+SELECT join_t1.t1_id, join_t2.t2_id
+FROM join_t1
+INNER JOIN join_t2 ON join_t1.t1_id > join_t2.t2_id
+WHERE join_t1.t1_id > 10 AND join_t2.t2_int > 1
+ORDER BY 1
+----
+physical_plan
+01)SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
+03)----SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+04)------CoalesceBatchesExec: target_batch_size=3
+05)--------FilterExec: t1_id@0 > 10
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----CoalesceBatchesExec: target_batch_size=3
+08)------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
+
 statement ok
 DROP TABLE t1;
 
@@ -5201,3 +5235,6 @@ DROP TABLE t2;
 
 statement ok
 set datafusion.explain.physical_plan_only = false;
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = false;
diff --git a/datafusion/sqllogictest/test_files/pwmj.slt b/datafusion/sqllogictest/test_files/pwmj.slt
new file mode 100644
index 0000000000000..0014b3c545f29
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/pwmj.slt
@@ -0,0 +1,354 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = true;
+
+statement ok
+CREATE TABLE join_t1 (t1_id INT);
+
+statement ok
+CREATE TABLE join_t2 (t2_id INT, t2_name TEXT, t2_int INT);
+
+statement ok
+INSERT INTO join_t1 VALUES (11), (22), (33), (44);
+
+statement ok
+INSERT INTO join_t2 VALUES
+  (11, 'z', 3),
+  (22, 'y', 1),
+  (44, 'x', 3),
+  (55, 'w', 3);
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+22 11
+33 11
+44 11
+
+# Checking `SELECT *`
+query IITI
+SELECT *
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+22 11 z 3
+33 11 z 3
+44 11 z 3
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id > t2.t2_id          
+WHERE t1.t1_id > 10              
+  AND t2.t2_int > 1               
+ORDER BY 1;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id > t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id > Int32(10)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_int > Int32(1)
+09)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Gt, join_type=Inner, on=(t1_id > t2_id)
+04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 > 10
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_int@1 > 1, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id >= t2.t2_id
+WHERE t1.t1_id >= 22
+  AND t2.t2_int = 3
+ORDER BY 1,2;
+----
+22 11
+33 11
+44 11
+44 44
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id >= t2.t2_id
+WHERE t1.t1_id >= 22
+  AND t2.t2_int = 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id >= t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id >= Int32(22)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_int = Int32(3)
+09)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=GtEq, join_type=Inner, on=(t1_id >= t2_id)
+04)------SortExec: expr=[t1_id@0 ASC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 >= 22
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_int@1 = 3, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < t2.t2_id
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+11 55
+11 44
+22 55
+22 44
+33 55
+33 44
+44 55
+
+query TT 
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < t2.t2_id
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id < t2.t2_id
+03)----SubqueryAlias: t1
+04)------TableScan: join_t1 projection=[t1_id]
+05)----SubqueryAlias: t2
+06)------Projection: join_t2.t2_id
+07)--------Filter: join_t2.t2_int >= Int32(3)
+08)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Lt, join_type=Inner, on=(t1_id < t2_id)
+04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)--------CoalesceBatchesExec: target_batch_size=8192
+08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < (t2.t2_id + 1)
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+11 11
+11 44
+11 55
+22 44
+22 55
+33 44
+33 55
+44 44
+44 55
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id < (t2.t2_id + 1)
+WHERE t2.t2_int >= 3
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: CAST(t1.t1_id AS Int64) < CAST(t2.t2_id AS Int64) + Int64(1)
+03)----SubqueryAlias: t1
+04)------TableScan: join_t1 projection=[t1_id]
+05)----SubqueryAlias: t2
+06)------Projection: join_t2.t2_id
+07)--------Filter: join_t2.t2_int >= Int32(3)
+08)----------TableScan: join_t2 projection=[t2_id, t2_int]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=Lt, join_type=Inner, on=(CAST(t1_id AS Int64) < CAST(t2_id AS Int64) + 1)
+04)------SortExec: expr=[CAST(t1_id@0 AS Int64) DESC], preserve_partitioning=[false]
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)--------CoalesceBatchesExec: target_batch_size=8192
+08)----------FilterExec: t2_int@1 >= 3, projection=[t2_id@0]
+09)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id <= t2.t2_id
+WHERE t1.t1_id IN (11, 44)
+  AND t2.t2_name <> 'y'
+ORDER BY 1,2;
+----
+11 55
+11 44
+11 11
+44 55
+44 44
+
+query TT
+EXPLAIN
+SELECT t1.t1_id, t2.t2_id
+FROM join_t1 t1
+JOIN join_t2 t2
+  ON t1.t1_id <= t2.t2_id
+WHERE t1.t1_id IN (11, 44)
+  AND t2.t2_name <> 'y'
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: t1.t1_id ASC NULLS LAST, t2.t2_id ASC NULLS LAST
+02)--Inner Join:  Filter: t1.t1_id <= t2.t2_id
+03)----SubqueryAlias: t1
+04)------Filter: join_t1.t1_id = Int32(11) OR join_t1.t1_id = Int32(44)
+05)--------TableScan: join_t1 projection=[t1_id]
+06)----SubqueryAlias: t2
+07)------Projection: join_t2.t2_id
+08)--------Filter: join_t2.t2_name != Utf8View("y")
+09)----------TableScan: join_t2 projection=[t2_id, t2_name]
+physical_plan
+01)SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST, t2_id@1 ASC NULLS LAST]
+02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----PiecewiseMergeJoin: operator=LtEq, join_type=Inner, on=(t1_id <= t2_id)
+04)------SortExec: expr=[t1_id@0 DESC], preserve_partitioning=[false]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: t1_id@0 = 11 OR t1_id@0 = 44
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)--------CoalesceBatchesExec: target_batch_size=8192
+10)----------FilterExec: t2_name@1 != y, projection=[t2_id@0]
+11)------------DataSourceExec: partitions=1, partition_sizes=[1]
+
+statement ok
+CREATE TABLE null_join_t1 (id INT);
+
+statement ok
+CREATE TABLE null_join_t2 (id INT);
+
+statement ok
+INSERT INTO null_join_t1 VALUES (1), (2), (NULL);
+
+statement ok
+INSERT INTO null_join_t2 VALUES (1), (NULL), (3);
+
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id > t2.id
+ORDER BY 1,2;
+----
+2 1
+
+# Verify this will offload this query to Nested Loop Join
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < (t1.id + t2.id)
+ORDER BY 1,2;
+----
+1 1
+1 3
+2 1
+2 3
+
+query TT
+EXPLAIN
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < (t1.id + t2.id)
+ORDER BY 1,2;
+----
+logical_plan
+01)Sort: left_id ASC NULLS LAST, right_id ASC NULLS LAST
+02)--Projection: t1.id AS left_id, t2.id AS right_id
+03)----Inner Join:  Filter: t1.id < t1.id + t2.id
+04)------SubqueryAlias: t1
+05)--------TableScan: null_join_t1 projection=[id]
+06)------SubqueryAlias: t2
+07)--------TableScan: null_join_t2 projection=[id]
+physical_plan
+01)SortExec: expr=[left_id@0 ASC NULLS LAST, right_id@1 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[id@0 as left_id, id@1 as right_id]
+03)----NestedLoopJoinExec: join_type=Inner, filter=id@0 < id@0 + id@1
+04)------DataSourceExec: partitions=1, partition_sizes=[1]
+05)------DataSourceExec: partitions=1, partition_sizes=[1]
+
+query II
+SELECT t1.id AS left_id, t2.id AS right_id
+FROM null_join_t1 t1
+JOIN null_join_t2 t2
+  ON t1.id < t2.id
+ORDER BY 1,2;
+----
+1 3 
+2 3 
+
+statement ok
+set datafusion.optimizer.enable_piecewise_merge_join = false;
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index a302cecfa6220..7ec1864b4667c 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -148,6 +148,7 @@ The following configuration settings are available:
 | datafusion.optimizer.max_passes                                         | 3                         | Number of times that the optimizer will attempt to optimize the plan                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.optimizer.top_down_join_key_reordering                       | true                      | When set to true, the physical plan optimizer will run a top down process to reorder the join keys                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | datafusion.optimizer.prefer_hash_join                                   | true                      | When set to true, the physical plan optimizer will prefer HashJoin over SortMergeJoin. HashJoin can work more efficiently than SortMergeJoin but consumes more memory                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
+| datafusion.optimizer.enable_piecewise_merge_join                        | false                     | When set to true, piecewise merge join is enabled. PiecewiseMergeJoin is currently experimental. Physical planner will opt for PiecewiseMergeJoin when there is only one range filter.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | datafusion.optimizer.hash_join_single_partition_threshold               | 1048576                   | The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.optimizer.hash_join_single_partition_threshold_rows          | 131072                    | The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
 | datafusion.optimizer.default_filter_selectivity                         | 20                        | The default filter selectivity used by Filter Statistics when an exact selectivity cannot be determined. Valid values are between 0 (no selectivity) and 100 (all rows are selected).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |

From 155b56e521d75186776a65f1634ee03058899a79 Mon Sep 17 00:00:00 2001
From: Dhanush <dhanushhs51@gmail.com>
Date: Tue, 21 Oct 2025 13:57:50 +0530
Subject: [PATCH 056/109] fix(docs): resolve extra outline on tables (#18193)

## Which issue does this PR close?

- Closes #18189.

## What changes are included in this PR?

This PR fixes the extra outline extending beyond the table content in
the documentation.

<img width="924" height="240" alt="image"
src="https://github.com/user-attachments/assets/d079ae42-8ce3-4afc-9eaa-bff21069ba83"
/>
---
 docs/source/_static/theme_overrides.css | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 0859beb788aa4..9d53ad4d91c5a 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -94,6 +94,13 @@ Details: 8rem for search box etc*/
   max-width: 100%;
 }
 
+/* Make table container width fit content instead of spanning full width. */
+.pst-scrollable-table-container {
+  display: inline-block;
+  overflow-x: auto;
+  max-width: 100%;
+}
+
 /* Restore proper table display to maintain column alignment */
 .bd-content table thead,
 .bd-content table tbody { display: table-row-group; }

From b5b7f9b356a5363a71ee1d293df199c9e33206cd Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 21 Oct 2025 23:44:53 +1100
Subject: [PATCH 057/109] chore(deps): bump taiki-e/install-action from 2.62.33
 to 2.62.34 (#18194)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.33 to 2.62.34.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.34</h2>
<ul>
<li>
<p>Update <code>sccache@latest</code> to 0.12.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.1.</p>
</li>
<li>
<p>Update <code>rclone@latest</code> to 1.71.2.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.12.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.2.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.0.</p>
</li>
<li>
<p>Update <code>cargo-auditable@latest</code> to 0.7.1.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.4.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<h2>[2.62.34] - 2025-10-21</h2>
<ul>
<li>
<p>Update <code>sccache@latest</code> to 0.12.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.1.</p>
</li>
<li>
<p>Update <code>rclone@latest</code> to 1.71.2.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.12.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.2.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.0.</p>
</li>
<li>
<p>Update <code>cargo-auditable@latest</code> to 0.7.1.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.4.</p>
</li>
</ul>
<h2>[2.62.33] - 2025-10-17</h2>
<ul>
<li>Update <code>mise@latest</code> to 2025.10.10.</li>
</ul>
<h2>[2.62.32] - 2025-10-16</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.34.2.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.7.</p>
</li>
</ul>
<h2>[2.62.31] - 2025-10-16</h2>
<ul>
<li>
<p>Update <code>protoc@latest</code> to 3.33.0.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.3.</p>
</li>
<li>
<p>Update <code>syft@latest</code> to 1.34.1.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/80466ef8efa80486cdfbddf929453a4f3565c791"><code>80466ef</code></a>
Release 2.62.34</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/ed7cb2047baadf42f834c9688303bb7e8ebbd075"><code>ed7cb20</code></a>
Update changelog</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/1a9eb7f2bba91138d65cbd1c619e39c434bfdcfb"><code>1a9eb7f</code></a>
Update <code>sccache@latest</code> to 0.12.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/21ab5940ccaf0ae62d7f1627204f7f802b70031b"><code>21ab594</code></a>
Update <code>wasmtime@latest</code> to 38.0.1</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/71c2494173124a41c3104a01e7a6dd3283370a9e"><code>71c2494</code></a>
Update <code>rclone@latest</code> to 1.71.2</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/ae645f690127f73c3785984d74a7995721aad657"><code>ae645f6</code></a>
Update <code>mise@latest</code> to 2025.10.12</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/f7efacd76072d3b1e01c4aeedc7e796b2b248145"><code>f7efacd</code></a>
Update <code>vacuum@latest</code> to 0.18.8</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/6b4da41be6e28745d64567d2adf23e74f468b7a4"><code>6b4da41</code></a>
Update <code>ubi@latest</code> to 0.8.2</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/d9cf2fee2e82aae13a0393dec9e416387e316bc3"><code>d9cf2fe</code></a>
Update <code>cargo-tarpaulin@latest</code> to 0.34.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/d32a1247f9767c380b9bdcec5ba26c4c97a0c68e"><code>d32a124</code></a>
Update <code>cargo-auditable@latest</code> to 0.7.1</li>
<li>Additional commits viewable in <a
href="https://github.com/taiki-e/install-action/compare/e43a5023a747770bfcb71ae048541a681714b951...80466ef8efa80486cdfbddf929453a4f3565c791">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.33&new-version=2.62.34)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 98e6c35ada3b4..30b21cd4a0625 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
+        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 133d287f18197..9e86b03154eaa 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -425,7 +425,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
+        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -752,7 +752,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@e43a5023a747770bfcb71ae048541a681714b951  # v2.62.33
+        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
         with:
           tool: cargo-msrv
 

From 77a4cb741a68a159d7ab72634e38f11f9b851313 Mon Sep 17 00:00:00 2001
From: Bert Vermeiren <103956021+bert-beyondloops@users.noreply.github.com>
Date: Tue, 21 Oct 2025 17:30:42 +0200
Subject: [PATCH 058/109] Fix COPY TO does not produce an output file for the
 empty set (#18074)

## Which issue does this PR close?

COPY TO does not produce a single output file for an empty set

- Closes #18073

## Rationale for this change

Executing following sql does not effectively create a single output file
on disk :

COPY (SELECT 1 AS id WHERE FALSE) TO 'table_no_rows.parquet';

I would expect it creates a parquet file containing 0 rows including the
schema metadata.

The fact you can still query the schema of such a table is still
valuable information.

## What changes are included in this PR?



## Are these changes tested?

Additional COPY TO test added in the copy.slt sqllogictests

## Are there any user-facing changes?

A file containing 0 rows will be created now

---------

Co-authored-by: Bert Vermeiren <bert.vermeiren@datadobi.com>
---
 datafusion/datasource/src/write/demux.rs    | 22 ++++++++++++++++++++-
 datafusion/sqllogictest/test_files/copy.slt | 15 ++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/datafusion/datasource/src/write/demux.rs b/datafusion/datasource/src/write/demux.rs
index e80099823054d..52cb17c10453e 100644
--- a/datafusion/datasource/src/write/demux.rs
+++ b/datafusion/datasource/src/write/demux.rs
@@ -40,9 +40,9 @@ use datafusion_common::cast::{
 };
 use datafusion_common::{exec_datafusion_err, internal_datafusion_err, not_impl_err};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::TaskContext;
 
 use chrono::NaiveDate;
+use datafusion_execution::TaskContext;
 use futures::StreamExt;
 use object_store::path::Path;
 use rand::distr::SampleString;
@@ -68,6 +68,11 @@ pub type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
 /// be written with the extension from the path. Otherwise the default extension
 /// will be used and the output will be split into multiple files.
 ///
+/// Output file guarantees:
+///  - Partitioned files: Files are created only for non-empty partitions.
+///  - Single-file output: 1 file is always written, even when the stream is empty.
+///  - Multi-file output: Depending on the number of record batches, 0 or more files are written.
+///
 /// Examples of `base_output_path`
 ///  * `tmp/dataset/` -> is a folder since it ends in `/`
 ///  * `tmp/dataset` -> is still a folder since it does not end in `/` but has no valid file extension
@@ -171,6 +176,21 @@ async fn row_count_demuxer(
         max_rows_per_file
     };
 
+    if single_file_output {
+        // ensure we have one file open, even when the input stream is empty
+        open_file_streams.push(create_new_file_stream(
+            &base_output_path,
+            &write_id,
+            part_idx,
+            &file_extension,
+            single_file_output,
+            max_buffered_batches,
+            &mut tx,
+        )?);
+        row_counts.push(0);
+        part_idx += 1;
+    }
+
     while let Some(rb) = input.next().await.transpose()? {
         // ensure we have at least minimum_parallel_files open
         if open_file_streams.len() < minimum_parallel_files {
diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt
index 096cde86f26f5..9af0dc63936ae 100644
--- a/datafusion/sqllogictest/test_files/copy.slt
+++ b/datafusion/sqllogictest/test_files/copy.slt
@@ -426,6 +426,21 @@ select * from validate_parquet_single;
 1 Foo
 2 Bar
 
+# copy 0 rows to a single parquet file output
+query I
+COPY (SELECT 1 AS id WHERE FALSE) TO 'test_files/scratch/copy/table_no_rows.parquet';
+----
+0
+
+statement ok
+CREATE EXTERNAL TABLE validate_parquet_single_no_rows STORED AS PARQUET LOCATION 'test_files/scratch/copy/table_no_rows.parquet';
+
+# validate the parquet file contains 0 rows.
+query I
+SELECT count(id) FROM validate_parquet_single_no_rows;
+----
+0
+
 # copy from table to folder of compressed json files
 query I
 COPY source_table  to 'test_files/scratch/copy/table_json_gz' STORED AS JSON OPTIONS ('format.compression' gzip);

From 347b2b6cc3ee3092e893b8a3b8f3c21ec91aa2b9 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Tue, 21 Oct 2025 11:27:14 -0500
Subject: [PATCH 059/109] Add Projection struct w/ helper methods to manipulate
 projections (#18176)

I'm hoping these will help with
https://github.com/apache/datafusion/issues/14993. For now they're just
an internal refactor + exposing some functionality as `pub` and lots of
new tests.

Written with AI assistance.
---
 datafusion/core/src/physical_planner.rs       |    2 +-
 .../physical-expr/src/equivalence/class.rs    |    2 +-
 .../physical-expr/src/equivalence/mod.rs      |   32 +-
 .../src/equivalence/properties/dependency.rs  |    4 +-
 datafusion/physical-expr/src/lib.rs           |    1 +
 .../src/{equivalence => }/projection.rs       | 1074 ++++++++++++++++-
 datafusion/physical-plan/src/projection.rs    |  338 +-----
 7 files changed, 1115 insertions(+), 338 deletions(-)
 rename datafusion/physical-expr/src/{equivalence => }/projection.rs (51%)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index bea51d31baacb..0d784c9179692 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -3063,7 +3063,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = "expr: [ProjectionExpr { expr: BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
+        let expected = "exprs: [ProjectionExpr { expr: BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
 
         assert_contains!(format!("{execution_plan:?}"), expected);
 
diff --git a/datafusion/physical-expr/src/equivalence/class.rs b/datafusion/physical-expr/src/equivalence/class.rs
index 66ce77ef415ef..5b64884f65bb8 100644
--- a/datafusion/physical-expr/src/equivalence/class.rs
+++ b/datafusion/physical-expr/src/equivalence/class.rs
@@ -20,10 +20,10 @@ use std::ops::Deref;
 use std::sync::Arc;
 use std::vec::IntoIter;
 
-use super::projection::ProjectionTargets;
 use super::ProjectionMapping;
 use crate::expressions::Literal;
 use crate::physical_expr::add_offset_to_expr;
+use crate::projection::ProjectionTargets;
 use crate::{PhysicalExpr, PhysicalExprRef, PhysicalSortExpr, PhysicalSortRequirement};
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index bcc6835e2f6c7..a7289103806b8 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -25,12 +25,13 @@ use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 
 mod class;
 mod ordering;
-mod projection;
 mod properties;
 
 pub use class::{AcrossPartitions, ConstExpr, EquivalenceClass, EquivalenceGroup};
 pub use ordering::OrderingEquivalenceClass;
-pub use projection::{project_ordering, project_orderings, ProjectionMapping};
+// Re-export for backwards compatibility, we recommend importing from
+// datafusion_physical_expr::projection instead
+pub use crate::projection::{project_ordering, project_orderings, ProjectionMapping};
 pub use properties::{
     calculate_union, join_equivalence_properties, EquivalenceProperties,
 };
@@ -61,7 +62,7 @@ mod tests {
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
-    use datafusion_common::{plan_err, Result};
+    use datafusion_common::Result;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortRequirement;
 
     /// Converts a string to a physical sort expression
@@ -95,31 +96,6 @@ mod tests {
         sort_expr
     }
 
-    pub fn output_schema(
-        mapping: &ProjectionMapping,
-        input_schema: &Arc<Schema>,
-    ) -> Result<SchemaRef> {
-        // Calculate output schema:
-        let mut fields = vec![];
-        for (source, targets) in mapping.iter() {
-            let data_type = source.data_type(input_schema)?;
-            let nullable = source.nullable(input_schema)?;
-            for (target, _) in targets.iter() {
-                let Some(column) = target.as_any().downcast_ref::<Column>() else {
-                    return plan_err!("Expects to have column");
-                };
-                fields.push(Field::new(column.name(), data_type.clone(), nullable));
-            }
-        }
-
-        let output_schema = Arc::new(Schema::new_with_metadata(
-            fields,
-            input_schema.metadata().clone(),
-        ));
-
-        Ok(output_schema)
-    }
-
     // Generate a schema which consists of 8 columns (a, b, c, d, e, f, g, h)
     pub fn create_test_schema() -> Result<SchemaRef> {
         let a = Field::new("a", DataType::Int32, true);
diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
index 26d5d32c65121..8945d18be430f 100644
--- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs
@@ -387,11 +387,11 @@ mod tests {
 
     use super::*;
     use crate::equivalence::tests::{
-        convert_to_sort_reqs, create_test_params, create_test_schema, output_schema,
-        parse_sort_expr,
+        convert_to_sort_reqs, create_test_params, create_test_schema, parse_sort_expr,
     };
     use crate::equivalence::{convert_to_sort_exprs, ProjectionMapping};
     use crate::expressions::{col, BinaryExpr, CastExpr, Column};
+    use crate::projection::tests::output_schema;
     use crate::{ConstExpr, EquivalenceProperties, ScalarFunctionExpr};
 
     use arrow::compute::SortOptions;
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 468591d34d71f..aa8c9e50fd71e 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -37,6 +37,7 @@ pub mod intervals;
 mod partitioning;
 mod physical_expr;
 pub mod planner;
+pub mod projection;
 mod scalar_function;
 pub mod simplifier;
 pub mod statistics;
diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/projection.rs
similarity index 51%
rename from datafusion/physical-expr/src/equivalence/projection.rs
rename to datafusion/physical-expr/src/projection.rs
index a4ed8187cfadd..e35bfbb3a20de 100644
--- a/datafusion/physical-expr/src/equivalence/projection.rs
+++ b/datafusion/physical-expr/src/projection.rs
@@ -19,14 +19,426 @@ use std::ops::Deref;
 use std::sync::Arc;
 
 use crate::expressions::Column;
+use crate::utils::collect_columns;
 use crate::PhysicalExpr;
 
-use arrow::datatypes::SchemaRef;
+use arrow::datatypes::{Field, Schema, SchemaRef};
+use datafusion_common::stats::{ColumnStatistics, Precision};
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_common::{internal_err, plan_err, Result};
+use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
 
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr};
 use indexmap::IndexMap;
+use itertools::Itertools;
+
+/// A projection expression as used by projection operations.
+///
+/// The expression is evaluated and the result is stored in a column
+/// with the name specified by `alias`.
+///
+/// For example, the SQL expression `a + b AS sum_ab` would be represented
+/// as a `ProjectionExpr` where `expr` is the expression `a + b`
+/// and `alias` is the string `sum_ab`.
+#[derive(Debug, Clone)]
+pub struct ProjectionExpr {
+    /// The expression that will be evaluated.
+    pub expr: Arc<dyn PhysicalExpr>,
+    /// The name of the output column for use an output schema.
+    pub alias: String,
+}
+
+impl std::fmt::Display for ProjectionExpr {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if self.expr.to_string() == self.alias {
+            write!(f, "{}", self.alias)
+        } else {
+            write!(f, "{} AS {}", self.expr, self.alias)
+        }
+    }
+}
+
+impl ProjectionExpr {
+    /// Create a new projection expression
+    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
+        Self { expr, alias }
+    }
+
+    /// Create a new projection expression from an expression and a schema using the expression's output field name as alias.
+    pub fn new_from_expression(
+        expr: Arc<dyn PhysicalExpr>,
+        schema: &Schema,
+    ) -> Result<Self> {
+        let field = expr.return_field(schema)?;
+        Ok(Self {
+            expr,
+            alias: field.name().to_string(),
+        })
+    }
+}
+
+impl From<(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: (Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(value.0, value.1)
+    }
+}
+
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl std::fmt::Display for Projection {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let exprs: Vec<String> = self.exprs.iter().map(|e| e.to_string()).collect();
+        write!(f, "Projection[{}]", exprs.join(", "))
+    }
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Returns an iterator over the projection expressions
+    pub fn iter(&self) -> impl Iterator<Item = &ProjectionExpr> {
+        self.exprs.iter()
+    }
+
+    /// Creates a ProjectionMapping from this projection
+    pub fn projection_mapping(
+        &self,
+        input_schema: &SchemaRef,
+    ) -> Result<ProjectionMapping> {
+        ProjectionMapping::try_new(
+            self.exprs
+                .iter()
+                .map(|p| (Arc::clone(&p.expr), p.alias.clone())),
+            input_schema,
+        )
+    }
+
+    /// Iterate over a clone of the projection expressions.
+    pub fn expr_iter(&self) -> impl Iterator<Item = Arc<dyn PhysicalExpr>> + '_ {
+        self.exprs.iter().map(|e| Arc::clone(&e.expr))
+    }
+
+    /// Apply another projection on top of this projection, returning the combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 as c2`.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use std::sync::Arc;
+    /// use datafusion_physical_expr::projection::{Projection, ProjectionExpr};
+    /// use datafusion_physical_expr::expressions::{Column, BinaryExpr, Literal};
+    /// use datafusion_common::{Result, ScalarValue};
+    /// use datafusion_expr::Operator;
+    ///
+    /// fn main() -> Result<()> {
+    ///     // Example from the docstring:
+    ///     // Base projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+    ///     let base = Projection::new(vec![
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("c", 2)),
+    ///             alias: "x".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("b", 1)),
+    ///             alias: "y".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(Column::new("a", 0)),
+    ///             alias: "z".to_string(),
+    ///         },
+    ///     ]);
+    ///
+    ///     // Top projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
+    ///     let top = Projection::new(vec![
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(BinaryExpr::new(
+    ///                 Arc::new(Column::new("x", 0)),
+    ///                 Operator::Plus,
+    ///                 Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+    ///             )),
+    ///             alias: "c1".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             expr: Arc::new(BinaryExpr::new(
+    ///                 Arc::new(Column::new("y", 1)),
+    ///                 Operator::Plus,
+    ///                 Arc::new(Column::new("z", 2)),
+    ///             )),
+    ///             alias: "c2".to_string(),
+    ///         },
+    ///     ]);
+    ///
+    ///     // Expected result: SELECT c@2 + 1 AS c1, b@1 + a@0 AS c2
+    ///     let result = base.try_merge(&top)?;
+    ///
+    ///     assert_eq!(result.as_ref().len(), 2);
+    ///     assert_eq!(result.as_ref()[0].alias, "c1");
+    ///     assert_eq!(result.as_ref()[1].alias, "c2");
+    ///
+    ///     Ok(())
+    /// }
+    /// ```
+    ///
+    /// # Errors
+    /// This function returns an error if any expression in the `other` projection cannot be
+    /// applied on top of this projection.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could not be applied on top of existing projections {}",
+                        proj_expr.expr,
+                        self.exprs.iter().map(|e| format!("{e}")).join(", ")
+                    )
+                })?;
+            new_exprs.push(ProjectionExpr {
+                expr: new_expr,
+                alias: proj_expr.alias.clone(),
+            });
+        }
+        Ok(Projection::new(new_exprs))
+    }
+
+    /// Extract the column indices used in this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// this function would return `[0, 1]`.
+    /// Repeated indices are returned only once, and the order is ascending.
+    pub fn column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .flat_map(|e| collect_columns(&e.expr).into_iter().map(|col| col.index()))
+            .sorted_unstable()
+            .dedup()
+            .collect_vec()
+    }
+
+    /// Project a schema according to this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// if the input schema is `[a: Int32, b: Int32, c: Int32]`, the output schema would be `[x: Int32, y: Int32]`.
+    /// Fields' metadata are preserved from the input schema.
+    pub fn project_schema(&self, input_schema: &Schema) -> Result<Schema> {
+        let fields: Result<Vec<Field>> = self
+            .exprs
+            .iter()
+            .map(|proj_expr| {
+                let metadata = proj_expr
+                    .expr
+                    .return_field(input_schema)?
+                    .metadata()
+                    .clone();
+
+                let field = Field::new(
+                    &proj_expr.alias,
+                    proj_expr.expr.data_type(input_schema)?,
+                    proj_expr.expr.nullable(input_schema)?,
+                )
+                .with_metadata(metadata);
+
+                Ok(field)
+            })
+            .collect();
+
+        Ok(Schema::new_with_metadata(
+            fields?,
+            input_schema.metadata().clone(),
+        ))
+    }
+
+    /// Project statistics according to this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
+    /// if the input statistics has column statistics for columns `a`, `b`, and `c`, the output statistics would have column statistics for columns `x` and `y`.
+    pub fn project_statistics(
+        &self,
+        mut stats: datafusion_common::Statistics,
+        input_schema: &Schema,
+    ) -> Result<datafusion_common::Statistics> {
+        let mut primitive_row_size = 0;
+        let mut primitive_row_size_possible = true;
+        let mut column_statistics = vec![];
+
+        for proj_expr in &self.exprs {
+            let expr = &proj_expr.expr;
+            let col_stats = if let Some(col) = expr.as_any().downcast_ref::<Column>() {
+                stats.column_statistics[col.index()].clone()
+            } else {
+                // TODO stats: estimate more statistics from expressions
+                // (expressions should compute their statistics themselves)
+                ColumnStatistics::new_unknown()
+            };
+            column_statistics.push(col_stats);
+            let data_type = expr.data_type(input_schema)?;
+            if let Some(value) = data_type.primitive_width() {
+                primitive_row_size += value;
+                continue;
+            }
+            primitive_row_size_possible = false;
+        }
+
+        if primitive_row_size_possible {
+            stats.total_byte_size =
+                Precision::Exact(primitive_row_size).multiply(&stats.num_rows);
+        }
+        stats.column_statistics = column_statistics;
+        Ok(stats)
+    }
+}
+
+impl<'a> IntoIterator for &'a Projection {
+    type Item = &'a ProjectionExpr;
+    type IntoIter = std::slice::Iter<'a, ProjectionExpr>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.iter()
+    }
+}
+
+impl IntoIterator for Projection {
+    type Item = ProjectionExpr;
+    type IntoIter = std::vec::IntoIter<ProjectionExpr>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.exprs.into_iter()
+    }
+}
+
+/// The function operates in two modes:
+///
+/// 1) When `sync_with_child` is `true`:
+///
+///    The function updates the indices of `expr` if the expression resides
+///    in the input plan. For instance, given the expressions `a@1 + b@2`
+///    and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are
+///    updated to `a@0 + b@1` and `c@2`.
+///
+/// 2) When `sync_with_child` is `false`:
+///
+///    The function determines how the expression would be updated if a projection
+///    was placed before the plan associated with the expression. If the expression
+///    cannot be rewritten after the projection, it returns `None`. For example,
+///    given the expressions `c@0`, `a@1` and `b@2`, and the projection with
+///    an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes
+///    `a@0`, but `b@2` results in `None` since the projection does not include `b`.
+///
+/// # Errors
+/// This function returns an error if `sync_with_child` is `true` and if any expression references
+/// an index that is out of bounds for `projected_exprs`.
+/// For example:
+///
+/// - `expr` is `a@3`
+/// - `projected_exprs` is \[`a@0`, `b@1`\]
+///
+/// In this case, `a@3` references index 3, which is out of bounds for `projected_exprs` (which has length 2).
+pub fn update_expr(
+    expr: &Arc<dyn PhysicalExpr>,
+    projected_exprs: &[ProjectionExpr],
+    sync_with_child: bool,
+) -> Result<Option<Arc<dyn PhysicalExpr>>> {
+    #[derive(Debug, PartialEq)]
+    enum RewriteState {
+        /// The expression is unchanged.
+        Unchanged,
+        /// Some part of the expression has been rewritten
+        RewrittenValid,
+        /// Some part of the expression has been rewritten, but some column
+        /// references could not be.
+        RewrittenInvalid,
+    }
+
+    let mut state = RewriteState::Unchanged;
+
+    let new_expr = Arc::clone(expr)
+        .transform_up(|expr| {
+            if state == RewriteState::RewrittenInvalid {
+                return Ok(Transformed::no(expr));
+            }
+
+            let Some(column) = expr.as_any().downcast_ref::<Column>() else {
+                return Ok(Transformed::no(expr));
+            };
+            if sync_with_child {
+                state = RewriteState::RewrittenValid;
+                // Update the index of `column`:
+                let projected_expr = projected_exprs.get(column.index()).ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Column index {} out of bounds for projected expressions of length {}",
+                        column.index(),
+                        projected_exprs.len()
+                    )
+                })?;
+                Ok(Transformed::yes(Arc::clone(&projected_expr.expr)))
+            } else {
+                // default to invalid, in case we can't find the relevant column
+                state = RewriteState::RewrittenInvalid;
+                // Determine how to update `column` to accommodate `projected_exprs`
+                projected_exprs
+                    .iter()
+                    .enumerate()
+                    .find_map(|(index, proj_expr)| {
+                        proj_expr.expr.as_any().downcast_ref::<Column>().and_then(
+                            |projected_column| {
+                                (column.name().eq(projected_column.name())
+                                    && column.index() == projected_column.index())
+                                .then(|| {
+                                    state = RewriteState::RewrittenValid;
+                                    Arc::new(Column::new(&proj_expr.alias, index)) as _
+                                })
+                            },
+                        )
+                    })
+                    .map_or_else(
+                        || Ok(Transformed::no(expr)),
+                        |c| Ok(Transformed::yes(c)),
+                    )
+            }
+        })
+        .data()?;
+
+    Ok((state == RewriteState::RewrittenValid).then_some(new_expr))
+}
 
 /// Stores target expressions, along with their indices, that associate with a
 /// source expression in a projection mapping.
@@ -249,18 +661,46 @@ pub fn project_ordering(
 }
 
 #[cfg(test)]
-mod tests {
+pub(crate) mod tests {
+    use std::collections::HashMap;
+
     use super::*;
-    use crate::equivalence::tests::output_schema;
     use crate::equivalence::{convert_to_orderings, EquivalenceProperties};
-    use crate::expressions::{col, BinaryExpr};
+    use crate::expressions::{col, BinaryExpr, Literal};
     use crate::utils::tests::TestScalarUDF;
     use crate::{PhysicalExprRef, ScalarFunctionExpr};
 
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema, TimeUnit};
     use datafusion_common::config::ConfigOptions;
+    use datafusion_common::{ScalarValue, Statistics};
     use datafusion_expr::{Operator, ScalarUDF};
+    use insta::assert_snapshot;
+
+    pub(crate) fn output_schema(
+        mapping: &ProjectionMapping,
+        input_schema: &Arc<Schema>,
+    ) -> Result<SchemaRef> {
+        // Calculate output schema:
+        let mut fields = vec![];
+        for (source, targets) in mapping.iter() {
+            let data_type = source.data_type(input_schema)?;
+            let nullable = source.nullable(input_schema)?;
+            for (target, _) in targets.iter() {
+                let Some(column) = target.as_any().downcast_ref::<Column>() else {
+                    return plan_err!("Expects to have column");
+                };
+                fields.push(Field::new(column.name(), data_type.clone(), nullable));
+            }
+        }
+
+        let output_schema = Arc::new(Schema::new_with_metadata(
+            fields,
+            input_schema.metadata().clone(),
+        ));
+
+        Ok(output_schema)
+    }
 
     #[test]
     fn project_orderings() -> Result<()> {
@@ -1087,4 +1527,628 @@ mod tests {
 
         Ok(())
     }
+
+    fn get_stats() -> Statistics {
+        Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(23),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(1),
+                    max_value: Precision::Exact(ScalarValue::from("x")),
+                    min_value: Precision::Exact(ScalarValue::from("a")),
+                    sum_value: Precision::Absent,
+                    null_count: Precision::Exact(3),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Absent,
+                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
+                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
+                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
+                    null_count: Precision::Absent,
+                },
+            ],
+        }
+    }
+
+    fn get_schema() -> Schema {
+        let field_0 = Field::new("col0", DataType::Int64, false);
+        let field_1 = Field::new("col1", DataType::Utf8, false);
+        let field_2 = Field::new("col2", DataType::Float32, false);
+        Schema::new(vec![field_0, field_1, field_2])
+    }
+
+    #[test]
+    fn test_stats_projection_columns_only() {
+        let source = get_stats();
+        let schema = get_schema();
+
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "col0".to_string(),
+            },
+        ]);
+
+        let result = projection.project_statistics(source, &schema).unwrap();
+
+        let expected = Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(23),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(1),
+                    max_value: Precision::Exact(ScalarValue::from("x")),
+                    min_value: Precision::Exact(ScalarValue::from("a")),
+                    sum_value: Precision::Absent,
+                    null_count: Precision::Exact(3),
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+            ],
+        };
+
+        assert_eq!(result, expected);
+    }
+
+    #[test]
+    fn test_stats_projection_column_with_primitive_width_only() {
+        let source = get_stats();
+        let schema = get_schema();
+
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "col2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "col0".to_string(),
+            },
+        ]);
+
+        let result = projection.project_statistics(source, &schema).unwrap();
+
+        let expected = Statistics {
+            num_rows: Precision::Exact(5),
+            total_byte_size: Precision::Exact(60),
+            column_statistics: vec![
+                ColumnStatistics {
+                    distinct_count: Precision::Absent,
+                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
+                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
+                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
+                    null_count: Precision::Absent,
+                },
+                ColumnStatistics {
+                    distinct_count: Precision::Exact(5),
+                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
+                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
+                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
+                    null_count: Precision::Exact(0),
+                },
+            ],
+        };
+
+        assert_eq!(result, expected);
+    }
+
+    // Tests for Projection struct
+
+    #[test]
+    fn test_projection_new() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "b".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs.clone());
+        assert_eq!(projection.as_ref().len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_from_vec() -> Result<()> {
+        let exprs = vec![ProjectionExpr {
+            expr: Arc::new(Column::new("x", 0)),
+            alias: "x".to_string(),
+        }];
+        let projection: Projection = exprs.clone().into();
+        assert_eq!(projection.as_ref().len(), 1);
+        Ok(())
+    }
+
+    #[test]
+    fn test_projection_as_ref() -> Result<()> {
+        let exprs = vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 0)),
+                alias: "col1".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 1)),
+                alias: "col2".to_string(),
+            },
+        ];
+        let projection = Projection::new(exprs);
+        let as_ref: &[ProjectionExpr] = projection.as_ref();
+        assert_eq!(as_ref.len(), 2);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_multiple_columns() -> Result<()> {
+        // Test with reversed column order to ensure proper reordering
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 2)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "a".to_string(),
+            },
+        ]);
+        // Should return sorted indices regardless of projection order
+        assert_eq!(projection.column_indices(), vec![0, 2, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_duplicates() -> Result<()> {
+        // Test that duplicate column indices appear only once
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a2", 1)), // duplicate index
+                alias: "a2".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_unsorted() -> Result<()> {
+        // Test that column indices are sorted in the output
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 5)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 1)),
+                alias: "a".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 3)),
+                alias: "b".to_string(),
+            },
+        ]);
+        assert_eq!(projection.column_indices(), vec![1, 3, 5]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_complex_expr() -> Result<()> {
+        // Test with complex expressions containing multiple columns
+        let expr = Arc::new(BinaryExpr::new(
+            Arc::new(Column::new("a", 1)),
+            Operator::Plus,
+            Arc::new(Column::new("b", 4)),
+        ));
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr,
+                alias: "sum".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "c".to_string(),
+            },
+        ]);
+        // Should return [1, 2, 4] - all columns used, sorted and deduplicated
+        assert_eq!(projection.column_indices(), vec![1, 2, 4]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_column_indices_empty() -> Result<()> {
+        let projection = Projection::new(vec![]);
+        assert_eq!(projection.column_indices(), Vec::<usize>::new());
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_simple_columns() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT y@1 AS col2, x@0 AS col1
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("y", 1)),
+                alias: "col2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("x", 0)),
+                alias: "col1".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT b@1 AS col2, c@2 AS col1
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_snapshot!(format!("{merged}"), @"Projection[b@1 AS col2, c@2 AS col1]");
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_merge_with_expressions() -> Result<()> {
+        // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
+        let base_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("c", 2)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "z".to_string(),
+            },
+        ]);
+
+        // Second projection: SELECT y@1 + z@2 AS c2, x@0 + 1 AS c1
+        let top_projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("y", 1)),
+                    Operator::Plus,
+                    Arc::new(Column::new("z", 2)),
+                )),
+                alias: "c2".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("x", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int32(Some(1)))),
+                )),
+                alias: "c1".to_string(),
+            },
+        ]);
+
+        // Merge should produce: SELECT b@1 + a@0 AS c2, c@2 + 1 AS c1
+        let merged = base_projection.try_merge(&top_projection)?;
+        assert_snapshot!(format!("{merged}"), @"Projection[b@1 + a@0 AS c2, c@2 + 1 AS c1]");
+
+        Ok(())
+    }
+
+    #[test]
+    fn try_merge_error() {
+        // Create a base projection
+        let base = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("a", 0)),
+                alias: "x".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("b", 1)),
+                alias: "y".to_string(),
+            },
+        ]);
+
+        // Create a top projection that references a non-existent column index
+        let top = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("z", 5)), // Invalid index
+            alias: "result".to_string(),
+        }]);
+
+        // Attempt to merge and expect an error
+        let err_msg = base.try_merge(&top).unwrap_err().to_string();
+        assert!(
+            err_msg.contains("Internal error: Column index 5 out of bounds for projected expressions of length 2"),
+            "Unexpected error message: {err_msg}",
+        );
+    }
+
+    #[test]
+    fn test_project_schema_simple_columns() -> Result<()> {
+        // Input schema: [col0: Int64, col1: Utf8, col2: Float32]
+        let input_schema = get_schema();
+
+        // Projection: SELECT col2 AS c, col0 AS a
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "c".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "a".to_string(),
+            },
+        ]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 2 fields
+        assert_eq!(output_schema.fields().len(), 2);
+
+        // First field should be "c" with Float32 type
+        assert_eq!(output_schema.field(0).name(), "c");
+        assert_eq!(output_schema.field(0).data_type(), &DataType::Float32);
+
+        // Second field should be "a" with Int64 type
+        assert_eq!(output_schema.field(1).name(), "a");
+        assert_eq!(output_schema.field(1).data_type(), &DataType::Int64);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_with_expressions() -> Result<()> {
+        // Input schema: [col0: Int64, col1: Utf8, col2: Float32]
+        let input_schema = get_schema();
+
+        // Projection: SELECT col0 + 1 AS incremented
+        let projection = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(BinaryExpr::new(
+                Arc::new(Column::new("col0", 0)),
+                Operator::Plus,
+                Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
+            )),
+            alias: "incremented".to_string(),
+        }]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 1 field
+        assert_eq!(output_schema.fields().len(), 1);
+
+        // Field should be "incremented" with Int64 type
+        assert_eq!(output_schema.field(0).name(), "incremented");
+        assert_eq!(output_schema.field(0).data_type(), &DataType::Int64);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_preserves_metadata() -> Result<()> {
+        // Create schema with metadata
+        let mut metadata = HashMap::new();
+        metadata.insert("key".to_string(), "value".to_string());
+        let field_with_metadata =
+            Field::new("col0", DataType::Int64, false).with_metadata(metadata.clone());
+        let input_schema = Schema::new(vec![
+            field_with_metadata,
+            Field::new("col1", DataType::Utf8, false),
+        ]);
+
+        // Projection: SELECT col0 AS renamed
+        let projection = Projection::new(vec![ProjectionExpr {
+            expr: Arc::new(Column::new("col0", 0)),
+            alias: "renamed".to_string(),
+        }]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        // Should have 1 field
+        assert_eq!(output_schema.fields().len(), 1);
+
+        // Field should be "renamed" with metadata preserved
+        assert_eq!(output_schema.field(0).name(), "renamed");
+        assert_eq!(output_schema.field(0).metadata(), &metadata);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_schema_empty() -> Result<()> {
+        let input_schema = get_schema();
+        let projection = Projection::new(vec![]);
+
+        let output_schema = projection.project_schema(&input_schema)?;
+
+        assert_eq!(output_schema.fields().len(), 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_columns_only() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection: SELECT col1 AS text, col0 AS num
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "text".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "num".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have 2 column statistics (reordered from input)
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        // First column (col1 from input)
+        assert_eq!(
+            output_stats.column_statistics[0].distinct_count,
+            Precision::Exact(1)
+        );
+        assert_eq!(
+            output_stats.column_statistics[0].max_value,
+            Precision::Exact(ScalarValue::from("x"))
+        );
+
+        // Second column (col0 from input)
+        assert_eq!(
+            output_stats.column_statistics[1].distinct_count,
+            Precision::Exact(5)
+        );
+        assert_eq!(
+            output_stats.column_statistics[1].max_value,
+            Precision::Exact(ScalarValue::Int64(Some(21)))
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_with_expressions() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection with expression: SELECT col0 + 1 AS incremented, col1 AS text
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(BinaryExpr::new(
+                    Arc::new(Column::new("col0", 0)),
+                    Operator::Plus,
+                    Arc::new(Literal::new(ScalarValue::Int64(Some(1)))),
+                )),
+                alias: "incremented".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col1", 1)),
+                alias: "text".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have 2 column statistics
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        // First column (expression) should have unknown statistics
+        assert_eq!(
+            output_stats.column_statistics[0].distinct_count,
+            Precision::Absent
+        );
+        assert_eq!(
+            output_stats.column_statistics[0].max_value,
+            Precision::Absent
+        );
+
+        // Second column (col1) should preserve statistics
+        assert_eq!(
+            output_stats.column_statistics[1].distinct_count,
+            Precision::Exact(1)
+        );
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_primitive_width_only() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        // Projection with only primitive width columns: SELECT col2 AS f, col0 AS i
+        let projection = Projection::new(vec![
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col2", 2)),
+                alias: "f".to_string(),
+            },
+            ProjectionExpr {
+                expr: Arc::new(Column::new("col0", 0)),
+                alias: "i".to_string(),
+            },
+        ]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Total byte size should be recalculated for primitive types
+        // Float32 (4 bytes) + Int64 (8 bytes) = 12 bytes per row, 5 rows = 60 bytes
+        assert_eq!(output_stats.total_byte_size, Precision::Exact(60));
+
+        // Should have 2 column statistics
+        assert_eq!(output_stats.column_statistics.len(), 2);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_project_statistics_empty() -> Result<()> {
+        let input_stats = get_stats();
+        let input_schema = get_schema();
+
+        let projection = Projection::new(vec![]);
+
+        let output_stats = projection.project_statistics(input_stats, &input_schema)?;
+
+        // Row count should be preserved
+        assert_eq!(output_stats.num_rows, Precision::Exact(5));
+
+        // Should have no column statistics
+        assert_eq!(output_stats.column_statistics.len(), 0);
+
+        // Total byte size should be 0 for empty projection
+        assert_eq!(output_stats.total_byte_size, Precision::Exact(0));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 6eea70e1176d3..4dc88bc566310 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -32,17 +32,16 @@ use crate::filter_pushdown::{
     FilterPushdownPropagation,
 };
 use crate::joins::utils::{ColumnIndex, JoinFilter, JoinOn, JoinOnRef};
-use crate::{ColumnStatistics, DisplayFormatType, ExecutionPlan, PhysicalExpr};
+use crate::{DisplayFormatType, ExecutionPlan, PhysicalExpr};
 use std::any::Any;
 use std::collections::HashMap;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use arrow::datatypes::{Field, Schema, SchemaRef};
+use arrow::datatypes::SchemaRef;
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::stats::Precision;
 use datafusion_common::tree_node::{
     Transformed, TransformedResult, TreeNode, TreeNodeRecursion,
 };
@@ -52,6 +51,9 @@ use datafusion_physical_expr::equivalence::ProjectionMapping;
 use datafusion_physical_expr::utils::collect_columns;
 use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
+// Re-exported from datafusion-physical-expr for backwards compatibility
+// We recommend updating your imports to use datafusion-physical-expr directly
+pub use datafusion_physical_expr::projection::{update_expr, Projection, ProjectionExpr};
 
 use futures::stream::{Stream, StreamExt};
 use log::trace;
@@ -63,7 +65,7 @@ use log::trace;
 #[derive(Debug, Clone)]
 pub struct ProjectionExec {
     /// The projection expressions stored as tuples of (expression, output column name)
-    pub(crate) expr: Vec<ProjectionExpr>,
+    projection: Projection,
     /// The schema once the projection has been applied to the input
     schema: SchemaRef,
     /// The input plan
@@ -127,42 +129,17 @@ impl ProjectionExec {
     {
         let input_schema = input.schema();
         // convert argument to Vec<ProjectionExpr>
-        let expr = expr.into_iter().map(Into::into).collect::<Vec<_>>();
+        let expr_vec = expr.into_iter().map(Into::into).collect::<Vec<_>>();
+        let projection = Projection::new(expr_vec);
 
-        let fields: Result<Vec<Field>> = expr
-            .iter()
-            .map(|proj_expr| {
-                let metadata = proj_expr
-                    .expr
-                    .return_field(&input_schema)?
-                    .metadata()
-                    .clone();
-
-                let field = Field::new(
-                    &proj_expr.alias,
-                    proj_expr.expr.data_type(&input_schema)?,
-                    proj_expr.expr.nullable(&input_schema)?,
-                )
-                .with_metadata(metadata);
-
-                Ok(field)
-            })
-            .collect();
-
-        let schema = Arc::new(Schema::new_with_metadata(
-            fields?,
-            input_schema.metadata().clone(),
-        ));
+        let schema = Arc::new(projection.project_schema(&input_schema)?);
 
         // Construct a map from the input expressions to the output expression of the Projection
-        let projection_mapping = ProjectionMapping::try_new(
-            expr.iter().map(|p| (Arc::clone(&p.expr), p.alias.clone())),
-            &input_schema,
-        )?;
+        let projection_mapping = projection.projection_mapping(&input_schema)?;
         let cache =
             Self::compute_properties(&input, &projection_mapping, Arc::clone(&schema))?;
         Ok(Self {
-            expr,
+            projection,
             schema,
             input,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -172,7 +149,7 @@ impl ProjectionExec {
 
     /// The projection expressions stored as tuples of (expression, output column name)
     pub fn expr(&self) -> &[ProjectionExpr] {
-        &self.expr
+        self.projection.as_ref()
     }
 
     /// The input plan
@@ -203,35 +180,6 @@ impl ProjectionExec {
     }
 }
 
-/// A projection expression that is created by [`ProjectionExec`]
-///
-/// The expression is evaluated and the result is stored in a column
-/// with the name specified by `alias`.
-///
-/// For example, the SQL expression `a + b AS sum_ab` would be represented
-/// as a `ProjectionExpr` where `expr` is the expression `a + b`
-/// and `alias` is the string `sum_ab`.
-#[derive(Debug, Clone)]
-pub struct ProjectionExpr {
-    /// The expression that will be evaluated.
-    pub expr: Arc<dyn PhysicalExpr>,
-    /// The name of the output column for use an output schema.
-    pub alias: String,
-}
-
-impl ProjectionExpr {
-    /// Create a new projection expression
-    pub fn new(expr: Arc<dyn PhysicalExpr>, alias: String) -> Self {
-        Self { expr, alias }
-    }
-}
-
-impl From<(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
-    fn from(value: (Arc<dyn PhysicalExpr>, String)) -> Self {
-        Self::new(value.0, value.1)
-    }
-}
-
 impl DisplayAs for ProjectionExec {
     fn fmt_as(
         &self,
@@ -241,7 +189,8 @@ impl DisplayAs for ProjectionExec {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
                 let expr: Vec<String> = self
-                    .expr
+                    .projection
+                    .as_ref()
                     .iter()
                     .map(|proj_expr| {
                         let e = proj_expr.expr.to_string();
@@ -291,7 +240,7 @@ impl ExecutionPlan for ProjectionExec {
     }
 
     fn benefits_from_input_partitioning(&self) -> Vec<bool> {
-        let all_simple_exprs = self.expr.iter().all(|proj_expr| {
+        let all_simple_exprs = self.projection.iter().all(|proj_expr| {
             proj_expr.expr.as_any().is::<Column>()
                 || proj_expr.expr.as_any().is::<Literal>()
         });
@@ -308,7 +257,7 @@ impl ExecutionPlan for ProjectionExec {
         self: Arc<Self>,
         mut children: Vec<Arc<dyn ExecutionPlan>>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        ProjectionExec::try_new(self.expr.clone(), children.swap_remove(0))
+        ProjectionExec::try_new(self.projection.clone(), children.swap_remove(0))
             .map(|p| Arc::new(p) as _)
     }
 
@@ -318,12 +267,12 @@ impl ExecutionPlan for ProjectionExec {
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         trace!("Start ProjectionExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
-        Ok(Box::pin(ProjectionStream {
-            schema: Arc::clone(&self.schema),
-            expr: self.expr.iter().map(|x| Arc::clone(&x.expr)).collect(),
-            input: self.input.execute(partition, context)?,
-            baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
-        }))
+        Ok(Box::pin(ProjectionStream::new(
+            Arc::clone(&self.schema),
+            self.projection.expr_iter().collect(),
+            self.input.execute(partition, context)?,
+            BaselineMetrics::new(&self.metrics, partition),
+        )))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -336,13 +285,8 @@ impl ExecutionPlan for ProjectionExec {
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
         let input_stats = self.input.partition_statistics(partition)?;
-        stats_projection(
-            input_stats,
-            self.expr
-                .iter()
-                .map(|proj_expr| Arc::clone(&proj_expr.expr)),
-            Arc::clone(&self.input.schema()),
-        )
+        self.projection
+            .project_statistics(input_stats, &self.input.schema())
     }
 
     fn supports_limit_pushdown(&self) -> bool {
@@ -388,40 +332,22 @@ impl ExecutionPlan for ProjectionExec {
     }
 }
 
-fn stats_projection(
-    mut stats: Statistics,
-    exprs: impl Iterator<Item = Arc<dyn PhysicalExpr>>,
-    schema: SchemaRef,
-) -> Result<Statistics> {
-    let mut primitive_row_size = 0;
-    let mut primitive_row_size_possible = true;
-    let mut column_statistics = vec![];
-    for expr in exprs {
-        let col_stats = if let Some(col) = expr.as_any().downcast_ref::<Column>() {
-            stats.column_statistics[col.index()].clone()
-        } else {
-            // TODO stats: estimate more statistics from expressions
-            // (expressions should compute their statistics themselves)
-            ColumnStatistics::new_unknown()
-        };
-        column_statistics.push(col_stats);
-        let data_type = expr.data_type(&schema)?;
-        if let Some(value) = data_type.primitive_width() {
-            primitive_row_size += value;
-            continue;
+impl ProjectionStream {
+    /// Create a new projection stream
+    fn new(
+        schema: SchemaRef,
+        expr: Vec<Arc<dyn PhysicalExpr>>,
+        input: SendableRecordBatchStream,
+        baseline_metrics: BaselineMetrics,
+    ) -> Self {
+        Self {
+            schema,
+            expr,
+            input,
+            baseline_metrics,
         }
-        primitive_row_size_possible = false;
     }
 
-    if primitive_row_size_possible {
-        stats.total_byte_size =
-            Precision::Exact(primitive_row_size).multiply(&stats.num_rows);
-    }
-    stats.column_statistics = column_statistics;
-    Ok(stats)
-}
-
-impl ProjectionStream {
     fn batch_project(&self, batch: &RecordBatch) -> Result<RecordBatch> {
         // Records time on drop
         let _timer = self.baseline_metrics.elapsed_compute().timer();
@@ -703,86 +629,6 @@ pub fn all_columns(exprs: &[ProjectionExpr]) -> bool {
         .all(|proj_expr| proj_expr.expr.as_any().is::<Column>())
 }
 
-/// The function operates in two modes:
-///
-/// 1) When `sync_with_child` is `true`:
-///
-///    The function updates the indices of `expr` if the expression resides
-///    in the input plan. For instance, given the expressions `a@1 + b@2`
-///    and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are
-///    updated to `a@0 + b@1` and `c@2`.
-///
-/// 2) When `sync_with_child` is `false`:
-///
-///    The function determines how the expression would be updated if a projection
-///    was placed before the plan associated with the expression. If the expression
-///    cannot be rewritten after the projection, it returns `None`. For example,
-///    given the expressions `c@0`, `a@1` and `b@2`, and the [`ProjectionExec`] with
-///    an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes
-///    `a@0`, but `b@2` results in `None` since the projection does not include `b`.
-pub fn update_expr(
-    expr: &Arc<dyn PhysicalExpr>,
-    projected_exprs: &[ProjectionExpr],
-    sync_with_child: bool,
-) -> Result<Option<Arc<dyn PhysicalExpr>>> {
-    #[derive(Debug, PartialEq)]
-    enum RewriteState {
-        /// The expression is unchanged.
-        Unchanged,
-        /// Some part of the expression has been rewritten
-        RewrittenValid,
-        /// Some part of the expression has been rewritten, but some column
-        /// references could not be.
-        RewrittenInvalid,
-    }
-
-    let mut state = RewriteState::Unchanged;
-
-    let new_expr = Arc::clone(expr)
-        .transform_up(|expr| {
-            if state == RewriteState::RewrittenInvalid {
-                return Ok(Transformed::no(expr));
-            }
-
-            let Some(column) = expr.as_any().downcast_ref::<Column>() else {
-                return Ok(Transformed::no(expr));
-            };
-            if sync_with_child {
-                state = RewriteState::RewrittenValid;
-                // Update the index of `column`:
-                Ok(Transformed::yes(Arc::clone(
-                    &projected_exprs[column.index()].expr,
-                )))
-            } else {
-                // default to invalid, in case we can't find the relevant column
-                state = RewriteState::RewrittenInvalid;
-                // Determine how to update `column` to accommodate `projected_exprs`
-                projected_exprs
-                    .iter()
-                    .enumerate()
-                    .find_map(|(index, proj_expr)| {
-                        proj_expr.expr.as_any().downcast_ref::<Column>().and_then(
-                            |projected_column| {
-                                (column.name().eq(projected_column.name())
-                                    && column.index() == projected_column.index())
-                                .then(|| {
-                                    state = RewriteState::RewrittenValid;
-                                    Arc::new(Column::new(&proj_expr.alias, index)) as _
-                                })
-                            },
-                        )
-                    })
-                    .map_or_else(
-                        || Ok(Transformed::no(expr)),
-                        |c| Ok(Transformed::yes(c)),
-                    )
-            }
-        })
-        .data();
-
-    new_expr.map(|e| (state == RewriteState::RewrittenValid).then_some(e))
-}
-
 /// Updates the given lexicographic ordering according to given projected
 /// expressions using the [`update_expr`] function.
 pub fn update_ordering(
@@ -1268,116 +1114,6 @@ mod tests {
         .unwrap();
     }
 
-    fn get_stats() -> Statistics {
-        Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(23),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(1),
-                    max_value: Precision::Exact(ScalarValue::from("x")),
-                    min_value: Precision::Exact(ScalarValue::from("a")),
-                    sum_value: Precision::Absent,
-                    null_count: Precision::Exact(3),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Absent,
-                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
-                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
-                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
-                    null_count: Precision::Absent,
-                },
-            ],
-        }
-    }
-
-    fn get_schema() -> Schema {
-        let field_0 = Field::new("col0", DataType::Int64, false);
-        let field_1 = Field::new("col1", DataType::Utf8, false);
-        let field_2 = Field::new("col2", DataType::Float32, false);
-        Schema::new(vec![field_0, field_1, field_2])
-    }
-    #[tokio::test]
-    async fn test_stats_projection_columns_only() {
-        let source = get_stats();
-        let schema = get_schema();
-
-        let exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
-            Arc::new(Column::new("col1", 1)),
-            Arc::new(Column::new("col0", 0)),
-        ];
-
-        let result =
-            stats_projection(source, exprs.into_iter(), Arc::new(schema)).unwrap();
-
-        let expected = Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(23),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(1),
-                    max_value: Precision::Exact(ScalarValue::from("x")),
-                    min_value: Precision::Exact(ScalarValue::from("a")),
-                    sum_value: Precision::Absent,
-                    null_count: Precision::Exact(3),
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-            ],
-        };
-
-        assert_eq!(result, expected);
-    }
-
-    #[tokio::test]
-    async fn test_stats_projection_column_with_primitive_width_only() {
-        let source = get_stats();
-        let schema = get_schema();
-
-        let exprs: Vec<Arc<dyn PhysicalExpr>> = vec![
-            Arc::new(Column::new("col2", 2)),
-            Arc::new(Column::new("col0", 0)),
-        ];
-
-        let result =
-            stats_projection(source, exprs.into_iter(), Arc::new(schema)).unwrap();
-
-        let expected = Statistics {
-            num_rows: Precision::Exact(5),
-            total_byte_size: Precision::Exact(60),
-            column_statistics: vec![
-                ColumnStatistics {
-                    distinct_count: Precision::Absent,
-                    max_value: Precision::Exact(ScalarValue::Float32(Some(1.1))),
-                    min_value: Precision::Exact(ScalarValue::Float32(Some(0.1))),
-                    sum_value: Precision::Exact(ScalarValue::Float32(Some(5.5))),
-                    null_count: Precision::Absent,
-                },
-                ColumnStatistics {
-                    distinct_count: Precision::Exact(5),
-                    max_value: Precision::Exact(ScalarValue::Int64(Some(21))),
-                    min_value: Precision::Exact(ScalarValue::Int64(Some(-4))),
-                    sum_value: Precision::Exact(ScalarValue::Int64(Some(42))),
-                    null_count: Precision::Exact(0),
-                },
-            ],
-        };
-
-        assert_eq!(result, expected);
-    }
-
     #[test]
     fn test_projection_statistics_uses_input_schema() {
         let input_schema = Schema::new(vec![

From 8d54e7b2ac847732382defc21ac6c2c990aac376 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Tue, 21 Oct 2025 11:27:27 -0500
Subject: [PATCH 060/109] Add TableSchema helper to encapsulate file schema +
 partition fields (#18178)

Hoping this helps with https://github.com/apache/datafusion/issues/14993

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../core/src/datasource/physical_plan/csv.rs  |  29 ++--
 datafusion/datasource-parquet/src/source.rs   |   6 +-
 datafusion/datasource/src/file_scan_config.rs |  84 +++++-----
 datafusion/datasource/src/file_stream.rs      |   2 +-
 datafusion/datasource/src/mod.rs              |   2 +
 datafusion/datasource/src/table_schema.rs     | 146 ++++++++++++++++++
 .../proto/src/physical_plan/to_proto.rs       |   8 +-
 .../substrait/src/physical_plan/producer.rs   |   2 +-
 8 files changed, 216 insertions(+), 63 deletions(-)
 create mode 100644 datafusion/datasource/src/table_schema.rs

diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index e33761a0abb3a..b2ef51a76f89a 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -121,7 +121,7 @@ mod tests {
         .with_projection(Some(vec![0, 2, 4]))
         .build();
 
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
 
         assert_eq!(3, csv.schema().fields().len());
@@ -185,7 +185,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_projection(Some(vec![4, 0, 2]))
         .build();
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(3, csv.schema().fields().len());
 
@@ -250,7 +250,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_limit(Some(5))
         .build();
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(13, csv.schema().fields().len());
 
@@ -313,7 +313,7 @@ mod tests {
         .with_file_compression_type(file_compression_type.to_owned())
         .with_limit(Some(5))
         .build();
-        assert_eq!(14, config.file_schema.fields().len());
+        assert_eq!(14, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(14, csv.schema().fields().len());
 
@@ -349,7 +349,7 @@ mod tests {
         let filename = "aggregate_test_100.csv";
         let tmp_dir = TempDir::new()?;
 
-        let file_groups = partitioned_file_groups(
+        let mut file_groups = partitioned_file_groups(
             path.as_str(),
             filename,
             1,
@@ -357,30 +357,29 @@ mod tests {
             file_compression_type.to_owned(),
             tmp_dir.path(),
         )?;
+        // Add partition columns / values
+        file_groups[0][0].partition_values = vec![ScalarValue::from("2021-10-26")];
+
+        let num_file_schema_fields = file_schema.fields().len();
 
         let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let mut config = FileScanConfigBuilder::from(partitioned_csv_config(
+        let config = FileScanConfigBuilder::from(partitioned_csv_config(
             file_schema,
             file_groups,
             source,
         ))
         .with_newlines_in_values(false)
         .with_file_compression_type(file_compression_type.to_owned())
-        .build();
-
-        // Add partition columns
-        config.table_partition_cols =
-            vec![Arc::new(Field::new("date", DataType::Utf8, false))];
-        config.file_groups[0][0].partition_values = vec![ScalarValue::from("2021-10-26")];
-
+        .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
         // We should be able to project on the partition column
         // Which is supposed to be after the file fields
-        config.projection = Some(vec![0, config.file_schema.fields().len()]);
+        .with_projection(Some(vec![0, num_file_schema_fields]))
+        .build();
 
         // we don't have `/date=xx/` in the path but that is ok because
         // partitions are resolved during scan anyway
 
-        assert_eq!(13, config.file_schema.fields().len());
+        assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(2, csv.schema().fields().len());
 
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index dd10363079f91..20d71692926fd 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -497,7 +497,7 @@ impl FileSource for ParquetSource {
     ) -> Arc<dyn FileOpener> {
         let projection = base_config
             .file_column_projection_indices()
-            .unwrap_or_else(|| (0..base_config.file_schema.fields().len()).collect());
+            .unwrap_or_else(|| (0..base_config.file_schema().fields().len()).collect());
 
         let (expr_adapter_factory, schema_adapter_factory) = match (
             base_config.expr_adapter_factory.as_ref(),
@@ -566,8 +566,8 @@ impl FileSource for ParquetSource {
                 .expect("Batch size must set before creating ParquetOpener"),
             limit: base_config.limit,
             predicate: self.predicate.clone(),
-            logical_file_schema: Arc::clone(&base_config.file_schema),
-            partition_fields: base_config.table_partition_cols.clone(),
+            logical_file_schema: Arc::clone(base_config.file_schema()),
+            partition_fields: base_config.table_partition_cols().clone(),
             metadata_size_hint: self.metadata_size_hint,
             metrics: self.metrics().clone(),
             parquet_file_reader_factory,
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index e67e1f8273723..d557a99274eab 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -24,7 +24,7 @@ use crate::schema_adapter::SchemaAdapterFactory;
 use crate::{
     display::FileGroupsDisplay, file::FileSource,
     file_compression_type::FileCompressionType, file_stream::FileStream,
-    source::DataSource, statistics::MinMaxStatistics, PartitionedFile,
+    source::DataSource, statistics::MinMaxStatistics, PartitionedFile, TableSchema,
 };
 use arrow::datatypes::FieldRef;
 use arrow::{
@@ -153,15 +153,11 @@ pub struct FileScanConfig {
     /// [`RuntimeEnv::register_object_store`]: datafusion_execution::runtime_env::RuntimeEnv::register_object_store
     /// [`RuntimeEnv::object_store`]: datafusion_execution::runtime_env::RuntimeEnv::object_store
     pub object_store_url: ObjectStoreUrl,
-    /// Schema before `projection` is applied. It contains the all columns that may
-    /// appear in the files. It does not include table partition columns
-    /// that may be added.
-    /// Note that this is **not** the schema of the physical files.
-    /// This is the schema that the physical file schema will be
-    /// mapped onto, and the schema that the [`DataSourceExec`] will return.
+    /// Schema information including the file schema, table partition columns,
+    /// and the combined table schema.
     ///
     /// [`DataSourceExec`]: crate::source::DataSourceExec
-    pub file_schema: SchemaRef,
+    pub table_schema: TableSchema,
     /// List of files to be processed, grouped into partitions
     ///
     /// Each file must have a schema of `file_schema` or a subset. If
@@ -180,8 +176,6 @@ pub struct FileScanConfig {
     /// The maximum number of records to read from this plan. If `None`,
     /// all records after filtering are returned.
     pub limit: Option<usize>,
-    /// The partitioning columns
-    pub table_partition_cols: Vec<FieldRef>,
     /// All equivalent lexicographical orderings that describe the schema.
     pub output_ordering: Vec<LexOrdering>,
     /// File compression type
@@ -459,13 +453,15 @@ impl FileScanConfigBuilder {
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
         let new_lines_in_values = new_lines_in_values.unwrap_or(false);
 
+        // Create TableSchema from file_schema and table_partition_cols
+        let table_schema = TableSchema::new(file_schema, table_partition_cols);
+
         FileScanConfig {
             object_store_url,
-            file_schema,
+            table_schema,
             file_source,
             limit,
             projection,
-            table_partition_cols,
             constraints,
             file_groups,
             output_ordering,
@@ -481,7 +477,7 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
     fn from(config: FileScanConfig) -> Self {
         Self {
             object_store_url: config.object_store_url,
-            file_schema: config.file_schema,
+            file_schema: Arc::clone(config.table_schema.file_schema()),
             file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
             statistics: config.file_source.statistics().ok(),
@@ -490,7 +486,7 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
             new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
             projection: config.projection,
-            table_partition_cols: config.table_partition_cols,
+            table_partition_cols: config.table_schema.table_partition_cols().clone(),
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
@@ -635,7 +631,7 @@ impl DataSource for FileScanConfig {
                 .expr
                 .as_any()
                 .downcast_ref::<Column>()
-                .map(|expr| expr.index() >= self.file_schema.fields().len())
+                .map(|expr| expr.index() >= self.file_schema().fields().len())
                 .unwrap_or(false)
         });
 
@@ -650,7 +646,7 @@ impl DataSource for FileScanConfig {
                 &file_scan
                     .projection
                     .clone()
-                    .unwrap_or_else(|| (0..self.file_schema.fields().len()).collect()),
+                    .unwrap_or_else(|| (0..self.file_schema().fields().len()).collect()),
             );
 
             Arc::new(
@@ -691,11 +687,21 @@ impl DataSource for FileScanConfig {
 }
 
 impl FileScanConfig {
+    /// Get the file schema (schema of the files without partition columns)
+    pub fn file_schema(&self) -> &SchemaRef {
+        self.table_schema.file_schema()
+    }
+
+    /// Get the table partition columns
+    pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
+        self.table_schema.table_partition_cols()
+    }
+
     fn projection_indices(&self) -> Vec<usize> {
         match &self.projection {
             Some(proj) => proj.clone(),
-            None => (0..self.file_schema.fields().len()
-                + self.table_partition_cols.len())
+            None => (0..self.file_schema().fields().len()
+                + self.table_partition_cols().len())
                 .collect(),
         }
     }
@@ -707,7 +713,7 @@ impl FileScanConfig {
             .projection_indices()
             .into_iter()
             .map(|idx| {
-                if idx < self.file_schema.fields().len() {
+                if idx < self.file_schema().fields().len() {
                     statistics.column_statistics[idx].clone()
                 } else {
                     // TODO provide accurate stat for partition column (#1186)
@@ -729,12 +735,12 @@ impl FileScanConfig {
             .projection_indices()
             .into_iter()
             .map(|idx| {
-                if idx < self.file_schema.fields().len() {
-                    self.file_schema.field(idx).clone()
+                if idx < self.file_schema().fields().len() {
+                    self.file_schema().field(idx).clone()
                 } else {
-                    let partition_idx = idx - self.file_schema.fields().len();
+                    let partition_idx = idx - self.file_schema().fields().len();
                     Arc::unwrap_or_clone(Arc::clone(
-                        &self.table_partition_cols[partition_idx],
+                        &self.table_partition_cols()[partition_idx],
                     ))
                 }
             })
@@ -742,7 +748,7 @@ impl FileScanConfig {
 
         Arc::new(Schema::new_with_metadata(
             table_fields,
-            self.file_schema.metadata().clone(),
+            self.file_schema().metadata().clone(),
         ))
     }
 
@@ -790,9 +796,9 @@ impl FileScanConfig {
 
     /// Project the schema, constraints, and the statistics on the given column indices
     pub fn project(&self) -> (SchemaRef, Constraints, Statistics, Vec<LexOrdering>) {
-        if self.projection.is_none() && self.table_partition_cols.is_empty() {
+        if self.projection.is_none() && self.table_partition_cols().is_empty() {
             return (
-                Arc::clone(&self.file_schema),
+                Arc::clone(self.file_schema()),
                 self.constraints.clone(),
                 self.file_source.statistics().unwrap().clone(),
                 self.output_ordering.clone(),
@@ -811,8 +817,8 @@ impl FileScanConfig {
     pub fn projected_file_column_names(&self) -> Option<Vec<String>> {
         self.projection.as_ref().map(|p| {
             p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
-                .map(|col_idx| self.file_schema.field(*col_idx).name())
+                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
+                .map(|col_idx| self.file_schema().field(*col_idx).name())
                 .cloned()
                 .collect()
         })
@@ -823,17 +829,17 @@ impl FileScanConfig {
         let fields = self.file_column_projection_indices().map(|indices| {
             indices
                 .iter()
-                .map(|col_idx| self.file_schema.field(*col_idx))
+                .map(|col_idx| self.file_schema().field(*col_idx))
                 .cloned()
                 .collect::<Vec<_>>()
         });
 
         fields.map_or_else(
-            || Arc::clone(&self.file_schema),
+            || Arc::clone(self.file_schema()),
             |f| {
                 Arc::new(Schema::new_with_metadata(
                     f,
-                    self.file_schema.metadata.clone(),
+                    self.file_schema().metadata.clone(),
                 ))
             },
         )
@@ -842,7 +848,7 @@ impl FileScanConfig {
     pub fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
         self.projection.as_ref().map(|p| {
             p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema.fields().len())
+                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
                 .copied()
                 .collect()
         })
@@ -2182,11 +2188,11 @@ mod tests {
 
         // Verify the built config has all the expected values
         assert_eq!(config.object_store_url, object_store_url);
-        assert_eq!(config.file_schema, file_schema);
+        assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, Some(1000));
         assert_eq!(config.projection, Some(vec![0, 1]));
-        assert_eq!(config.table_partition_cols.len(), 1);
-        assert_eq!(config.table_partition_cols[0].name(), "date");
+        assert_eq!(config.table_partition_cols().len(), 1);
+        assert_eq!(config.table_partition_cols()[0].name(), "date");
         assert_eq!(config.file_groups.len(), 1);
         assert_eq!(config.file_groups[0].len(), 1);
         assert_eq!(
@@ -2265,10 +2271,10 @@ mod tests {
 
         // Verify default values
         assert_eq!(config.object_store_url, object_store_url);
-        assert_eq!(config.file_schema, file_schema);
+        assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, None);
         assert_eq!(config.projection, None);
-        assert!(config.table_partition_cols.is_empty());
+        assert!(config.table_partition_cols().is_empty());
         assert!(config.file_groups.is_empty());
         assert_eq!(
             config.file_compression_type,
@@ -2339,10 +2345,10 @@ mod tests {
         // Verify properties match
         let partition_cols = partition_cols.into_iter().map(Arc::new).collect::<Vec<_>>();
         assert_eq!(new_config.object_store_url, object_store_url);
-        assert_eq!(new_config.file_schema, schema);
+        assert_eq!(*new_config.file_schema(), schema);
         assert_eq!(new_config.projection, Some(vec![0, 2]));
         assert_eq!(new_config.limit, Some(10));
-        assert_eq!(new_config.table_partition_cols, partition_cols);
+        assert_eq!(*new_config.table_partition_cols(), partition_cols);
         assert_eq!(new_config.file_groups.len(), 1);
         assert_eq!(new_config.file_groups[0].len(), 1);
         assert_eq!(
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index e0b6c25a19162..9fee5691beeac 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -80,7 +80,7 @@ impl FileStream {
         let pc_projector = PartitionColumnProjector::new(
             Arc::clone(&projected_schema),
             &config
-                .table_partition_cols
+                .table_partition_cols()
                 .iter()
                 .map(|x| x.name().clone())
                 .collect::<Vec<_>>(),
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index 1f47c0983ea10..80b44ad5949a2 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -41,6 +41,7 @@ pub mod schema_adapter;
 pub mod sink;
 pub mod source;
 mod statistics;
+pub mod table_schema;
 
 #[cfg(test)]
 pub mod test_util;
@@ -57,6 +58,7 @@ use datafusion_common::{ScalarValue, Statistics};
 use futures::{Stream, StreamExt};
 use object_store::{path::Path, ObjectMeta};
 use object_store::{GetOptions, GetRange, ObjectStore};
+pub use table_schema::TableSchema;
 // Remove when add_row_stats is remove
 #[allow(deprecated)]
 pub use statistics::add_row_stats;
diff --git a/datafusion/datasource/src/table_schema.rs b/datafusion/datasource/src/table_schema.rs
new file mode 100644
index 0000000000000..9413bd9ef20bf
--- /dev/null
+++ b/datafusion/datasource/src/table_schema.rs
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Helper struct to manage table schemas with partition columns
+
+use arrow::datatypes::{FieldRef, SchemaBuilder, SchemaRef};
+use std::sync::Arc;
+
+/// Helper to hold table schema information for partitioned data sources.
+///
+/// When reading partitioned data (such as Hive-style partitioning), a table's schema
+/// consists of two parts:
+/// 1. **File schema**: The schema of the actual data files on disk
+/// 2. **Partition columns**: Columns that are encoded in the directory structure,
+///    not stored in the files themselves
+///
+/// # Example: Partitioned Table
+///
+/// Consider a table with the following directory structure:
+/// ```text
+/// /data/date=2025-10-10/region=us-west/data.parquet
+/// /data/date=2025-10-11/region=us-east/data.parquet
+/// ```
+///
+/// In this case:
+/// - **File schema**: The schema of `data.parquet` files (e.g., `[user_id, amount]`)
+/// - **Partition columns**: `[date, region]` extracted from the directory path
+/// - **Table schema**: The full schema combining both (e.g., `[user_id, amount, date, region]`)
+///
+/// # When to Use
+///
+/// Use `TableSchema` when:
+/// - Reading partitioned data sources (Parquet, CSV, etc. with Hive-style partitioning)
+/// - You need to efficiently access different schema representations without reconstructing them
+/// - You want to avoid repeatedly concatenating file and partition schemas
+///
+/// For non-partitioned data or when working with a single schema representation,
+/// working directly with Arrow's `Schema` or `SchemaRef` is simpler.
+///
+/// # Performance
+///
+/// This struct pre-computes and caches the full table schema, allowing cheap references
+/// to any representation without repeated allocations or reconstructions.
+#[derive(Debug, Clone)]
+pub struct TableSchema {
+    /// The schema of the data files themselves, without partition columns.
+    ///
+    /// For example, if your Parquet files contain `[user_id, amount]`,
+    /// this field holds that schema.
+    file_schema: SchemaRef,
+
+    /// Columns that are derived from the directory structure (partitioning scheme).
+    ///
+    /// For Hive-style partitioning like `/date=2025-10-10/region=us-west/`,
+    /// this contains the `date` and `region` fields.
+    ///
+    /// These columns are NOT present in the data files but are appended to each
+    /// row during query execution based on the file's location.
+    table_partition_cols: Vec<FieldRef>,
+
+    /// The complete table schema: file_schema columns followed by partition columns.
+    ///
+    /// This is pre-computed during construction by concatenating `file_schema`
+    /// and `table_partition_cols`, so it can be returned as a cheap reference.
+    table_schema: SchemaRef,
+}
+
+impl TableSchema {
+    /// Create a new TableSchema from a file schema and partition columns.
+    ///
+    /// The table schema is automatically computed by appending the partition columns
+    /// to the file schema.
+    ///
+    /// # Arguments
+    ///
+    /// * `file_schema` - Schema of the data files (without partition columns)
+    /// * `table_partition_cols` - Partition columns to append to each row
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # use datafusion_datasource::TableSchema;
+    /// let file_schema = Arc::new(Schema::new(vec![
+    ///     Field::new("user_id", DataType::Int64, false),
+    ///     Field::new("amount", DataType::Float64, false),
+    /// ]));
+    ///
+    /// let partition_cols = vec![
+    ///     Arc::new(Field::new("date", DataType::Utf8, false)),
+    ///     Arc::new(Field::new("region", DataType::Utf8, false)),
+    /// ];
+    ///
+    /// let table_schema = TableSchema::new(file_schema, partition_cols);
+    ///
+    /// // Table schema will have 4 columns: user_id, amount, date, region
+    /// assert_eq!(table_schema.table_schema().fields().len(), 4);
+    /// ```
+    pub fn new(file_schema: SchemaRef, table_partition_cols: Vec<FieldRef>) -> Self {
+        let mut builder = SchemaBuilder::from(file_schema.as_ref());
+        builder.extend(table_partition_cols.iter().cloned());
+        Self {
+            file_schema,
+            table_partition_cols,
+            table_schema: Arc::new(builder.finish()),
+        }
+    }
+
+    /// Get the file schema (without partition columns).
+    ///
+    /// This is the schema of the actual data files on disk.
+    pub fn file_schema(&self) -> &SchemaRef {
+        &self.file_schema
+    }
+
+    /// Get the table partition columns.
+    ///
+    /// These are the columns derived from the directory structure that
+    /// will be appended to each row during query execution.
+    pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
+        &self.table_partition_cols
+    }
+
+    /// Get the full table schema (file schema + partition columns).
+    ///
+    /// This is the complete schema that will be seen by queries, combining
+    /// both the columns from the files and the partition columns.
+    pub fn table_schema(&self) -> &SchemaRef {
+        &self.table_schema
+    }
+}
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 7e29a1deaeed0..399c234191aa7 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -515,16 +515,16 @@ pub fn serialize_file_scan_config(
     // Fields must be added to the schema so that they can persist in the protobuf,
     // and then they are to be removed from the schema in `parse_protobuf_file_scan_config`
     let mut fields = conf
-        .file_schema
+        .file_schema()
         .fields()
         .iter()
         .cloned()
         .collect::<Vec<_>>();
-    fields.extend(conf.table_partition_cols.iter().cloned());
+    fields.extend(conf.table_partition_cols().iter().cloned());
 
     let schema = Arc::new(
         arrow::datatypes::Schema::new(fields.clone())
-            .with_metadata(conf.file_schema.metadata.clone()),
+            .with_metadata(conf.file_schema().metadata.clone()),
     );
 
     Ok(protobuf::FileScanExecConf {
@@ -540,7 +540,7 @@ pub fn serialize_file_scan_config(
             .collect(),
         schema: Some(schema.as_ref().try_into()?),
         table_partition_cols: conf
-            .table_partition_cols
+            .table_partition_cols()
             .iter()
             .map(|x| x.name().clone())
             .collect::<Vec<_>>(),
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index cb725a7277fd3..63abd14d6f5e1 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -73,7 +73,7 @@ pub fn to_substrait_rel(
             let mut names = vec![];
             let mut types = vec![];
 
-            for field in file_config.file_schema.fields.iter() {
+            for field in file_config.file_schema().fields.iter() {
                 match to_substrait_type(field.data_type(), field.is_nullable()) {
                     Ok(t) => {
                         names.push(field.name().clone());

From 1e30aed6f2c523875f6ff7781462e1db280622e2 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Tue, 21 Oct 2025 12:35:53 -0500
Subject: [PATCH 061/109] Add spilling to RepartitionExec (#18014)

Addresses
https://github.com/apache/datafusion/issues/17334#issuecomment-3237651689

I ran into this using `datafusion-distributed` which I think makes the
issue of partition execution time skew even more likely to happen. As
per that issue it can also happen with non-distributed queries, e.g. if
one partition's sort spills and others don't.

Due to the nature of `ReparitionExec` I don't think we can bound the
channels, that could lead to deadlocks. So what I did was at least make
queries that would have previously fail continue forward with disk
spilling. I did not account for memory usage when reading batches back
from disk since DataFusion in general does not generally account for
"in-flight" batches.

Written with help from Claude

---------

Co-authored-by: Bruce Ritchie <bruce.ritchie@veeva.com>
---
 datafusion/core/tests/memory_limit/mod.rs     |   1 +
 .../memory_limit/repartition_mem_limit.rs     | 116 ++++
 .../physical-plan/src/repartition/mod.rs      | 512 +++++++++++++++---
 3 files changed, 562 insertions(+), 67 deletions(-)
 create mode 100644 datafusion/core/tests/memory_limit/repartition_mem_limit.rs

diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs
index 89bc48b1e6348..5d8a1d24181cb 100644
--- a/datafusion/core/tests/memory_limit/mod.rs
+++ b/datafusion/core/tests/memory_limit/mod.rs
@@ -23,6 +23,7 @@ use std::sync::{Arc, LazyLock};
 
 #[cfg(feature = "extended_tests")]
 mod memory_limit_validation;
+mod repartition_mem_limit;
 use arrow::array::{ArrayRef, DictionaryArray, Int32Array, RecordBatch, StringViewArray};
 use arrow::compute::SortOptions;
 use arrow::datatypes::{Int32Type, SchemaRef};
diff --git a/datafusion/core/tests/memory_limit/repartition_mem_limit.rs b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
new file mode 100644
index 0000000000000..a7af2f01d1cc9
--- /dev/null
+++ b/datafusion/core/tests/memory_limit/repartition_mem_limit.rs
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{ArrayRef, Int32Array, RecordBatch};
+use datafusion::{
+    assert_batches_sorted_eq,
+    prelude::{SessionConfig, SessionContext},
+};
+use datafusion_catalog::MemTable;
+use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+use datafusion_physical_plan::{repartition::RepartitionExec, ExecutionPlanProperties};
+use futures::TryStreamExt;
+use itertools::Itertools;
+
+/// End to end test for spilling in RepartitionExec.
+/// The idea is to make a real world query with a relatively low memory limit and
+/// then drive one partition at a time, simulating dissimilar execution speed in partitions.
+/// Just as some examples of real world scenarios where this can happen consider
+/// lopsided groups in a group by especially if one partitions spills and others don't,
+/// or in distributed systems if one upstream node is slower than others.
+#[tokio::test]
+async fn test_repartition_memory_limit() {
+    let runtime = RuntimeEnvBuilder::new()
+        .with_memory_limit(1024 * 1024, 1.0)
+        .build()
+        .unwrap();
+    let config = SessionConfig::new()
+        .with_batch_size(32)
+        .with_target_partitions(2);
+    let ctx = SessionContext::new_with_config_rt(config, Arc::new(runtime));
+    let batches = vec![RecordBatch::try_from_iter(vec![(
+        "c1",
+        Arc::new(Int32Array::from_iter_values((0..10).cycle().take(100_000))) as ArrayRef,
+    )])
+    .unwrap()];
+    let table = Arc::new(MemTable::try_new(batches[0].schema(), vec![batches]).unwrap());
+    ctx.register_table("t", table).unwrap();
+    let plan = ctx
+        .state()
+        .create_logical_plan("SELECT c1, count(*) as c FROM t GROUP BY c1;")
+        .await
+        .unwrap();
+    let plan = ctx.state().create_physical_plan(&plan).await.unwrap();
+    assert_eq!(plan.output_partitioning().partition_count(), 2);
+    // Execute partition 0, this should cause items going into the rest of the partitions to queue up and because
+    // of the low memory limit should spill to disk.
+    let batches0 = Arc::clone(&plan)
+        .execute(0, ctx.task_ctx())
+        .unwrap()
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+    let mut metrics = None;
+    Arc::clone(&plan)
+        .transform_down(|node| {
+            if node.as_any().is::<RepartitionExec>() {
+                metrics = node.metrics();
+            }
+            Ok(Transformed::no(node))
+        })
+        .unwrap();
+
+    let metrics = metrics.unwrap();
+    assert!(metrics.spilled_bytes().unwrap() > 0);
+    assert!(metrics.spilled_rows().unwrap() > 0);
+    assert!(metrics.spill_count().unwrap() > 0);
+
+    // Execute the other partition
+    let batches1 = Arc::clone(&plan)
+        .execute(1, ctx.task_ctx())
+        .unwrap()
+        .try_collect::<Vec<_>>()
+        .await
+        .unwrap();
+
+    let all_batches = batches0
+        .into_iter()
+        .chain(batches1.into_iter())
+        .collect_vec();
+    #[rustfmt::skip]
+    let expected = &[
+    "+----+-------+",
+    "| c1 | c     |",
+    "+----+-------+",
+    "| 0  | 10000 |",
+    "| 1  | 10000 |",
+    "| 2  | 10000 |",
+    "| 3  | 10000 |",
+    "| 4  | 10000 |",
+    "| 5  | 10000 |",
+    "| 6  | 10000 |",
+    "| 7  | 10000 |",
+    "| 8  | 10000 |",
+    "| 9  | 10000 |",
+    "+----+-------+",
+    ];
+    assert_batches_sorted_eq!(expected, &all_batches);
+}
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index dafde268ba737..8174f71c31afa 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -32,12 +32,13 @@ use super::{
 };
 use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType};
 use crate::hash_utils::create_hashes;
-use crate::metrics::BaselineMetrics;
+use crate::metrics::{BaselineMetrics, SpillMetrics};
 use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec};
 use crate::repartition::distributor_channels::{
     channels, partition_aware_channels, DistributionReceiver, DistributionSender,
 };
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
+use crate::spill::spill_manager::SpillManager;
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics};
 
@@ -50,6 +51,7 @@ use datafusion_common::utils::transpose;
 use datafusion_common::{internal_err, ColumnStatistics, HashMap};
 use datafusion_common::{not_impl_err, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
+use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
@@ -66,22 +68,42 @@ use parking_lot::Mutex;
 
 mod distributor_channels;
 
-type MaybeBatch = Option<Result<RecordBatch>>;
+/// A batch in the repartition queue - either in memory or spilled to disk
+#[derive(Debug)]
+enum RepartitionBatch {
+    /// Batch held in memory (counts against memory reservation)
+    Memory(RecordBatch),
+    /// Batch spilled to disk (one file per batch for queue semantics)
+    /// File automatically deleted when dropped via reference counting
+    /// The size field stores the original batch size for validation when reading back
+    Spilled {
+        spill_file: RefCountedTempFile,
+        size: usize,
+    },
+}
+
+type MaybeBatch = Option<Result<RepartitionBatch>>;
 type InputPartitionsToCurrentPartitionSender = Vec<DistributionSender<MaybeBatch>>;
 type InputPartitionsToCurrentPartitionReceiver = Vec<DistributionReceiver<MaybeBatch>>;
 
+/// Channels and resources for a single output partition
+#[derive(Debug)]
+struct PartitionChannels {
+    /// Senders for each input partition to send data to this output partition
+    tx: InputPartitionsToCurrentPartitionSender,
+    /// Receivers for each input partition sending data to this output partition
+    rx: InputPartitionsToCurrentPartitionReceiver,
+    /// Memory reservation for this output partition
+    reservation: SharedMemoryReservation,
+    /// Spill manager for handling disk spills for this output partition
+    spill_manager: Arc<SpillManager>,
+}
+
 #[derive(Debug)]
 struct ConsumingInputStreamsState {
     /// Channels for sending batches from input partitions to output partitions.
     /// Key is the partition number.
-    channels: HashMap<
-        usize,
-        (
-            InputPartitionsToCurrentPartitionSender,
-            InputPartitionsToCurrentPartitionReceiver,
-            SharedMemoryReservation,
-        ),
-    >,
+    channels: HashMap<usize, PartitionChannels>,
 
     /// Helper that ensures that that background job is killed once it is no longer needed.
     abort_helper: Arc<Vec<SpawnedTask<()>>>,
@@ -161,8 +183,8 @@ impl RepartitionExecState {
         let streams_and_metrics = match self {
             RepartitionExecState::NotInitialized => {
                 self.ensure_input_streams_initialized(
-                    input,
-                    metrics,
+                    Arc::clone(&input),
+                    metrics.clone(),
                     partitioning.partition_count(),
                     Arc::clone(&context),
                 )?;
@@ -205,9 +227,24 @@ impl RepartitionExecState {
         for (partition, (tx, rx)) in txs.into_iter().zip(rxs).enumerate() {
             let reservation = Arc::new(Mutex::new(
                 MemoryConsumer::new(format!("{name}[{partition}]"))
+                    .with_can_spill(true)
                     .register(context.memory_pool()),
             ));
-            channels.insert(partition, (tx, rx, reservation));
+            let spill_metrics = SpillMetrics::new(&metrics, partition);
+            let spill_manager = Arc::new(SpillManager::new(
+                Arc::clone(&context.runtime_env()),
+                spill_metrics,
+                input.schema(),
+            ));
+            channels.insert(
+                partition,
+                PartitionChannels {
+                    tx,
+                    rx,
+                    reservation,
+                    spill_manager,
+                },
+            );
         }
 
         // launch one async task per *input* partition
@@ -217,8 +254,15 @@ impl RepartitionExecState {
         {
             let txs: HashMap<_, _> = channels
                 .iter()
-                .map(|(partition, (tx, _rx, reservation))| {
-                    (*partition, (tx[i].clone(), Arc::clone(reservation)))
+                .map(|(partition, channels)| {
+                    (
+                        *partition,
+                        (
+                            channels.tx[i].clone(),
+                            Arc::clone(&channels.reservation),
+                            Arc::clone(&channels.spill_manager),
+                        ),
+                    )
                 })
                 .collect();
 
@@ -234,7 +278,9 @@ impl RepartitionExecState {
             let wait_for_task = SpawnedTask::spawn(RepartitionExec::wait_for_task(
                 input_task,
                 txs.into_iter()
-                    .map(|(partition, (tx, _reservation))| (partition, tx))
+                    .map(|(partition, (tx, _reservation, _spill_manager))| {
+                        (partition, tx)
+                    })
                     .collect(),
             ));
             spawned_tasks.push(wait_for_task);
@@ -675,7 +721,7 @@ impl ExecutionPlan for RepartitionExec {
             let num_input_partitions = input.output_partitioning().partition_count();
 
             // lock scope
-            let (mut rx, reservation, abort_helper) = {
+            let (mut rx, reservation, spill_manager, abort_helper) = {
                 // lock mutexes
                 let mut state = state.lock();
                 let state = state.consume_input_streams(
@@ -689,12 +735,22 @@ impl ExecutionPlan for RepartitionExec {
 
                 // now return stream for the specified *output* partition which will
                 // read from the channel
-                let (_tx, rx, reservation) = state
+                let PartitionChannels {
+                    rx,
+                    reservation,
+                    spill_manager,
+                    ..
+                } = state
                     .channels
                     .remove(&partition)
                     .expect("partition not used yet");
 
-                (rx, reservation, Arc::clone(&state.abort_helper))
+                (
+                    rx,
+                    reservation,
+                    spill_manager,
+                    Arc::clone(&state.abort_helper),
+                )
             };
 
             trace!(
@@ -711,6 +767,8 @@ impl ExecutionPlan for RepartitionExec {
                             receiver,
                             _drop_helper: Arc::clone(&abort_helper),
                             reservation: Arc::clone(&reservation),
+                            spill_manager: Arc::clone(&spill_manager),
+                            state: RepartitionStreamState::ReceivingFromChannel,
                         }) as SendableRecordBatchStream
                     })
                     .collect::<Vec<_>>();
@@ -739,6 +797,8 @@ impl ExecutionPlan for RepartitionExec {
                     input: rx.swap_remove(0),
                     _drop_helper: abort_helper,
                     reservation,
+                    spill_manager,
+                    state: RepartitionStreamState::ReceivingFromChannel,
                 }) as SendableRecordBatchStream)
             }
         })
@@ -979,7 +1039,11 @@ impl RepartitionExec {
         mut stream: SendableRecordBatchStream,
         mut output_channels: HashMap<
             usize,
-            (DistributionSender<MaybeBatch>, SharedMemoryReservation),
+            (
+                DistributionSender<MaybeBatch>,
+                SharedMemoryReservation,
+                Arc<SpillManager>,
+            ),
         >,
         partitioning: Partitioning,
         metrics: RepartitionMetrics,
@@ -1001,18 +1065,49 @@ impl RepartitionExec {
                 None => break,
             };
 
+            // Handle empty batch
+            if batch.num_rows() == 0 {
+                continue;
+            }
+
             for res in partitioner.partition_iter(batch)? {
                 let (partition, batch) = res?;
                 let size = batch.get_array_memory_size();
 
                 let timer = metrics.send_time[partition].timer();
                 // if there is still a receiver, send to it
-                if let Some((tx, reservation)) = output_channels.get_mut(&partition) {
-                    reservation.lock().try_grow(size)?;
-
-                    if tx.send(Some(Ok(batch))).await.is_err() {
+                if let Some((tx, reservation, spill_manager)) =
+                    output_channels.get_mut(&partition)
+                {
+                    let (batch_to_send, is_memory_batch) =
+                        match reservation.lock().try_grow(size) {
+                            Ok(_) => {
+                                // Memory available - send in-memory batch
+                                (RepartitionBatch::Memory(batch), true)
+                            }
+                            Err(_) => {
+                                // We're memory limited - spill this single batch to its own file
+                                let spill_file = spill_manager
+                                    .spill_record_batch_and_finish(
+                                        &[batch],
+                                        &format!(
+                                            "RepartitionExec spill partition {partition}"
+                                        ),
+                                    )?
+                                    // Note that we handled empty batch above, so this is safe
+                                    .expect("non-empty batch should produce spill file");
+
+                                // Store size for validation when reading back
+                                (RepartitionBatch::Spilled { spill_file, size }, false)
+                            }
+                        };
+
+                    if tx.send(Some(Ok(batch_to_send))).await.is_err() {
                         // If the other end has hung up, it was an early shutdown (e.g. LIMIT)
-                        reservation.lock().shrink(size);
+                        // Only shrink memory if it was a memory batch
+                        if is_memory_batch {
+                            reservation.lock().shrink(size);
+                        }
                         output_channels.remove(&partition);
                     }
                 }
@@ -1093,6 +1188,13 @@ impl RepartitionExec {
     }
 }
 
+enum RepartitionStreamState {
+    /// Waiting for next item from channel
+    ReceivingFromChannel,
+    /// Reading a spilled batch from disk (stream reads via tokio::fs)
+    ReadingSpilledBatch(SendableRecordBatchStream),
+}
+
 struct RepartitionStream {
     /// Number of input partitions that will be sending batches to this output channel
     num_input_partitions: usize,
@@ -1111,6 +1213,12 @@ struct RepartitionStream {
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
+
+    /// Spill manager for reading spilled batches
+    spill_manager: Arc<SpillManager>,
+
+    /// Current state of the stream
+    state: RepartitionStreamState,
 }
 
 impl Stream for RepartitionStream {
@@ -1121,33 +1229,67 @@ impl Stream for RepartitionStream {
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
         loop {
-            match self.input.recv().poll_unpin(cx) {
-                Poll::Ready(Some(Some(v))) => {
-                    if let Ok(batch) = &v {
-                        self.reservation
-                            .lock()
-                            .shrink(batch.get_array_memory_size());
+            match &mut self.state {
+                RepartitionStreamState::ReceivingFromChannel => {
+                    let value = futures::ready!(self.input.recv().poll_unpin(cx));
+                    match value {
+                        Some(Some(v)) => match v {
+                            Ok(RepartitionBatch::Memory(batch)) => {
+                                // Release memory and return
+                                self.reservation
+                                    .lock()
+                                    .shrink(batch.get_array_memory_size());
+                                return Poll::Ready(Some(Ok(batch)));
+                            }
+                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
+                                // Read from disk - SpillReaderStream uses tokio::fs internally
+                                // Pass the original size for validation
+                                let stream = self
+                                    .spill_manager
+                                    .read_spill_as_stream(spill_file, Some(size))?;
+                                self.state =
+                                    RepartitionStreamState::ReadingSpilledBatch(stream);
+                                // Continue loop to poll the stream immediately
+                            }
+                            Err(e) => {
+                                return Poll::Ready(Some(Err(e)));
+                            }
+                        },
+                        Some(None) => {
+                            self.num_input_partitions_processed += 1;
+
+                            if self.num_input_partitions
+                                == self.num_input_partitions_processed
+                            {
+                                // all input partitions have finished sending batches
+                                return Poll::Ready(None);
+                            } else {
+                                // other partitions still have data to send
+                                continue;
+                            }
+                        }
+                        None => {
+                            return Poll::Ready(None);
+                        }
                     }
-
-                    return Poll::Ready(Some(v));
                 }
-                Poll::Ready(Some(None)) => {
-                    self.num_input_partitions_processed += 1;
-
-                    if self.num_input_partitions == self.num_input_partitions_processed {
-                        // all input partitions have finished sending batches
-                        return Poll::Ready(None);
-                    } else {
-                        // other partitions still have data to send
-                        continue;
+                RepartitionStreamState::ReadingSpilledBatch(stream) => {
+                    match futures::ready!(stream.poll_next_unpin(cx)) {
+                        Some(Ok(batch)) => {
+                            // Return batch and stay in ReadingSpilledBatch state to read more batches
+                            return Poll::Ready(Some(Ok(batch)));
+                        }
+                        Some(Err(e)) => {
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            return Poll::Ready(Some(Err(e)));
+                        }
+                        None => {
+                            // Spill stream ended - go back to receiving from channel
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            continue;
+                        }
                     }
                 }
-                Poll::Ready(None) => {
-                    return Poll::Ready(None);
-                }
-                Poll::Pending => {
-                    return Poll::Pending;
-                }
             }
         }
     }
@@ -1174,6 +1316,12 @@ struct PerPartitionStream {
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
+
+    /// Spill manager for reading spilled batches
+    spill_manager: Arc<SpillManager>,
+
+    /// Current state of the stream
+    state: RepartitionStreamState,
 }
 
 impl Stream for PerPartitionStream {
@@ -1183,21 +1331,59 @@ impl Stream for PerPartitionStream {
         mut self: Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        match self.receiver.recv().poll_unpin(cx) {
-            Poll::Ready(Some(Some(v))) => {
-                if let Ok(batch) = &v {
-                    self.reservation
-                        .lock()
-                        .shrink(batch.get_array_memory_size());
+        loop {
+            match &mut self.state {
+                RepartitionStreamState::ReceivingFromChannel => {
+                    let value = futures::ready!(self.receiver.recv().poll_unpin(cx));
+                    match value {
+                        Some(Some(v)) => match v {
+                            Ok(RepartitionBatch::Memory(batch)) => {
+                                // Release memory and return
+                                self.reservation
+                                    .lock()
+                                    .shrink(batch.get_array_memory_size());
+                                return Poll::Ready(Some(Ok(batch)));
+                            }
+                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
+                                // Read from disk - SpillReaderStream uses tokio::fs internally
+                                // Pass the original size for validation
+                                let stream = self
+                                    .spill_manager
+                                    .read_spill_as_stream(spill_file, Some(size))?;
+                                self.state =
+                                    RepartitionStreamState::ReadingSpilledBatch(stream);
+                                // Continue loop to poll the stream immediately
+                            }
+                            Err(e) => {
+                                return Poll::Ready(Some(Err(e)));
+                            }
+                        },
+                        Some(None) => {
+                            // Input partition has finished sending batches
+                            return Poll::Ready(None);
+                        }
+                        None => return Poll::Ready(None),
+                    }
+                }
+
+                RepartitionStreamState::ReadingSpilledBatch(stream) => {
+                    match futures::ready!(stream.poll_next_unpin(cx)) {
+                        Some(Ok(batch)) => {
+                            // Return batch and stay in ReadingSpilledBatch state to read more batches
+                            return Poll::Ready(Some(Ok(batch)));
+                        }
+                        Some(Err(e)) => {
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            return Poll::Ready(Some(Err(e)));
+                        }
+                        None => {
+                            // Spill stream ended - go back to receiving from channel
+                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                            continue;
+                        }
+                    }
                 }
-                Poll::Ready(Some(v))
-            }
-            Poll::Ready(Some(None)) => {
-                // Input partition has finished sending batches
-                Poll::Ready(None)
             }
-            Poll::Ready(None) => Poll::Ready(None),
-            Poll::Pending => Poll::Pending,
         }
     }
 }
@@ -1229,8 +1415,8 @@ mod tests {
     use arrow::array::{ArrayRef, StringArray, UInt32Array};
     use arrow::datatypes::{DataType, Field, Schema};
     use datafusion_common::cast::as_string_array;
+    use datafusion_common::exec_err;
     use datafusion_common::test_util::batches_to_sort_string;
-    use datafusion_common::{arrow_datafusion_err, exec_err};
     use datafusion_common_runtime::JoinSet;
     use datafusion_execution::runtime_env::RuntimeEnvBuilder;
     use insta::assert_snapshot;
@@ -1711,17 +1897,210 @@ mod tests {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn repartition_with_spilling() -> Result<()> {
+        // Test that repartition successfully spills to disk when memory is constrained
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with very tight memory limit to force spilling
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(1, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed by spilling to disk
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify spilling metrics to confirm spilling actually happened
+        let metrics = exec.metrics().unwrap();
+        assert!(
+            metrics.spill_count().unwrap() > 0,
+            "Expected spill_count > 0, but got {:?}",
+            metrics.spill_count()
+        );
+        println!("Spilled {} times", metrics.spill_count().unwrap());
+        assert!(
+            metrics.spilled_bytes().unwrap() > 0,
+            "Expected spilled_bytes > 0, but got {:?}",
+            metrics.spilled_bytes()
+        );
+        println!(
+            "Spilled {} bytes in {} spills",
+            metrics.spilled_bytes().unwrap(),
+            metrics.spill_count().unwrap()
+        );
+        assert!(
+            metrics.spilled_rows().unwrap() > 0,
+            "Expected spilled_rows > 0, but got {:?}",
+            metrics.spilled_rows()
+        );
+        println!("Spilled {} rows", metrics.spilled_rows().unwrap());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn repartition_with_partial_spilling() -> Result<()> {
+        // Test that repartition can handle partial spilling (some batches in memory, some spilled)
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with moderate memory limit to force partial spilling
+        // 2KB should allow some batches in memory but force others to spill
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(2 * 1024, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed with partial spilling
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify partial spilling metrics
+        let metrics = exec.metrics().unwrap();
+        let spill_count = metrics.spill_count().unwrap();
+        let spilled_rows = metrics.spilled_rows().unwrap();
+        let spilled_bytes = metrics.spilled_bytes().unwrap();
+
+        assert!(
+            spill_count > 0,
+            "Expected some spilling to occur, but got spill_count={spill_count}"
+        );
+        assert!(
+            spilled_rows > 0 && spilled_rows < total_rows,
+            "Expected partial spilling (0 < spilled_rows < {total_rows}), but got spilled_rows={spilled_rows}"
+        );
+        assert!(
+            spilled_bytes > 0,
+            "Expected some bytes to be spilled, but got spilled_bytes={spilled_bytes}"
+        );
+
+        println!(
+            "Partial spilling: spilled {} out of {} rows ({:.1}%) in {} spills, {} bytes",
+            spilled_rows,
+            total_rows,
+            (spilled_rows as f64 / total_rows as f64) * 100.0,
+            spill_count,
+            spilled_bytes
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn repartition_without_spilling() -> Result<()> {
+        // Test that repartition does not spill when there's ample memory
+        let schema = test_schema();
+        let partition = create_vec_batches(50);
+        let input_partitions = vec![partition];
+        let partitioning = Partitioning::RoundRobinBatch(4);
+
+        // Set up context with generous memory limit - no spilling should occur
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(10 * 1024 * 1024, 1.0) // 10MB
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all partitions - should succeed without spilling
+        let mut total_rows = 0;
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                total_rows += batch.num_rows();
+            }
+        }
+
+        // Verify we got all the data (50 batches * 8 rows each)
+        assert_eq!(total_rows, 50 * 8);
+
+        // Verify no spilling occurred
+        let metrics = exec.metrics().unwrap();
+        assert_eq!(
+            metrics.spill_count(),
+            Some(0),
+            "Expected no spilling, but got spill_count={:?}",
+            metrics.spill_count()
+        );
+        assert_eq!(
+            metrics.spilled_bytes(),
+            Some(0),
+            "Expected no bytes spilled, but got spilled_bytes={:?}",
+            metrics.spilled_bytes()
+        );
+        assert_eq!(
+            metrics.spilled_rows(),
+            Some(0),
+            "Expected no rows spilled, but got spilled_rows={:?}",
+            metrics.spilled_rows()
+        );
+
+        println!("No spilling occurred - all data processed in memory");
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn oom() -> Result<()> {
-        // define input partitions
+        use datafusion_execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
+
+        // Test that repartition fails with OOM when disk manager is disabled
         let schema = test_schema();
         let partition = create_vec_batches(50);
         let input_partitions = vec![partition];
         let partitioning = Partitioning::RoundRobinBatch(4);
 
-        // setup up context
+        // Setup context with memory limit but NO disk manager (explicitly disabled)
         let runtime = RuntimeEnvBuilder::default()
             .with_memory_limit(1, 1.0)
+            .with_disk_manager_builder(
+                DiskManagerBuilder::default().with_mode(DiskManagerMode::Disabled),
+            )
             .build_arc()?;
 
         let task_ctx = TaskContext::default().with_runtime(runtime);
@@ -1732,11 +2111,10 @@ mod tests {
             TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
         let exec = RepartitionExec::try_new(exec, partitioning)?;
 
-        // pull partitions
+        // Attempt to execute - should fail with ResourcesExhausted error
         for i in 0..exec.partitioning().partition_count() {
             let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
-            let err =
-                arrow_datafusion_err!(stream.next().await.unwrap().unwrap_err().into());
+            let err = stream.next().await.unwrap().unwrap_err();
             let err = err.find_root();
             assert!(
                 matches!(err, DataFusionError::ResourcesExhausted(_)),

From d5ea5e987651fcb02db50e11042770622674d752 Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Tue, 21 Oct 2025 13:30:09 -0600
Subject: [PATCH 062/109] Adds DELETE and HEAD instrumentation to CLI (#18206)

## Which issue does this PR close?


This does not fully close, but is an incremental building block
component for:
 - https://github.com/apache/datafusion/issues/17207

The full context of how this code is likely to progress can be seen in
the POC for this effort:
 - https://github.com/apache/datafusion/pull/17266

## Rationale for this change

Further fills out method instrumentation

## What changes are included in this PR?

- Adds instrumentation to head requests in the instrumented object store
- Adds instrumentatin to delete requests in the instrumented object
store
 - Adds tests for new code

## Are these changes tested?

Yes. New unit tests have been added.

## Are there any user-facing changes?

No-ish

##
cc @alamb
---
 .../src/object_storage/instrumented.rs        | 106 +++++++++++++++++-
 datafusion-cli/tests/cli_integration.rs       |   4 +-
 .../snapshots/object_store_profiling.snap     |   5 +
 3 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 722d4e1ce7a86..4f8626888ed91 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -194,6 +194,25 @@ impl InstrumentedObjectStore {
         Ok(ret)
     }
 
+    async fn instrumented_delete(&self, location: &Path) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.delete(location).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Delete,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(())
+    }
+
     fn instrumented_list(
         &self,
         prefix: Option<&Path>,
@@ -235,6 +254,25 @@ impl InstrumentedObjectStore {
 
         Ok(ret)
     }
+
+    async fn instrumented_head(&self, location: &Path) -> Result<ObjectMeta> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        let ret = self.inner.head(location).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Head,
+            path: location.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: None,
+        });
+
+        Ok(ret)
+    }
 }
 
 impl fmt::Display for InstrumentedObjectStore {
@@ -285,6 +323,10 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn delete(&self, location: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_delete(location).await;
+        }
+
         self.inner.delete(location).await
     }
 
@@ -313,6 +355,10 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn head(&self, location: &Path) -> Result<ObjectMeta> {
+        if self.enabled() {
+            return self.instrumented_head(location).await;
+        }
+
         self.inner.head(location).await
     }
 }
@@ -321,9 +367,9 @@ impl ObjectStore for InstrumentedObjectStore {
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum Operation {
     _Copy,
-    _Delete,
+    Delete,
     Get,
-    _Head,
+    Head,
     List,
     Put,
 }
@@ -753,6 +799,35 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_delete() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.delete(&path).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        // We need a new store so we have data to delete again
+        let (instrumented, path) = setup_test_store().await;
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.delete(&path).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Delete);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[tokio::test]
     async fn instrumented_store_list() {
         let (instrumented, path) = setup_test_store().await;
@@ -865,6 +940,33 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_head() {
+        let (instrumented, path) = setup_test_store().await;
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.head(&path).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        let _ = instrumented.head(&path).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Head);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert!(request.extra_display.is_none());
+    }
+
     #[test]
     fn request_details() {
         let rd = RequestDetails {
diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs
index 809e6fd32c4f0..c1395aa4f562c 100644
--- a/datafusion-cli/tests/cli_integration.rs
+++ b/datafusion-cli/tests/cli_integration.rs
@@ -411,8 +411,8 @@ async fn test_object_store_profiling() {
     // Output:
     // <TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
     settings.add_filter(
-        r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2} operation=(Get|Put|Delete|List|Head) duration=\d+\.\d{6}s size=(\d+) path=(.*)",
-        "<TIMESTAMP> operation=$1 duration=[DURATION] size=$2 path=$3",
+        r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?[+-]\d{2}:\d{2} operation=(Get|Put|Delete|List|Head) duration=\d+\.\d{6}s (size=\d+\s+)?path=(.*)",
+        "<TIMESTAMP> operation=$1 duration=[DURATION] ${2}path=$3",
     );
 
     // We also need to filter out the summary statistics (anything with an 's' at the end)
diff --git a/datafusion-cli/tests/snapshots/object_store_profiling.snap b/datafusion-cli/tests/snapshots/object_store_profiling.snap
index cff646f3b0e0c..029b07c324f5d 100644
--- a/datafusion-cli/tests/snapshots/object_store_profiling.snap
+++ b/datafusion-cli/tests/snapshots/object_store_profiling.snap
@@ -37,6 +37,7 @@ ObjectStore Profile mode set to Trace
 
 Object Store Profiling
 Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(data)
+<TIMESTAMP> operation=Head duration=[DURATION] path=cars.csv
 <TIMESTAMP> operation=Get duration=[DURATION] size=1006 path=cars.csv
 
 Summaries:
@@ -45,6 +46,8 @@ Summaries:
 +-----------+----------+-----------+-----------+-----------+-----------+-------+
 | Get        | duration | ...NORMALIZED...| 1     |
 | Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
+| Head       | duration | ...NORMALIZED...| 1     |
+| Head      | size     |           |           |           |           | 1     |
 +-----------+----------+-----------+-----------+-----------+-----------+-------+
 ObjectStore Profile mode set to Summary
 +-----+-------+---------------------+
@@ -63,6 +66,8 @@ Summaries:
 +-----------+----------+-----------+-----------+-----------+-----------+-------+
 | Get        | duration | ...NORMALIZED...| 1     |
 | Get       | size     | 1006 B    | 1006 B    | 1006 B    | 1006 B    | 1     |
+| Head       | duration | ...NORMALIZED...| 1     |
+| Head      | size     |           |           |           |           | 1     |
 +-----------+----------+-----------+-----------+-----------+-----------+-------+
 ObjectStore Profile mode set to Disabled
 +-----+-------+---------------------+

From 8054bb8818e0a86011e408978d61121fe477e1cd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 21 Oct 2025 12:43:26 -0700
Subject: [PATCH 063/109] [branch-50] Prepare 50.3.0 release version number and
 README (#18173) (#18182)

## Which issue does this PR close?

- Related to https://github.com/apache/datafusion/issues/18072

## Rationale for this change
Bring 50.3.0 changes to main

## What changes are included in this PR?

- cherry-pick https://github.com/apache/datafusion/pull/18173 into main:
- CHANGELOG
- Version number change

## Are these changes tested?
Yes by CI

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 Cargo.lock                        | 84 +++++++++++++++----------------
 Cargo.toml                        | 77 ++++++++++++++--------------
 dev/changelog/50.3.0.md           | 47 +++++++++++++++++
 docs/source/user-guide/configs.md |  2 +-
 4 files changed, 129 insertions(+), 81 deletions(-)
 create mode 100644 dev/changelog/50.3.0.md

diff --git a/Cargo.lock b/Cargo.lock
index 84cf7793ed690..e368dcf9a91e2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1762,7 +1762,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1834,7 +1834,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion",
@@ -1859,7 +1859,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1882,7 +1882,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1905,7 +1905,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-cli"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1937,7 +1937,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "apache-avro",
@@ -1964,7 +1964,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "futures",
  "log",
@@ -1973,7 +1973,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-compression",
@@ -2008,7 +2008,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -2031,7 +2031,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -2050,7 +2050,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2071,7 +2071,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2091,7 +2091,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2120,11 +2120,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "50.2.0"
+version = "50.3.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2158,7 +2158,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2179,7 +2179,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2203,7 +2203,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2214,7 +2214,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-ffi"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -2236,7 +2236,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2267,7 +2267,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2288,7 +2288,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2301,7 +2301,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2324,7 +2324,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2338,7 +2338,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2354,7 +2354,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2362,7 +2362,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -2371,7 +2371,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2398,7 +2398,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2423,7 +2423,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2436,7 +2436,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2448,7 +2448,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2468,7 +2468,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2504,7 +2504,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2540,7 +2540,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2552,7 +2552,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2570,7 +2570,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2582,7 +2582,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2602,7 +2602,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2628,7 +2628,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2662,7 +2662,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2684,7 +2684,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "50.2.0"
+version = "50.3.0"
 dependencies = [
  "chrono",
  "console_error_panic_hook",
diff --git a/Cargo.toml b/Cargo.toml
index cb5ddd8884dbe..3e0861c07ab0a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
 rust-version = "1.87.0"
 # Define DataFusion version
-version = "50.2.0"
+version = "50.3.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -111,43 +111,44 @@ chrono = { version = "0.4.42", default-features = false }
 criterion = "0.5.1"
 ctor = "0.4.3"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "50.2.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "50.2.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.2.0" }
-datafusion-common = { path = "datafusion/common", version = "50.2.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.2.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "50.2.0", default-features = false }
-datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.2.0", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.2.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.2.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.2.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.2.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "50.2.0" }
-datafusion-execution = { path = "datafusion/execution", version = "50.2.0", default-features = false }
-datafusion-expr = { path = "datafusion/expr", version = "50.2.0", default-features = false }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "50.2.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "50.2.0" }
-datafusion-functions = { path = "datafusion/functions", version = "50.2.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.2.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.2.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.2.0", default-features = false }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "50.2.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "50.2.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.2.0" }
-datafusion-macros = { path = "datafusion/macros", version = "50.2.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "50.2.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.2.0", default-features = false }
-datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.2.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.2.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.2.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.2.0" }
-datafusion-proto = { path = "datafusion/proto", version = "50.2.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "50.2.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "50.2.0" }
-datafusion-session = { path = "datafusion/session", version = "50.2.0" }
-datafusion-spark = { path = "datafusion/spark", version = "50.2.0" }
-datafusion-sql = { path = "datafusion/sql", version = "50.2.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "50.2.0" }
+datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" }
+datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "50.3.0" }
+datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false }
+datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" }
+datafusion-functions = { path = "datafusion/functions", version = "50.3.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" }
+datafusion-macros = { path = "datafusion/macros", version = "50.3.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false }
+datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" }
+datafusion-proto = { path = "datafusion/proto", version = "50.3.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" }
+datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" }
+datafusion-session = { path = "datafusion/session", version = "50.3.0" }
+datafusion-spark = { path = "datafusion/spark", version = "50.3.0" }
+datafusion-sql = { path = "datafusion/sql", version = "50.3.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" }
+
 doc-comment = "0.3"
 env_logger = "0.11"
 futures = "0.3"
diff --git a/dev/changelog/50.3.0.md b/dev/changelog/50.3.0.md
new file mode 100644
index 0000000000000..49950e00c282d
--- /dev/null
+++ b/dev/changelog/50.3.0.md
@@ -0,0 +1,47 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 50.3.0 Changelog
+
+This release consists of 7 commits from 3 contributors. See credits at the end of this changelog for more information.
+
+See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions.
+
+**Other:**
+
+- [branch-50] chore: Fix `no space left on device` [#18141](https://github.com/apache/datafusion/pull/18141) (comphead)
+- [branch-50]: chore: cherry pick `concat` to 50.3.0 [#18128](https://github.com/apache/datafusion/pull/18128) (comphead)
+- [branch-50] Backport Fix bug in LimitPushPastWindows (#18029) [#18107](https://github.com/apache/datafusion/pull/18107) (avantgardnerio)
+- [branch-50] fix(SubqueryAlias): use maybe_project_redundant_column (#17478) [#18130](https://github.com/apache/datafusion/pull/18130) (hareshkh)
+- [branch-50]: fix: Add overflow checks to SparkDateAdd/Sub to avoid panics (#18013) [#18131](https://github.com/apache/datafusion/pull/18131) (hareshkh)
+- [branch-50] Fix ambiguous column names in substrait conversion #17299 [#18077](https://github.com/apache/datafusion/pull/18077) (hareshkh)
+- [branch-50] Extend datatype semantic equality check to include timestamps (#17777) [#18129](https://github.com/apache/datafusion/pull/18129) (hareshkh)
+- [branch-50] perf: Fix NLJ slow join with condition `array_has` (#18161) [#18179](https://github.com/apache/datafusion/pull/18179) (alamb)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+     4	Haresh Khanna
+     2	Oleks V
+     1	Brent Gardner
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 7ec1864b4667c..c0e4ccd850d95 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -98,7 +98,7 @@ The following configuration settings are available:
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.created_by                                 | datafusion version 50.2.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.created_by                                 | datafusion version 50.3.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |

From 31109e4227de47c79287bb5364a99fdb2a69ac31 Mon Sep 17 00:00:00 2001
From: Dhanush <dhanushhs51@gmail.com>
Date: Wed, 22 Oct 2025 01:26:55 +0530
Subject: [PATCH 064/109] feat(docs): display compatible logo for dark mode
 (#18197)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

- Closes #18190.

## What changes are included in this PR?

This PR adds a new asset (original_dark.svg) which is similar to
original.svg with just a change in text color from black to white, to
support proper logo display in dark mode. It also updates the relevant
files to ensure the correct logo variant is shown depending on the theme
(light or dark).

<img width="3200" height="1632" alt="Screenshot 2025-10-21 at 16-49-35
Apache DataFusion — Apache DataFusion documentation"
src="https://github.com/user-attachments/assets/b44a6e38-f7c6-4787-b35a-2361ed1ec9a8"
/>
&nbsp
<img width="3200" height="1632" alt="Screenshot 2025-10-21 at 16-49-43
Apache DataFusion — Apache DataFusion documentation"
src="https://github.com/user-attachments/assets/98953a0a-b2eb-4dc7-8acc-018f78a1730d"
/>
---
 docs/source/_static/images/original_dark.svg | 31 ++++++++++++++++++++
 docs/source/_static/theme_overrides.css      | 12 ++++++++
 docs/source/_templates/docs-sidebar.html     |  3 +-
 docs/source/index.rst                        |  9 ++++--
 4 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/_static/images/original_dark.svg

diff --git a/docs/source/_static/images/original_dark.svg b/docs/source/_static/images/original_dark.svg
new file mode 100644
index 0000000000000..532434f9214a6
--- /dev/null
+++ b/docs/source/_static/images/original_dark.svg
@@ -0,0 +1,31 @@
+<svg width="801" height="168" viewBox="0 0 801 168" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_1_181)">
+<path fill-rule="evenodd" clip-rule="evenodd" d="M76.1297 168C88.4597 168 99.6097 158.25 107.58 142.55C127.23 144.6 143.09 139.99 149.76 128.51C156.29 117.25 152.65 101.68 141.58 86.09C152.36 70.67 155.85 55.32 149.39 44.19C143.01 33.19 128.19 28.5 109.69 29.92C101.65 11.62 89.5997 0 76.1297 0C62.6597 0 50.4997 11.73 42.4497 30.18C24.3497 28.97 9.87966 33.68 3.59966 44.51C-2.81034 55.56 0.589656 70.78 11.1897 86.09C0.299656 101.57 -3.24034 117.01 3.23966 128.18C9.80966 139.5 25.3097 144.13 44.5597 142.31C52.5397 158.15 63.7297 168 76.1297 168ZM52.7197 141.19C59.5097 154.21 68.6197 162.18 78.6397 162.18C88.6597 162.18 97.6397 154.63 104.76 142.22C95.7497 141.01 86.0297 138.46 76.0797 134.58C68.0497 137.65 60.1797 139.86 52.7197 141.19ZM65.0697 129.77C59.6397 131.52 54.3497 132.84 49.3097 133.72C47.3697 128.87 45.6997 123.54 44.3597 117.82C47.6797 120.07 51.1497 122.25 54.7497 124.32C58.1797 126.29 61.6197 128.11 65.0597 129.77H65.0697ZM76.1197 125.68C82.0597 123.2 88.0897 120.24 94.0797 116.79C102.76 111.8 110.61 106.25 117.41 100.43C118.01 95.13 118.33 89.64 118.33 84C118.33 76.79 117.81 69.8 116.84 63.17C111.11 58.53 104.73 54.12 97.7797 50.13C90.5497 45.98 83.2397 42.57 76.0697 39.91C69.1797 42.52 62.1597 45.82 55.2197 49.81C50.8597 52.32 46.7097 54.99 42.8197 57.78C41.3997 65.98 40.6297 74.8 40.6297 84C40.6297 91.65 41.1597 99.05 42.1597 106.04C47.2997 109.94 52.9197 113.66 58.9297 117.12C64.6597 120.41 70.4297 123.27 76.1197 125.68ZM87.1297 129.83C90.8397 128.07 94.5497 126.12 98.2497 124C104.42 120.45 110.18 116.61 115.48 112.58C113.79 120.73 111.42 128.22 108.52 134.76C101.82 133.95 94.6097 132.3 87.1297 129.83ZM117.71 110.84C116.16 119.65 113.87 127.8 110.98 135.03C127.49 136.56 140.52 132.79 145.95 123.44C151.17 114.43 148.57 101.58 140 88.28C134.26 96.01 126.71 103.7 117.71 110.84ZM136.56 79.61C145.46 66.59 148.33 53.99 143.08 44.93C137.99 36.16 126.02 31.96 110.76 32.43C114.3 41.14 116.97 51.23 118.5 62.2C125.46 67.76 131.53 73.64 136.55 79.61H136.56ZM108.21 32.55C111.86 40.6 114.7 50.11 116.43 60.57C110.98 56.37 105.01 52.37 98.6097 48.69C91.9597 44.87 85.2397 41.62 78.6197 38.96C89.0597 35.24 99.1497 33.11 108.2 32.54L108.21 32.55ZM107.08 30.16C97.4397 31.18 86.8897 33.79 76.0797 37.98C67.2997 34.65 58.7097 32.35 50.6497 31.1C57.5997 15.56 67.5697 5.83 78.6397 5.83C89.7097 5.83 99.7197 15.18 107.08 30.16ZM73.5197 38.99C65.2297 36.1 57.1697 34.21 49.6897 33.32C46.9897 39.84 44.7997 47.28 43.2497 55.38C46.7897 52.94 50.4997 50.6 54.3797 48.37C60.7497 44.71 67.1697 41.58 73.5197 38.99ZM33.7197 62.5C35.2297 51.54 37.8697 41.46 41.3797 32.74C26.5297 32.46 14.9097 36.66 9.91966 45.26C4.70966 54.24 7.48966 66.68 16.1997 79.58C21.0897 73.73 26.9797 67.96 33.7197 62.5ZM12.7697 88.3C18.3697 95.9 25.7197 103.48 34.4997 110.54C36.0297 119.36 38.3097 127.53 41.1797 134.78C25.0597 136.11 12.3797 132.31 7.03966 123.12C1.85966 114.19 4.36966 101.48 12.7597 88.31L12.7697 88.3ZM17.5697 81.54C21.9097 75.99 27.2197 70.45 33.3797 65.17C32.6597 71.22 32.2697 77.53 32.2697 84C32.2697 88.91 32.4897 93.72 32.9097 98.4C26.7997 92.89 21.6197 87.18 17.5597 81.53L17.5697 81.54ZM135.19 81.55C131 87.32 125.65 93.15 119.32 98.76C119.76 93.97 120 89.03 120 84C120 77.41 119.6 70.99 118.85 64.84C125.22 70.21 130.72 75.87 135.19 81.55Z" fill="url(#paint0_linear_1_181)"/>
+<path d="M204.34 45.97H192.83L190.47 52.2H185.2L196.07 23.88H201.47L212.54 52.2H206.78L204.34 45.97ZM202.74 41.74L198.54 30.99L194.46 41.74H202.73H202.74Z" fill="#FFF"/>
+<path d="M235.55 23.88H246.82C249.59 23.88 251.9 24.73 253.73 26.44C255.57 28.14 256.49 30.38 256.49 33.15C256.49 35.92 255.57 38.2 253.73 39.9C251.89 41.6 249.59 42.46 246.82 42.46H240.95V52.21H235.56V23.89L235.55 23.88ZM240.94 37.26H246.85C248 37.26 248.96 36.87 249.73 36.1C250.5 35.3 250.89 34.32 250.89 33.14C250.89 31.96 250.5 31 249.73 30.22C248.98 29.45 248.02 29.06 246.85 29.06H240.94V37.25V37.26Z" fill="#FFF"/>
+<path d="M293.67 45.97H282.16L279.8 52.2H274.53L285.4 23.88H290.79L301.86 52.2H296.1L293.66 45.97H293.67ZM292.07 41.74L287.87 30.99L283.79 41.74H292.06H292.07Z" fill="#FFF"/>
+<path d="M335.99 23C338.57 23 340.92 23.68 343.02 25.04C345.15 26.37 346.84 28.18 348.1 30.47L343.1 32.87C341.37 29.57 339 27.92 335.99 27.92C333.62 27.92 331.65 28.91 330.07 30.88C328.53 32.82 327.75 35.22 327.75 38.07C327.75 40.92 328.52 43.32 330.07 45.26C331.62 47.18 333.59 48.14 335.99 48.14C337.45 48.14 338.81 47.73 340.07 46.9C341.32 46.05 342.32 44.93 343.07 43.54L347.99 45.9C346.79 48.14 345.13 49.89 342.99 51.17C340.86 52.42 338.53 53.05 336 53.05C331.95 53.05 328.63 51.65 326.05 48.86C323.47 46.04 322.17 42.44 322.17 38.07C322.17 33.7 323.48 30.13 326.09 27.28C328.73 24.43 332.03 23.01 336 23.01L335.99 23Z" fill="#FFF"/>
+<path d="M393.49 52.2H388.09V40.61H377.1V52.2H371.71V23.88H377.1V35.43H388.09V23.88H393.49V52.2Z" fill="#FFF"/>
+<path d="M435.17 29.07H423.9V35.46H432.41V40.65H423.9V47H435.17V52.19H418.51V23.87H435.17V29.06V29.07Z" fill="#FFF"/>
+<path d="M185 76.1H207.25C218.63 76.1 227.58 79.3 234.1 85.69C240.69 92.02 243.98 100.14 243.98 110.04C243.98 119.94 240.69 128.1 234.1 134.49C227.51 140.88 218.53 144.08 207.15 144.08H184.99V76.1H185ZM199.96 90.49V129.7H206.67C209.55 129.7 212.2 129.38 214.63 128.74C217.12 128.1 219.43 127.08 221.54 125.67C223.71 124.26 225.41 122.25 226.62 119.63C227.83 116.95 228.44 113.75 228.44 110.04C228.44 106.33 227.83 103.17 226.62 100.55C225.4 97.93 223.71 95.92 221.54 94.51C219.43 93.1 217.16 92.08 214.73 91.44C212.3 90.8 209.61 90.48 206.67 90.48H199.96V90.49Z" fill="#FFF"/>
+<path d="M291.82 130.85H264.96L259.97 144.08H245.39L271.38 76.1H286.34L312.91 144.08H296.89L291.81 130.85H291.82ZM287.4 119.35L278.29 95.76L269.37 119.35H287.4Z" fill="#FFF"/>
+<path d="M362.7 90.49H342.94V144.09H327.98V90.49H308.51V76.11H362.7V90.49Z" fill="#FFF"/>
+<path d="M404.77 130.85H377.92L372.93 144.08H358.35L384.34 76.1H399.3L425.87 144.08H409.85L404.77 130.85ZM400.36 119.35L391.25 95.76L382.33 119.35H400.36Z" fill="#FFF"/>
+<path d="M474.73 90.49H448.45V102.95H468.4V117.33H448.45V144.08H433.49V76.1H474.73V90.48V90.49Z" fill="#FFF"/>
+<path d="M541.71 76.1V118.96C541.65 127.27 538.96 133.85 533.65 138.71C528.41 143.57 521.53 146 513.03 146C504.53 146 497.78 143.57 492.41 138.71C487.04 133.85 484.39 127.27 484.45 118.96V76.1H499.32V119.05C499.32 122.88 500.6 125.95 503.16 128.25C505.78 130.49 509.08 131.61 513.04 131.61C517 131.61 520.27 130.49 522.82 128.25C525.38 125.95 526.69 122.88 526.75 119.05V76.1H541.71Z" fill="#FFF"/>
+<path d="M578.69 73.99C585.72 73.99 591.96 75.59 597.39 78.78L590.77 91.24C585.98 89.19 581.43 88.17 577.15 88.17C570.95 88.17 567.85 90.25 567.85 94.4C567.85 96.45 569.03 98.17 571.4 99.58C573.83 100.92 577.28 102.39 581.76 103.99C586.3 105.52 589.72 106.96 592.02 108.3C598.22 111.88 601.32 117.25 601.32 124.41C601.32 131.12 598.86 136.4 593.93 140.23C589.01 144.07 582.87 145.98 575.52 145.98C571.36 145.98 567.05 145.21 562.57 143.68C558.09 142.15 554.61 140.39 552.12 138.41L560.18 126.43C567.02 130.46 573.22 132.21 578.79 131.7C580.96 131.51 582.76 130.74 584.16 129.4C585.63 128.06 586.24 126.43 585.98 124.51C585.72 122.72 584.22 121.25 581.47 120.1C578.72 118.89 575.14 117.48 570.73 115.88C566.38 114.28 562.83 112.4 560.08 110.22C555.03 106.13 552.5 101.05 552.5 94.98C552.5 91.98 553.01 89.26 554.03 86.83C555.05 84.4 556.4 82.42 558.06 80.89C559.79 79.29 561.8 77.98 564.1 76.96C566.4 75.87 568.77 75.11 571.2 74.66C573.63 74.21 576.12 73.99 578.68 73.99H578.69Z" fill="#FFF"/>
+<path d="M626.79 144.08H611.83V76.1H626.79V144.08Z" fill="#FFF"/>
+<path d="M647.04 84.16C653.88 77.38 662.48 74 672.84 74C683.2 74 691.73 77.39 698.45 84.16C705.23 90.87 708.62 99.4 708.62 109.76C708.62 120.12 705.33 128.68 698.74 135.45C692.15 142.16 683.52 145.68 672.84 146C662.48 146 653.88 142.58 647.04 135.74C640.26 128.84 636.87 120.21 636.87 109.85C636.87 99.49 640.26 90.93 647.04 84.15V84.16ZM672.84 87.61C666.64 87.61 661.68 89.69 657.97 93.84C654.26 97.99 652.41 103.3 652.41 109.76C652.41 116.22 654.26 121.62 657.97 125.96C661.74 130.24 666.7 132.38 672.84 132.38C678.98 132.38 683.96 130.24 687.61 125.96C691.25 121.68 693.08 116.31 693.08 109.85C693.08 103.39 691.26 98.12 687.61 94.03C684.03 89.88 679.11 87.73 672.84 87.61Z" fill="#FFF"/>
+<path d="M775 144.08H759.65L732.7 101.99V144.08H718.7V76.1H732.99L761 118.57V76.1H775V144.08Z" fill="#FFF"/>
+<path d="M788.01 78.18H784.62V87.44H782.46V78.18H779.13V76.1H788.01V78.18ZM800.56 87.45H798.48V80.17L795.5 85.79H794.12L791.14 80.17L791.12 87.45H789.1V76.11H791.13L794.87 83.04L798.45 76.11H800.51L800.53 87.45H800.56Z" fill="#FFF"/>
+</g>
+<defs>
+<linearGradient id="paint0_linear_1_181" x1="17.0897" y1="117.74" x2="162.73" y2="36.78" gradientUnits="userSpaceOnUse">
+<stop stop-color="#EF4137"/>
+<stop offset="1" stop-color="#FBB042"/>
+</linearGradient>
+<clipPath id="clip0_1_181">
+<rect width="800.56" height="168" fill="currentColor"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 9d53ad4d91c5a..01f1a126a76ae 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -40,6 +40,18 @@ code {
   text-align: center;
 }
 
+/* Display appropriate logo for dark and light mode */
+.light-logo { display: inline; }
+.dark-logo { display: none; }
+
+@media (prefers-color-scheme: dark) {
+  .light-logo { display: none; }
+  .dark-logo { 
+    display: inline; 
+    background-color: transparent !important;
+  }
+}
+
 /* Ensure the logo is properly displayed */
 
 .navbar-brand {
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
index 9a4a1472be02e..01aabb9860507 100644
--- a/docs/source/_templates/docs-sidebar.html
+++ b/docs/source/_templates/docs-sidebar.html
@@ -1,6 +1,7 @@
 <p>
   <a href="{{ pathto(master_doc) }}">
-    <img src="{{ pathto('_static/images/2x_bgwhite_original.png', 1) }}" class="logo" alt="logo">
+    <img src="{{ pathto('_static/images/original.svg', 1) }}" class="logo light-logo" alt="logo">
+    <img src="{{ pathto('_static/images/original_dark.svg', 1) }}" class="logo dark-logo" alt="logo">
   </a>
 </p>
 <p>
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 574c285b0e65e..a11ca862e4572 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -15,8 +15,13 @@
 .. specific language governing permissions and limitations
 .. under the License.
 
-.. image:: _static/images/2x_bgwhite_original.png
-  :alt: DataFusion Logo
+.. image:: _static/images/original.svg
+   :alt: DataFusion Logo
+   :class: light-logo
+
+.. image:: _static/images/original_dark.svg
+   :alt: DataFusion Logo
+   :class: dark-logo
 
 =================
 Apache DataFusion

From 6d52e54bc8b2129be371c40df2a09164c7a522ed Mon Sep 17 00:00:00 2001
From: Sriram Sundar <111196722+codetyri0n@users.noreply.github.com>
Date: Wed, 22 Oct 2025 01:36:30 +0530
Subject: [PATCH 065/109] Docs: Update SQL example for current_time() and
 current_date(). (#18200)

## Which issue does this PR close?
- Closes #18199
## What changes are included in this PR?
- Added a SQL example in scalar functions md to demonstrate setting
execution time zone (optional) for current_time() and current_date().
## Are these changes tested?
## Are there any user-facing changes?
---
 datafusion/functions/src/datetime/current_date.rs | 4 +++-
 datafusion/functions/src/datetime/current_time.rs | 4 +++-
 docs/source/user-guide/sql/scalar_functions.md    | 4 ++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index 0ba3afd19bedb..18b99bca8638e 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -36,7 +36,9 @@ Returns the current date in the session time zone.
 
 The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes.
 "#,
-    syntax_example = "current_date()"
+    syntax_example = r#"current_date()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_date();"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentDateFunc {
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index b1592d8df2bb9..4f5b199cce41e 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -38,7 +38,9 @@ The `current_time()` return value is determined at query time and will return th
 
 The session time zone can be set using the statement 'SET datafusion.execution.time_zone = desired time zone'. The time zone can be a value like +00:00, 'Europe/London' etc.
 "#,
-    syntax_example = "current_time()"
+    syntax_example = r#"current_time()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_time();"#
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
 pub struct CurrentTimeFunc {
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 4ca3a822db9dc..d2e7066191f91 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2409,6 +2409,8 @@ The `current_date()` return value is determined at query time and will return th
 
 ```sql
 current_date()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_date();
 ```
 
 #### Aliases
@@ -2425,6 +2427,8 @@ The session time zone can be set using the statement 'SET datafusion.execution.t
 
 ```sql
 current_time()
+    (optional) SET datafusion.execution.time_zone = '+00:00';
+    SELECT current_time();
 ```
 
 ### `current_timestamp`

From 531af8e43ae3563da2a3c5ef35b2241d3ce2d621 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Tue, 21 Oct 2025 16:50:54 -0400
Subject: [PATCH 066/109] feat: Add `deregister_object_store` (#17999)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #17854 .

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 .../src/object_storage/instrumented.rs        |  7 ++++++
 datafusion/core/src/execution/context/mod.rs  |  7 ++++++
 datafusion/execution/src/object_store.rs      | 22 ++++++++++++++++++-
 datafusion/execution/src/runtime_env.rs       |  8 +++++--
 4 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 4f8626888ed91..ebf9899f267cf 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -694,6 +694,13 @@ impl ObjectStoreRegistry for InstrumentedObjectStoreRegistry {
         self.inner.register_store(url, instrumented)
     }
 
+    fn deregister_store(
+        &self,
+        url: &Url,
+    ) -> datafusion::common::Result<Arc<dyn ObjectStore>> {
+        self.inner.deregister_store(url)
+    }
+
     fn get_store(&self, url: &Url) -> datafusion::common::Result<Arc<dyn ObjectStore>> {
         self.inner.get_store(url)
     }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index a8148b80495e6..eace5610e117d 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -505,6 +505,13 @@ impl SessionContext {
         self.runtime_env().register_object_store(url, object_store)
     }
 
+    /// Deregisters an [`ObjectStore`] associated with the specific URL prefix.
+    ///
+    /// See [`RuntimeEnv::deregister_object_store`] for more details.
+    pub fn deregister_object_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        self.runtime_env().deregister_object_store(url)
+    }
+
     /// Registers the [`RecordBatch`] as the specified table name
     pub fn register_batch(
         &self,
diff --git a/datafusion/execution/src/object_store.rs b/datafusion/execution/src/object_store.rs
index ef83128ac6818..aedee7d44460d 100644
--- a/datafusion/execution/src/object_store.rs
+++ b/datafusion/execution/src/object_store.rs
@@ -20,7 +20,9 @@
 //! and query data inside these systems.
 
 use dashmap::DashMap;
-use datafusion_common::{exec_err, internal_datafusion_err, DataFusionError, Result};
+use datafusion_common::{
+    exec_err, internal_datafusion_err, not_impl_err, DataFusionError, Result,
+};
 #[cfg(not(target_arch = "wasm32"))]
 use object_store::local::LocalFileSystem;
 use object_store::ObjectStore;
@@ -154,6 +156,13 @@ pub trait ObjectStoreRegistry: Send + Sync + std::fmt::Debug + 'static {
         store: Arc<dyn ObjectStore>,
     ) -> Option<Arc<dyn ObjectStore>>;
 
+    /// Deregister the store previously registered with the same key. Returns the
+    /// deregistered store if it existed.
+    #[allow(unused_variables)]
+    fn deregister_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        not_impl_err!("ObjectStoreRegistry::deregister_store is not implemented for this ObjectStoreRegistry")
+    }
+
     /// Get a suitable store for the provided URL. For example:
     ///
     /// - URL with scheme `file:///` or no scheme will return the default LocalFS store
@@ -230,6 +239,17 @@ impl ObjectStoreRegistry for DefaultObjectStoreRegistry {
         self.object_stores.insert(s, store)
     }
 
+    fn deregister_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        let s = get_url_key(url);
+        let (_, object_store) = self.object_stores
+            .remove(&s)
+            .ok_or_else(|| {
+                internal_datafusion_err!("Failed to deregister object store. No suitable object store found for {url}. See `RuntimeEnv::register_object_store`")
+            })?;
+
+        Ok(object_store)
+    }
+
     fn get_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
         let s = get_url_key(url);
         self.object_stores
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index db045a8b7e8a7..b0d0a966b7a27 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -114,8 +114,6 @@ impl RuntimeEnv {
     /// ```
     ///
     /// # Example: Register remote URL object store like [Github](https://github.com)
-    ///
-    ///
     /// ```
     /// # use std::sync::Arc;
     /// # use url::Url;
@@ -141,6 +139,12 @@ impl RuntimeEnv {
         self.object_store_registry.register_store(url, object_store)
     }
 
+    /// Deregisters a custom `ObjectStore` previously registered for a specific url.
+    /// See [`ObjectStoreRegistry::deregister_store`] for more details.
+    pub fn deregister_object_store(&self, url: &Url) -> Result<Arc<dyn ObjectStore>> {
+        self.object_store_registry.deregister_store(url)
+    }
+
     /// Retrieves a `ObjectStore` instance for a url by consulting the
     /// registry. See [`ObjectStoreRegistry::get_store`] for more
     /// details.

From b7a10adbc8d404f92bb281f08135f3b84fc42084 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Wed, 22 Oct 2025 12:12:49 +0300
Subject: [PATCH 067/109] fix: Use dynamic timezone in now() function for
 accurate timestamp (#18017)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #17993

## Rationale for this change

```
DataFusion CLI v50.1.0
> SET TIME ZONE = '+08:00';
0 row(s) fetched.
Elapsed 0.011 seconds.

> SELECT arrow_typeof(now());
+---------------------------------------+
| arrow_typeof(now())                   |
+---------------------------------------+
| Timestamp(Nanosecond, Some("+08:00")) |
+---------------------------------------+
1 row(s) fetched.
Elapsed 0.015 seconds.

> SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date;
+--------+
| result |
+--------+
| 1      |
+--------+
1 row(s) fetched.
Elapsed 0.029 seconds.
```

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?
When the timezone changes, re-register `now()` function

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/execution/context/mod.rs  | 20 ++++++++++++
 .../core/src/execution/session_state.rs       | 29 ++++++++++++++---
 .../core/tests/expr_api/simplification.rs     |  3 +-
 datafusion/core/tests/optimizer/mod.rs        |  3 +-
 datafusion/expr/src/udf.rs                    | 31 +++++++++++++++++++
 datafusion/functions/src/datetime/mod.rs      |  8 +++--
 datafusion/functions/src/datetime/now.rs      | 28 ++++++++++++++---
 datafusion/functions/src/macros.rs            | 26 ++++++++++++++++
 .../sqllogictest/test_files/timestamps.slt    | 15 +++++++++
 9 files changed, 148 insertions(+), 15 deletions(-)

diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index eace5610e117d..87dc18be5b83b 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -1079,6 +1079,26 @@ impl SessionContext {
         } else {
             let mut state = self.state.write();
             state.config_mut().options_mut().set(&variable, &value)?;
+
+            // Re-initialize any UDFs that depend on configuration
+            // This allows both built-in and custom functions to respond to configuration changes
+            let config_options = state.config().options();
+
+            // Collect updated UDFs in a separate vector
+            let udfs_to_update: Vec<_> = state
+                .scalar_functions()
+                .values()
+                .filter_map(|udf| {
+                    udf.inner()
+                        .with_updated_config(config_options)
+                        .map(Arc::new)
+                })
+                .collect();
+
+            for udf in udfs_to_update {
+                state.register_udf(udf)?;
+            }
+
             drop(state);
         }
 
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 6749ddd7ab8d5..b3b336f5605cf 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -1418,12 +1418,31 @@ impl SessionStateBuilder {
         }
 
         if let Some(scalar_functions) = scalar_functions {
-            scalar_functions.into_iter().for_each(|udf| {
-                let existing_udf = state.register_udf(udf);
-                if let Ok(Some(existing_udf)) = existing_udf {
-                    debug!("Overwrote an existing UDF: {}", existing_udf.name());
+            for udf in scalar_functions {
+                let config_options = state.config().options();
+                match udf.inner().with_updated_config(config_options) {
+                    Some(new_udf) => {
+                        if let Err(err) = state.register_udf(Arc::new(new_udf)) {
+                            debug!(
+                                "Failed to re-register updated UDF '{}': {}",
+                                udf.name(),
+                                err
+                            );
+                        }
+                    }
+                    None => match state.register_udf(Arc::clone(&udf)) {
+                        Ok(Some(existing)) => {
+                            debug!("Overwrote existing UDF '{}'", existing.name());
+                        }
+                        Ok(None) => {
+                            debug!("Registered UDF '{}'", udf.name());
+                        }
+                        Err(err) => {
+                            debug!("Failed to register UDF '{}': {}", udf.name(), err);
+                        }
+                    },
                 }
-            });
+            }
         }
 
         if let Some(aggregate_functions) = aggregate_functions {
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 89651726a69a4..572a7e2b335c4 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -514,8 +514,7 @@ fn multiple_now() -> Result<()> {
     // expect the same timestamp appears in both exprs
     let actual = get_optimized_plan_formatted(plan, &time);
     let expected = format!(
-        "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\
-            \n  TableScan: test",
+        "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\n  TableScan: test",
         time.timestamp_nanos_opt().unwrap(),
         time.timestamp_nanos_opt().unwrap()
     );
diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs
index 9899a0158fb8a..aec32d05624c2 100644
--- a/datafusion/core/tests/optimizer/mod.rs
+++ b/datafusion/core/tests/optimizer/mod.rs
@@ -144,8 +144,9 @@ fn test_sql(sql: &str) -> Result<LogicalPlan> {
     let statement = &ast[0];
 
     // create a logical query plan
+    let config = ConfigOptions::default();
     let context_provider = MyContextProvider::default()
-        .with_udf(datetime::now())
+        .with_udf(datetime::now(&config))
         .with_udf(datafusion_functions::core::arrow_cast())
         .with_udf(datafusion_functions::string::concat())
         .with_udf(datafusion_functions::string::concat_ws());
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index de81ec5f0bacf..c4cd8c006d1ff 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -546,6 +546,33 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// [`DataFusionError::Internal`]: datafusion_common::DataFusionError::Internal
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
 
+    /// Create a new instance of this function with updated configuration.
+    ///
+    /// This method is called when configuration options change at runtime
+    /// (e.g., via `SET` statements) to allow functions that depend on
+    /// configuration to update themselves accordingly.
+    ///
+    /// Note the current [`ConfigOptions`] are also passed to [`Self::invoke_with_args`] so
+    /// this API is not needed for functions where the values may
+    /// depend on the current options.
+    ///
+    /// This API is useful for functions where the return
+    /// **type** depends on the configuration options, such as the `now()` function
+    /// which depends on the current timezone.
+    ///
+    /// # Arguments
+    ///
+    /// * `config` - The updated configuration options
+    ///
+    /// # Returns
+    ///
+    /// * `Some(ScalarUDF)` - A new instance of this function configured with the new settings
+    /// * `None` - If this function does not change with new configuration settings (the default)
+    ///
+    fn with_updated_config(&self, _config: &ConfigOptions) -> Option<ScalarUDF> {
+        None
+    }
+
     /// What type will be returned by this function, given the arguments?
     ///
     /// By default, this function calls [`Self::return_type`] with the
@@ -879,6 +906,10 @@ impl ScalarUDFImpl for AliasedScalarUDFImpl {
         self.inner.invoke_with_args(args)
     }
 
+    fn with_updated_config(&self, _config: &ConfigOptions) -> Option<ScalarUDF> {
+        None
+    }
+
     fn aliases(&self) -> &[String] {
         &self.aliases
     }
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index 5729b1edae958..d80f14facf822 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -45,7 +45,6 @@ make_udf_function!(date_part::DatePartFunc, date_part);
 make_udf_function!(date_trunc::DateTruncFunc, date_trunc);
 make_udf_function!(make_date::MakeDateFunc, make_date);
 make_udf_function!(from_unixtime::FromUnixtimeFunc, from_unixtime);
-make_udf_function!(now::NowFunc, now);
 make_udf_function!(to_char::ToCharFunc, to_char);
 make_udf_function!(to_date::ToDateFunc, to_date);
 make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time);
@@ -56,6 +55,9 @@ make_udf_function!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis);
 make_udf_function!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros);
 make_udf_function!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos);
 
+// create UDF with config
+make_udf_function_with_config!(now::NowFunc, now);
+
 // we cannot currently use the export_functions macro since it doesn't handle
 // functions with varargs currently
 
@@ -91,6 +93,7 @@ pub mod expr_fn {
     ),(
         now,
         "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
+        @config
     ),
     (
         to_local_time,
@@ -255,6 +258,7 @@ pub mod expr_fn {
 
 /// Returns all DataFusion functions defined in this package
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
+    use datafusion_common::config::ConfigOptions;
     vec![
         current_date(),
         current_time(),
@@ -263,7 +267,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
         date_trunc(),
         from_unixtime(),
         make_date(),
-        now(),
+        now(&ConfigOptions::default()),
         to_char(),
         to_date(),
         to_local_time(),
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 65dadb42a89e1..96a35c241ff00 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -19,12 +19,14 @@ use arrow::datatypes::DataType::Timestamp;
 use arrow::datatypes::TimeUnit::Nanosecond;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use std::any::Any;
+use std::sync::Arc;
 
+use datafusion_common::config::ConfigOptions;
 use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
-    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDFImpl, Signature,
-    Volatility,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarUDF, ScalarUDFImpl,
+    Signature, Volatility,
 };
 use datafusion_macros::user_doc;
 
@@ -41,19 +43,30 @@ The `now()` return value is determined at query time and will return the same ti
 pub struct NowFunc {
     signature: Signature,
     aliases: Vec<String>,
+    timezone: Option<Arc<str>>,
 }
 
 impl Default for NowFunc {
     fn default() -> Self {
-        Self::new()
+        Self::new_with_config(&ConfigOptions::default())
     }
 }
 
 impl NowFunc {
+    #[deprecated(since = "50.2.0", note = "use `new_with_config` instead")]
     pub fn new() -> Self {
         Self {
             signature: Signature::nullary(Volatility::Stable),
             aliases: vec!["current_timestamp".to_string()],
+            timezone: Some(Arc::from("+00")),
+        }
+    }
+
+    pub fn new_with_config(config: &ConfigOptions) -> Self {
+        Self {
+            signature: Signature::nullary(Volatility::Stable),
+            aliases: vec!["current_timestamp".to_string()],
+            timezone: Some(Arc::from(config.execution.time_zone.as_str())),
         }
     }
 }
@@ -77,10 +90,14 @@ impl ScalarUDFImpl for NowFunc {
         &self.signature
     }
 
+    fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
+        Some(Self::new_with_config(config).into())
+    }
+
     fn return_field_from_args(&self, _args: ReturnFieldArgs) -> Result<FieldRef> {
         Ok(Field::new(
             self.name(),
-            Timestamp(Nanosecond, Some("+00:00".into())),
+            Timestamp(Nanosecond, self.timezone.clone()),
             false,
         )
         .into())
@@ -106,8 +123,9 @@ impl ScalarUDFImpl for NowFunc {
             .execution_props()
             .query_execution_start_time
             .timestamp_nanos_opt();
+
         Ok(ExprSimplifyResult::Simplified(Expr::Literal(
-            ScalarValue::TimestampNanosecond(now_ts, Some("+00:00".into())),
+            ScalarValue::TimestampNanosecond(now_ts, self.timezone.clone()),
             None,
         )))
     }
diff --git a/datafusion/functions/src/macros.rs b/datafusion/functions/src/macros.rs
index 228d704e29cb5..9e195f2d52914 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -40,6 +40,7 @@
 /// Exported functions accept:
 /// - `Vec<Expr>` argument (single argument followed by a comma)
 /// - Variable number of `Expr` arguments (zero or more arguments, must be without commas)
+/// - Functions that require config (marked with `@config` prefix)
 #[macro_export]
 macro_rules! export_functions {
     ($(($FUNC:ident, $DOC:expr, $($arg:tt)*)),*) => {
@@ -49,6 +50,15 @@ macro_rules! export_functions {
         )*
     };
 
+    // function that requires config (marked with @config)
+    (single $FUNC:ident, $DOC:expr, @config) => {
+        #[doc = $DOC]
+        pub fn $FUNC() -> datafusion_expr::Expr {
+            use datafusion_common::config::ConfigOptions;
+            super::$FUNC(&ConfigOptions::default()).call(vec![])
+        }
+    };
+
     // single vector argument (a single argument followed by a comma)
     (single $FUNC:ident, $DOC:expr, $arg:ident,) => {
         #[doc = $DOC]
@@ -89,6 +99,22 @@ macro_rules! make_udf_function {
     };
 }
 
+/// Creates a singleton `ScalarUDF` of the `$UDF` function and a function
+/// named `$NAME` which returns that singleton. The function takes a
+/// configuration argument of type `$CONFIG_TYPE` to create the UDF.
+#[macro_export]
+macro_rules! make_udf_function_with_config {
+    ($UDF:ty, $NAME:ident) => {
+        #[allow(rustdoc::redundant_explicit_links)]
+        #[doc = concat!("Return a [`ScalarUDF`](datafusion_expr::ScalarUDF) implementation of ", stringify!($NAME))]
+        pub fn $NAME(config: &datafusion_common::config::ConfigOptions) -> std::sync::Arc<datafusion_expr::ScalarUDF> {
+            std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
+                <$UDF>::new_with_config(&config),
+            ))
+        }
+    };
+}
+
 /// Macro creates a sub module if the feature is not enabled
 ///
 /// The rationale for providing stub functions is to help users to configure datafusion
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index ebc79533f176b..6fe9995c7b67a 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -73,6 +73,21 @@ true
 ##########
 ## Current time Tests
 ##########
+statement ok
+SET TIME ZONE = '+08'
+
+query T
+select arrow_typeof(now());
+----
+Timestamp(Nanosecond, Some("+08"))
+
+query I
+SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date;
+----
+1
+
+statement ok
+SET TIME ZONE = '+00'
 
 query B
 select cast(now() as time) = current_time();

From 114beec770dfc7f12e581a5e178c897104b96c70 Mon Sep 17 00:00:00 2001
From: Georgi Krastev <georgi.krastev@coralogix.com>
Date: Wed, 22 Oct 2025 11:33:19 +0200
Subject: [PATCH 068/109] Fix array_has simplification with null argument
 (#18186)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

## Rationale for this change

According to three-valued logic we should return `null` and that's also
what happens when the argument is not a constant as seen in the test.

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

Updated `ArrayHas::simplify` to explicitly handle `null`

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

Updated the `array_has` SQL test and added unit tests

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

Yes, a minor change in behaviour wrt `null`

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 datafusion/functions-nested/src/array_has.rs | 93 +++++++++++++++++---
 datafusion/sqllogictest/test_files/array.slt | 10 +--
 2 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index f34fea0c4ba07..080b2f16d92f3 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -132,23 +132,26 @@ impl ScalarUDFImpl for ArrayHas {
         // if the haystack is a constant list, we can use an inlist expression which is more
         // efficient because the haystack is not varying per-row
         match haystack {
+            Expr::Literal(scalar, _) if scalar.is_null() => {
+                return Ok(ExprSimplifyResult::Simplified(Expr::Literal(
+                    ScalarValue::Boolean(None),
+                    None,
+                )))
+            }
             Expr::Literal(
                 // FixedSizeList gets coerced to List
                 scalar @ ScalarValue::List(_) | scalar @ ScalarValue::LargeList(_),
                 _,
             ) => {
-                let array = scalar.to_array().unwrap(); // guarantee of ScalarValue
                 if let Ok(scalar_values) =
-                    ScalarValue::convert_array_to_scalar_vec(&array)
+                    ScalarValue::convert_array_to_scalar_vec(&scalar.to_array()?)
                 {
                     assert_eq!(scalar_values.len(), 1);
                     let list = scalar_values
                         .into_iter()
-                        // If the vec is a singular null, `list` will be empty due to this flatten().
-                        // It would be more clear if we handled the None separately, but this is more performant.
                         .flatten()
                         .flatten()
-                        .map(|v| Expr::Literal(v.clone(), None))
+                        .map(|v| Expr::Literal(v, None))
                         .collect();
 
                     return Ok(ExprSimplifyResult::Simplified(in_list(
@@ -178,6 +181,12 @@ impl ScalarUDFImpl for ArrayHas {
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
         let [first_arg, second_arg] = take_function_args(self.name(), &args.args)?;
+        if first_arg.data_type().is_null() {
+            // Always return null if the first argument is null
+            // i.e. array_has(null, element) -> null
+            return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
+        }
+
         match &second_arg {
             ColumnarValue::Array(array_needle) => {
                 // the needle is already an array, convert the haystack to an array of the same length
@@ -663,6 +672,7 @@ fn general_array_has_all_and_any_kernel(
 mod tests {
     use std::sync::Arc;
 
+    use arrow::datatypes::Int32Type;
     use arrow::{
         array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray},
         buffer::OffsetBuffer,
@@ -733,6 +743,40 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_simplify_array_has_with_null_to_null() {
+        let haystack = Expr::Literal(ScalarValue::Null, None);
+        let needle = col("c");
+
+        let props = ExecutionProps::new();
+        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
+        let Ok(ExprSimplifyResult::Simplified(simplified)) =
+            ArrayHas::new().simplify(vec![haystack, needle], &context)
+        else {
+            panic!("Expected simplified expression");
+        };
+
+        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
+    }
+
+    #[test]
+    fn test_simplify_array_has_with_null_list_to_null() {
+        let haystack =
+            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([None]);
+        let haystack = Expr::Literal(ScalarValue::List(Arc::new(haystack)), None);
+        let needle = col("c");
+
+        let props = ExecutionProps::new();
+        let context = datafusion_expr::simplify::SimplifyContext::new(&props);
+        let Ok(ExprSimplifyResult::Simplified(simplified)) =
+            ArrayHas::new().simplify(vec![haystack, needle], &context)
+        else {
+            panic!("Expected simplified expression");
+        };
+
+        assert_eq!(simplified, Expr::Literal(ScalarValue::Boolean(None), None));
+    }
+
     #[test]
     fn test_array_has_complex_list_not_simplified() {
         let haystack = col("c1");
@@ -757,13 +801,9 @@ mod tests {
             Field::new_list("", Field::new("", DataType::Int32, true), true),
             true,
         ));
-        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
-        let return_field = Arc::new(Field::new_list(
-            "return",
-            Field::new("", DataType::Boolean, true),
-            true,
-        ));
 
+        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
+        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
         let haystack = ListArray::new(
             Field::new_list_field(DataType::Int32, true).into(),
             OffsetBuffer::new(vec![0, 0].into()),
@@ -773,7 +813,6 @@ mod tests {
 
         let haystack = ColumnarValue::Array(Arc::new(haystack));
         let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
-
         let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
             args: vec![haystack, needle],
             arg_fields: vec![haystack_field, needle_field],
@@ -789,4 +828,34 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_array_has_list_null_haystack() -> Result<(), DataFusionError> {
+        let haystack_field = Arc::new(Field::new("haystack", DataType::Null, true));
+        let needle_field = Arc::new(Field::new("needle", DataType::Int32, true));
+        let return_field = Arc::new(Field::new("return", DataType::Boolean, true));
+        let haystack =
+            ListArray::from_iter_primitive::<Int32Type, [Option<i32>; 0], _>([
+                None, None, None,
+            ]);
+
+        let haystack = ColumnarValue::Array(Arc::new(haystack));
+        let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1)));
+        let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs {
+            args: vec![haystack, needle],
+            arg_fields: vec![haystack_field, needle_field],
+            number_rows: 1,
+            return_field,
+            config_options: Arc::new(ConfigOptions::default()),
+        })?;
+
+        let output = result.into_array(1)?;
+        let output = output.as_boolean();
+        assert_eq!(output.len(), 3);
+        for i in 0..3 {
+            assert!(output.is_null(i));
+        }
+
+        Ok(())
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index f488204d6d7b6..43899642a93aa 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -6040,13 +6040,13 @@ false
 # array_has([1, 3, 5], 1) -> true (array contains element)
 # array_has([], 1) -> false (empty array, not null)
 # array_has(null, 1) -> null (null array)
-query B
-select array_has(column1, column2)
+query BB
+select array_has(column1, column2), array_has(null, column2)
 from array_has_table_empty;
 ----
-true
-false
-NULL
+true NULL
+false NULL
+NULL NULL
 
 # Test for issue: array_has should return false for empty arrays, not null
 # This test demonstrates the correct behavior with COALESCE to show the distinction

From 4aceda0e1f5347fe8b8bc2f68adb5467c5f43835 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 22 Oct 2025 09:30:50 -0700
Subject: [PATCH 069/109] chore(deps): bump taiki-e/install-action from 2.62.34
 to 2.62.35 (#18215)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.34 to 2.62.35.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.35</h2>
<ul>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.2.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.108.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.14.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.9.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.5.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.13.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<h2>[2.62.35] - 2025-10-22</h2>
<ul>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.2.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.108.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.14.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.9.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.5.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.13.</p>
</li>
</ul>
<h2>[2.62.34] - 2025-10-21</h2>
<ul>
<li>
<p>Update <code>sccache@latest</code> to 0.12.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.1.</p>
</li>
<li>
<p>Update <code>rclone@latest</code> to 1.71.2.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.12.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.2.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.0.</p>
</li>
<li>
<p>Update <code>cargo-auditable@latest</code> to 0.7.1.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.4.</p>
</li>
</ul>
<h2>[2.62.33] - 2025-10-17</h2>
<ul>
<li>Update <code>mise@latest</code> to 2025.10.10.</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475"><code>2cdf2d8</code></a>
Release 2.62.35</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/f8cf2e3dce7d899e952d963b0924c851af90a86b"><code>f8cf2e3</code></a>
Update <code>wasmtime@latest</code> to 38.0.2</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/d7eb58e9b7a81d48e1205aebc72b8fdd55b78033"><code>d7eb58e</code></a>
Update <code>cargo-nextest@latest</code> to 0.9.108</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/6f582fb5e1784e7d741567b3898b56a172c01ad2"><code>6f582fb</code></a>
Update <code>mise@latest</code> to 2025.10.14</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/e47cba09b2b1d250f56c09c8ba13aeec6c82bdd5"><code>e47cba0</code></a>
Update <code>vacuum@latest</code> to 0.18.9</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/ab708ac84ce8149350d39890a1ff0c84d6628250"><code>ab708ac</code></a>
Update <code>uv@latest</code> to 0.9.5</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/930a817047f79c053ff418bc3748cffe4aa21a03"><code>930a817</code></a>
Update <code>mise@latest</code> to 2025.10.13</li>
<li>See full diff in <a
href="https://github.com/taiki-e/install-action/compare/80466ef8efa80486cdfbddf929453a4f3565c791...2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.34&new-version=2.62.35)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: xudong.w <wxd963996380@gmail.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 30b21cd4a0625..921def3d1baae 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
+        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 9e86b03154eaa..c93102e289142 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -425,7 +425,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
+        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -752,7 +752,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@80466ef8efa80486cdfbddf929453a4f3565c791  # v2.62.34
+        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
         with:
           tool: cargo-msrv
 

From 6ecf76c83043990c253a84d81217349c09258220 Mon Sep 17 00:00:00 2001
From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com>
Date: Wed, 22 Oct 2025 19:36:11 +0300
Subject: [PATCH 070/109] bench: create benchmark for lookup table like `CASE
 WHEN` (#18203)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

N/A

But extracted from:
- #18183

## Rationale for this change

I want to add optimization for lookup based `CASE WHEN` like:

```sql
CASE company
    WHEN 1 THEN 'Apple'
    WHEN 5 THEN 'Samsung'
    WHEN 2 THEN 'Motorola'
    WHEN 3 THEN 'LG'
    ELSE 'Other'
END
```

## What changes are included in this PR?

Added multiple benchmarks for testing lookup table from int to string
and vice verca

with different size of lookup table (5, 10, 20), different probabilities
for having values generated to exist in the lookup map, and
probabilities for the number of nulls

## Are these changes tested?

N/A

## Are there any user-facing changes?

nope



<details>
<summary>Current results</summary>

```
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [572.27 µs 572.51 µs 572.78 µs]
                        change: [-0.4311% -0.1953% +0.0524%] (p = 0.09 > 0.05)
                        No change in performance detected.
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [808.11 µs 808.63 µs 809.30 µs]
                        change: [+0.2857% +0.4440% +0.6288%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [341.97 µs 342.52 µs 343.21 µs]
                        change: [-0.0740% +0.1913% +0.4541%] (p = 0.13 > 0.05)
                        No change in performance detected.
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [446.34 µs 446.58 µs 446.83 µs]
                        change: [-0.0381% +0.1947% +0.3941%] (p = 0.08 > 0.05)
                        No change in performance detected.
Found 12 outliers among 100 measurements (12.00%)
  8 (8.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [804.30 µs 804.79 µs 805.33 µs]
                        change: [+0.7523% +0.9613% +1.1731%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 6 outliers among 100 measurements (6.00%)
  4 (4.00%) high mild
  2 (2.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.1s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [1.3972 ms 1.3979 ms 1.3987 ms]
                        change: [-0.4150% -0.2455% -0.0613%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 10 outliers among 100 measurements (10.00%)
  7 (7.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [342.75 µs 342.96 µs 343.22 µs]
                        change: [+0.0122% +0.2433% +0.4781%] (p = 0.02 < 0.05)
                        Change within noise threshold.
Found 10 outliers among 100 measurements (10.00%)
  1 (1.00%) low mild
  2 (2.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [445.39 µs 445.56 µs 445.75 µs]
                        change: [-0.4254% -0.2538% -0.0547%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
  5 (5.00%) high mild
  3 (3.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.9s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [1.1731 ms 1.1738 ms 1.1746 ms]
                        change: [+0.3589% +0.5605% +0.7873%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 11 outliers among 100 measurements (11.00%)
  1 (1.00%) low mild
  5 (5.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [2.3819 ms 2.3832 ms 2.3845 ms]
                        change: [+0.0355% +0.1016% +0.1663%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 5 outliers among 100 measurements (5.00%)
  5 (5.00%) high mild
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [341.53 µs 341.68 µs 341.85 µs]
                        change: [-0.4361% -0.2031% +0.0398%] (p = 0.08 > 0.05)
                        No change in performance detected.
Found 9 outliers among 100 measurements (9.00%)
  2 (2.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0
                        time:   [444.97 µs 445.12 µs 445.30 µs]
                        change: [-0.3776% -0.2148% -0.0460%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  2 (2.00%) high mild
  10 (10.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [728.94 µs 729.23 µs 729.58 µs]
                        change: [+0.2335% +0.4019% +0.5791%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
  1 (1.00%) low mild
  8 (8.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [966.21 µs 967.02 µs 968.03 µs]
                        change: [+0.2251% +0.3997% +0.6015%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) high mild
  9 (9.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [485.63 µs 485.86 µs 486.11 µs]
                        change: [+0.0990% +0.2832% +0.4909%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 6 outliers among 100 measurements (6.00%)
  4 (4.00%) high mild
  2 (2.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [565.86 µs 566.24 µs 566.70 µs]
                        change: [+0.1038% +0.2811% +0.4814%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 14 outliers among 100 measurements (14.00%)
  5 (5.00%) high mild
  9 (9.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.2s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [1.0237 ms 1.0243 ms 1.0250 ms]
                        change: [+0.3817% +0.6095% +0.8366%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  4 (4.00%) high mild
  8 (8.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.2s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [1.6139 ms 1.6145 ms 1.6151 ms]
                        change: [-0.3413% -0.1799% -0.0094%] (p = 0.03 < 0.05)
                        Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
  4 (4.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [485.50 µs 485.75 µs 486.07 µs]
                        change: [+0.0362% +0.2597% +0.4990%] (p = 0.02 < 0.05)
                        Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [564.83 µs 565.13 µs 565.49 µs]
                        change: [-0.2292% -0.0443% +0.1429%] (p = 0.68 > 0.05)
                        No change in performance detected.
Found 13 outliers among 100 measurements (13.00%)
  5 (5.00%) high mild
  8 (8.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.7s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [1.5206 ms 1.5214 ms 1.5223 ms]
                        change: [+0.1219% +0.2845% +0.4382%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [2.7355 ms 2.7372 ms 2.7392 ms]
                        change: [+0.1862% +0.2633% +0.3492%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 16 outliers among 100 measurements (16.00%)
  6 (6.00%) high mild
  10 (10.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [485.64 µs 485.87 µs 486.12 µs]
                        change: [-0.1317% +0.0342% +0.1974%] (p = 0.72 > 0.05)
                        No change in performance detected.
Found 17 outliers among 100 measurements (17.00%)
  10 (10.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.1
                        time:   [564.95 µs 565.22 µs 565.52 µs]
                        change: [-0.2459% -0.0804% +0.1093%] (p = 0.42 > 0.05)
                        No change in performance detected.
Found 11 outliers among 100 measurements (11.00%)
  5 (5.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [613.55 µs 613.90 µs 614.30 µs]
                        change: [-0.1978% +0.0206% +0.2512%] (p = 0.88 > 0.05)
                        No change in performance detected.
Found 13 outliers among 100 measurements (13.00%)
  2 (2.00%) high mild
  11 (11.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [804.94 µs 805.27 µs 805.64 µs]
                        change: [-0.3371% -0.2017% -0.0566%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 7 outliers among 100 measurements (7.00%)
  6 (6.00%) high mild
  1 (1.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [451.36 µs 451.55 µs 451.75 µs]
                        change: [-0.1076% +0.0692% +0.2464%] (p = 0.48 > 0.05)
                        No change in performance detected.
Found 10 outliers among 100 measurements (10.00%)
  6 (6.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [516.12 µs 516.36 µs 516.64 µs]
                        change: [-0.2179% +0.0030% +0.2181%] (p = 0.96 > 0.05)
                        No change in performance detected.
Found 15 outliers among 100 measurements (15.00%)
  5 (5.00%) high mild
  10 (10.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [831.48 µs 831.89 µs 832.37 µs]
                        change: [+0.2730% +0.4416% +0.6047%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 9 outliers among 100 measurements (9.00%)
  7 (7.00%) high mild
  2 (2.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 6.6s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [1.2999 ms 1.3006 ms 1.3014 ms]
                        change: [+0.1551% +0.3508% +0.5933%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 10 outliers among 100 measurements (10.00%)
  7 (7.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [451.54 µs 451.76 µs 452.00 µs]
                        change: [-0.0486% +0.1303% +0.3100%] (p = 0.17 > 0.05)
                        No change in performance detected.
Found 8 outliers among 100 measurements (8.00%)
  6 (6.00%) high mild
  2 (2.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [516.47 µs 516.73 µs 517.04 µs]
                        change: [-0.2732% -0.0578% +0.1455%] (p = 0.66 > 0.05)
                        No change in performance detected.
Found 14 outliers among 100 measurements (14.00%)
  4 (4.00%) high mild
  10 (10.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 6.2s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [1.2276 ms 1.2283 ms 1.2290 ms]
                        change: [+0.3032% +0.4998% +0.6974%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [2.1643 ms 2.1656 ms 2.1671 ms]
                        change: [-0.2215% -0.1512% -0.0757%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 16 outliers among 100 measurements (16.00%)
  7 (7.00%) high mild
  9 (9.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [451.48 µs 451.71 µs 451.96 µs]
                        change: [-0.4533% -0.2697% -0.0701%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  7 (7.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.1, nulls: 0.5
                        time:   [516.17 µs 516.42 µs 516.71 µs]
                        change: [-0.3951% -0.1759% +0.0285%] (p = 0.10 > 0.05)
                        No change in performance detected.
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [581.53 µs 581.85 µs 582.22 µs]
                        change: [-0.4681% -0.2375% +0.0205%] (p = 0.03 < 0.05)
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  6 (6.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [806.04 µs 806.40 µs 806.83 µs]
                        change: [-0.3328% -0.1863% -0.0288%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [341.42 µs 341.59 µs 341.78 µs]
                        change: [-0.5843% -0.3707% -0.1573%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 11 outliers among 100 measurements (11.00%)
  6 (6.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [445.02 µs 445.20 µs 445.42 µs]
                        change: [-0.3079% -0.1435% +0.0361%] (p = 0.10 > 0.05)
                        No change in performance detected.
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [804.03 µs 804.52 µs 805.10 µs]
                        change: [+0.0833% +0.2480% +0.4237%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.1s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [1.3958 ms 1.3964 ms 1.3970 ms]
                        change: [-0.3882% -0.2616% -0.1751%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 8 outliers among 100 measurements (8.00%)
  5 (5.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [341.66 µs 341.85 µs 342.06 µs]
                        change: [-0.5154% -0.3585% -0.2438%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [445.08 µs 445.29 µs 445.53 µs]
                        change: [-0.1558% +0.0148% +0.1858%] (p = 0.88 > 0.05)
                        No change in performance detected.
Found 13 outliers among 100 measurements (13.00%)
  7 (7.00%) high mild
  6 (6.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.9s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [1.1700 ms 1.1709 ms 1.1719 ms]
                        change: [+0.0124% +0.2123% +0.4009%] (p = 0.02 < 0.05)
                        Change within noise threshold.
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [2.3757 ms 2.3775 ms 2.3800 ms]
                        change: [+0.0656% +0.1579% +0.2573%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 12 outliers among 100 measurements (12.00%)
  6 (6.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [341.47 µs 341.77 µs 342.10 µs]
                        change: [-0.2178% +0.0125% +0.2362%] (p = 0.92 > 0.05)
                        No change in performance detected.
Found 10 outliers among 100 measurements (10.00%)
  5 (5.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0
                        time:   [445.57 µs 445.82 µs 446.09 µs]
                        change: [-0.0565% +0.1035% +0.2671%] (p = 0.23 > 0.05)
                        No change in performance detected.
Found 9 outliers among 100 measurements (9.00%)
  5 (5.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [728.22 µs 728.56 µs 728.96 µs]
                        change: [-0.3534% -0.1796% +0.0062%] (p = 0.03 < 0.05)
                        Change within noise threshold.
Found 13 outliers among 100 measurements (13.00%)
  4 (4.00%) high mild
  9 (9.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [965.27 µs 965.67 µs 966.17 µs]
                        change: [-0.2803% -0.1597% -0.0369%] (p = 0.00 < 0.05)
                        Change within noise threshold.
Found 9 outliers among 100 measurements (9.00%)
  5 (5.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [485.15 µs 485.40 µs 485.69 µs]
                        change: [-0.1787% +0.0020% +0.1849%] (p = 0.98 > 0.05)
                        No change in performance detected.
Found 8 outliers among 100 measurements (8.00%)
  1 (1.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [566.91 µs 567.13 µs 567.37 µs]
                        change: [-0.1075% +0.0721% +0.2537%] (p = 0.47 > 0.05)
                        No change in performance detected.
Found 11 outliers among 100 measurements (11.00%)
  3 (3.00%) high mild
  8 (8.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.2s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [1.0272 ms 1.0278 ms 1.0286 ms]
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.2s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [1.6216 ms 1.6224 ms 1.6232 ms]
Found 8 outliers among 100 measurements (8.00%)
  2 (2.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [485.95 µs 486.18 µs 486.46 µs]
Found 13 outliers among 100 measurements (13.00%)
  5 (5.00%) high mild
  8 (8.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [566.82 µs 567.07 µs 567.34 µs]
Found 15 outliers among 100 measurements (15.00%)
  7 (7.00%) high mild
  8 (8.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.7s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [1.5252 ms 1.5263 ms 1.5276 ms]
Found 10 outliers among 100 measurements (10.00%)
  7 (7.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [2.7479 ms 2.7492 ms 2.7507 ms]
Found 15 outliers among 100 measurements (15.00%)
  8 (8.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [485.18 µs 485.54 µs 486.05 µs]
Found 13 outliers among 100 measurements (13.00%)
  4 (4.00%) high mild
  9 (9.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.1
                        time:   [566.81 µs 567.09 µs 567.43 µs]
Found 15 outliers among 100 measurements (15.00%)
  7 (7.00%) high mild
  8 (8.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [614.34 µs 614.75 µs 615.29 µs]
Found 10 outliers among 100 measurements (10.00%)
  3 (3.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [808.13 µs 808.56 µs 809.05 µs]
Found 11 outliers among 100 measurements (11.00%)
  7 (7.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [452.21 µs 452.46 µs 452.79 µs]
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [518.11 µs 518.36 µs 518.62 µs]
Found 13 outliers among 100 measurements (13.00%)
  7 (7.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [834.45 µs 834.88 µs 835.36 µs]
Found 9 outliers among 100 measurements (9.00%)
  6 (6.00%) high mild
  3 (3.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 6.6s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [1.3037 ms 1.3045 ms 1.3053 ms]
Found 8 outliers among 100 measurements (8.00%)
  7 (7.00%) high mild
  1 (1.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [451.25 µs 451.57 µs 451.99 µs]
Found 11 outliers among 100 measurements (11.00%)
  5 (5.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [517.62 µs 517.86 µs 518.12 µs]
Found 11 outliers among 100 measurements (11.00%)
  5 (5.00%) high mild
  6 (6.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 6.2s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [1.2297 ms 1.2310 ms 1.2328 ms]
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [2.1666 ms 2.1676 ms 2.1686 ms]
Found 8 outliers among 100 measurements (8.00%)
  7 (7.00%) high mild
  1 (1.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [452.46 µs 452.66 µs 452.88 µs]
Found 8 outliers among 100 measurements (8.00%)
  5 (5.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.5, nulls: 0.5
                        time:   [517.20 µs 517.44 µs 517.72 µs]
Found 17 outliers among 100 measurements (17.00%)
  6 (6.00%) high mild
  11 (11.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [582.53 µs 583.31 µs 584.58 µs]
Found 6 outliers among 100 measurements (6.00%)
  1 (1.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [807.24 µs 807.65 µs 808.09 µs]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [341.89 µs 342.06 µs 342.25 µs]
Found 7 outliers among 100 measurements (7.00%)
  3 (3.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [445.32 µs 445.54 µs 445.80 µs]
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [804.09 µs 804.53 µs 805.00 µs]
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.1s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [1.3968 ms 1.3975 ms 1.3983 ms]
Found 11 outliers among 100 measurements (11.00%)
  4 (4.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [341.27 µs 341.43 µs 341.60 µs]
Found 9 outliers among 100 measurements (9.00%)
  7 (7.00%) high mild
  2 (2.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [445.54 µs 445.86 µs 446.24 µs]
Found 14 outliers among 100 measurements (14.00%)
  6 (6.00%) high mild
  8 (8.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.9s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [1.1703 ms 1.1710 ms 1.1717 ms]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [2.3708 ms 2.3724 ms 2.3743 ms]
Found 11 outliers among 100 measurements (11.00%)
  8 (8.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [341.94 µs 342.15 µs 342.41 µs]
Found 9 outliers among 100 measurements (9.00%)
  5 (5.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0
                        time:   [445.15 µs 445.42 µs 445.74 µs]
Found 11 outliers among 100 measurements (11.00%)
  4 (4.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [729.65 µs 729.94 µs 730.26 µs]
Found 11 outliers among 100 measurements (11.00%)
  6 (6.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [966.46 µs 966.97 µs 967.58 µs]
Found 7 outliers among 100 measurements (7.00%)
  2 (2.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [486.14 µs 486.36 µs 486.61 µs]
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [566.81 µs 567.07 µs 567.34 µs]
Found 13 outliers among 100 measurements (13.00%)
  6 (6.00%) high mild
  7 (7.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.2s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [1.0273 ms 1.0278 ms 1.0283 ms]
Found 9 outliers among 100 measurements (9.00%)
  4 (4.00%) high mild
  5 (5.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.2s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [1.6239 ms 1.6248 ms 1.6258 ms]
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [485.73 µs 486.04 µs 486.43 µs]
Found 14 outliers among 100 measurements (14.00%)
  6 (6.00%) high mild
  8 (8.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [567.22 µs 567.54 µs 567.93 µs]
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) high mild
  6 (6.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.7s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [1.5275 ms 1.5282 ms 1.5290 ms]
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [2.7513 ms 2.7532 ms 2.7553 ms]
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [486.38 µs 486.58 µs 486.78 µs]
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) high mild
  2 (2.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 0.9, nulls: 0.1
                        time:   [566.15 µs 566.42 µs 566.75 µs]
Found 13 outliers among 100 measurements (13.00%)
  7 (7.00%) high mild
  6 (6.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [582.47 µs 582.74 µs 583.04 µs]
Found 14 outliers among 100 measurements (14.00%)
  9 (9.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [807.70 µs 808.17 µs 808.74 µs]
Found 11 outliers among 100 measurements (11.00%)
  2 (2.00%) high mild
  9 (9.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [341.62 µs 341.80 µs 342.03 µs]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 5 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [445.48 µs 445.87 µs 446.40 µs]
Found 13 outliers among 100 measurements (13.00%)
  6 (6.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [804.56 µs 805.03 µs 805.52 µs]
Found 6 outliers among 100 measurements (6.00%)
  5 (5.00%) high mild
  1 (1.00%) high severe
Benchmarking lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 7.1s, enable flat sampling, or reduce sample count to 50.
lookup_table_case_when/case when utf8 -> i32, 10 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [1.3995 ms 1.4004 ms 1.4015 ms]
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [341.46 µs 341.64 µs 341.85 µs]
Found 9 outliers among 100 measurements (9.00%)
  6 (6.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 10 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [445.91 µs 446.16 µs 446.47 µs]
Found 9 outliers among 100 measurements (9.00%)
  3 (3.00%) high mild
  6 (6.00%) high severe
Benchmarking lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0: Warming up for 3.0000 s
Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 5.9s, enable flat sampling, or reduce sample count to 60.
lookup_table_case_when/case when i32 -> utf8, 20 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [1.1708 ms 1.1716 ms 1.1725 ms]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) high mild
  3 (3.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, all equally true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [2.3735 ms 2.3748 ms 2.3763 ms]
Found 12 outliers among 100 measurements (12.00%)
  5 (5.00%) high mild
  7 (7.00%) high severe
lookup_table_case_when/case when i32 -> utf8, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [342.14 µs 342.31 µs 342.53 µs]
Found 8 outliers among 100 measurements (8.00%)
  3 (3.00%) high mild
  5 (5.00%) high severe
lookup_table_case_when/case when utf8 -> i32, 20 entries, only first 2 are true/case_when 8192 rows: in_range: 1, nulls: 0
                        time:   [444.92 µs 445.09 µs 445.29 µs]
Found 15 outliers among 100 measurements (15.00%)
  6 (6.00%) high mild
  9 (9.00%) high severe
```

</details>
---
 datafusion/physical-expr/benches/case_when.rs | 292 +++++++++++++++++-
 1 file changed, 289 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index ec850047e5866..e52aeb1aee123 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -15,13 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder};
-use arrow::datatypes::{Field, Schema};
+use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder, StringArray};
+use arrow::datatypes::{ArrowNativeTypeOp, Field, Schema};
 use arrow::record_batch::RecordBatch;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use arrow::util::test_util::seedable_rng;
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::{case, col, lit, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use itertools::Itertools;
+use rand::distr::uniform::SampleUniform;
+use rand::distr::Alphanumeric;
+use rand::rngs::StdRng;
+use rand::{Rng, RngCore};
+use std::fmt::{Display, Formatter};
+use std::ops::Range;
 use std::sync::Arc;
 
 fn make_x_cmp_y(
@@ -82,6 +90,8 @@ fn criterion_benchmark(c: &mut Criterion) {
     run_benchmarks(c, &make_batch(8192, 3));
     run_benchmarks(c, &make_batch(8192, 50));
     run_benchmarks(c, &make_batch(8192, 100));
+
+    benchmark_lookup_table_case_when(c, 8192);
 }
 
 fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
@@ -230,5 +240,281 @@ fn run_benchmarks(c: &mut Criterion, batch: &RecordBatch) {
     });
 }
 
+struct Options<T> {
+    number_of_rows: usize,
+    range_of_values: Vec<T>,
+    in_range_probability: f32,
+    null_probability: f32,
+}
+
+fn generate_other_primitive_value<T: ArrowNativeTypeOp + SampleUniform>(
+    rng: &mut impl RngCore,
+    exclude: &[T],
+) -> T {
+    let mut value;
+    let retry_limit = 100;
+    for _ in 0..retry_limit {
+        value = rng.random_range(T::MIN_TOTAL_ORDER..=T::MAX_TOTAL_ORDER);
+        if !exclude.contains(&value) {
+            return value;
+        }
+    }
+
+    panic!("Could not generate out of range value after {retry_limit} attempts");
+}
+
+fn create_random_string_generator(
+    length: Range<usize>,
+) -> impl Fn(&mut dyn RngCore, &[String]) -> String {
+    assert!(length.end > length.start);
+
+    move |rng, exclude| {
+        let retry_limit = 100;
+        for _ in 0..retry_limit {
+            let length = rng.random_range(length.clone());
+            let value: String = rng
+                .sample_iter(Alphanumeric)
+                .take(length)
+                .map(char::from)
+                .collect();
+
+            if !exclude.contains(&value) {
+                return value;
+            }
+        }
+
+        panic!("Could not generate out of range value after {retry_limit} attempts");
+    }
+}
+
+/// Create column with the provided number of rows
+/// `in_range_percentage` is the percentage of values that should be inside the specified range
+/// `null_percentage` is the percentage of null values
+/// The rest of the values will be outside the specified range
+fn generate_values_for_lookup<T, A>(
+    options: Options<T>,
+    generate_other_value: impl Fn(&mut StdRng, &[T]) -> T,
+) -> A
+where
+    T: Clone,
+    A: FromIterator<Option<T>>,
+{
+    // Create a value with specified range most of the time, but also some nulls and the rest is generic
+
+    assert!(
+        options.in_range_probability + options.null_probability <= 1.0,
+        "Percentages must sum to 1.0 or less"
+    );
+
+    let rng = &mut seedable_rng();
+
+    let in_range_probability = 0.0..options.in_range_probability;
+    let null_range_probability =
+        in_range_probability.start..in_range_probability.start + options.null_probability;
+    let out_range_probability = null_range_probability.end..1.0;
+
+    (0..options.number_of_rows)
+        .map(|_| {
+            let roll: f32 = rng.random();
+
+            match roll {
+                v if out_range_probability.contains(&v) => {
+                    let index = rng.random_range(0..options.range_of_values.len());
+                    // Generate value in range
+                    Some(options.range_of_values[index].clone())
+                }
+                v if null_range_probability.contains(&v) => None,
+                _ => {
+                    // Generate value out of range
+                    Some(generate_other_value(rng, &options.range_of_values))
+                }
+            }
+        })
+        .collect::<A>()
+}
+
+fn benchmark_lookup_table_case_when(c: &mut Criterion, batch_size: usize) {
+    #[derive(Clone, Copy, Debug)]
+    struct CaseWhenLookupInput {
+        batch_size: usize,
+
+        in_range_probability: f32,
+        null_probability: f32,
+    }
+
+    impl Display for CaseWhenLookupInput {
+        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+            write!(
+                f,
+                "case_when {} rows: in_range: {}, nulls: {}",
+                self.batch_size, self.in_range_probability, self.null_probability,
+            )
+        }
+    }
+
+    let mut case_when_lookup = c.benchmark_group("lookup_table_case_when");
+
+    for in_range_probability in [0.1, 0.5, 0.9, 1.0] {
+        for null_probability in [0.0, 0.1, 0.5] {
+            if in_range_probability + null_probability > 1.0 {
+                continue;
+            }
+
+            let input = CaseWhenLookupInput {
+                batch_size,
+                in_range_probability,
+                null_probability,
+            };
+
+            let when_thens_primitive_to_string = vec![
+                (1, "something"),
+                (2, "very"),
+                (3, "interesting"),
+                (4, "is"),
+                (5, "going"),
+                (6, "to"),
+                (7, "happen"),
+                (30, "in"),
+                (31, "datafusion"),
+                (90, "when"),
+                (91, "you"),
+                (92, "find"),
+                (93, "it"),
+                (120, "let"),
+                (240, "me"),
+                (241, "know"),
+                (244, "please"),
+                (246, "thank"),
+                (250, "you"),
+                (252, "!"),
+            ];
+            let when_thens_string_to_primitive = when_thens_primitive_to_string
+                .iter()
+                .map(|&(key, value)| (value, key))
+                .collect_vec();
+
+            for num_entries in [5, 10, 20] {
+                for (name, values_range) in [
+                    ("all equally true", 0..num_entries),
+                    // Test when early termination is beneficial
+                    ("only first 2 are true", 0..2),
+                ] {
+                    let when_thens_primitive_to_string =
+                        when_thens_primitive_to_string[values_range.clone()].to_vec();
+
+                    let when_thens_string_to_primitive =
+                        when_thens_string_to_primitive[values_range].to_vec();
+
+                    case_when_lookup.bench_with_input(
+                        BenchmarkId::new(
+                            format!(
+                                "case when i32 -> utf8, {num_entries} entries, {name}"
+                            ),
+                            input,
+                        ),
+                        &input,
+                        |b, input| {
+                            let array: Int32Array = generate_values_for_lookup(
+                                Options::<i32> {
+                                    number_of_rows: batch_size,
+                                    range_of_values: when_thens_primitive_to_string
+                                        .iter()
+                                        .map(|(key, _)| *key)
+                                        .collect(),
+                                    in_range_probability: input.in_range_probability,
+                                    null_probability: input.null_probability,
+                                },
+                                |rng, exclude| {
+                                    generate_other_primitive_value::<i32>(rng, exclude)
+                                },
+                            );
+                            let batch = RecordBatch::try_new(
+                                Arc::new(Schema::new(vec![Field::new(
+                                    "col1",
+                                    array.data_type().clone(),
+                                    true,
+                                )])),
+                                vec![Arc::new(array)],
+                            )
+                            .unwrap();
+
+                            let when_thens = when_thens_primitive_to_string
+                                .iter()
+                                .map(|&(key, value)| (lit(key), lit(value)))
+                                .collect();
+
+                            let expr = Arc::new(
+                                case(
+                                    Some(col("col1", batch.schema_ref()).unwrap()),
+                                    when_thens,
+                                    Some(lit("whatever")),
+                                )
+                                .unwrap(),
+                            );
+
+                            b.iter(|| {
+                                black_box(expr.evaluate(black_box(&batch)).unwrap())
+                            })
+                        },
+                    );
+
+                    case_when_lookup.bench_with_input(
+                        BenchmarkId::new(
+                            format!(
+                                "case when utf8 -> i32, {num_entries} entries, {name}"
+                            ),
+                            input,
+                        ),
+                        &input,
+                        |b, input| {
+                            let array: StringArray = generate_values_for_lookup(
+                                Options::<String> {
+                                    number_of_rows: batch_size,
+                                    range_of_values: when_thens_string_to_primitive
+                                        .iter()
+                                        .map(|(key, _)| (*key).to_string())
+                                        .collect(),
+                                    in_range_probability: input.in_range_probability,
+                                    null_probability: input.null_probability,
+                                },
+                                |rng, exclude| {
+                                    create_random_string_generator(3..10)(rng, exclude)
+                                },
+                            );
+                            let batch = RecordBatch::try_new(
+                                Arc::new(Schema::new(vec![Field::new(
+                                    "col1",
+                                    array.data_type().clone(),
+                                    true,
+                                )])),
+                                vec![Arc::new(array)],
+                            )
+                            .unwrap();
+
+                            let when_thens = when_thens_string_to_primitive
+                                .iter()
+                                .map(|&(key, value)| (lit(key), lit(value)))
+                                .collect();
+
+                            let expr = Arc::new(
+                                case(
+                                    Some(col("col1", batch.schema_ref()).unwrap()),
+                                    when_thens,
+                                    Some(lit(1000)),
+                                )
+                                .unwrap(),
+                            );
+
+                            b.iter(|| {
+                                black_box(expr.evaluate(black_box(&batch)).unwrap())
+                            })
+                        },
+                    );
+                }
+            }
+        }
+    }
+}
+
 criterion_group!(benches, criterion_benchmark);
 criterion_main!(benches);

From 774b6fee0b8a33b48e28ef35ac5242e80312900b Mon Sep 17 00:00:00 2001
From: Blake Orth <BlakeOrth@users.noreply.github.com>
Date: Wed, 22 Oct 2025 15:05:52 -0600
Subject: [PATCH 071/109] Adds instrumentation to COPY operations in the CLI
 (#18227)

## Which issue does this PR close?

- Closes #17207

## Rationale for this change

Completes the instrumentation of the object store used in datafusion-cli
:tada:

## What changes are included in this PR?

 - Adds instrumentation to copy
 - Adds instrumentation to copy_if_not_exists
 - Adds tests for new code

## Are these changes tested?

Yes. New unit tests have been added.

## Are there any user-facing changes?

No-ish

##
cc @alamb

This PR completes the final methods for the instrumented object store!
It seems like this should allow us to close the issue we've had tied to
all of these PRs and additional work in this area can probably be done
under new/targeted issues.
---
 .../src/object_storage/instrumented.rs        | 122 +++++++++++++++++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index ebf9899f267cf..4465c59a904e8 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -255,6 +255,48 @@ impl InstrumentedObjectStore {
         Ok(ret)
     }
 
+    async fn instrumented_copy(&self, from: &Path, to: &Path) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.copy(from, to).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Copy,
+            path: from.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: Some(format!("copy_to: {to}")),
+        });
+
+        Ok(())
+    }
+
+    async fn instrumented_copy_if_not_exists(
+        &self,
+        from: &Path,
+        to: &Path,
+    ) -> Result<()> {
+        let timestamp = Utc::now();
+        let start = Instant::now();
+        self.inner.copy_if_not_exists(from, to).await?;
+        let elapsed = start.elapsed();
+
+        self.requests.lock().push(RequestDetails {
+            op: Operation::Copy,
+            path: from.clone(),
+            timestamp,
+            duration: Some(elapsed),
+            size: None,
+            range: None,
+            extra_display: Some(format!("copy_to: {to}")),
+        });
+
+        Ok(())
+    }
+
     async fn instrumented_head(&self, location: &Path) -> Result<ObjectMeta> {
         let timestamp = Utc::now();
         let start = Instant::now();
@@ -347,10 +389,18 @@ impl ObjectStore for InstrumentedObjectStore {
     }
 
     async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_copy(from, to).await;
+        }
+
         self.inner.copy(from, to).await
     }
 
     async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
+        if self.enabled() {
+            return self.instrumented_copy_if_not_exists(from, to).await;
+        }
+
         self.inner.copy_if_not_exists(from, to).await
     }
 
@@ -366,7 +416,7 @@ impl ObjectStore for InstrumentedObjectStore {
 /// Object store operation types tracked by [`InstrumentedObjectStore`]
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum Operation {
-    _Copy,
+    Copy,
     Delete,
     Get,
     Head,
@@ -947,6 +997,76 @@ mod tests {
         assert!(request.extra_display.is_none());
     }
 
+    #[tokio::test]
+    async fn instrumented_store_copy() {
+        let (instrumented, path) = setup_test_store().await;
+        let copy_to = Path::from("test/copied");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.copy(&path, &copy_to).await.unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented.copy(&path, &copy_to).await.unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Copy);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert_eq!(
+            request.extra_display.unwrap(),
+            format!("copy_to: {copy_to}")
+        );
+    }
+
+    #[tokio::test]
+    async fn instrumented_store_copy_if_not_exists() {
+        let (instrumented, path) = setup_test_store().await;
+        let mut copy_to = Path::from("test/copied");
+
+        // By default no requests should be instrumented/stored
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented
+            .copy_if_not_exists(&path, &copy_to)
+            .await
+            .unwrap();
+        assert!(instrumented.requests.lock().is_empty());
+
+        // Use a new destination since the previous one already exists
+        copy_to = Path::from("test/copied_again");
+        instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
+        assert!(instrumented.requests.lock().is_empty());
+        instrumented
+            .copy_if_not_exists(&path, &copy_to)
+            .await
+            .unwrap();
+        assert_eq!(instrumented.requests.lock().len(), 1);
+
+        let mut requests = instrumented.take_requests();
+        assert_eq!(requests.len(), 1);
+        assert!(instrumented.requests.lock().is_empty());
+
+        let request = requests.pop().unwrap();
+        assert_eq!(request.op, Operation::Copy);
+        assert_eq!(request.path, path);
+        assert!(request.duration.is_some());
+        assert!(request.size.is_none());
+        assert!(request.range.is_none());
+        assert_eq!(
+            request.extra_display.unwrap(),
+            format!("copy_to: {copy_to}")
+        );
+    }
+
     #[tokio::test]
     async fn instrumented_store_head() {
         let (instrumented, path) = setup_test_store().await;

From 47fd638c7c457b5fe3635fba0f262550193bbce9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 22 Oct 2025 19:12:30 -0700
Subject: [PATCH 072/109] Consolidate core_integration/datasource and rename
 parquet_source --> parquet_integration (#18226)

## Which issue does this PR close?
- related to https://github.com/apache/datafusion/issues/11210


## Rationale for this change

Running the following command

```shell
cargo test --test csv_schema_fix_test
```

Results in a 201 mb binary (and requires linking, etc) for only a few
tests

```shell
du -s -h target/debug/deps/csv_schema_fix_test-48507d40c6598e82
201M	target/debug/deps/csv_schema_fix_test-48507d40c6598e82
```


Let's combine that into the existing core integration test to save CI
runner space and time

Also, I have been confused why the parquet integration tests are named
`parquet_source` (I think it is meant to mirror `ParquetSource` which is
confusing) so let's change that to something more discoverable

## What changes are included in this PR?
1. Consolidate core_integration/datasource
2. rename `parquet_source` --> `parquet_integration`

## Are these changes tested?

It is only tests. You can run the tests in the new location via
```shell
cargo test --test core_integration -- csv
```

## Are there any user-facing changes?
No, this is all internal test rearrangement
---
 datafusion/core/tests/core_integration.rs     |  3 +++
 .../csv.rs}                                   |  0
 datafusion/core/tests/datasource/mod.rs       | 23 +++++++++++++++++++
 ...rquet_config.rs => parquet_integration.rs} |  0
 4 files changed, 26 insertions(+)
 rename datafusion/core/tests/{csv_schema_fix_test.rs => datasource/csv.rs} (100%)
 create mode 100644 datafusion/core/tests/datasource/mod.rs
 rename datafusion/core/tests/{parquet_config.rs => parquet_integration.rs} (100%)

diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index e37a368f07719..edcf039e4e704 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -21,6 +21,9 @@ mod sql;
 /// Run all tests that are found in the `dataframe` directory
 mod dataframe;
 
+/// Run all tests that are found in the `datasource` directory
+mod datasource;
+
 /// Run all tests that are found in the `macro_hygiene` directory
 mod macro_hygiene;
 
diff --git a/datafusion/core/tests/csv_schema_fix_test.rs b/datafusion/core/tests/datasource/csv.rs
similarity index 100%
rename from datafusion/core/tests/csv_schema_fix_test.rs
rename to datafusion/core/tests/datasource/csv.rs
diff --git a/datafusion/core/tests/datasource/mod.rs b/datafusion/core/tests/datasource/mod.rs
new file mode 100644
index 0000000000000..d1f3b3957c0fd
--- /dev/null
+++ b/datafusion/core/tests/datasource/mod.rs
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for various DataSources
+//!
+//! Note tests for the Parquet format are in `parquet_integration` binary
+
+// Include tests in csv module
+mod csv;
diff --git a/datafusion/core/tests/parquet_config.rs b/datafusion/core/tests/parquet_integration.rs
similarity index 100%
rename from datafusion/core/tests/parquet_config.rs
rename to datafusion/core/tests/parquet_integration.rs

From 340834d1f731fd606307a8443cdeacaca949fa29 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Wed, 22 Oct 2025 23:07:11 -0400
Subject: [PATCH 073/109] feat: Add existence join to NestedLoopJoin benchmarks
 (#18005)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #16820 .

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 benchmarks/src/nlj.rs | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/benchmarks/src/nlj.rs b/benchmarks/src/nlj.rs
index e412c0ade8a83..7d1e14f69439c 100644
--- a/benchmarks/src/nlj.rs
+++ b/benchmarks/src/nlj.rs
@@ -146,6 +146,45 @@ const NLJ_QUERIES: &[&str] = &[
         FULL JOIN range(30000) AS t2
         ON (t1.value > t2.value);
     "#,
+    // Q13: LEFT SEMI 30K x 30K | HIGH 99.9%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t1
+        LEFT SEMI JOIN range(30000) AS t2
+        ON t1.value < t2.value;
+    "#,
+    // Q14: LEFT ANTI 30K x 30K | LOW 0.003%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t1
+        LEFT ANTI JOIN range(30000) AS t2
+        ON t1.value < t2.value;
+    "#,
+    // Q15: RIGHT SEMI 30K x 30K | HIGH 99.9%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t2
+        RIGHT SEMI JOIN range(30000) AS t1
+        ON t2.value < t1.value;
+    "#,
+    // Q16: RIGHT ANTI 30K x 30K | LOW 0.003%
+    r#"
+        SELECT t1.*
+        FROM range(30000) AS t2
+        RIGHT ANTI JOIN range(30000) AS t1
+        ON t2.value < t1.value;
+    "#,
+    // Q17: LEFT MARK | HIGH 99.9%
+    r#"
+        SELECT *
+        FROM range(30000) AS t2(k2)
+        WHERE k2 > 0
+        OR EXISTS (
+            SELECT 1
+            FROM range(30000) AS t1(k1)
+            WHERE t2.k2 > t1.k1
+        );
+    "#,
 ];
 
 impl RunOpt {

From 408e1e4e2c46d673a067e05f3e363a6f51e641c4 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Thu, 23 Oct 2025 11:07:24 +0800
Subject: [PATCH 074/109] doc: Add `Metrics` section to the user-guide (#18216)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Part of #16602

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Now we have to search in the code comment (or even implementation) to
find the documentation of certain metrics, it would be better to open a
page in the `user-guide` for metrics.

The doc has to be manually updated, the metrics construction is
scattered in the codebase, so it's hard to make it auto-generated.

This PR only includes 2 common metrics, I plan to add more
operator-specific metrics while working on
https://github.com/apache/datafusion/issues/18116

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 docs/source/index.rst             |  1 +
 docs/source/user-guide/metrics.md | 37 +++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 docs/source/user-guide/metrics.md

diff --git a/docs/source/index.rst b/docs/source/index.rst
index a11ca862e4572..6bb3c9485b718 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -122,6 +122,7 @@ To get started, see
    user-guide/sql/index
    user-guide/configs
    user-guide/explain-usage
+   user-guide/metrics
    user-guide/faq
 
 .. _toc.library-user-guide:
diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md
new file mode 100644
index 0000000000000..f2634b901518b
--- /dev/null
+++ b/docs/source/user-guide/metrics.md
@@ -0,0 +1,37 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Metrics
+
+DataFusion operators expose runtime metrics so you can understand where time is spent and how much data flows through the pipeline. See more in [EXPLAIN ANALYZE](sql/explain.md#explain-analyze).
+
+## Common Metrics
+
+### BaselineMetrics
+
+`BaselineMetrics` are available in most physical operators to capture common measurements.
+
+| Metric          | Description                                            |
+| --------------- | ------------------------------------------------------ |
+| elapsed_compute | CPU time the operator actively spends processing work. |
+| output_rows     | Total number of rows the operator produces.            |
+
+## Operator-specific Metrics
+
+TODO

From 92c5607246fee9678eb773f6dbc4e33c261fc71f Mon Sep 17 00:00:00 2001
From: Vegard Stikbakke <vegard.stikbakke@gmail.com>
Date: Thu, 23 Oct 2025 14:53:25 +0200
Subject: [PATCH 075/109] fix: UnnestExec preserves relevant equivalence
 properties of input (#16985)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?
- Closes https://github.com/apache/datafusion/issues/15231.

## What changes are included in this PR?
- In `UnnestExec`'s `compute_properties` we now construct
its`EquivalenceProperties` using what we can from the input plan, so
that we preserve sort ordering of unrelated columns (and avoid
unnecessary sorts further up in the plan).

## Are these changes tested?
- Adds test cases to the sqllogictests for `UnnestExec` in `unnest.slt`

## Are there any user-facing changes?
No

## Explanation
Given a struct or array value `col`, `unnest(col)` takes the N elements
of `col` and "spreads" these onto N rows, where all other columns in the
statement are preserved. Said another way, when we unnest a column we
are inserting a lateral cross-join against its elements, which by
construction:
- Duplicates every other column once for each array/map element
- Replaces the original collection column with one (or more) “element”
columns
- Expands one input row into zero (if empty) or many output rows

E.g. (from `unnest.slt`):

https://github.com/apache/datafusion/blob/6d9b76e4a30f6234ffa3f8100b5d4c2735558ca6/datafusion/sqllogictest/test_files/unnest.slt#L699-L712

The [`EquivalenceProperties`
struct](https://github.com/apache/datafusion/blob/66d6995b8f626f28f811489bd2cb552b6c64a85f/datafusion/physical-expr/src/equivalence/properties/mod.rs#L133-L146)
has three types of properties:
1. equivalence groups (expressions with the same value)
2. ordering equivalence classes (expressions that define the same
ordering)
3. table constraints - a set of columns that form a primary key or a
unique key

In this PR we construct the `UnnestExec` node's `EquivalenceProperties`
by using the input plan's equivalence properties for the columns that
are not transformed - except for table constraints, which we discard
entirely. The reasoning for discarding constraints is that because we're
duplicating the other columns across rows, we are invalidating any
uniqueness or primary-key constraint. We also need to some twiddling
with the mapping of the projection (indices change due to the
unnesting).
---
 datafusion/core/src/physical_planner.rs       |   2 +-
 datafusion/physical-plan/src/unnest.rs        |  98 +++++--
 datafusion/proto/src/physical_plan/mod.rs     |   2 +-
 .../tests/cases/roundtrip_physical_plan.rs    |   2 +-
 datafusion/sqllogictest/test_files/unnest.slt | 272 ++++++++++++++++++
 5 files changed, 357 insertions(+), 19 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 0d784c9179692..708c52001ee88 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -989,7 +989,7 @@ impl DefaultPhysicalPlanner {
                     struct_type_columns.clone(),
                     schema,
                     options.clone(),
-                ))
+                )?)
             }
 
             // 2 Children
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index e36cd2b6c2429..026a7fbcd0e52 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -32,8 +32,8 @@ use crate::{
 };
 
 use arrow::array::{
-    new_null_array, Array, ArrayRef, AsArray, FixedSizeListArray, Int64Array,
-    LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
+    new_null_array, Array, ArrayRef, AsArray, BooleanBufferBuilder, FixedSizeListArray,
+    Int64Array, LargeListArray, ListArray, PrimitiveArray, Scalar, StructArray,
 };
 use arrow::compute::kernels::length::length;
 use arrow::compute::kernels::zip::zip;
@@ -43,16 +43,19 @@ use arrow::record_batch::RecordBatch;
 use arrow_ord::cmp::lt;
 use async_trait::async_trait;
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, HashMap, HashSet, Result, UnnestOptions,
+    exec_datafusion_err, exec_err, internal_err, Constraints, HashMap, HashSet, Result,
+    UnnestOptions,
 };
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::EquivalenceProperties;
+use datafusion_physical_expr::equivalence::ProjectionMapping;
+use datafusion_physical_expr::expressions::Column;
+use datafusion_physical_expr::PhysicalExpr;
 use futures::{Stream, StreamExt};
 use log::trace;
 
 /// Unnest the given columns (either with type struct or list)
-/// For list unnesting, each rows is vertically transformed into multiple rows
-/// For struct unnesting, each columns is horizontally transformed into multiple columns,
+/// For list unnesting, each row is vertically transformed into multiple rows
+/// For struct unnesting, each column is horizontally transformed into multiple columns,
 /// Thus the original RecordBatch with dimension (n x m) may have new dimension (n' x m')
 ///
 /// See [`UnnestOptions`] for more details and an example.
@@ -82,10 +85,15 @@ impl UnnestExec {
         struct_column_indices: Vec<usize>,
         schema: SchemaRef,
         options: UnnestOptions,
-    ) -> Self {
-        let cache = Self::compute_properties(&input, Arc::clone(&schema));
+    ) -> Result<Self> {
+        let cache = Self::compute_properties(
+            &input,
+            &list_column_indices,
+            &struct_column_indices,
+            Arc::clone(&schema),
+        )?;
 
-        UnnestExec {
+        Ok(UnnestExec {
             input,
             schema,
             list_column_indices,
@@ -93,20 +101,78 @@ impl UnnestExec {
             options,
             metrics: Default::default(),
             cache,
-        }
+        })
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
+        list_column_indices: &[ListUnnest],
+        struct_column_indices: &[usize],
         schema: SchemaRef,
-    ) -> PlanProperties {
-        PlanProperties::new(
-            EquivalenceProperties::new(schema),
-            input.output_partitioning().to_owned(),
+    ) -> Result<PlanProperties> {
+        // Find out which indices are not unnested, such that they can be copied over from the input plan
+        let input_schema = input.schema();
+        let mut unnested_indices = BooleanBufferBuilder::new(input_schema.fields().len());
+        unnested_indices.append_n(input_schema.fields().len(), false);
+        for list_unnest in list_column_indices {
+            unnested_indices.set_bit(list_unnest.index_in_input_schema, true);
+        }
+        for struct_unnest in struct_column_indices {
+            unnested_indices.set_bit(*struct_unnest, true)
+        }
+        let unnested_indices = unnested_indices.finish();
+        let non_unnested_indices: Vec<usize> = (0..input_schema.fields().len())
+            .filter(|idx| !unnested_indices.value(*idx))
+            .collect();
+
+        // Manually build projection mapping from non-unnested input columns to their positions in the output
+        let input_schema = input.schema();
+        let projection_mapping: ProjectionMapping = non_unnested_indices
+            .iter()
+            .map(|&input_idx| {
+                // Find what index the input column has in the output schema
+                let input_field = input_schema.field(input_idx);
+                let output_idx = schema
+                    .fields()
+                    .iter()
+                    .position(|output_field| output_field.name() == input_field.name())
+                    .ok_or_else(|| {
+                        exec_datafusion_err!(
+                            "Non-unnested column '{}' must exist in output schema",
+                            input_field.name()
+                        )
+                    })?;
+
+                let input_col = Arc::new(Column::new(input_field.name(), input_idx))
+                    as Arc<dyn PhysicalExpr>;
+                let target_col = Arc::new(Column::new(input_field.name(), output_idx))
+                    as Arc<dyn PhysicalExpr>;
+                // Use From<Vec<(Arc<dyn PhysicalExpr>, usize)>> for ProjectionTargets
+                let targets = vec![(target_col, output_idx)].into();
+                Ok((input_col, targets))
+            })
+            .collect::<Result<ProjectionMapping>>()?;
+
+        // Create the unnest's equivalence properties by copying the input plan's equivalence properties
+        // for the unaffected columns. Except for the constraints, which are removed entirely because
+        // the unnest operation invalidates any global uniqueness or primary-key constraints.
+        let input_eq_properties = input.equivalence_properties();
+        let eq_properties = input_eq_properties
+            .project(&projection_mapping, Arc::clone(&schema))
+            .with_constraints(Constraints::default());
+
+        // Output partitioning must use the projection mapping
+        let output_partitioning = input
+            .output_partitioning()
+            .project(&projection_mapping, &eq_properties);
+
+        Ok(PlanProperties::new(
+            eq_properties,
+            output_partitioning,
             input.pipeline_behavior(),
             input.boundedness(),
-        )
+        ))
     }
 
     /// Input execution plan
@@ -173,7 +239,7 @@ impl ExecutionPlan for UnnestExec {
             self.struct_column_indices.clone(),
             Arc::clone(&self.schema),
             self.options.clone(),
-        )))
+        )?))
     }
 
     fn required_input_distribution(&self) -> Vec<Distribution> {
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index cd9bd672851d0..e5f4a1f7d0267 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -1759,7 +1759,7 @@ impl protobuf::PhysicalPlanNode {
             unnest.struct_type_columns.iter().map(|c| *c as _).collect(),
             Arc::new(convert_required!(unnest.schema)?),
             into_required!(unnest.options)?,
-        )))
+        )?))
     }
 
     fn generate_series_name_to_str(name: protobuf::GenerateSeriesName) -> &'static str {
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index e6cfcb95805a1..a0456e2031be0 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -1716,7 +1716,7 @@ fn roundtrip_unnest() -> Result<()> {
         vec![2, 4],
         output_schema,
         options,
-    );
+    )?;
     roundtrip_test(Arc::new(unnest))
 }
 
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 67b3a7cf56665..38fcc1ba9016f 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -941,3 +941,275 @@ where min_height * width1 = (
 )
 ----
 4 7 4 28
+
+## Unnest with ordering on unrelated column is preserved
+query TT
+EXPLAIN WITH unnested AS (SELECT
+    ROW_NUMBER() OVER () AS generated_id,
+    unnest(array[value]) as ar
+  FROM range(1,5)) SELECT array_agg(ar) FROM unnested group by generated_id;
+----
+logical_plan
+01)Projection: array_agg(unnested.ar)
+02)--Aggregate: groupBy=[[unnested.generated_id]], aggr=[[array_agg(unnested.ar)]]
+03)----SubqueryAlias: unnested
+04)------Projection: generated_id, __unnest_placeholder(make_array(range().value),depth=1) AS UNNEST(make_array(range().value)) AS ar
+05)--------Unnest: lists[__unnest_placeholder(make_array(range().value))|depth=1] structs[]
+06)----------Projection: row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS generated_id, make_array(range().value) AS __unnest_placeholder(make_array(range().value))
+07)------------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+08)--------------TableScan: range() projection=[value]
+physical_plan
+01)ProjectionExec: expr=[array_agg(unnested.ar)@1 as array_agg(unnested.ar)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
+03)----SortExec: expr=[generated_id@0 ASC NULLS LAST], preserve_partitioning=[true]
+04)------CoalesceBatchesExec: target_batch_size=8192
+05)--------RepartitionExec: partitioning=Hash([generated_id@0], 4), input_partitions=4
+06)----------AggregateExec: mode=Partial, gby=[generated_id@0 as generated_id], aggr=[array_agg(unnested.ar)], ordering_mode=Sorted
+07)------------ProjectionExec: expr=[generated_id@0 as generated_id, __unnest_placeholder(make_array(range().value),depth=1)@1 as ar]
+08)--------------UnnestExec
+09)----------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+12)----------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
+
+# Unnest array where data is already ordered by column2 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    ([1,2,3], 100),
+    ([3],     200),
+    ([],      300),
+    ([3,1],   400)
+  ORDER BY column2
+ ) TO 'test_files/scratch/unnest/ordered_array.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_array.parquet'
+WITH ORDER (column2)
+
+query ?I
+SELECT * FROM t;
+----
+[1, 2, 3] 100
+[3] 200
+[] 300
+[3, 1] 400
+
+# Data is sorted on column2 already, so no need to sort again
+query II
+SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+1 100
+2 100
+3 100
+3 200
+3 400
+1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+logical_plan
+01)Sort: t.column2 ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=1) AS UNNEST(t.column1), t.column2
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=1] structs[]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+05)--------TableScan: t projection=[column1, column2]
+physical_plan
+01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as UNNEST(t.column1), column2@1 as column2]
+02)--UnnestExec
+03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# Explain should have a SortExec at the top because we order by the output of the unnest (i.e. discarding the ordering)
+query TT
+EXPLAIN SELECT UNNEST(column1) as unnested, column2 FROM t ORDER BY 1;
+----
+logical_plan
+01)Sort: unnested ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=1) AS UNNEST(t.column1) AS unnested, t.column2
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=1] structs[]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+05)--------TableScan: t projection=[column1, column2]
+physical_plan
+01)SortExec: expr=[unnested@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=1)@0 as unnested, column2@1 as column2]
+03)----UnnestExec
+04)------ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_array.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest tuple where the data is already sorted by column 1
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    (100, [3,2,1], 'a'),
+    (200, [1,2,3], 'b'),
+    (300, [3,1,2], 'c')
+  ORDER BY column1
+ ) TO 'test_files/scratch/unnest/ordered_tuples.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_tuples.parquet'
+WITH ORDER (column1)
+
+query I?T
+SELECT * FROM t;
+----
+100 [3, 2, 1] a
+200 [1, 2, 3] b
+300 [3, 1, 2] c
+
+# Put the columns in a tuple and unnest, we need to sort because we discard ordering of unnested columns
+query TT
+EXPLAIN WITH unnested AS (
+  SELECT unnest((column1, column2, column3))
+  FROM t
+) SELECT * FROM unnested order by 1;
+----
+logical_plan
+01)Sort: unnested.__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0 ASC NULLS LAST
+02)--SubqueryAlias: unnested
+03)----Unnest: lists[] structs[__unnest_placeholder(struct(t.column1,t.column2,t.column3))]
+04)------Projection: struct(t.column1, t.column2, t.column3) AS __unnest_placeholder(struct(t.column1,t.column2,t.column3))
+05)--------TableScan: t projection=[column1, column2, column3]
+physical_plan
+01)SortExec: expr=[__unnest_placeholder(struct(t.column1,t.column2,t.column3)).c0@0 ASC NULLS LAST], preserve_partitioning=[false]
+02)--UnnestExec
+03)----ProjectionExec: expr=[struct(column1@0, column2@1, column3@2) as __unnest_placeholder(struct(t.column1,t.column2,t.column3))]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_tuples.parquet]]}, projection=[column1, column2, column3], output_ordering=[column1@0 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest struct where data is already ordered by column2 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    (named_struct('s1', 1, 's2', 2, 's3', 3), 100),
+    (named_struct('s1', 1, 's2', 3, 's3', 2), 200),
+    (named_struct('s1', 2, 's2', 1, 's3', 3), 300),
+    (named_struct('s1', 3, 's2', 2, 's3', 1), 400)
+  ORDER BY column2
+ ) TO 'test_files/scratch/unnest/ordered_struct.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_struct.parquet'
+WITH ORDER (column2)
+
+query ?I
+SELECT * FROM t;
+----
+{s1: 1, s2: 2, s3: 3} 100
+{s1: 1, s2: 3, s3: 2} 200
+{s1: 2, s2: 1, s3: 3} 300
+{s1: 3, s2: 2, s3: 1} 400
+
+# data is sorted on column2 already, so no need to sort again
+query IIII
+SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+1 2 3 100
+1 3 2 200
+2 1 3 300
+3 2 1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(column1), column2 FROM t ORDER BY column2;
+----
+logical_plan
+01)Sort: t.column2 ASC NULLS LAST
+02)--Unnest: lists[] structs[__unnest_placeholder(t.column1)]
+03)----Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2
+04)------TableScan: t projection=[column1, column2]
+physical_plan
+01)UnnestExec
+02)--ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as column2]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct.parquet]]}, projection=[column1, column2], output_ordering=[column2@1 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;
+
+# Unnest nested array (unnesting twice), struct, and array, where data is already ordered by column4 (100, 200, 300, 400)
+statement ok
+COPY (
+  SELECT * FROM VALUES
+    ([[1],[2],[3]], [1,2,3], named_struct('s1', 1, 's2', 2, 's3', 3),  100),
+    ([[1],[3],[2]], [3],     named_struct('s1', 1, 's2', 3, 's3', 2),  200),
+    ([[2],[1],[3]], [],      named_struct('s1', 2, 's2', 1, 's3', 3),  300),
+    ([[3],[2],[1]], [3,1],   named_struct('s1', 3, 's2', 2, 's3', 1),  400)
+  ORDER BY column4
+ ) TO 'test_files/scratch/unnest/ordered_struct_arrays.parquet';
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/unnest/ordered_struct_arrays.parquet'
+WITH ORDER (column4)
+
+query ???I
+SELECT * FROM t;
+----
+[[1], [2], [3]] [1, 2, 3] {s1: 1, s2: 2, s3: 3} 100
+[[1], [3], [2]] [3] {s1: 1, s2: 3, s3: 2} 200
+[[2], [1], [3]] [] {s1: 2, s2: 1, s3: 3} 300
+[[3], [2], [1]] [3, 1] {s1: 3, s2: 2, s3: 1} 400
+
+# data is sorted on column4 already, so no need to sort again
+query IIIIII
+SELECT UNNEST(UNNEST(column1)), UNNEST(column2), UNNEST(column3), column4 FROM t ORDER BY column4;
+----
+1 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+2 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+3 1 1 2 3 100
+NULL 2 1 2 3 100
+NULL 3 1 2 3 100
+1 3 1 3 2 200
+3 3 1 3 2 200
+2 3 1 3 2 200
+2 NULL 2 1 3 300
+1 NULL 2 1 3 300
+3 NULL 2 1 3 300
+3 3 3 2 1 400
+NULL 1 3 2 1 400
+2 3 3 2 1 400
+NULL 1 3 2 1 400
+1 3 3 2 1 400
+NULL 1 3 2 1 400
+
+# Explain should not have a SortExec
+query TT
+EXPLAIN SELECT UNNEST(UNNEST(column1)), UNNEST(column2), UNNEST(column3), column4 FROM t ORDER BY column4;
+----
+logical_plan
+01)Sort: t.column4 ASC NULLS LAST
+02)--Projection: __unnest_placeholder(t.column1,depth=2) AS UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1) AS UNNEST(t.column2), __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3, t.column4
+03)----Unnest: lists[__unnest_placeholder(t.column1)|depth=2, __unnest_placeholder(t.column2)|depth=1] structs[__unnest_placeholder(t.column3)]
+04)------Projection: t.column1 AS __unnest_placeholder(t.column1), t.column2 AS __unnest_placeholder(t.column2), t.column3 AS __unnest_placeholder(t.column3), t.column4
+05)--------TableScan: t projection=[column1, column2, column3, column4]
+physical_plan
+01)ProjectionExec: expr=[__unnest_placeholder(t.column1,depth=2)@0 as UNNEST(UNNEST(t.column1)), __unnest_placeholder(t.column2,depth=1)@1 as UNNEST(t.column2), __unnest_placeholder(t.column3).s1@2 as __unnest_placeholder(t.column3).s1, __unnest_placeholder(t.column3).s2@3 as __unnest_placeholder(t.column3).s2, __unnest_placeholder(t.column3).s3@4 as __unnest_placeholder(t.column3).s3, column4@5 as column4]
+02)--UnnestExec
+03)----ProjectionExec: expr=[column1@0 as __unnest_placeholder(t.column1), column2@1 as __unnest_placeholder(t.column2), column3@2 as __unnest_placeholder(t.column3), column4@3 as column4]
+04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/unnest/ordered_struct_arrays.parquet]]}, projection=[column1, column2, column3, column4], output_ordering=[column4@3 ASC NULLS LAST], file_type=parquet
+
+# cleanup
+statement ok
+drop table t;

From be85bf41aed7c1eeba842af31997dfbad07be973 Mon Sep 17 00:00:00 2001
From: Qi Zhu <qi.zhu@polygon.io>
Date: Thu, 23 Oct 2025 23:57:14 +0800
Subject: [PATCH 076/109] CoalescePartitionsExec fetch is not consistent with
 one partition and more than one partition (#18245)

## Which issue does this PR close?

- Closes [#18244](https://github.com/apache/datafusion/issues/18244)

## Rationale for this change

In our internal project, the limit will not return right number when
CoalescePartitionsExec follow up by our customer operator which is only
one partition output.

After my investigation i found:

CoalescePartitionsExec fetch is not consistent with one partition and
more than one partition.

## What changes are included in this PR?

Make CoalescePartitionsExec fetch should be consistent when the
partition number changes.

## Are these changes tested?

Yes
## Are there any user-facing changes?

No
---
 .../physical-plan/src/coalesce_partitions.rs  | 120 +++++++++++++++++-
 1 file changed, 118 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 5869c51b26b8d..2597dc6408dee 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -170,8 +170,18 @@ impl ExecutionPlan for CoalescePartitionsExec {
                 "CoalescePartitionsExec requires at least one input partition"
             ),
             1 => {
-                // bypass any threading / metrics if there is a single partition
-                self.input.execute(0, context)
+                // single-partition path: execute child directly, but ensure fetch is respected
+                // (wrap with ObservedStream only if fetch is present so we don't add overhead otherwise)
+                let child_stream = self.input.execute(0, context)?;
+                if self.fetch.is_some() {
+                    let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
+                    return Ok(Box::pin(ObservedStream::new(
+                        child_stream,
+                        baseline_metrics,
+                        self.fetch,
+                    )));
+                }
+                Ok(child_stream)
             }
             _ => {
                 let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
@@ -351,4 +361,110 @@ mod tests {
 
         collect(coalesce_partitions_exec, task_ctx).await.unwrap();
     }
+
+    #[tokio::test]
+    async fn test_single_partition_with_fetch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use existing scan_partitioned with 1 partition (returns 100 rows per partition)
+        let input = test::scan_partitioned(1);
+
+        // Test with fetch=3
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(3));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(row_count, 3, "Should only return 3 rows due to fetch=3");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multi_partition_with_fetch_one() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Create 4 partitions, each with 100 rows
+        // This simulates the real-world scenario where each partition has data
+        let input = test::scan_partitioned(4);
+
+        // Test with fetch=1 (the original bug: was returning multiple rows instead of 1)
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(1));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 1,
+            "Should only return 1 row due to fetch=1, not one per partition"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_partition_without_fetch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use scan_partitioned with 1 partition
+        let input = test::scan_partitioned(1);
+
+        // Test without fetch (should return all rows)
+        let coalesce = CoalescePartitionsExec::new(input);
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 100,
+            "Should return all 100 rows when fetch is None"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_partition_fetch_larger_than_batch() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Use scan_partitioned with 1 partition (returns 100 rows)
+        let input = test::scan_partitioned(1);
+
+        // Test with fetch larger than available rows
+        let coalesce = CoalescePartitionsExec::new(input).with_fetch(Some(200));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(
+            row_count, 100,
+            "Should return all available rows (100) when fetch (200) is larger"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multi_partition_fetch_exact_match() -> Result<()> {
+        let task_ctx = Arc::new(TaskContext::default());
+
+        // Create 4 partitions, each with 100 rows
+        let num_partitions = 4;
+        let csv = test::scan_partitioned(num_partitions);
+
+        // Test with fetch=400 (exactly all rows)
+        let coalesce = CoalescePartitionsExec::new(csv).with_fetch(Some(400));
+
+        let stream = coalesce.execute(0, task_ctx)?;
+        let batches = common::collect(stream).await?;
+
+        let row_count: usize = batches.iter().map(|batch| batch.num_rows()).sum();
+        assert_eq!(row_count, 400, "Should return exactly 400 rows");
+
+        Ok(())
+    }
 }

From 144f1554aa30cfbb2dfb79cf94f93319efa4dcc1 Mon Sep 17 00:00:00 2001
From: Chen Chongchen <chenkovsky@qq.com>
Date: Fri, 24 Oct 2025 02:14:12 +0800
Subject: [PATCH 077/109] fix: wrong simplification for >= >, <= < (#18222)

## Which issue does this PR close?


- Closes #18214.

## Rationale for this change

In find_most_restrictive_predicate, operation is ignored, but it should
be considered when boundaries are equal.

## What changes are included in this PR?

check the operator, when boundaries are equal.

## Are these changes tested?

UT

## Are there any user-facing changes?

No
---
 .../src/simplify_expressions/simplify_predicates.rs  |  6 +++++-
 .../sqllogictest/test_files/simplify_predicates.slt  | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
index 131404e607060..e811ce7313102 100644
--- a/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
+++ b/datafusion/optimizer/src/simplify_expressions/simplify_predicates.rs
@@ -194,7 +194,7 @@ fn find_most_restrictive_predicate(
     let mut best_value: Option<&ScalarValue> = None;
 
     for (idx, pred) in predicates.iter().enumerate() {
-        if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = pred {
+        if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = pred {
             // Extract the literal value based on which side has it
             let scalar_value = match (right.as_literal(), left.as_literal()) {
                 (Some(scalar), _) => Some(scalar),
@@ -207,8 +207,12 @@ fn find_most_restrictive_predicate(
                     let comparison = scalar.try_cmp(current_best)?;
                     let is_better = if find_greater {
                         comparison == std::cmp::Ordering::Greater
+                            || (comparison == std::cmp::Ordering::Equal
+                                && op == &Operator::Gt)
                     } else {
                         comparison == std::cmp::Ordering::Less
+                            || (comparison == std::cmp::Ordering::Equal
+                                && op == &Operator::Lt)
                     };
 
                     if is_better {
diff --git a/datafusion/sqllogictest/test_files/simplify_predicates.slt b/datafusion/sqllogictest/test_files/simplify_predicates.slt
index 31ce1efd21c72..c2a21ea7103c3 100644
--- a/datafusion/sqllogictest/test_files/simplify_predicates.slt
+++ b/datafusion/sqllogictest/test_files/simplify_predicates.slt
@@ -230,5 +230,17 @@ logical_plan
 01)Filter: test_data.int_col > Int32(5) AND test_data.int_col > Int32(6) OR test_data.float_col < Float32(10) AND test_data.float_col < Float32(8)
 02)--TableScan: test_data projection=[int_col, float_col, str_col, date_col, bool_col]
 
+
+query TT
+EXPLAIN SELECT * FROM (
+  SELECT * FROM test_data 
+  WHERE int_col > 1 AND int_col < 10
+) WHERE int_col >= 1 AND int_col <= 10;
+----
+logical_plan
+01)Filter: test_data.int_col > Int32(1) AND test_data.int_col < Int32(10)
+02)--TableScan: test_data projection=[int_col, float_col, str_col, date_col, bool_col]
+
+
 statement ok
 set datafusion.explain.logical_plan_only=false;

From d12797311bb53d0d7c30cb93e5014875a17fe2f3 Mon Sep 17 00:00:00 2001
From: Ian Lai <108986288+Chen-Yuan-Lai@users.noreply.github.com>
Date: Fri, 24 Oct 2025 04:16:20 +0800
Subject: [PATCH 078/109] Migrate core test to insta part 3 (#16978)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Part of #15791 .

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
Related #16324 #16617

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
No

---------

Co-authored-by: Ian Lai <Ian.Lai@senao.com>
Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: blaginin <dmitrii@blaginin.me>
---
 .../partition_statistics.rs                   | 36 ++++++++++---------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/datafusion/core/tests/physical_optimizer/partition_statistics.rs b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
index 62ab5cbc422be..49dc5b845605d 100644
--- a/datafusion/core/tests/physical_optimizer/partition_statistics.rs
+++ b/datafusion/core/tests/physical_optimizer/partition_statistics.rs
@@ -17,6 +17,7 @@
 
 #[cfg(test)]
 mod test {
+    use insta::assert_snapshot;
     use std::sync::Arc;
 
     use arrow::array::{Int32Array, RecordBatch};
@@ -606,21 +607,21 @@ mod test {
             .build()
             .map(Arc::new)?];
 
-        let aggregate_exec_partial = Arc::new(AggregateExec::try_new(
-            AggregateMode::Partial,
-            group_by.clone(),
-            aggr_expr.clone(),
-            vec![None],
-            Arc::clone(&scan),
-            scan_schema.clone(),
-        )?) as _;
-
-        let mut plan_string = get_plan_string(&aggregate_exec_partial);
-        let _ = plan_string.swap_remove(1);
-        let expected_plan = vec![
-            "AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]",
-        ];
-        assert_eq!(plan_string, expected_plan);
+        let aggregate_exec_partial: Arc<dyn ExecutionPlan> =
+            Arc::new(AggregateExec::try_new(
+                AggregateMode::Partial,
+                group_by.clone(),
+                aggr_expr.clone(),
+                vec![None],
+                Arc::clone(&scan),
+                scan_schema.clone(),
+            )?) as _;
+
+        let plan_string = get_plan_string(&aggregate_exec_partial).swap_remove(0);
+        assert_snapshot!(
+            plan_string,
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+        );
 
         let p0_statistics = aggregate_exec_partial.partition_statistics(Some(0))?;
 
@@ -710,7 +711,10 @@ mod test {
         )?) as _;
 
         let agg_plan = get_plan_string(&agg_partial).remove(0);
-        assert_eq!("AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]",agg_plan);
+        assert_snapshot!(
+            agg_plan,
+            @"AggregateExec: mode=Partial, gby=[id@0 as id, 1 + id@0 as expr], aggr=[COUNT(c)]"
+        );
 
         let empty_stat = Statistics {
             num_rows: Precision::Exact(0),

From fef3b718887699f467565222f5bfd60d6ec33f17 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Fri, 24 Oct 2025 11:44:09 +1100
Subject: [PATCH 079/109] docs: Update HOWTOs for adding new functions (#18089)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #12220

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Updating documentation on adding new functions; aggregate instructions
were old, and adding in other types too (window, table)

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Updated instructions for adding new functions to DataFusion. Also did
some other touchups on the docs.

## Are these changes tested?

Doc changes only.

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

Doc changes only.

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Oleks V <comphead@users.noreply.github.com>
---
 docs/source/contributor-guide/howtos.md       | 170 ++++++++++--------
 .../functions/adding-udfs.md                  |   8 +-
 2 files changed, 101 insertions(+), 77 deletions(-)

diff --git a/docs/source/contributor-guide/howtos.md b/docs/source/contributor-guide/howtos.md
index 89a1bc7360a14..24b63865cb71b 100644
--- a/docs/source/contributor-guide/howtos.md
+++ b/docs/source/contributor-guide/howtos.md
@@ -21,60 +21,86 @@
 
 ## How to update the version of Rust used in CI tests
 
-- Make a PR to update the [rust-toolchain] file in the root of the repository:
+Make a PR to update the [rust-toolchain] file in the root of the repository.
 
 [rust-toolchain]: https://github.com/apache/datafusion/blob/main/rust-toolchain.toml
 
-## How to add a new scalar function
-
-Below is a checklist of what you need to do to add a new scalar function to DataFusion:
-
-- Add the actual implementation of the function to a new module file within:
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions-nested) for arrays, maps and structs functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/crypto) for crypto functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/datetime) for datetime functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/encoding) for encoding functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/math) for math functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/regex) for regex functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/string) for string functions
-  - [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/unicode) for unicode functions
-  - create a new module [here](https://github.com/apache/datafusion/tree/main/datafusion/functions/src/) for other functions.
-- New function modules - for example a `vector` module, should use a [rust feature](https://doc.rust-lang.org/cargo/reference/features.html) (for example `vector_expressions`) to allow DataFusion
-  users to enable or disable the new module as desired.
-- The implementation of the function is done via implementing `ScalarUDFImpl` trait for the function struct.
-  - See the [advanced_udf.rs] example for an example implementation
-  - Add tests for the new function
-- To connect the implementation of the function add to the mod.rs file:
-  - a `mod xyz;` where xyz is the new module file
-  - a call to `make_udf_function!(..);`
-  - an item in `export_functions!(..);`
-- In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
-  - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
-- Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md)
-  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
-  - Run `./dev/update_function_docs.sh` to update docs
-
-[advanced_udf.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
-[datafusion/expr/src]: https://github.com/apache/datafusion/tree/main/datafusion/expr/src
-[sqllogictest/test_files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files
-
-## How to add a new aggregate function
-
-Below is a checklist of what you need to do to add a new aggregate function to DataFusion:
-
-- Add the actual implementation of an `Accumulator` and `AggregateExpr`:
-- In [datafusion/expr/src], add:
-  - a new variant to `AggregateFunction`
-  - a new entry to `FromStr` with the name of the function as called by SQL
-  - a new line in `return_type` with the expected return type of the function, given an incoming type
-  - a new line in `signature` with the signature of the function (number and types of its arguments)
-  - a new line in `create_aggregate_expr` mapping the built-in to the implementation
-  - tests to the function.
-- In [sqllogictest/test_files], add new `sqllogictest` integration tests where the function is called through SQL against well known data and returns the expected result.
-  - Documentation for `sqllogictest` [here](https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md)
-- Add SQL reference documentation [here](https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md)
-  - An example of this being done can be seen [here](https://github.com/apache/datafusion/pull/12775)
-  - Run `./dev/update_function_docs.sh` to update docs
+## Adding new functions
+
+**Implementation**
+
+| Function type | Location to implement     | Trait to implement                             | Macros to use                                    | Example              |
+| ------------- | ------------------------- | ---------------------------------------------- | ------------------------------------------------ | -------------------- |
+| Scalar        | [functions][df-functions] | [`ScalarUDFImpl`]                              | `make_udf_function!()` and `export_functions!()` | [`advanced_udf.rs`]  |
+| Nested        | [functions-nested]        | [`ScalarUDFImpl`]                              | `make_udf_expr_and_func!()`                      |                      |
+| Aggregate     | [functions-aggregate]     | [`AggregateUDFImpl`] and an [`Accumulator`]    | `make_udaf_expr_and_func!()`                     | [`advanced_udaf.rs`] |
+| Window        | [functions-window]        | [`WindowUDFImpl`] and a [`PartitionEvaluator`] | `define_udwf_and_expr!()`                        | [`advanced_udwf.rs`] |
+| Table         | [functions-table]         | [`TableFunctionImpl`] and a [`TableProvider`]  | `create_udtf_function!()`                        | [`simple_udtf.rs`]   |
+
+- The macros are to simplify some boilerplate such as ensuring a DataFrame API compatible function is also created
+- Ensure new functions are properly exported through the subproject
+  `mod.rs` or `lib.rs`.
+- Functions should preferably provide documentation via the `#[user_doc(...)]` attribute so their documentation
+  can be included in the SQL reference documentation (see below section)
+- Scalar functions are further grouped into modules for families of functions (e.g. string, math, datetime).
+  Functions should be added to the relevant module; if a new module needs to be created then a new [Rust feature]
+  should also be added to allow DataFusion users to conditionally compile the modules as needed
+- Aggregate functions can optionally implement a [`GroupsAccumulator`] for better performance
+
+Spark compatible functions are [located in separate crate][df-spark] but otherwise follow the same steps, though all
+function types (e.g. scalar, nested, aggregate) are grouped together in the single location.
+
+[df-functions]: https://github.com/apache/datafusion/tree/main/datafusion/functions
+[functions-nested]: https://github.com/apache/datafusion/tree/main/datafusion/functions-nested
+[functions-aggregate]: https://github.com/apache/datafusion/tree/main/datafusion/functions-aggregate
+[functions-window]: https://github.com/apache/datafusion/tree/main/datafusion/functions-window
+[functions-table]: https://github.com/apache/datafusion/tree/main/datafusion/functions-table
+[df-spark]: https://github.com/apache/datafusion/tree/main/datafusion/spark
+[`scalarudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.ScalarUDFImpl.html
+[`aggregateudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.AggregateUDFImpl.html
+[`accumulator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.Accumulator.html
+[`groupsaccumulator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.GroupsAccumulator.html
+[`windowudfimpl`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.WindowUDFImpl.html
+[`partitionevaluator`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/trait.PartitionEvaluator.html
+[`tablefunctionimpl`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableFunctionImpl.html
+[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/catalog/trait.TableProvider.html
+[`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+[`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
+[`advanced_udwf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs
+[`simple_udtf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/simple_udtf.rs
+[rust feature]: https://doc.rust-lang.org/cargo/reference/features.html
+
+**Testing**
+
+Prefer adding `sqllogictest` integration tests where the function is called via SQL against
+well known data and returns an expected result. See the existing [test files][slt-test-files] if
+there is an appropriate file to add test cases to, otherwise create a new file. See the
+[`sqllogictest` documentation][slt-readme] for details on how to construct these tests.
+Ensure edge case, `null` input cases are considered in these tests.
+
+If a behaviour cannot be tested via `sqllogictest` (e.g. testing `simplify()`, needs to be
+tested in isolation from the optimizer, difficult to construct exact input via `sqllogictest`)
+then tests can be added as Rust unit tests in the implementation module, though these should be
+kept minimal where possible
+
+[slt-test-files]: https://github.com/apache/datafusion/tree/main/datafusion/sqllogictest/test_files
+[slt-readme]: https://github.com/apache/datafusion/blob/main/datafusion/sqllogictest/README.md
+
+**Documentation**
+
+Run documentation update script `./dev/update_function_docs.sh` which will update the relevant
+markdown document [here][fn-doc-home] (see the documents for [scalar][fn-doc-scalar],
+[aggregate][fn-doc-aggregate] and [window][fn-doc-window] functions)
+
+- You _should not_ manually update the markdown document after running the script as those manual
+  changes would be overwritten on next execution
+- Reference [GitHub issue] which introduced this behaviour
+
+[fn-doc-home]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql
+[fn-doc-scalar]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/scalar_functions.md
+[fn-doc-aggregate]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/aggregate_functions.md
+[fn-doc-window]: https://github.com/apache/datafusion/blob/main/docs/source/user-guide/sql/window_functions.md
+[github issue]: https://github.com/apache/datafusion/issues/12740
 
 ## How to display plans graphically
 
@@ -97,11 +123,13 @@ can be displayed. For example, the following command creates a
 dot -Tpdf < /tmp/plan.dot > /tmp/plan.pdf
 ```
 
-## How to format `.md` document
+## How to format `.md` documents
 
-We are using `prettier` to format `.md` files.
+We use [`prettier`] to format `.md` files.
 
-You can either use `npm i -g prettier` to install it globally or use `npx` to run it as a standalone binary. Using `npx` required a working node environment. Upgrading to the latest prettier is recommended (by adding `--upgrade` to the `npm` command).
+You can either use `npm i -g prettier` to install it globally or use `npx` to run it as a standalone binary.
+Using `npx` requires a working node environment. Upgrading to the latest prettier is recommended (by adding
+`--upgrade` to the `npm` command).
 
 ```bash
 $ prettier --version
@@ -114,19 +142,19 @@ After you've confirmed your prettier version, you can format all the `.md` files
 prettier -w {datafusion,datafusion-cli,datafusion-examples,dev,docs}/**/*.md
 ```
 
+[`prettier`]: https://prettier.io/
+
 ## How to format `.toml` files
 
-We use `taplo` to format `.toml` files.
+We use [`taplo`] to format `.toml` files.
 
-For Rust developers, you can install it via:
+To install via cargo:
 
 ```sh
 cargo install taplo-cli --locked
 ```
 
-> Refer to the [Installation section][doc] on other ways to install it.
->
-> [doc]: https://taplo.tamasfe.dev/cli/installation/binary.html
+> Refer to the [taplo installation documentation][taplo-install] for other ways to install it.
 
 ```bash
 $ taplo --version
@@ -139,28 +167,24 @@ After you've confirmed your `taplo` version, you can format all the `.toml` file
 taplo fmt
 ```
 
+[`taplo`]: https://taplo.tamasfe.dev/
+[taplo-install]: https://taplo.tamasfe.dev/cli/installation/binary.html
+
 ## How to update protobuf/gen dependencies
 
-The prost/tonic code can be generated by running `./regen.sh`, which in turn invokes the Rust binary located in `./gen`
+For the `proto` and `proto-common` crates, the prost/tonic code is generated by running their respective `./regen.sh` scripts,
+which in turn invokes the Rust binary located in `./gen`.
 
 This is necessary after modifying the protobuf definitions or altering the dependencies of `./gen`, and requires a
 valid installation of [protoc] (see [installation instructions] for details).
 
 ```bash
-./regen.sh
+# From repository root
+# proto-common
+./datafusion/proto-common/regen.sh
+# proto
+./datafusion/proto/regen.sh
 ```
 
 [protoc]: https://github.com/protocolbuffers/protobuf#protocol-compiler-installation
 [installation instructions]: https://datafusion.apache.org/contributor-guide/getting_started.html#protoc-installation
-
-## How to add/edit documentation for UDFs
-
-Documentations for the UDF documentations are generated from code (related [github issue]). To generate markdown run `./update_function_docs.sh`.
-
-This is necessary after adding new UDF implementation or modifying existing implementation which requires to update documentation.
-
-```bash
-./dev/update_function_docs.sh
-```
-
-[github issue]: https://github.com/apache/datafusion/issues/12740
diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md
index 2335105882a10..ecb618179ea16 100644
--- a/docs/source/library-user-guide/functions/adding-udfs.md
+++ b/docs/source/library-user-guide/functions/adding-udfs.md
@@ -354,7 +354,7 @@ async fn main() {
 }
 ```
 
-## Adding a Async Scalar UDF
+## Adding an Async Scalar UDF
 
 An Async Scalar UDF allows you to implement user-defined functions that support
 asynchronous execution, such as performing network or I/O operations within the
@@ -1257,7 +1257,7 @@ async fn main() -> Result<()> {
 [`create_udaf`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/fn.create_udaf.html
 [`advanced_udaf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udaf.rs
 
-## Adding a User-Defined Table Function
+## Adding a Table UDF
 
 A User-Defined Table Function (UDTF) is a function that takes parameters and returns a `TableProvider`.
 
@@ -1266,8 +1266,8 @@ This is a simple struct that holds a set of RecordBatches in memory and treats t
 be replaced with your own struct that implements `TableProvider`.
 
 While this is a simple example for illustrative purposes, UDTFs have a lot of potential use cases. And can be
-particularly useful for reading data from external sources and interactive analysis. For example, see the [example][4]
-for a working example that reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata`
+particularly useful for reading data from external sources and interactive analysis. See the [working example][simple_udtf.rs]
+which reads from a CSV file. As another example, you could use the built-in UDTF `parquet_metadata`
 in the CLI to read the metadata from a Parquet file.
 
 ```console

From 1af3699a29ddaacb7f82ed2e785dd3123f2ca605 Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Fri, 24 Oct 2025 11:44:34 +1100
Subject: [PATCH 080/109] docs: fix trim for `rust,ignore` blocks (#18239)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

Issue was raised as comment:
https://github.com/apache/datafusion/issues/17913#issuecomment-3395344103

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

On current website can see:

<img width="752" height="458" alt="image"
src="https://github.com/user-attachments/assets/7df06070-528f-432d-8a11-cf92f86f5f34"
/>

-
https://datafusion.apache.org/library-user-guide/upgrading.html#projectionexpr-changed-from-type-alias-to-struct

Source document:


https://github.com/apache/datafusion/blob/408e1e4e2c46d673a067e05f3e363a6f51e641c4/docs/source/library-user-guide/upgrading.md?plain=1#L204-L218

<img width="771" height="711" alt="image"
src="https://github.com/user-attachments/assets/9472b6b6-1a7e-42f3-a9ae-17d55adfb491"
/>


-https://datafusion.apache.org/library-user-guide/extending-operators.html#optimizing-logical-plan

Source document:


https://github.com/apache/datafusion/blob/408e1e4e2c46d673a067e05f3e363a6f51e641c4/docs/source/library-user-guide/extending-operators.md?plain=1#L32-L57

`,ignore` should also be trimmed out.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Fix regex used in trimming rust blocks to account for optional `ignore`.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

Tested locally:

<img width="791" height="441" alt="image"
src="https://github.com/user-attachments/assets/ff679228-812a-47d5-876e-6fc8fb552bed"
/>

<img width="762" height="681" alt="image"
src="https://github.com/user-attachments/assets/7c551441-3579-4f65-bf41-325ac81fcc76"
/>

Existing blocks (with only `rust` and not `rust,ignore`) unaffected,
e.g. in catalogs page:

<img width="764" height="543" alt="image"
src="https://github.com/user-attachments/assets/c18376d5-974c-4510-89e4-4c777c860ca0"
/>


## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

No, doc changes.

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 docs/rustdoc_trim.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rustdoc_trim.py b/docs/rustdoc_trim.py
index 70becc45ee760..ab146c6a662fd 100644
--- a/docs/rustdoc_trim.py
+++ b/docs/rustdoc_trim.py
@@ -19,7 +19,7 @@
 from sphinx.application import Sphinx
 
 # Regex pattern to match Rust code blocks in Markdown
-RUST_CODE_BLOCK_PATTERN = re.compile(r"```rust\s*(.*?)```", re.DOTALL)
+RUST_CODE_BLOCK_PATTERN = re.compile(r"```rust(?:,ignore)?\s*(.*?)```", re.DOTALL)
 
 
 def remove_hashtag_lines_in_rust_blocks(markdown_content):

From 9f23680eb4c72905bb175df9ff01f00c1821e1c4 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 24 Oct 2025 11:47:31 +1100
Subject: [PATCH 081/109] chore(deps): bump taiki-e/install-action from 2.62.35
 to 2.62.36 (#18240)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.35 to 2.62.36.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.36</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.36.0.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.19.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.15.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<h2>[2.62.36] - 2025-10-23</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.36.0.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.19.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.15.</p>
</li>
</ul>
<h2>[2.62.35] - 2025-10-22</h2>
<ul>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.2.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.108.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.14.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.9.</p>
</li>
<li>
<p>Update <code>uv@latest</code> to 0.9.5.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.13.</p>
</li>
</ul>
<h2>[2.62.34] - 2025-10-21</h2>
<ul>
<li>
<p>Update <code>sccache@latest</code> to 0.12.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.1.</p>
</li>
<li>
<p>Update <code>rclone@latest</code> to 1.71.2.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.12.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.8.</p>
</li>
<li>
<p>Update <code>ubi@latest</code> to 0.8.2.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.0.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/ebb229c6baa68383264f2822689b07b4916d9177"><code>ebb229c</code></a>
Release 2.62.36</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/871223a422b42846ee73cfee9b2939667ef07eb6"><code>871223a</code></a>
Update changelog</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/9b94bbb4b4d20101badb70bb73f52ac145ae431a"><code>9b94bbb</code></a>
Update <code>syft@latest</code> to 1.36.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/b152c8f48b07f6221a63986dbf67750cd3efb969"><code>b152c8f</code></a>
Update <code>vacuum@latest</code> to 0.19.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/d8a93cd3fe180dea0eb608df5620c7de63e9bfac"><code>d8a93cd</code></a>
Update <code>mise@latest</code> to 2025.10.15</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/675314f7d924660452802c418307966e9d3a80c1"><code>675314f</code></a>
Update <code>vacuum@latest</code> to 0.18.10</li>
<li>See full diff in <a
href="https://github.com/taiki-e/install-action/compare/2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475...ebb229c6baa68383264f2822689b07b4916d9177">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.35&new-version=2.62.36)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index 921def3d1baae..cae620baf46cb 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
+        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index c93102e289142..e9606e15c4ec3 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -425,7 +425,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
+        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -752,7 +752,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@2cdf2d81f4edfc3b41d5ddf56083c70b48ac2475  # v2.62.35
+        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
         with:
           tool: cargo-msrv
 

From 167baf718278112d427910a0296f366909bc3384 Mon Sep 17 00:00:00 2001
From: Aldrin M <octalene.dev@pm.me>
Date: Thu, 23 Oct 2025 18:13:30 -0700
Subject: [PATCH 082/109] Fix: Do not normalize table names when deserializing
 from protobuf (#18187)

## Which issue does this PR close?

Closes #18122


## Rationale for this change

Existing behavior is to use the `relation` field of `ColumnRelation`
message to construct a `TableReference` ([mod.rs#L146][fromcol_proto],
[mod.rs#L171][tryfrom_proto]). However, the `relation` field
is a string and `From<String> for TableReference` always calls
parse_identifiers_normalized with `ignore_case: False`, which always
normalizes the identifier to lower case
([TableReference::parse_str][parse_str]).

For a description of the bug at a bit of a higher level, see #18122.


## What changes are included in this PR?

This PR introduces the following:
1. An implementation `From<protobuf::ColumnRelation>` and
`From<&protobuf::ColumnRelation>` for
`TableReference`.
2. Updated logic in `TryFrom<&protobuf::DFSchema>` for `DFSchema` and in
`From<protobuf::Column>` for `Column` that correctly leads to the new
`From` impls for `TableReference` to be invoked.
3. A new method, `TableReference::parse_str_normalized`, that parses an
identifier without normalizing it, with some logic from
`TableReference::parse_str` being refactored to accommodate code reuse.


## Are these changes tested?

Commit a355196 adds a new test case,
`roundtrip_mixed_case_table_reference`, that tests the desired behavior.

The existing behavior (without the fix in 0616df2 and with the extra
line `println!("{}", server_logical_plan.display_indent_schema());`):
```
cargo test "roundtrip_mixed_case_table_reference" --test proto_integration -- --nocapture
   Compiling datafusion-proto v48.0.1 (/Users/aldrinm/code/bauplanlabs/datafusion/octalene-datafusion/datafusion/proto)
    Finished `test` profile [unoptimized + debuginfo] target(s) in 1.56s
     Running tests/proto_integration.rs (target/debug/deps/proto_integration-775454d70979734b)

running 1 test

thread 'cases::roundtrip_logical_plan::roundtrip_mixed_case_table_reference' panicked at datafusion/proto/tests/cases/roundtrip_logical_plan.rs:2690:5:
assertion `left == right` failed
  left: "Filter: TestData.a = Int64(1) [a:Int64;N]\n  TableScan: TestData projection=[a], partial_filters=[TestData.a = Int64(1)] [a:Int64;N]"
 right: "Filter: testdata.a = Int64(1) [a:Int64;N]\n  TableScan: TestData projection=[a], partial_filters=[testdata.a = Int64(1)] [a:Int64;N]"
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
test cases::roundtrip_logical_plan::roundtrip_mixed_case_table_reference ... FAILED

failures:

failures:
    cases::roundtrip_logical_plan::roundtrip_mixed_case_table_reference

test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 112 filtered out; finished in 0.09s
```

With the fix implemented (0616df2):
```
running 1 test
Filter: TestData.a = Int64(1) [a:Int64;N]
  TableScan: TestData projection=[a], partial_filters=[TestData.a = Int64(1)] [a:Int64;N]
test cases::roundtrip_logical_plan::roundtrip_mixed_case_table_reference ... ok

test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 112 filtered out; finished in 0.06s
```

## Are there any user-facing changes?

None.


<!-- Resources -->
[fromcol_proto]:
https://github.com/apache/datafusion/blob/50.2.0/datafusion/proto-common/src/from_proto/mod.rs#L146
[tryfrom_proto]:
https://github.com/apache/datafusion/blob/50.2.0/datafusion/proto-common/src/from_proto/mod.rs#L171
[parse_str]:
https://github.com/apache/datafusion/blob/50.2.0/datafusion/common/src/table_reference.rs#L273
---
 .../src/object_storage/instrumented.rs        |  8 ++--
 datafusion-cli/src/print_options.rs           |  2 +-
 datafusion/common/src/table_reference.rs      | 47 +++++++++++++------
 datafusion/common/src/utils/mod.rs            |  3 ++
 datafusion/proto-common/src/from_proto/mod.rs | 13 +++--
 .../proto/src/logical_plan/from_proto.rs      |  6 +--
 .../tests/cases/roundtrip_logical_plan.rs     | 46 ++++++++++++++++++
 7 files changed, 96 insertions(+), 29 deletions(-)

diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs
index 4465c59a904e8..c4b63b417fe42 100644
--- a/datafusion-cli/src/object_storage/instrumented.rs
+++ b/datafusion-cli/src/object_storage/instrumented.rs
@@ -58,7 +58,7 @@ pub enum InstrumentedObjectStoreMode {
 
 impl fmt::Display for InstrumentedObjectStoreMode {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
+        write!(f, "{self:?}")
     }
 }
 
@@ -426,7 +426,7 @@ pub enum Operation {
 
 impl fmt::Display for Operation {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
+        write!(f, "{self:?}")
     }
 }
 
@@ -556,11 +556,11 @@ impl RequestSummaries {
                 let size_stats = s.size_stats.as_ref();
                 let dur_avg = duration_stats.map(|d| {
                     let avg = d.sum.as_secs_f32() / count;
-                    format!("{:.6}s", avg)
+                    format!("{avg:.6}s")
                 });
                 let size_avg = size_stats.map(|s| {
                     let avg = s.sum as f32 / count;
-                    format!("{} B", avg)
+                    format!("{avg} B")
                 });
                 [dur_avg, size_avg]
             })
diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs
index 5804617f39a79..93d1d450fd82b 100644
--- a/datafusion-cli/src/print_options.rs
+++ b/datafusion-cli/src/print_options.rs
@@ -206,7 +206,7 @@ impl PrintOptions {
 
                         writeln!(writer, "Summaries:")?;
                         let summaries = RequestSummaries::new(&requests);
-                        writeln!(writer, "{}", summaries)?;
+                        writeln!(writer, "{summaries}")?;
                     }
                 }
             }
diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs
index 7cf8e7af1a794..574465856760b 100644
--- a/datafusion/common/src/table_reference.rs
+++ b/datafusion/common/src/table_reference.rs
@@ -269,24 +269,41 @@ impl TableReference {
     }
 
     /// Forms a [`TableReference`] by parsing `s` as a multipart SQL
-    /// identifier. See docs on [`TableReference`] for more details.
+    /// identifier, normalizing `s` to lowercase.
+    /// See docs on [`TableReference`] for more details.
     pub fn parse_str(s: &str) -> Self {
-        let mut parts = parse_identifiers_normalized(s, false);
+        Self::parse_str_normalized(s, false)
+    }
+
+    /// Forms a [`TableReference`] by parsing `s` as a multipart SQL
+    /// identifier, normalizing `s` to lowercase if `ignore_case` is `false`.
+    /// See docs on [`TableReference`] for more details.
+    pub fn parse_str_normalized(s: &str, ignore_case: bool) -> Self {
+        let table_parts = parse_identifiers_normalized(s, ignore_case);
 
+        Self::from_vec(table_parts).unwrap_or_else(|| Self::Bare { table: s.into() })
+    }
+
+    /// Consume a vector of identifier parts to compose a [`TableReference`]. The input vector
+    /// should contain 1 <= N <= 3 elements in the following sequence:
+    /// ```no_rust
+    /// [<catalog>, <schema>, table]
+    /// ```
+    fn from_vec(mut parts: Vec<String>) -> Option<Self> {
         match parts.len() {
-            1 => Self::Bare {
-                table: parts.remove(0).into(),
-            },
-            2 => Self::Partial {
-                schema: parts.remove(0).into(),
-                table: parts.remove(0).into(),
-            },
-            3 => Self::Full {
-                catalog: parts.remove(0).into(),
-                schema: parts.remove(0).into(),
-                table: parts.remove(0).into(),
-            },
-            _ => Self::Bare { table: s.into() },
+            1 => Some(Self::Bare {
+                table: parts.pop()?.into(),
+            }),
+            2 => Some(Self::Partial {
+                table: parts.pop()?.into(),
+                schema: parts.pop()?.into(),
+            }),
+            3 => Some(Self::Full {
+                table: parts.pop()?.into(),
+                schema: parts.pop()?.into(),
+                catalog: parts.pop()?.into(),
+            }),
+            _ => None,
         }
     }
 
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index c72e3b3a8df74..045c02a5a2aa8 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -285,6 +285,9 @@ pub(crate) fn parse_identifiers(s: &str) -> Result<Vec<Ident>> {
     Ok(idents)
 }
 
+/// Parse a string into a vector of identifiers.
+///
+/// Note: If ignore_case is false, the string will be normalized to lowercase.
 #[cfg(feature = "sql")]
 pub(crate) fn parse_identifiers_normalized(s: &str, ignore_case: bool) -> Vec<String> {
     parse_identifiers(s)
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
index 2d07fb8410210..4ede5b970eaeb 100644
--- a/datafusion/proto-common/src/from_proto/mod.rs
+++ b/datafusion/proto-common/src/from_proto/mod.rs
@@ -138,11 +138,17 @@ where
     }
 }
 
+impl From<protobuf::ColumnRelation> for TableReference {
+    fn from(rel: protobuf::ColumnRelation) -> Self {
+        Self::parse_str_normalized(rel.relation.as_str(), true)
+    }
+}
+
 impl From<protobuf::Column> for Column {
     fn from(c: protobuf::Column) -> Self {
         let protobuf::Column { relation, name } = c;
 
-        Self::new(relation.map(|r| r.relation), name)
+        Self::new(relation, name)
     }
 }
 
@@ -164,10 +170,7 @@ impl TryFrom<&protobuf::DfSchema> for DFSchema {
             .map(|df_field| {
                 let field: Field = df_field.field.as_ref().required("field")?;
                 Ok((
-                    df_field
-                        .qualifier
-                        .as_ref()
-                        .map(|q| q.relation.clone().into()),
+                    df_field.qualifier.as_ref().map(|q| q.clone().into()),
                     Arc::new(field),
                 ))
             })
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 42968670490fa..507a0cec9d88e 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -315,8 +315,7 @@ pub fn parse_expr(
                     let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
                     .map_err(|_| {
                         proto_error(format!(
-                            "Received a WindowExprNode message with unknown NullTreatment {}",
-                            null_treatment
+                            "Received a WindowExprNode message with unknown NullTreatment {null_treatment}",
                         ))
                     })?;
                     Some(NullTreatment::from(null_treatment))
@@ -596,8 +595,7 @@ pub fn parse_expr(
                     let null_treatment  =  protobuf::NullTreatment::try_from(null_treatment)
                     .map_err(|_| {
                         proto_error(format!(
-                            "Received an AggregateUdfExprNode message with unknown NullTreatment {}",
-                            null_treatment
+                            "Received an AggregateUdfExprNode message with unknown NullTreatment {null_treatment}",
                         ))
                     })?;
                     Some(NullTreatment::from(null_treatment))
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 516f178cc07de..18cd8b8e668b3 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -2804,3 +2804,49 @@ async fn roundtrip_arrow_scan() -> Result<()> {
     assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
     Ok(())
 }
+
+#[tokio::test]
+async fn roundtrip_mixed_case_table_reference() -> Result<()> {
+    // Prepare "client" database
+    let client_ctx = SessionContext::new_with_config(
+        SessionConfig::new()
+            .set_bool("datafusion.sql_parser.enable_ident_normalization", false),
+    );
+    client_ctx
+        .register_csv(
+            "\"TestData\"",
+            "tests/testdata/test.csv",
+            CsvReadOptions::default(),
+        )
+        .await?;
+
+    // Prepare "server" database
+    let server_ctx = SessionContext::new_with_config(
+        SessionConfig::new()
+            .set_bool("datafusion.sql_parser.enable_ident_normalization", false),
+    );
+    server_ctx
+        .register_csv(
+            "\"TestData\"",
+            "tests/testdata/test.csv",
+            CsvReadOptions::default(),
+        )
+        .await?;
+
+    // Create a logical plan, serialize it (client), then deserialize it (server)
+    let dataframe = client_ctx
+        .sql("SELECT a FROM TestData WHERE TestData.a = 1")
+        .await?;
+
+    let client_logical_plan = dataframe.into_optimized_plan()?;
+    let plan_bytes = logical_plan_to_bytes(&client_logical_plan)?;
+    let server_logical_plan =
+        logical_plan_from_bytes(&plan_bytes, &server_ctx.task_ctx())?;
+
+    assert_eq!(
+        format!("{}", client_logical_plan.display_indent_schema()),
+        format!("{}", server_logical_plan.display_indent_schema())
+    );
+
+    Ok(())
+}

From 665a55268a18b51e4d1e9216fd1ab9391624e7e9 Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Fri, 24 Oct 2025 00:21:20 -0500
Subject: [PATCH 083/109] Use TableSchema in FileScanConfig (#18231)

Steps towards #14993
---
 datafusion/datasource/src/file_scan_config.rs | 42 +++++++++----------
 datafusion/datasource/src/table_schema.rs     | 14 +++++++
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index d557a99274eab..4dfb6a4ec3d33 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -156,6 +156,11 @@ pub struct FileScanConfig {
     /// Schema information including the file schema, table partition columns,
     /// and the combined table schema.
     ///
+    /// The table schema (file schema + partition columns) is the schema exposed
+    /// upstream of [`FileScanConfig`] (e.g. in [`DataSourceExec`]).
+    ///
+    /// See [`TableSchema`] for more information.
+    ///
     /// [`DataSourceExec`]: crate::source::DataSourceExec
     pub table_schema: TableSchema,
     /// List of files to be processed, grouped into partitions
@@ -244,23 +249,19 @@ pub struct FileScanConfig {
 #[derive(Clone)]
 pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
-    /// Table schema before any projections or partition columns are applied.
+    /// Schema information including the file schema, table partition columns,
+    /// and the combined table schema.
     ///
-    /// This schema is used to read the files, but is **not** necessarily the
-    /// schema of the physical files. Rather this is the schema that the
+    /// This schema is used to read the files, but the file schema is **not** necessarily
+    /// the schema of the physical files. Rather this is the schema that the
     /// physical file schema will be mapped onto, and the schema that the
     /// [`DataSourceExec`] will return.
     ///
-    /// This is usually the same as the table schema as specified by the `TableProvider` minus any partition columns.
-    ///
-    /// This probably would be better named `table_schema`
-    ///
     /// [`DataSourceExec`]: crate::source::DataSourceExec
-    file_schema: SchemaRef,
+    table_schema: TableSchema,
     file_source: Arc<dyn FileSource>,
     limit: Option<usize>,
     projection: Option<Vec<usize>>,
-    table_partition_cols: Vec<FieldRef>,
     constraints: Option<Constraints>,
     file_groups: Vec<FileGroup>,
     statistics: Option<Statistics>,
@@ -285,7 +286,7 @@ impl FileScanConfigBuilder {
     ) -> Self {
         Self {
             object_store_url,
-            file_schema,
+            table_schema: TableSchema::from_file_schema(file_schema),
             file_source,
             file_groups: vec![],
             statistics: None,
@@ -294,7 +295,6 @@ impl FileScanConfigBuilder {
             new_lines_in_values: None,
             limit: None,
             projection: None,
-            table_partition_cols: vec![],
             constraints: None,
             batch_size: None,
             expr_adapter_factory: None,
@@ -326,10 +326,13 @@ impl FileScanConfigBuilder {
 
     /// Set the partitioning columns
     pub fn with_table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
-        self.table_partition_cols = table_partition_cols
+        let table_partition_cols: Vec<FieldRef> = table_partition_cols
             .into_iter()
             .map(|f| Arc::new(f) as FieldRef)
             .collect();
+        self.table_schema = self
+            .table_schema
+            .with_table_partition_cols(table_partition_cols);
         self
     }
 
@@ -427,11 +430,10 @@ impl FileScanConfigBuilder {
     pub fn build(self) -> FileScanConfig {
         let Self {
             object_store_url,
-            file_schema,
+            table_schema,
             file_source,
             limit,
             projection,
-            table_partition_cols,
             constraints,
             file_groups,
             statistics,
@@ -443,19 +445,16 @@ impl FileScanConfigBuilder {
         } = self;
 
         let constraints = constraints.unwrap_or_default();
-        let statistics =
-            statistics.unwrap_or_else(|| Statistics::new_unknown(&file_schema));
+        let statistics = statistics
+            .unwrap_or_else(|| Statistics::new_unknown(table_schema.file_schema()));
 
         let file_source = file_source
             .with_statistics(statistics.clone())
-            .with_schema(Arc::clone(&file_schema));
+            .with_schema(Arc::clone(table_schema.file_schema()));
         let file_compression_type =
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
         let new_lines_in_values = new_lines_in_values.unwrap_or(false);
 
-        // Create TableSchema from file_schema and table_partition_cols
-        let table_schema = TableSchema::new(file_schema, table_partition_cols);
-
         FileScanConfig {
             object_store_url,
             table_schema,
@@ -477,7 +476,7 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
     fn from(config: FileScanConfig) -> Self {
         Self {
             object_store_url: config.object_store_url,
-            file_schema: Arc::clone(config.table_schema.file_schema()),
+            table_schema: config.table_schema,
             file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
             statistics: config.file_source.statistics().ok(),
@@ -486,7 +485,6 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
             new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
             projection: config.projection,
-            table_partition_cols: config.table_schema.table_partition_cols().clone(),
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
diff --git a/datafusion/datasource/src/table_schema.rs b/datafusion/datasource/src/table_schema.rs
index 9413bd9ef20bf..8e95585ce873b 100644
--- a/datafusion/datasource/src/table_schema.rs
+++ b/datafusion/datasource/src/table_schema.rs
@@ -121,6 +121,20 @@ impl TableSchema {
         }
     }
 
+    /// Create a new TableSchema from a file schema with no partition columns.
+    pub fn from_file_schema(file_schema: SchemaRef) -> Self {
+        Self::new(file_schema, vec![])
+    }
+
+    /// Set the table partition columns and rebuild the table schema.
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<FieldRef>,
+    ) -> TableSchema {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
     /// Get the file schema (without partition columns).
     ///
     /// This is the schema of the actual data files on disk.

From 619123a8a7dc31857baf34dbea5ff60f1ff73ea5 Mon Sep 17 00:00:00 2001
From: Khanh Duong <dqkqdlot@gmail.com>
Date: Fri, 24 Oct 2025 15:29:15 +0900
Subject: [PATCH 084/109] Revert "chore: revert tests (#18065)" (#18255)

## Which issue does this PR close?

- Relates #18062
- Relates #18065

## Rationale for this change

We disabled these tests because CI was failing on main.

The test: `current_date() = cast(now() as date)` was added in #18034
requires `now` to use configured timezone, but it is only available
after #18017.

Since #18017 has been merged, these tests should be enable.

## What changes are included in this PR?

This reverts commit a65a2cbd6070b392ff7ed5ccd2aa0accaf42177a.

## Are these changes tested?

Yes.

## Are there any user-facing changes?
No.
---
 .../test_files/current_date_timezone.slt      | 84 +++++++++----------
 1 file changed, 41 insertions(+), 43 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/current_date_timezone.slt b/datafusion/sqllogictest/test_files/current_date_timezone.slt
index b30373acfaa0e..1b9c3cddeecec 100644
--- a/datafusion/sqllogictest/test_files/current_date_timezone.slt
+++ b/datafusion/sqllogictest/test_files/current_date_timezone.slt
@@ -19,64 +19,62 @@
 ## current_date with timezone tests
 ##########
 
-# CI Fails https://github.com/apache/datafusion/issues/18062
-
 # Test 1: Verify current_date is consistent within the same query (default UTC)
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
 # Test 2: Verify alias 'today' works the same as current_date
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true
 
 # Test 3: Set timezone to +05:00 and verify current_date is still stable
-# statement ok
-# SET datafusion.execution.time_zone = '+05:00';
+statement ok
+SET datafusion.execution.time_zone = '+05:00';
 
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
-# Test 4: Verify current_date matches cast(now() as date) in the same timezone
-# query B
-# SELECT current_date() = cast(now() as date);
-# ----
-# true
+#Test 4: Verify current_date matches cast(now() as date) in the same timezone
+query B
+SELECT current_date() = cast(now() as date);
+----
+true
 
 # Test 5: Test with negative offset timezone
-# statement ok
-# SET datafusion.execution.time_zone = '-08:00';
+statement ok
+SET datafusion.execution.time_zone = '-08:00';
 
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true
 
 # Test 6: Test with named timezone (America/New_York)
-# statement ok
-# SET datafusion.execution.time_zone = 'America/New_York';
+statement ok
+SET datafusion.execution.time_zone = 'America/New_York';
 
-# query B
-# SELECT current_date() = current_date();
-# ----
-# true
+query B
+SELECT current_date() = current_date();
+----
+true
 
 # Test 7: Verify date type is preserved
-# query T
-# SELECT arrow_typeof(current_date());
-# ----
-# Date32
+query T
+SELECT arrow_typeof(current_date());
+----
+Date32
 
 # Test 8: Reset to UTC
-# statement ok
-# SET datafusion.execution.time_zone = '+00:00';
+statement ok
+SET datafusion.execution.time_zone = '+00:00';
 
-# query B
-# SELECT current_date() = today();
-# ----
-# true
+query B
+SELECT current_date() = today();
+----
+true

From 22c4214fe1ca3953932f3f12ccd5b68dbfbefdf3 Mon Sep 17 00:00:00 2001
From: kosiew <kosiew@gmail.com>
Date: Fri, 24 Oct 2025 17:58:57 +0800
Subject: [PATCH 085/109] Refactor `nvl2` Function to Support Lazy Evaluation
 and Simplification via CASE Expression (#18191)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

* Closes #17983

## Rationale for this change

The current implementation of the `nvl2` function in DataFusion eagerly
evaluates all its arguments, which can lead to unnecessary computation
and incorrect behavior when handling expressions that should only be
conditionally evaluated. This PR introduces **lazy evaluation** for
`nvl2`, aligning its behavior with other conditional expressions like
`coalesce` and improving both performance and correctness.

This change also introduces a **simplification rule** that rewrites
`nvl2` expressions into equivalent `CASE` statements, allowing for
better optimization during query planning and execution.

## What changes are included in this PR?

* Refactored `nvl2` implementation in
`datafusion/functions/src/core/nvl2.rs`:

* Added support for **short-circuit (lazy) evaluation** using
`short_circuits()`.
* Implemented **simplify()** method to rewrite expressions into `CASE`
form.
* Introduced **return_field_from_args()** for correct nullability and
type inference.
* Replaced the previous eager `nvl2_func()` logic with an optimized,
more declarative approach.

* Added comprehensive **unit tests**:

* `test_nvl2_short_circuit` in `dataframe_functions.rs` verifies correct
short-circuit behavior.
* `test_create_physical_expr_nvl2` in `expr_api/mod.rs` validates
physical expression creation and output correctness.

## Are these changes tested?

✅ Yes, multiple new tests are included:

* **`test_nvl2_short_circuit`** ensures `nvl2` does not evaluate
unnecessary branches.
* **`test_create_physical_expr_nvl2`** checks the correctness of
evaluation and type coercion behavior.

All existing and new tests pass successfully.

## Are there any user-facing changes?

Yes, but they are **non-breaking** and **performance-enhancing**:

* `nvl2` now evaluates lazily, meaning only the required branch is
computed based on the nullity of the test expression.
* Expression simplification will yield more optimized query plans.

There are **no API-breaking changes**. However, users may observe
improved performance and reduced computation for expressions involving
`nvl2`.

---------

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 .../tests/dataframe/dataframe_functions.rs    | 27 ++++++
 datafusion/core/tests/expr_api/mod.rs         | 20 +++++
 datafusion/functions/src/core/nvl2.rs         | 85 ++++++++-----------
 3 files changed, 84 insertions(+), 48 deletions(-)

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index b664fccdfa800..d95eb38c19e1a 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -274,6 +274,33 @@ async fn test_nvl2() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn test_nvl2_short_circuit() -> Result<()> {
+    let expr = nvl2(
+        col("a"),
+        arrow_cast(lit("1"), lit("Int32")),
+        arrow_cast(col("a"), lit("Int32")),
+    );
+
+    let batches = get_batches(expr).await?;
+
+    assert_snapshot!(
+        batches_to_string(&batches),
+        @r#"
+    +-----------------------------------------------------------------------------------+
+    | nvl2(test.a,arrow_cast(Utf8("1"),Utf8("Int32")),arrow_cast(test.a,Utf8("Int32"))) |
+    +-----------------------------------------------------------------------------------+
+    | 1                                                                                 |
+    | 1                                                                                 |
+    | 1                                                                                 |
+    | 1                                                                                 |
+    +-----------------------------------------------------------------------------------+
+    "#
+    );
+
+    Ok(())
+}
 #[tokio::test]
 async fn test_fn_arrow_typeof() -> Result<()> {
     let expr = arrow_typeof(col("l"));
diff --git a/datafusion/core/tests/expr_api/mod.rs b/datafusion/core/tests/expr_api/mod.rs
index 4aee274de9083..84e644480a4fd 100644
--- a/datafusion/core/tests/expr_api/mod.rs
+++ b/datafusion/core/tests/expr_api/mod.rs
@@ -320,6 +320,26 @@ async fn test_create_physical_expr() {
     create_simplified_expr_test(lit(1i32) + lit(2i32), "3");
 }
 
+#[test]
+fn test_create_physical_expr_nvl2() {
+    let batch = &TEST_BATCH;
+    let df_schema = DFSchema::try_from(batch.schema()).unwrap();
+    let ctx = SessionContext::new();
+
+    let expect_err = |expr| {
+        let physical_expr = ctx.create_physical_expr(expr, &df_schema).unwrap();
+        let err = physical_expr.evaluate(batch).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("nvl2 should have been simplified to case"),
+            "unexpected error: {err:?}"
+        );
+    };
+
+    expect_err(nvl2(col("i"), lit(1i64), lit(0i64)));
+    expect_err(nvl2(lit(1i64), col("i"), lit(0i64)));
+}
+
 #[tokio::test]
 async fn test_create_physical_expr_coercion() {
     // create_physical_expr does apply type coercion and unwrapping in cast
diff --git a/datafusion/functions/src/core/nvl2.rs b/datafusion/functions/src/core/nvl2.rs
index 82aa8d2a4cd54..45cb6760d062d 100644
--- a/datafusion/functions/src/core/nvl2.rs
+++ b/datafusion/functions/src/core/nvl2.rs
@@ -15,17 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::Array;
-use arrow::compute::is_not_null;
-use arrow::compute::kernels::zip::zip;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::{internal_err, utils::take_function_args, Result};
 use datafusion_expr::{
-    type_coercion::binary::comparison_coercion, ColumnarValue, Documentation,
-    ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+    conditional_expressions::CaseBuilder,
+    simplify::{ExprSimplifyResult, SimplifyInfo},
+    type_coercion::binary::comparison_coercion,
+    ColumnarValue, Documentation, Expr, ReturnFieldArgs, ScalarFunctionArgs,
+    ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
-use std::sync::Arc;
 
 #[user_doc(
     doc_section(label = "Conditional Functions"),
@@ -95,8 +94,37 @@ impl ScalarUDFImpl for NVL2Func {
         Ok(arg_types[1].clone())
     }
 
-    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        nvl2_func(&args.args)
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let nullable =
+            args.arg_fields[1].is_nullable() || args.arg_fields[2].is_nullable();
+        let return_type = args.arg_fields[1].data_type().clone();
+        Ok(Field::new(self.name(), return_type, nullable).into())
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        internal_err!("nvl2 should have been simplified to case")
+    }
+
+    fn simplify(
+        &self,
+        args: Vec<Expr>,
+        _info: &dyn SimplifyInfo,
+    ) -> Result<ExprSimplifyResult> {
+        let [test, if_non_null, if_null] = take_function_args(self.name(), args)?;
+
+        let expr = CaseBuilder::new(
+            None,
+            vec![test.is_not_null()],
+            vec![if_non_null],
+            Some(Box::new(if_null)),
+        )
+        .end()?;
+
+        Ok(ExprSimplifyResult::Simplified(expr))
+    }
+
+    fn short_circuits(&self) -> bool {
+        true
     }
 
     fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
@@ -123,42 +151,3 @@ impl ScalarUDFImpl for NVL2Func {
         self.doc()
     }
 }
-
-fn nvl2_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let mut len = 1;
-    let mut is_array = false;
-    for arg in args {
-        if let ColumnarValue::Array(array) = arg {
-            len = array.len();
-            is_array = true;
-            break;
-        }
-    }
-    if is_array {
-        let args = args
-            .iter()
-            .map(|arg| match arg {
-                ColumnarValue::Scalar(scalar) => scalar.to_array_of_size(len),
-                ColumnarValue::Array(array) => Ok(Arc::clone(array)),
-            })
-            .collect::<Result<Vec<_>>>()?;
-        let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?;
-        let to_apply = is_not_null(&tested)?;
-        let value = zip(&to_apply, &if_non_null, &if_null)?;
-        Ok(ColumnarValue::Array(value))
-    } else {
-        let [tested, if_non_null, if_null] = take_function_args("nvl2", args)?;
-        match &tested {
-            ColumnarValue::Array(_) => {
-                internal_err!("except Scalar value, but got Array")
-            }
-            ColumnarValue::Scalar(scalar) => {
-                if scalar.is_null() {
-                    Ok(if_null.clone())
-                } else {
-                    Ok(if_non_null.clone())
-                }
-            }
-        }
-    }
-}

From a8373d2bd092f8f7c79e31e741b22484131c9c7b Mon Sep 17 00:00:00 2001
From: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
Date: Sat, 25 Oct 2025 01:57:23 +1100
Subject: [PATCH 086/109] docs: refine
 `AggregateUDFImpl::is_ordered_set_aggregate` documentation (#17805)

Going through some tickets related to ordered set aggregates and got a
little confused on DataFusion's support for them.

As I understand it, #13511 made `WITHIN GROUP` mandatory for ordered set
aggregate functions, of which we support only two so far:

- `approx_percentile_cont`
- Technically `approx_median` shares some internals with
`approx_percentile_cont` but itself isn't an ordered set aggregation
- `approx_percentile_cont_with_weight` (which uses
`approx_percentile_cont` internally)

This was then amended in #16999 to make it optional, at least via the
SQL API; it is still mandatory on the DataFrame API:


https://github.com/apache/datafusion/blob/bbb5cc79de3d037d0b06572ff417de7c3d9fe437/datafusion/functions-aggregate/src/approx_percentile_cont.rs#L53-L58

I'm updating the doc here to try clarify things to my understanding, as
a followup to the original doc update: #17744
---
 datafusion/expr/src/udaf.rs | 55 +++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index bfd699d814855..b593f8411d24a 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -746,21 +746,52 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
         true
     }
 
-    /// If this function is ordered-set aggregate function, return true
-    /// otherwise, return false
+    /// If this function is an ordered-set aggregate function, return `true`.
+    /// Otherwise, return `false` (default).
     ///
-    /// Ordered-set aggregate functions require an explicit `ORDER BY` clause
-    /// because the calculation performed by these functions is dependent on the
-    /// specific sequence of the input rows, unlike other aggregate functions
-    /// like `SUM`, `AVG`, or `COUNT`.
+    /// Ordered-set aggregate functions allow specifying a sort order that affects
+    /// how the function calculates its result, unlike other aggregate functions
+    /// like `SUM` or `COUNT`. For example, `percentile_cont` is an ordered-set
+    /// aggregate function that calculates the exact percentile value from a list
+    /// of values; the output of calculating the `0.75` percentile depends on if
+    /// you're calculating on an ascending or descending list of values.
     ///
-    /// An example of an ordered-set aggregate function is `percentile_cont`
-    /// which computes a specific percentile value from a sorted list of values, and
-    /// is only meaningful when the input data is ordered.
+    /// Setting this to return `true` affects only SQL parsing & planning; it allows
+    /// use of the `WITHIN GROUP` clause to specify this order, for example:
     ///
-    /// In SQL syntax, ordered-set aggregate functions are used with the
-    /// `WITHIN GROUP (ORDER BY ...)` clause to specify the ordering of the input
-    /// data.
+    /// ```sql
+    /// -- Ascending
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 ASC) FROM table;
+    /// -- Default ordering is ascending if not explicitly specified
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1) FROM table;
+    /// -- Descending
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 DESC) FROM table;
+    /// ```
+    ///
+    /// This calculates the `0.75` percentile of the column `c1` from `table`,
+    /// according to the specific ordering. The column specified in the `WITHIN GROUP`
+    /// ordering clause is taken as the column to calculate values on; specifying
+    /// the `WITHIN GROUP` clause is optional so these queries are equivalent:
+    ///
+    /// ```sql
+    /// -- If no WITHIN GROUP is specified then default ordering is implementation
+    /// -- dependent; in this case ascending for percentile_cont
+    /// SELECT percentile_cont(c1, 0.75) FROM table;
+    /// SELECT percentile_cont(0.75) WITHIN GROUP (ORDER BY c1 ASC) FROM table;
+    /// ```
+    ///
+    /// Aggregate UDFs can define their default ordering if the function is called
+    /// without the `WITHIN GROUP` clause, though a default of ascending is the
+    /// standard practice.
+    ///
+    /// Note that setting this to `true` does not guarantee input sort order to
+    /// the aggregate function; it expects the function to handle ordering the
+    /// input values themselves (e.g. `percentile_cont` must buffer and sort
+    /// the values internally). That is, DataFusion does not introduce any kind
+    /// of sort into the plan for these functions.
+    ///
+    /// Setting this to `false` disallows calling this function with the `WITHIN GROUP`
+    /// clause.
     fn is_ordered_set_aggregate(&self) -> bool {
         false
     }

From 82b1307f311e700ec97c1a1da9135c9679407e62 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@dunnington.ca>
Date: Fri, 24 Oct 2025 10:57:01 -0500
Subject: [PATCH 087/109] Enable placeholders with extension types (#17986)

## Which issue does this PR close?

- Closes #17862

## Rationale for this change

Most logical plan expressions now propagate metadata; however,
parameters with extension types or other field metadata cannot
participate in placeholder/parameter binding.

## What changes are included in this PR?

The DataType in the Placeholder struct was replaced with a FieldRef
along with anything that stored the "DataType" of a parameter.

Strictly speaking one could bind parameters with an extension type by
copy/pasting the placeholder replacer, which I figured out towards the
end of this change. I still think this change makes sense and opens up
the door for things like handling UUID in SQL with full parameter
binding support.

## Are these changes tested?

Yes

## Are there any user-facing changes?

Yes, one new function was added to extract the placeholder fields from a
plan.

This is a breaking change for code that specifically interacts with the
pub fields of the modified structs (ParamValues, Placeholder, and
Prepare are the main ones).

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/common/src/datatype.rs             | 107 ++++++
 datafusion/common/src/lib.rs                  |   2 +
 datafusion/common/src/metadata.rs             | 363 ++++++++++++++++++
 datafusion/common/src/param_value.rs          |  63 ++-
 datafusion/core/src/execution/context/mod.rs  |  29 +-
 .../core/src/execution/session_state.rs       |  14 +-
 datafusion/core/tests/dataframe/mod.rs        |   9 +-
 datafusion/core/tests/sql/select.rs           |  52 ++-
 .../user_defined_scalar_functions.rs          |   2 +-
 datafusion/expr/src/expr.rs                   | 324 ++++------------
 datafusion/expr/src/expr_fn.rs                |   6 +-
 datafusion/expr/src/expr_schema.rs            |  99 +++--
 datafusion/expr/src/literal.rs                |   3 +-
 datafusion/expr/src/logical_plan/builder.rs   |   9 +-
 datafusion/expr/src/logical_plan/plan.rs      |  89 ++++-
 datafusion/expr/src/logical_plan/statement.rs |  21 +-
 .../simplify_expressions/expr_simplifier.rs   |   2 +-
 .../physical-expr/src/expressions/literal.rs  |   2 +-
 datafusion/physical-expr/src/planner.rs       |   5 +-
 datafusion/proto/proto/datafusion.proto       |   7 +
 datafusion/proto/src/generated/pbjson.rs      |  53 +++
 datafusion/proto/src/generated/prost.rs       |  13 +
 .../proto/src/logical_plan/from_proto.rs      |  22 +-
 datafusion/proto/src/logical_plan/mod.rs      |  46 ++-
 datafusion/proto/src/logical_plan/to_proto.rs |  26 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  29 ++
 datafusion/proto/tests/cases/serialize.rs     |  18 +-
 datafusion/sql/src/expr/mod.rs                |  19 +-
 datafusion/sql/src/expr/value.rs              |  10 +-
 datafusion/sql/src/planner.rs                 |  67 ++--
 datafusion/sql/src/statement.rs               |  40 +-
 datafusion/sql/tests/cases/params.rs          | 185 ++++++++-
 datafusion/sql/tests/common/mod.rs            |   8 +
 33 files changed, 1288 insertions(+), 456 deletions(-)
 create mode 100644 datafusion/common/src/datatype.rs
 create mode 100644 datafusion/common/src/metadata.rs

diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs
new file mode 100644
index 0000000000000..85ffcf689c3f4
--- /dev/null
+++ b/datafusion/common/src/datatype.rs
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! [DataTypeExt] extension trait for converting DataTypes to Fields
+
+use crate::arrow::datatypes::{DataType, Field, FieldRef};
+use std::sync::Arc;
+
+/// DataFusion extension methods for Arrow [`DataType`]
+pub trait DataTypeExt {
+    /// Convert the type to field with nullable type and "" name
+    ///
+    /// This is used to track the places where we convert a [`DataType`]
+    /// into a nameless field to interact with an API that is
+    /// capable of representing an extension type and/or nullability.
+    fn into_nullable_field(self) -> Field;
+
+    /// Convert the type to field ref with nullable type and "" name
+    ///
+    /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
+    /// constructs a [`FieldRef`].
+    fn into_nullable_field_ref(self) -> FieldRef;
+}
+
+impl DataTypeExt for DataType {
+    fn into_nullable_field(self) -> Field {
+        Field::new("", self, true)
+    }
+
+    fn into_nullable_field_ref(self) -> FieldRef {
+        Arc::new(Field::new("", self, true))
+    }
+}
+
+/// DataFusion extension methods for Arrow [`Field`]
+pub trait FieldExt {
+    /// Returns a new Field representing a List of this Field's DataType.
+    fn into_list(self) -> Self;
+
+    /// Return a new Field representing this Field as the item type of a FixedSizeList
+    fn into_fixed_size_list(self, list_size: i32) -> Self;
+
+    /// Create a field with the default list field name ("item")
+    ///
+    /// Note that lists are allowed to have an arbitrarily named field;
+    /// however, a name other than 'item' will cause it to fail an
+    /// == check against a more idiomatically created list in
+    /// arrow-rs which causes issues.
+    fn into_list_item(self) -> Self;
+}
+
+impl FieldExt for Field {
+    fn into_list(self) -> Self {
+        DataType::List(Arc::new(self.into_list_item())).into_nullable_field()
+    }
+
+    fn into_fixed_size_list(self, list_size: i32) -> Self {
+        DataType::FixedSizeList(self.into_list_item().into(), list_size)
+            .into_nullable_field()
+    }
+
+    fn into_list_item(self) -> Self {
+        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
+            self.with_name(Field::LIST_FIELD_DEFAULT_NAME)
+        } else {
+            self
+        }
+    }
+}
+
+impl FieldExt for Arc<Field> {
+    fn into_list(self) -> Self {
+        DataType::List(self.into_list_item())
+            .into_nullable_field()
+            .into()
+    }
+
+    fn into_fixed_size_list(self, list_size: i32) -> Self {
+        DataType::FixedSizeList(self.into_list_item(), list_size)
+            .into_nullable_field()
+            .into()
+    }
+
+    fn into_list_item(self) -> Self {
+        if self.name() != Field::LIST_FIELD_DEFAULT_NAME {
+            Arc::unwrap_or_clone(self)
+                .with_name(Field::LIST_FIELD_DEFAULT_NAME)
+                .into()
+        } else {
+            self
+        }
+    }
+}
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 44375b36b3145..76c7b46e32737 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -39,6 +39,7 @@ pub mod alias;
 pub mod cast;
 pub mod config;
 pub mod cse;
+pub mod datatype;
 pub mod diagnostic;
 pub mod display;
 pub mod encryption;
@@ -47,6 +48,7 @@ pub mod file_options;
 pub mod format;
 pub mod hash_utils;
 pub mod instant;
+pub mod metadata;
 pub mod nested_struct;
 mod null_equality;
 pub mod parsers;
diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs
new file mode 100644
index 0000000000000..ec0b3bc81467b
--- /dev/null
+++ b/datafusion/common/src/metadata.rs
@@ -0,0 +1,363 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use arrow::datatypes::{DataType, Field};
+use hashbrown::HashMap;
+
+use crate::{error::_plan_err, DataFusionError, ScalarValue};
+
+/// A [`ScalarValue`] with optional [`FieldMetadata`]
+#[derive(Debug, Clone)]
+pub struct ScalarAndMetadata {
+    pub value: ScalarValue,
+    pub metadata: Option<FieldMetadata>,
+}
+
+impl ScalarAndMetadata {
+    /// Create a new Literal from a scalar value with optional [`FieldMetadata`]
+    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
+        Self { value, metadata }
+    }
+
+    /// Access the underlying [ScalarValue] storage
+    pub fn value(&self) -> &ScalarValue {
+        &self.value
+    }
+
+    /// Access the [FieldMetadata] attached to this value, if any
+    pub fn metadata(&self) -> Option<&FieldMetadata> {
+        self.metadata.as_ref()
+    }
+
+    /// Consume self and return components
+    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
+        (self.value, self.metadata)
+    }
+
+    /// Cast this values's storage type
+    ///
+    /// This operation assumes that if the underlying [ScalarValue] can be casted
+    /// to a given type that any extension type represented by the metadata is also
+    /// valid.
+    pub fn cast_storage_to(
+        &self,
+        target_type: &DataType,
+    ) -> Result<Self, DataFusionError> {
+        let new_value = self.value().cast_to(target_type)?;
+        Ok(ScalarAndMetadata::new(new_value, self.metadata.clone()))
+    }
+}
+
+/// Assert equality of data types where one or both sides may have field metadata
+///
+/// This currently compares absent metadata (e.g., one side was a DataType) and
+/// empty metadata (e.g., one side was a field where the field had no metadata)
+/// as equal and uses byte-for-byte comparison for the keys and values of the
+/// fields, even though this is potentially too strict for some cases (e.g.,
+/// extension types where extension metadata is represented by JSON, or cases
+/// where field metadata is orthogonal to the interpretation of the data type).
+///
+/// Returns a planning error with suitably formatted type representations if
+/// actual and expected do not compare to equal.
+pub fn check_metadata_with_storage_equal(
+    actual: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    expected: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    what: &str,
+    context: &str,
+) -> Result<(), DataFusionError> {
+    if actual.0 != expected.0 {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    let metadata_equal = match (actual.1, expected.1) {
+        (None, None) => true,
+        (None, Some(expected_metadata)) => expected_metadata.is_empty(),
+        (Some(actual_metadata), None) => actual_metadata.is_empty(),
+        (Some(actual_metadata), Some(expected_metadata)) => {
+            actual_metadata == expected_metadata
+        }
+    };
+
+    if !metadata_equal {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    Ok(())
+}
+
+/// Given a data type represented by storage and optional metadata, generate
+/// a user-facing string
+///
+/// This function exists to reduce the number of Field debug strings that are
+/// used to communicate type information in error messages and plan explain
+/// renderings.
+pub fn format_type_and_metadata(
+    data_type: &DataType,
+    metadata: Option<&std::collections::HashMap<String, String>>,
+) -> String {
+    match metadata {
+        Some(metadata) if !metadata.is_empty() => {
+            format!("{data_type}<{metadata:?}>")
+        }
+        _ => data_type.to_string(),
+    }
+}
+
+/// Literal metadata
+///
+/// Stores metadata associated with a literal expressions
+/// and is designed to be fast to `clone`.
+///
+/// This structure is used to store metadata associated with a literal expression, and it
+/// corresponds to the `metadata` field on [`Field`].
+///
+/// # Example: Create [`FieldMetadata`] from a [`Field`]
+/// ```
+/// # use std::collections::HashMap;
+/// # use datafusion_common::metadata::FieldMetadata;
+/// # use arrow::datatypes::{Field, DataType};
+/// # let field = Field::new("c1", DataType::Int32, true)
+/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
+/// // Create a new `FieldMetadata` instance from a `Field`
+/// let metadata = FieldMetadata::new_from_field(&field);
+/// // There is also a `From` impl:
+/// let metadata = FieldMetadata::from(&field);
+/// ```
+///
+/// # Example: Update a [`Field`] with [`FieldMetadata`]
+/// ```
+/// # use datafusion_common::metadata::FieldMetadata;
+/// # use arrow::datatypes::{Field, DataType};
+/// # let field = Field::new("c1", DataType::Int32, true);
+/// # let metadata = FieldMetadata::new_from_field(&field);
+/// // Add any metadata from `FieldMetadata` to `Field`
+/// let updated_field = metadata.add_to_field(field);
+/// ```
+///
+#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
+pub struct FieldMetadata {
+    /// The inner metadata of a literal expression, which is a map of string
+    /// keys to string values.
+    ///
+    /// Note this is not a `HashMap` because `HashMap` does not provide
+    /// implementations for traits like `Debug` and `Hash`.
+    inner: Arc<BTreeMap<String, String>>,
+}
+
+impl Default for FieldMetadata {
+    fn default() -> Self {
+        Self::new_empty()
+    }
+}
+
+impl FieldMetadata {
+    /// Create a new empty metadata instance.
+    pub fn new_empty() -> Self {
+        Self {
+            inner: Arc::new(BTreeMap::new()),
+        }
+    }
+
+    /// Merges two optional `FieldMetadata` instances, overwriting any existing
+    /// keys in `m` with keys from `n` if present.
+    ///
+    /// This function is commonly used in alias operations, particularly for literals
+    /// with metadata. When creating an alias expression, the metadata from the original
+    /// expression (such as a literal) is combined with any metadata specified on the alias.
+    ///
+    /// # Arguments
+    ///
+    /// * `m` - The first metadata (typically from the original expression like a literal)
+    /// * `n` - The second metadata (typically from the alias definition)
+    ///
+    /// # Merge Strategy
+    ///
+    /// - If both metadata instances exist, they are merged with `n` taking precedence
+    /// - Keys from `n` will overwrite keys from `m` if they have the same name
+    /// - If only one metadata instance exists, it is returned unchanged
+    /// - If neither exists, `None` is returned
+    ///
+    /// # Example usage
+    /// ```rust
+    /// use datafusion_common::metadata::FieldMetadata;
+    /// use std::collections::BTreeMap;
+    ///
+    /// // Create metadata for a literal expression
+    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("source".to_string(), "constant".to_string()),
+    ///     ("type".to_string(), "int".to_string()),
+    /// ])));
+    ///
+    /// // Create metadata for an alias
+    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
+    ///     ("description".to_string(), "answer".to_string()),
+    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
+    /// ])));
+    ///
+    /// // Merge the metadata
+    /// let merged = FieldMetadata::merge_options(
+    ///     literal_metadata.as_ref(),
+    ///     alias_metadata.as_ref(),
+    /// );
+    ///
+    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
+    /// assert!(merged.is_some());
+    /// ```
+    pub fn merge_options(
+        m: Option<&FieldMetadata>,
+        n: Option<&FieldMetadata>,
+    ) -> Option<FieldMetadata> {
+        match (m, n) {
+            (Some(m), Some(n)) => {
+                let mut merged = m.clone();
+                merged.extend(n.clone());
+                Some(merged)
+            }
+            (Some(m), None) => Some(m.clone()),
+            (None, Some(n)) => Some(n.clone()),
+            (None, None) => None,
+        }
+    }
+
+    /// Create a new metadata instance from a `Field`'s metadata.
+    pub fn new_from_field(field: &Field) -> Self {
+        let inner = field
+            .metadata()
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self {
+            inner: Arc::new(inner),
+        }
+    }
+
+    /// Create a new metadata instance from a map of string keys to string values.
+    pub fn new(inner: BTreeMap<String, String>) -> Self {
+        Self {
+            inner: Arc::new(inner),
+        }
+    }
+
+    /// Get the inner metadata as a reference to a `BTreeMap`.
+    pub fn inner(&self) -> &BTreeMap<String, String> {
+        &self.inner
+    }
+
+    /// Return the inner metadata
+    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
+        self.inner
+    }
+
+    /// Adds metadata from `other` into `self`, overwriting any existing keys.
+    pub fn extend(&mut self, other: Self) {
+        if other.is_empty() {
+            return;
+        }
+        let other = Arc::unwrap_or_clone(other.into_inner());
+        Arc::make_mut(&mut self.inner).extend(other);
+    }
+
+    /// Returns true if the metadata is empty.
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
+    /// Returns the number of key-value pairs in the metadata.
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
+    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
+        self.inner
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect()
+    }
+
+    /// Updates the metadata on the Field with this metadata, if it is not empty.
+    pub fn add_to_field(&self, field: Field) -> Field {
+        if self.inner.is_empty() {
+            return field;
+        }
+
+        field.with_metadata(self.to_hashmap())
+    }
+}
+
+impl From<&Field> for FieldMetadata {
+    fn from(field: &Field) -> Self {
+        Self::new_from_field(field)
+    }
+}
+
+impl From<BTreeMap<String, String>> for FieldMetadata {
+    fn from(inner: BTreeMap<String, String>) -> Self {
+        Self::new(inner)
+    }
+}
+
+impl From<std::collections::HashMap<String, String>> for FieldMetadata {
+    fn from(map: std::collections::HashMap<String, String>) -> Self {
+        Self::new(map.into_iter().collect())
+    }
+}
+
+/// From reference
+impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
+    fn from(map: &std::collections::HashMap<String, String>) -> Self {
+        let inner = map
+            .iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self::new(inner)
+    }
+}
+
+/// From hashbrown map
+impl From<HashMap<String, String>> for FieldMetadata {
+    fn from(map: HashMap<String, String>) -> Self {
+        let inner = map.into_iter().collect();
+        Self::new(inner)
+    }
+}
+
+impl From<&HashMap<String, String>> for FieldMetadata {
+    fn from(map: &HashMap<String, String>) -> Self {
+        let inner = map
+            .into_iter()
+            .map(|(k, v)| (k.to_string(), v.to_string()))
+            .collect();
+        Self::new(inner)
+    }
+}
diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs
index 7582cff56f87a..5ab58239e66c6 100644
--- a/datafusion/common/src/param_value.rs
+++ b/datafusion/common/src/param_value.rs
@@ -16,22 +16,37 @@
 // under the License.
 
 use crate::error::{_plan_datafusion_err, _plan_err};
+use crate::metadata::{check_metadata_with_storage_equal, ScalarAndMetadata};
 use crate::{Result, ScalarValue};
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field, FieldRef};
 use std::collections::HashMap;
 
 /// The parameter value corresponding to the placeholder
 #[derive(Debug, Clone)]
 pub enum ParamValues {
     /// For positional query parameters, like `SELECT * FROM test WHERE a > $1 AND b = $2`
-    List(Vec<ScalarValue>),
+    List(Vec<ScalarAndMetadata>),
     /// For named query parameters, like `SELECT * FROM test WHERE a > $foo AND b = $goo`
-    Map(HashMap<String, ScalarValue>),
+    Map(HashMap<String, ScalarAndMetadata>),
 }
 
 impl ParamValues {
-    /// Verify parameter list length and type
+    /// Verify parameter list length and DataType
+    ///
+    /// Use [`ParamValues::verify_fields`] to ensure field metadata is considered when
+    /// computing type equality.
+    #[deprecated(since = "51.0.0", note = "Use verify_fields instead")]
     pub fn verify(&self, expect: &[DataType]) -> Result<()> {
+        // make dummy Fields
+        let expect = expect
+            .iter()
+            .map(|dt| Field::new("", dt.clone(), true).into())
+            .collect::<Vec<_>>();
+        self.verify_fields(&expect)
+    }
+
+    /// Verify parameter list length and type
+    pub fn verify_fields(&self, expect: &[FieldRef]) -> Result<()> {
         match self {
             ParamValues::List(list) => {
                 // Verify if the number of params matches the number of values
@@ -45,15 +60,16 @@ impl ParamValues {
 
                 // Verify if the types of the params matches the types of the values
                 let iter = expect.iter().zip(list.iter());
-                for (i, (param_type, value)) in iter.enumerate() {
-                    if *param_type != value.data_type() {
-                        return _plan_err!(
-                            "Expected parameter of type {}, got {:?} at index {}",
-                            param_type,
-                            value.data_type(),
-                            i
-                        );
-                    }
+                for (i, (param_type, lit)) in iter.enumerate() {
+                    check_metadata_with_storage_equal(
+                        (
+                            &lit.value.data_type(),
+                            lit.metadata.as_ref().map(|m| m.to_hashmap()).as_ref(),
+                        ),
+                        (param_type.data_type(), Some(param_type.metadata())),
+                        "parameter",
+                        &format!(" at index {i}"),
+                    )?;
                 }
                 Ok(())
             }
@@ -65,7 +81,7 @@ impl ParamValues {
         }
     }
 
-    pub fn get_placeholders_with_values(&self, id: &str) -> Result<ScalarValue> {
+    pub fn get_placeholders_with_values(&self, id: &str) -> Result<ScalarAndMetadata> {
         match self {
             ParamValues::List(list) => {
                 if id.is_empty() {
@@ -99,7 +115,12 @@ impl ParamValues {
 
 impl From<Vec<ScalarValue>> for ParamValues {
     fn from(value: Vec<ScalarValue>) -> Self {
-        Self::List(value)
+        Self::List(
+            value
+                .into_iter()
+                .map(|v| ScalarAndMetadata::new(v, None))
+                .collect(),
+        )
     }
 }
 
@@ -108,8 +129,10 @@ where
     K: Into<String>,
 {
     fn from(value: Vec<(K, ScalarValue)>) -> Self {
-        let value: HashMap<String, ScalarValue> =
-            value.into_iter().map(|(k, v)| (k.into(), v)).collect();
+        let value: HashMap<String, ScalarAndMetadata> = value
+            .into_iter()
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None)))
+            .collect();
         Self::Map(value)
     }
 }
@@ -119,8 +142,10 @@ where
     K: Into<String>,
 {
     fn from(value: HashMap<K, ScalarValue>) -> Self {
-        let value: HashMap<String, ScalarValue> =
-            value.into_iter().map(|(k, v)| (k.into(), v)).collect();
+        let value: HashMap<String, ScalarAndMetadata> = value
+            .into_iter()
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None)))
+            .collect();
         Self::Map(value)
     }
 }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 87dc18be5b83b..448ee5264afd0 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -64,12 +64,13 @@ use datafusion_catalog::{
     DynamicFileCatalog, TableFunction, TableFunctionImpl, UrlTableFactory,
 };
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::metadata::ScalarAndMetadata;
 use datafusion_common::{
     config::{ConfigExtension, TableOptions},
     exec_datafusion_err, exec_err, internal_datafusion_err, not_impl_err,
     plan_datafusion_err, plan_err,
     tree_node::{TreeNodeRecursion, TreeNodeVisitor},
-    DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaReference, TableReference,
+    DFSchema, DataFusionError, ParamValues, SchemaReference, TableReference,
 };
 pub use datafusion_execution::config::SessionConfig;
 use datafusion_execution::registry::SerializerRegistry;
@@ -715,15 +716,15 @@ impl SessionContext {
             LogicalPlan::Statement(Statement::Prepare(Prepare {
                 name,
                 input,
-                data_types,
+                fields,
             })) => {
                 // The number of parameters must match the specified data types length.
-                if !data_types.is_empty() {
+                if !fields.is_empty() {
                     let param_names = input.get_parameter_names()?;
-                    if param_names.len() != data_types.len() {
+                    if param_names.len() != fields.len() {
                         return plan_err!(
                             "Prepare specifies {} data types but query has {} parameters",
-                            data_types.len(),
+                            fields.len(),
                             param_names.len()
                         );
                     }
@@ -733,7 +734,7 @@ impl SessionContext {
                 // not currently feasible. This is because `now()` would be optimized to a
                 // constant value, causing each EXECUTE to yield the same result, which is
                 // incorrect behavior.
-                self.state.write().store_prepared(name, data_types, input)?;
+                self.state.write().store_prepared(name, fields, input)?;
                 self.return_empty_dataframe()
             }
             LogicalPlan::Statement(Statement::Execute(execute)) => {
@@ -1265,28 +1266,30 @@ impl SessionContext {
         })?;
 
         // Only allow literals as parameters for now.
-        let mut params: Vec<ScalarValue> = parameters
+        let mut params: Vec<ScalarAndMetadata> = parameters
             .into_iter()
             .map(|e| match e {
-                Expr::Literal(scalar, _) => Ok(scalar),
+                Expr::Literal(scalar, metadata) => {
+                    Ok(ScalarAndMetadata::new(scalar, metadata))
+                }
                 _ => not_impl_err!("Unsupported parameter type: {}", e),
             })
             .collect::<Result<_>>()?;
 
         // If the prepared statement provides data types, cast the params to those types.
-        if !prepared.data_types.is_empty() {
-            if params.len() != prepared.data_types.len() {
+        if !prepared.fields.is_empty() {
+            if params.len() != prepared.fields.len() {
                 return exec_err!(
                     "Prepared statement '{}' expects {} parameters, but {} provided",
                     name,
-                    prepared.data_types.len(),
+                    prepared.fields.len(),
                     params.len()
                 );
             }
             params = params
                 .into_iter()
-                .zip(prepared.data_types.iter())
-                .map(|(e, dt)| e.cast_to(dt))
+                .zip(prepared.fields.iter())
+                .map(|(e, dt)| -> Result<_> { e.cast_storage_to(dt.data_type()) })
                 .collect::<Result<_>>()?;
         }
 
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index b3b336f5605cf..561e0c363a372 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -30,7 +30,7 @@ use crate::datasource::provider_as_source;
 use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner};
 use crate::execution::SessionStateDefaults;
 use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner};
-use arrow::datatypes::DataType;
+use arrow_schema::{DataType, FieldRef};
 use datafusion_catalog::information_schema::{
     InformationSchemaProvider, INFORMATION_SCHEMA,
 };
@@ -115,11 +115,11 @@ use uuid::Uuid;
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 ///     let state = SessionStateBuilder::new()
-///         .with_config(SessionConfig::new())  
+///         .with_config(SessionConfig::new())
 ///         .with_runtime_env(Arc::new(RuntimeEnv::default()))
 ///         .with_default_features()
 ///         .build();
-///     Ok(())  
+///     Ok(())
 /// # }
 /// ```
 ///
@@ -872,12 +872,12 @@ impl SessionState {
     pub(crate) fn store_prepared(
         &mut self,
         name: String,
-        data_types: Vec<DataType>,
+        fields: Vec<FieldRef>,
         plan: Arc<LogicalPlan>,
     ) -> datafusion_common::Result<()> {
         match self.prepared_plans.entry(name) {
             Entry::Vacant(e) => {
-                e.insert(Arc::new(PreparedPlan { data_types, plan }));
+                e.insert(Arc::new(PreparedPlan { fields, plan }));
                 Ok(())
             }
             Entry::Occupied(e) => {
@@ -1322,7 +1322,7 @@ impl SessionStateBuilder {
     /// let url = Url::try_from("file://").unwrap();
     /// let object_store = object_store::local::LocalFileSystem::new();
     /// let state = SessionStateBuilder::new()
-    ///     .with_config(SessionConfig::new())  
+    ///     .with_config(SessionConfig::new())
     ///     .with_object_store(&url, Arc::new(object_store))
     ///     .with_default_features()
     ///     .build();
@@ -2030,7 +2030,7 @@ impl SimplifyInfo for SessionSimplifyProvider<'_> {
 #[derive(Debug)]
 pub(crate) struct PreparedPlan {
     /// Data types of the parameters
-    pub(crate) data_types: Vec<DataType>,
+    pub(crate) fields: Vec<FieldRef>,
     /// The prepared logical plan
     pub(crate) plan: Arc<LogicalPlan>,
 }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index aa538f6dee813..979ada2bc6bb6 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -33,6 +33,7 @@ use arrow::error::ArrowError;
 use arrow::util::pretty::pretty_format_batches;
 use arrow_schema::{SortOptions, TimeUnit};
 use datafusion::{assert_batches_eq, dataframe};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_functions_aggregate::count::{count_all, count_all_window};
 use datafusion_functions_aggregate::expr_fn::{
     array_agg, avg, avg_distinct, count, count_distinct, max, median, min, sum,
@@ -65,15 +66,13 @@ use datafusion_catalog::TableProvider;
 use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
 use datafusion_common::{
     assert_contains, internal_datafusion_err, Constraint, Constraints, DFSchema,
-    DataFusionError, ParamValues, ScalarValue, TableReference, UnnestOptions,
+    DataFusionError, ScalarValue, TableReference, UnnestOptions,
 };
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_datasource::file_format::format_as_file_type;
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::runtime_env::RuntimeEnv;
-use datafusion_expr::expr::{
-    FieldMetadata, GroupingSet, NullTreatment, Sort, WindowFunction,
-};
+use datafusion_expr::expr::{GroupingSet, NullTreatment, Sort, WindowFunction};
 use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
     cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
@@ -2465,7 +2464,7 @@ async fn filtered_aggr_with_param_values() -> Result<()> {
     let df = ctx
         .sql("select count (c2) filter (where c3 > $1) from table1")
         .await?
-        .with_param_values(ParamValues::List(vec![ScalarValue::from(10u64)]));
+        .with_param_values(vec![ScalarValue::from(10u64)]);
 
     let df_results = df?.collect().await?;
     assert_snapshot!(
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index 98c3e3ccee8a1..2eb3ba36dd909 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -15,8 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::collections::HashMap;
+
 use super::*;
-use datafusion_common::ScalarValue;
+use datafusion::assert_batches_eq;
+use datafusion_common::{metadata::ScalarAndMetadata, ParamValues, ScalarValue};
 use insta::assert_snapshot;
 
 #[tokio::test]
@@ -317,6 +320,53 @@ async fn test_named_parameter_not_bound() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_query_parameters_with_metadata() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let df = ctx.sql("SELECT $1, $2").await.unwrap();
+
+    let metadata1 = HashMap::from([("some_key".to_string(), "some_value".to_string())]);
+    let metadata2 =
+        HashMap::from([("some_other_key".to_string(), "some_other_value".to_string())]);
+
+    let df_with_params_replaced = df
+        .with_param_values(ParamValues::List(vec![
+            ScalarAndMetadata::new(
+                ScalarValue::UInt32(Some(1)),
+                Some(metadata1.clone().into()),
+            ),
+            ScalarAndMetadata::new(
+                ScalarValue::Utf8(Some("two".to_string())),
+                Some(metadata2.clone().into()),
+            ),
+        ]))
+        .unwrap();
+
+    // df_with_params_replaced.schema() is not correct here
+    // https://github.com/apache/datafusion/issues/18102
+    let batches = df_with_params_replaced.clone().collect().await.unwrap();
+    let schema = batches[0].schema();
+
+    assert_eq!(schema.field(0).data_type(), &DataType::UInt32);
+    assert_eq!(schema.field(0).metadata(), &metadata1);
+    assert_eq!(schema.field(1).data_type(), &DataType::Utf8);
+    assert_eq!(schema.field(1).metadata(), &metadata2);
+
+    assert_batches_eq!(
+        [
+            "+----+-----+",
+            "| $1 | $2  |",
+            "+----+-----+",
+            "| 1  | two |",
+            "+----+-----+",
+        ],
+        &batches
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn test_version_function() {
     let expected_version = format!(
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index f1af66de9b592..fb1371da6ceb3 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -34,13 +34,13 @@ use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionS
 use datafusion::prelude::*;
 use datafusion::{execution::registry::FunctionRegistry, test_util};
 use datafusion_common::cast::{as_float64_array, as_int32_array};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{
     assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_datafusion_err,
     exec_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result, ScalarValue,
 };
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::{
     lit_with_metadata, Accumulator, ColumnarValue, CreateFunction, CreateFunctionBody,
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 282b3f6a0f55c..e1115b714053c 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -18,7 +18,7 @@
 //! Logical Expressions: [`Expr`]
 
 use std::cmp::Ordering;
-use std::collections::{BTreeMap, HashSet};
+use std::collections::HashSet;
 use std::fmt::{self, Display, Formatter, Write};
 use std::hash::{Hash, Hasher};
 use std::mem;
@@ -45,6 +45,9 @@ use sqlparser::ast::{
     RenameSelectItem, ReplaceSelectElement,
 };
 
+// Moved in 51.0.0 to datafusion_common
+pub use datafusion_common::metadata::FieldMetadata;
+
 // This mirrors sqlparser::ast::NullTreatment but we need our own variant
 // for when the sql feature is disabled.
 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
@@ -447,235 +450,6 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr {
     }
 }
 
-/// Literal metadata
-///
-/// Stores metadata associated with a literal expressions
-/// and is designed to be fast to `clone`.
-///
-/// This structure is used to store metadata associated with a literal expression, and it
-/// corresponds to the `metadata` field on [`Field`].
-///
-/// # Example: Create [`FieldMetadata`] from a [`Field`]
-/// ```
-/// # use std::collections::HashMap;
-/// # use datafusion_expr::expr::FieldMetadata;
-/// # use arrow::datatypes::{Field, DataType};
-/// # let field = Field::new("c1", DataType::Int32, true)
-/// #  .with_metadata(HashMap::from([("foo".to_string(), "bar".to_string())]));
-/// // Create a new `FieldMetadata` instance from a `Field`
-/// let metadata = FieldMetadata::new_from_field(&field);
-/// // There is also a `From` impl:
-/// let metadata = FieldMetadata::from(&field);
-/// ```
-///
-/// # Example: Update a [`Field`] with [`FieldMetadata`]
-/// ```
-/// # use datafusion_expr::expr::FieldMetadata;
-/// # use arrow::datatypes::{Field, DataType};
-/// # let field = Field::new("c1", DataType::Int32, true);
-/// # let metadata = FieldMetadata::new_from_field(&field);
-/// // Add any metadata from `FieldMetadata` to `Field`
-/// let updated_field = metadata.add_to_field(field);
-/// ```
-///
-#[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
-pub struct FieldMetadata {
-    /// The inner metadata of a literal expression, which is a map of string
-    /// keys to string values.
-    ///
-    /// Note this is not a `HashMap` because `HashMap` does not provide
-    /// implementations for traits like `Debug` and `Hash`.
-    inner: Arc<BTreeMap<String, String>>,
-}
-
-impl Default for FieldMetadata {
-    fn default() -> Self {
-        Self::new_empty()
-    }
-}
-
-impl FieldMetadata {
-    /// Create a new empty metadata instance.
-    pub fn new_empty() -> Self {
-        Self {
-            inner: Arc::new(BTreeMap::new()),
-        }
-    }
-
-    /// Merges two optional `FieldMetadata` instances, overwriting any existing
-    /// keys in `m` with keys from `n` if present.
-    ///
-    /// This function is commonly used in alias operations, particularly for literals
-    /// with metadata. When creating an alias expression, the metadata from the original
-    /// expression (such as a literal) is combined with any metadata specified on the alias.
-    ///
-    /// # Arguments
-    ///
-    /// * `m` - The first metadata (typically from the original expression like a literal)
-    /// * `n` - The second metadata (typically from the alias definition)
-    ///
-    /// # Merge Strategy
-    ///
-    /// - If both metadata instances exist, they are merged with `n` taking precedence
-    /// - Keys from `n` will overwrite keys from `m` if they have the same name
-    /// - If only one metadata instance exists, it is returned unchanged
-    /// - If neither exists, `None` is returned
-    ///
-    /// # Example usage
-    /// ```rust
-    /// use datafusion_expr::expr::FieldMetadata;
-    /// use std::collections::BTreeMap;
-    ///
-    /// // Create metadata for a literal expression
-    /// let literal_metadata = Some(FieldMetadata::from(BTreeMap::from([
-    ///     ("source".to_string(), "constant".to_string()),
-    ///     ("type".to_string(), "int".to_string()),
-    /// ])));
-    ///
-    /// // Create metadata for an alias
-    /// let alias_metadata = Some(FieldMetadata::from(BTreeMap::from([
-    ///     ("description".to_string(), "answer".to_string()),
-    ///     ("source".to_string(), "user".to_string()), // This will override literal's "source"
-    /// ])));
-    ///
-    /// // Merge the metadata
-    /// let merged = FieldMetadata::merge_options(
-    ///     literal_metadata.as_ref(),
-    ///     alias_metadata.as_ref(),
-    /// );
-    ///
-    /// // Result contains: {"source": "user", "type": "int", "description": "answer"}
-    /// assert!(merged.is_some());
-    /// ```
-    pub fn merge_options(
-        m: Option<&FieldMetadata>,
-        n: Option<&FieldMetadata>,
-    ) -> Option<FieldMetadata> {
-        match (m, n) {
-            (Some(m), Some(n)) => {
-                let mut merged = m.clone();
-                merged.extend(n.clone());
-                Some(merged)
-            }
-            (Some(m), None) => Some(m.clone()),
-            (None, Some(n)) => Some(n.clone()),
-            (None, None) => None,
-        }
-    }
-
-    /// Create a new metadata instance from a `Field`'s metadata.
-    pub fn new_from_field(field: &Field) -> Self {
-        let inner = field
-            .metadata()
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self {
-            inner: Arc::new(inner),
-        }
-    }
-
-    /// Create a new metadata instance from a map of string keys to string values.
-    pub fn new(inner: BTreeMap<String, String>) -> Self {
-        Self {
-            inner: Arc::new(inner),
-        }
-    }
-
-    /// Get the inner metadata as a reference to a `BTreeMap`.
-    pub fn inner(&self) -> &BTreeMap<String, String> {
-        &self.inner
-    }
-
-    /// Return the inner metadata
-    pub fn into_inner(self) -> Arc<BTreeMap<String, String>> {
-        self.inner
-    }
-
-    /// Adds metadata from `other` into `self`, overwriting any existing keys.
-    pub fn extend(&mut self, other: Self) {
-        if other.is_empty() {
-            return;
-        }
-        let other = Arc::unwrap_or_clone(other.into_inner());
-        Arc::make_mut(&mut self.inner).extend(other);
-    }
-
-    /// Returns true if the metadata is empty.
-    pub fn is_empty(&self) -> bool {
-        self.inner.is_empty()
-    }
-
-    /// Returns the number of key-value pairs in the metadata.
-    pub fn len(&self) -> usize {
-        self.inner.len()
-    }
-
-    /// Convert this `FieldMetadata` into a `HashMap<String, String>`
-    pub fn to_hashmap(&self) -> std::collections::HashMap<String, String> {
-        self.inner
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect()
-    }
-
-    /// Updates the metadata on the Field with this metadata, if it is not empty.
-    pub fn add_to_field(&self, field: Field) -> Field {
-        if self.inner.is_empty() {
-            return field;
-        }
-
-        field.with_metadata(self.to_hashmap())
-    }
-}
-
-impl From<&Field> for FieldMetadata {
-    fn from(field: &Field) -> Self {
-        Self::new_from_field(field)
-    }
-}
-
-impl From<BTreeMap<String, String>> for FieldMetadata {
-    fn from(inner: BTreeMap<String, String>) -> Self {
-        Self::new(inner)
-    }
-}
-
-impl From<std::collections::HashMap<String, String>> for FieldMetadata {
-    fn from(map: std::collections::HashMap<String, String>) -> Self {
-        Self::new(map.into_iter().collect())
-    }
-}
-
-/// From reference
-impl From<&std::collections::HashMap<String, String>> for FieldMetadata {
-    fn from(map: &std::collections::HashMap<String, String>) -> Self {
-        let inner = map
-            .iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self::new(inner)
-    }
-}
-
-/// From hashbrown map
-impl From<HashMap<String, String>> for FieldMetadata {
-    fn from(map: HashMap<String, String>) -> Self {
-        let inner = map.into_iter().collect();
-        Self::new(inner)
-    }
-}
-
-impl From<&HashMap<String, String>> for FieldMetadata {
-    fn from(map: &HashMap<String, String>) -> Self {
-        let inner = map
-            .into_iter()
-            .map(|(k, v)| (k.to_string(), v.to_string()))
-            .collect();
-        Self::new(inner)
-    }
-}
-
 /// The metadata used in [`Field::metadata`].
 ///
 /// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs.
@@ -1370,13 +1144,22 @@ pub struct Placeholder {
     /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`)
     pub id: String,
     /// The type the parameter will be filled in with
-    pub data_type: Option<DataType>,
+    pub field: Option<FieldRef>,
 }
 
 impl Placeholder {
     /// Create a new Placeholder expression
+    #[deprecated(since = "51.0.0", note = "Use new_with_field instead")]
     pub fn new(id: String, data_type: Option<DataType>) -> Self {
-        Self { id, data_type }
+        Self {
+            id,
+            field: data_type.map(|dt| Arc::new(Field::new("", dt, true))),
+        }
+    }
+
+    /// Create a new Placeholder expression from a Field
+    pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self {
+        Self { id, field }
     }
 }
 
@@ -1843,7 +1626,7 @@ impl Expr {
     /// ```
     /// # use datafusion_expr::col;
     /// # use std::collections::HashMap;
-    /// # use datafusion_expr::expr::FieldMetadata;
+    /// # use datafusion_common::metadata::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
     /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
@@ -1875,7 +1658,7 @@ impl Expr {
     /// ```
     /// # use datafusion_expr::col;
     /// # use std::collections::HashMap;
-    /// # use datafusion_expr::expr::FieldMetadata;
+    /// # use datafusion_common::metadata::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
     /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
@@ -2886,19 +2669,23 @@ impl HashNode for Expr {
     }
 }
 
-// Modifies expr if it is a placeholder with datatype of right
+// Modifies expr to match the DataType, metadata, and nullability of other if it is
+// a placeholder with previously unspecified type information (i.e., most placeholders)
 fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> {
-    if let Expr::Placeholder(Placeholder { id: _, data_type }) = expr {
-        if data_type.is_none() {
-            let other_dt = other.get_type(schema);
-            match other_dt {
+    if let Expr::Placeholder(Placeholder { id: _, field }) = expr {
+        if field.is_none() {
+            let other_field = other.to_field(schema);
+            match other_field {
                 Err(e) => {
                     Err(e.context(format!(
                         "Can not find type of {other} needed to infer type of {expr}"
                     )))?;
                 }
-                Ok(dt) => {
-                    *data_type = Some(dt);
+                Ok((_, other_field)) => {
+                    // We can't infer the nullability of the future parameter that might
+                    // be bound, so ensure this is set to true
+                    *field =
+                        Some(other_field.as_ref().clone().with_nullable(true).into());
                 }
             }
         };
@@ -3715,8 +3502,8 @@ pub fn physical_name(expr: &Expr) -> Result<String> {
 mod test {
     use crate::expr_fn::col;
     use crate::{
-        case, lit, qualified_wildcard, wildcard, wildcard_with_options, ColumnarValue,
-        ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
+        case, lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options,
+        ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility,
     };
     use arrow::datatypes::{Field, Schema};
     use sqlparser::ast;
@@ -3730,15 +3517,15 @@ mod test {
         let param_placeholders = vec![
             Expr::Placeholder(Placeholder {
                 id: "$1".to_string(),
-                data_type: None,
+                field: None,
             }),
             Expr::Placeholder(Placeholder {
                 id: "$2".to_string(),
-                data_type: None,
+                field: None,
             }),
             Expr::Placeholder(Placeholder {
                 id: "$3".to_string(),
-                data_type: None,
+                field: None,
             }),
         ];
         let in_list = Expr::InList(InList {
@@ -3764,8 +3551,8 @@ mod test {
                     match expr {
                         Expr::Placeholder(placeholder) => {
                             assert_eq!(
-                                placeholder.data_type,
-                                Some(DataType::Int32),
+                                placeholder.field.unwrap().data_type(),
+                                &DataType::Int32,
                                 "Placeholder {} should infer Int32",
                                 placeholder.id
                             );
@@ -3789,7 +3576,7 @@ mod test {
             expr: Box::new(col("name")),
             pattern: Box::new(Expr::Placeholder(Placeholder {
                 id: "$1".to_string(),
-                data_type: None,
+                field: None,
             })),
             negated: false,
             case_insensitive: false,
@@ -3802,7 +3589,7 @@ mod test {
         match inferred_expr {
             Expr::Like(like) => match *like.pattern {
                 Expr::Placeholder(placeholder) => {
-                    assert_eq!(placeholder.data_type, Some(DataType::Utf8));
+                    assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8);
                 }
                 _ => panic!("Expected Placeholder"),
             },
@@ -3817,8 +3604,8 @@ mod test {
             Expr::SimilarTo(like) => match *like.pattern {
                 Expr::Placeholder(placeholder) => {
                     assert_eq!(
-                        placeholder.data_type,
-                        Some(DataType::Utf8),
+                        placeholder.field.unwrap().data_type(),
+                        &DataType::Utf8,
                         "Placeholder {} should infer Utf8",
                         placeholder.id
                     );
@@ -3829,6 +3616,39 @@ mod test {
         }
     }
 
+    #[test]
+    fn infer_placeholder_with_metadata() {
+        // name == $1, where name is a non-nullable string
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, false)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )]));
+        let df_schema = DFSchema::try_from(schema).unwrap();
+
+        let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1"));
+
+        let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap();
+        match inferred_expr {
+            Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right {
+                Expr::Placeholder(placeholder) => {
+                    assert_eq!(
+                        placeholder.field.as_ref().unwrap().data_type(),
+                        &DataType::Utf8
+                    );
+                    assert_eq!(
+                        placeholder.field.as_ref().unwrap().metadata(),
+                        df_schema.field(0).metadata()
+                    );
+                    // Inferred placeholder should still be nullable
+                    assert!(placeholder.field.as_ref().unwrap().is_nullable());
+                }
+                _ => panic!("Expected Placeholder"),
+            },
+            _ => panic!("Expected BinaryExpr"),
+        }
+    }
+
     #[test]
     fn format_case_when() -> Result<()> {
         let expr = case(col("a"))
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4666411dd5408..c777c4978f99a 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -119,13 +119,13 @@ pub fn ident(name: impl Into<String>) -> Expr {
 ///
 /// ```rust
 /// # use datafusion_expr::{placeholder};
-/// let p = placeholder("$0"); // $0, refers to parameter 1
-/// assert_eq!(p.to_string(), "$0")
+/// let p = placeholder("$1"); // $1, refers to parameter 1
+/// assert_eq!(p.to_string(), "$1")
 /// ```
 pub fn placeholder(id: impl Into<String>) -> Expr {
     Expr::Placeholder(Placeholder {
         id: id.into(),
-        data_type: None,
+        field: None,
     })
 }
 
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index e803e35341305..8c557a5630f0f 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -17,8 +17,8 @@
 
 use super::{Between, Expr, Like};
 use crate::expr::{
-    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, FieldMetadata,
-    InList, InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
+    AggregateFunction, AggregateFunctionParams, Alias, BinaryExpr, Cast, InList,
+    InSubquery, Placeholder, ScalarFunction, TryCast, Unnest, WindowFunction,
     WindowFunctionParams,
 };
 use crate::type_coercion::functions::{
@@ -28,6 +28,7 @@ use crate::udf::ReturnFieldArgs;
 use crate::{utils, LogicalPlan, Projection, Subquery, WindowFunctionDefinition};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     not_impl_err, plan_datafusion_err, plan_err, Column, DataFusionError, ExprSchema,
     Result, Spans, TableReference,
@@ -104,9 +105,9 @@ impl ExprSchemable for Expr {
     fn get_type(&self, schema: &dyn ExprSchema) -> Result<DataType> {
         match self {
             Expr::Alias(Alias { expr, name, .. }) => match &**expr {
-                Expr::Placeholder(Placeholder { data_type, .. }) => match &data_type {
+                Expr::Placeholder(Placeholder { field, .. }) => match &field {
                     None => schema.data_type(&Column::from_name(name)).cloned(),
-                    Some(dt) => Ok(dt.clone()),
+                    Some(field) => Ok(field.data_type().clone()),
                 },
                 _ => expr.get_type(schema),
             },
@@ -211,9 +212,9 @@ impl ExprSchemable for Expr {
             )
             .get_result_type(),
             Expr::Like { .. } | Expr::SimilarTo { .. } => Ok(DataType::Boolean),
-            Expr::Placeholder(Placeholder { data_type, .. }) => {
-                if let Some(dtype) = data_type {
-                    Ok(dtype.clone())
+            Expr::Placeholder(Placeholder { field, .. }) => {
+                if let Some(field) = field {
+                    Ok(field.data_type().clone())
                 } else {
                     // If the placeholder's type hasn't been specified, treat it as
                     // null (unspecified placeholders generate an error during planning)
@@ -309,10 +310,12 @@ impl ExprSchemable for Expr {
                     window_function,
                 )
                 .map(|(_, nullable)| nullable),
-            Expr::ScalarVariable(_, _)
-            | Expr::TryCast { .. }
-            | Expr::Unnest(_)
-            | Expr::Placeholder(_) => Ok(true),
+            Expr::Placeholder(Placeholder { id: _, field }) => {
+                Ok(field.as_ref().map(|f| f.is_nullable()).unwrap_or(true))
+            }
+            Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::Unnest(_) => {
+                Ok(true)
+            }
             Expr::IsNull(_)
             | Expr::IsNotNull(_)
             | Expr::IsTrue(_)
@@ -428,25 +431,11 @@ impl ExprSchemable for Expr {
         let field = match self {
             Expr::Alias(Alias {
                 expr,
-                name,
+                name: _,
                 metadata,
                 ..
             }) => {
-                let field = match &**expr {
-                    Expr::Placeholder(Placeholder { data_type, .. }) => {
-                        match &data_type {
-                            None => schema
-                                .data_type_and_nullable(&Column::from_name(name))
-                                .map(|(d, n)| Field::new(&schema_name, d.clone(), n)),
-                            Some(dt) => Ok(Field::new(
-                                &schema_name,
-                                dt.clone(),
-                                expr.nullable(schema)?,
-                            )),
-                        }
-                    }
-                    _ => expr.to_field(schema).map(|(_, f)| f.as_ref().clone()),
-                }?;
+                let field = expr.to_field(schema).map(|(_, f)| f.as_ref().clone())?;
 
                 let mut combined_metadata = expr.metadata(schema)?;
                 if let Some(metadata) = metadata {
@@ -594,6 +583,10 @@ impl ExprSchemable for Expr {
                 .to_field(schema)
                 .map(|(_, f)| f.as_ref().clone().with_data_type(data_type.clone()))
                 .map(Arc::new),
+            Expr::Placeholder(Placeholder {
+                id: _,
+                field: Some(field),
+            }) => Ok(field.as_ref().clone().with_name(&schema_name).into()),
             Expr::Like(_)
             | Expr::SimilarTo(_)
             | Expr::Not(_)
@@ -776,10 +769,12 @@ pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subq
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use super::*;
     use crate::{col, lit, out_ref_col_with_metadata};
 
-    use datafusion_common::{internal_err, DFSchema, HashMap, ScalarValue};
+    use datafusion_common::{internal_err, DFSchema, ScalarValue};
 
     macro_rules! test_is_expr_nullable {
         ($EXPR_TYPE:ident) => {{
@@ -905,7 +900,7 @@ mod tests {
 
         let schema = DFSchema::from_unqualified_fields(
             vec![meta.add_to_field(Field::new("foo", DataType::Int32, true))].into(),
-            std::collections::HashMap::new(),
+            HashMap::new(),
         )
         .unwrap();
 
@@ -921,6 +916,52 @@ mod tests {
         assert_eq!(meta, outer_ref.metadata(&schema).unwrap());
     }
 
+    #[test]
+    fn test_expr_placeholder() {
+        let schema = MockExprSchema::new();
+
+        let mut placeholder_meta = HashMap::new();
+        placeholder_meta.insert("bar".to_string(), "buzz".to_string());
+        let placeholder_meta = FieldMetadata::from(placeholder_meta);
+
+        let expr = Expr::Placeholder(Placeholder::new_with_field(
+            "".to_string(),
+            Some(
+                Field::new("", DataType::Utf8, true)
+                    .with_metadata(placeholder_meta.to_hashmap())
+                    .into(),
+            ),
+        ));
+
+        assert_eq!(
+            expr.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, true)
+        );
+        assert_eq!(placeholder_meta, expr.metadata(&schema).unwrap());
+
+        let expr_alias = expr.alias("a placeholder by any other name");
+        assert_eq!(
+            expr_alias.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, true)
+        );
+        assert_eq!(placeholder_meta, expr_alias.metadata(&schema).unwrap());
+
+        // Non-nullable placeholder field should remain non-nullable
+        let expr = Expr::Placeholder(Placeholder::new_with_field(
+            "".to_string(),
+            Some(Field::new("", DataType::Utf8, false).into()),
+        ));
+        assert_eq!(
+            expr.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, false)
+        );
+        let expr_alias = expr.alias("a placeholder by any other name");
+        assert_eq!(
+            expr_alias.data_type_and_nullable(&schema).unwrap(),
+            (DataType::Utf8, false)
+        );
+    }
+
     #[derive(Debug)]
     struct MockExprSchema {
         field: Field,
diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs
index c4bd43bc0a620..335d7b471f5fe 100644
--- a/datafusion/expr/src/literal.rs
+++ b/datafusion/expr/src/literal.rs
@@ -17,9 +17,8 @@
 
 //! Literal module contains foundational types that are used to represent literals in DataFusion.
 
-use crate::expr::FieldMetadata;
 use crate::Expr;
-use datafusion_common::ScalarValue;
+use datafusion_common::{metadata::FieldMetadata, ScalarValue};
 
 /// Create a literal expression
 pub fn lit<T: Literal>(n: T) -> Expr {
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index 7a283b0420d3c..a430add3f7863 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -25,7 +25,7 @@ use std::iter::once;
 use std::sync::Arc;
 
 use crate::dml::CopyTo;
-use crate::expr::{Alias, FieldMetadata, PlannedReplaceSelectItem, Sort as SortExpr};
+use crate::expr::{Alias, PlannedReplaceSelectItem, Sort as SortExpr};
 use crate::expr_rewriter::{
     coerce_plan_expr_for_schema, normalize_col,
     normalize_col_with_schemas_and_ambiguity_check, normalize_cols, normalize_sorts,
@@ -50,9 +50,10 @@ use crate::{
 
 use super::dml::InsertOp;
 use arrow::compute::can_cast_types;
-use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, FieldRef, Fields, Schema, SchemaRef};
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::file_options::file_type::FileType;
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     exec_err, get_target_functional_dependencies, internal_datafusion_err, not_impl_err,
     plan_datafusion_err, plan_err, Column, Constraints, DFSchema, DFSchemaRef,
@@ -622,11 +623,11 @@ impl LogicalPlanBuilder {
     }
 
     /// Make a builder for a prepare logical plan from the builder's plan
-    pub fn prepare(self, name: String, data_types: Vec<DataType>) -> Result<Self> {
+    pub fn prepare(self, name: String, fields: Vec<FieldRef>) -> Result<Self> {
         Ok(Self::new(LogicalPlan::Statement(Statement::Prepare(
             Prepare {
                 name,
-                data_types,
+                fields,
                 input: self.plan,
             },
         ))))
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 05a2564464c59..9541f35e3062b 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -51,9 +51,10 @@ use crate::{
     WindowFunctionDefinition,
 };
 
-use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use arrow::datatypes::{DataType, Field, FieldRef, Schema, SchemaRef};
 use datafusion_common::cse::{NormalizeEq, Normalizeable};
 use datafusion_common::format::ExplainFormat;
+use datafusion_common::metadata::check_metadata_with_storage_equal;
 use datafusion_common::tree_node::{
     Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion,
 };
@@ -1098,15 +1099,13 @@ impl LogicalPlan {
                 }))
             }
             LogicalPlan::Statement(Statement::Prepare(Prepare {
-                name,
-                data_types,
-                ..
+                name, fields, ..
             })) => {
                 self.assert_no_expressions(expr)?;
                 let input = self.only_input(inputs)?;
                 Ok(LogicalPlan::Statement(Statement::Prepare(Prepare {
                     name: name.clone(),
-                    data_types: data_types.clone(),
+                    fields: fields.clone(),
                     input: Arc::new(input),
                 })))
             }
@@ -1282,7 +1281,7 @@ impl LogicalPlan {
             if let LogicalPlan::Statement(Statement::Prepare(prepare_lp)) =
                 plan_with_values
             {
-                param_values.verify(&prepare_lp.data_types)?;
+                param_values.verify_fields(&prepare_lp.fields)?;
                 // try and take ownership of the input if is not shared, clone otherwise
                 Arc::unwrap_or_clone(prepare_lp.input)
             } else {
@@ -1463,8 +1462,10 @@ impl LogicalPlan {
                     let original_name = name_preserver.save(&e);
                     let transformed_expr = e.transform_up(|e| {
                         if let Expr::Placeholder(Placeholder { id, .. }) = e {
-                            let value = param_values.get_placeholders_with_values(&id)?;
-                            Ok(Transformed::yes(Expr::Literal(value, None)))
+                            let (value, metadata) = param_values
+                                .get_placeholders_with_values(&id)?
+                                .into_inner();
+                            Ok(Transformed::yes(Expr::Literal(value, metadata)))
                         } else {
                             Ok(Transformed::no(e))
                         }
@@ -1494,24 +1495,43 @@ impl LogicalPlan {
     }
 
     /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and DataTypes
+    ///
+    /// Note that this will drop any extension or field metadata attached to parameters. Use
+    /// [`LogicalPlan::get_parameter_fields`] to keep extension metadata.
     pub fn get_parameter_types(
         &self,
     ) -> Result<HashMap<String, Option<DataType>>, DataFusionError> {
-        let mut param_types: HashMap<String, Option<DataType>> = HashMap::new();
+        let mut parameter_fields = self.get_parameter_fields()?;
+        Ok(parameter_fields
+            .drain()
+            .map(|(name, maybe_field)| {
+                (name, maybe_field.map(|field| field.data_type().clone()))
+            })
+            .collect())
+    }
+
+    /// Walk the logical plan, find any `Placeholder` tokens, and return a map of their IDs and FieldRefs
+    pub fn get_parameter_fields(
+        &self,
+    ) -> Result<HashMap<String, Option<FieldRef>>, DataFusionError> {
+        let mut param_types: HashMap<String, Option<FieldRef>> = HashMap::new();
 
         self.apply_with_subqueries(|plan| {
             plan.apply_expressions(|expr| {
                 expr.apply(|expr| {
-                    if let Expr::Placeholder(Placeholder { id, data_type }) = expr {
+                    if let Expr::Placeholder(Placeholder { id, field }) = expr {
                         let prev = param_types.get(id);
-                        match (prev, data_type) {
-                            (Some(Some(prev)), Some(dt)) => {
-                                if prev != dt {
-                                    plan_err!("Conflicting types for {id}")?;
-                                }
+                        match (prev, field) {
+                            (Some(Some(prev)), Some(field)) => {
+                                check_metadata_with_storage_equal(
+                                    (field.data_type(), Some(field.metadata())),
+                                    (prev.data_type(), Some(prev.metadata())),
+                                    "parameter",
+                                    &format!(": Conflicting types for id {id}"),
+                                )?;
                             }
-                            (_, Some(dt)) => {
-                                param_types.insert(id.clone(), Some(dt.clone()));
+                            (_, Some(field)) => {
+                                param_types.insert(id.clone(), Some(Arc::clone(field)));
                             }
                             _ => {
                                 param_types.insert(id.clone(), None);
@@ -4231,6 +4251,7 @@ mod tests {
         binary_expr, col, exists, in_subquery, lit, placeholder, scalar_subquery,
         GroupingSet,
     };
+    use datafusion_common::metadata::ScalarAndMetadata;
     use datafusion_common::tree_node::{
         TransformedResult, TreeNodeRewriter, TreeNodeVisitor,
     };
@@ -4771,6 +4792,38 @@ mod tests {
             .expect_err("unexpectedly succeeded to replace an invalid placeholder");
     }
 
+    #[test]
+    fn test_replace_placeholder_mismatched_metadata() {
+        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+
+        // Create a prepared statement with explicit fields that do not have metadata
+        let plan = table_scan(TableReference::none(), &schema, None)
+            .unwrap()
+            .filter(col("id").eq(placeholder("$1")))
+            .unwrap()
+            .build()
+            .unwrap();
+        let prepared_builder = LogicalPlanBuilder::new(plan)
+            .prepare(
+                "".to_string(),
+                vec![Field::new("", DataType::Int32, true).into()],
+            )
+            .unwrap();
+
+        // Attempt to bind a parameter with metadata
+        let mut scalar_meta = HashMap::new();
+        scalar_meta.insert("some_key".to_string(), "some_value".to_string());
+        let param_values = ParamValues::List(vec![ScalarAndMetadata::new(
+            ScalarValue::Int32(Some(42)),
+            Some(scalar_meta.into()),
+        )]);
+        prepared_builder
+            .plan()
+            .clone()
+            .with_param_values(param_values)
+            .expect_err("prepared field metadata mismatch unexpectedly succeeded");
+    }
+
     #[test]
     fn test_nullable_schema_after_grouping_set() {
         let schema = Schema::new(vec![
@@ -5143,7 +5196,7 @@ mod tests {
             .unwrap();
 
         // Check that the placeholder parameters have not received a DataType.
-        let params = plan.get_parameter_types().unwrap();
+        let params = plan.get_parameter_fields().unwrap();
         assert_eq!(params.len(), 1);
 
         let parameter_type = params.clone().get(placeholder_value).unwrap().clone();
diff --git a/datafusion/expr/src/logical_plan/statement.rs b/datafusion/expr/src/logical_plan/statement.rs
index 6d3fe9fa75acf..bfc6b53d1136e 100644
--- a/datafusion/expr/src/logical_plan/statement.rs
+++ b/datafusion/expr/src/logical_plan/statement.rs
@@ -15,7 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::DataType;
+use arrow::datatypes::FieldRef;
+use datafusion_common::metadata::format_type_and_metadata;
 use datafusion_common::{DFSchema, DFSchemaRef};
 use itertools::Itertools as _;
 use std::fmt::{self, Display};
@@ -108,10 +109,18 @@ impl Statement {
                     }) => {
                         write!(f, "SetVariable: set {variable:?} to {value:?}")
                     }
-                    Statement::Prepare(Prepare {
-                        name, data_types, ..
-                    }) => {
-                        write!(f, "Prepare: {name:?} [{}]", data_types.iter().join(", "))
+                    Statement::Prepare(Prepare { name, fields, .. }) => {
+                        write!(
+                            f,
+                            "Prepare: {name:?} [{}]",
+                            fields
+                                .iter()
+                                .map(|f| format_type_and_metadata(
+                                    f.data_type(),
+                                    Some(f.metadata())
+                                ))
+                                .join(", ")
+                        )
                     }
                     Statement::Execute(Execute {
                         name, parameters, ..
@@ -192,7 +201,7 @@ pub struct Prepare {
     /// The name of the statement
     pub name: String,
     /// Data types of the parameters ([`Expr::Placeholder`])
-    pub data_types: Vec<DataType>,
+    pub fields: Vec<FieldRef>,
     /// The logical plan of the statements
     pub input: Arc<LogicalPlan>,
 }
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index c40906239073a..204ce14e37d82 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -29,6 +29,7 @@ use std::sync::Arc;
 
 use datafusion_common::{
     cast::{as_large_list_array, as_list_array},
+    metadata::FieldMetadata,
     tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
 };
 use datafusion_common::{
@@ -57,7 +58,6 @@ use crate::simplify_expressions::unwrap_cast::{
     unwrap_cast_in_comparison_for_binary,
 };
 use crate::simplify_expressions::SimplifyInfo;
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr_common::casts::try_cast_literal_to_type;
 use indexmap::IndexSet;
 use regex::Regex;
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index 6e425ee439d69..94e91d43a1c48 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -28,8 +28,8 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{Result, ScalarValue};
-use datafusion_expr::expr::FieldMetadata;
 use datafusion_expr::Expr;
 use datafusion_expr_common::columnar_value::ColumnarValue;
 use datafusion_expr_common::interval_arithmetic::Interval;
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index 73df60c42e963..7790380dffd56 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -25,13 +25,12 @@ use crate::{
 
 use arrow::datatypes::Schema;
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
     exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema,
 };
 use datafusion_expr::execution_props::ExecutionProps;
-use datafusion_expr::expr::{
-    Alias, Cast, FieldMetadata, InList, Placeholder, ScalarFunction,
-};
+use datafusion_expr::expr::{Alias, Cast, InList, Placeholder, ScalarFunction};
 use datafusion_expr::var_provider::is_system_variables;
 use datafusion_expr::var_provider::VarType;
 use datafusion_expr::{
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 11103472ae2ae..f9400d14a59c9 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -179,8 +179,11 @@ message CreateExternalTableNode {
 
 message PrepareNode {
   string name = 1;
+  // We serialize both the data types and the fields for compatibility with
+  // older versions (newer versions populate both).
   repeated datafusion_common.ArrowType data_types = 2;
   LogicalPlanNode input = 3;
+  repeated datafusion_common.Field fields = 4;
 }
 
 message CreateCatalogSchemaNode {
@@ -412,7 +415,11 @@ message Wildcard {
 
 message PlaceholderNode {
   string id = 1;
+  // We serialize the data type, metadata, and nullability separately to maintain
+  // compatibility with older versions
   datafusion_common.ArrowType data_type = 2;
+  optional bool nullable = 3;
+  map<string, string> metadata = 4;
 }
 
 message LogicalExprList {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index b34da2c312de0..4cf834d0601e4 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -18434,6 +18434,12 @@ impl serde::Serialize for PlaceholderNode {
         if self.data_type.is_some() {
             len += 1;
         }
+        if self.nullable.is_some() {
+            len += 1;
+        }
+        if !self.metadata.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.PlaceholderNode", len)?;
         if !self.id.is_empty() {
             struct_ser.serialize_field("id", &self.id)?;
@@ -18441,6 +18447,12 @@ impl serde::Serialize for PlaceholderNode {
         if let Some(v) = self.data_type.as_ref() {
             struct_ser.serialize_field("dataType", v)?;
         }
+        if let Some(v) = self.nullable.as_ref() {
+            struct_ser.serialize_field("nullable", v)?;
+        }
+        if !self.metadata.is_empty() {
+            struct_ser.serialize_field("metadata", &self.metadata)?;
+        }
         struct_ser.end()
     }
 }
@@ -18454,12 +18466,16 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
             "id",
             "data_type",
             "dataType",
+            "nullable",
+            "metadata",
         ];
 
         #[allow(clippy::enum_variant_names)]
         enum GeneratedField {
             Id,
             DataType,
+            Nullable,
+            Metadata,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -18483,6 +18499,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
                         match value {
                             "id" => Ok(GeneratedField::Id),
                             "dataType" | "data_type" => Ok(GeneratedField::DataType),
+                            "nullable" => Ok(GeneratedField::Nullable),
+                            "metadata" => Ok(GeneratedField::Metadata),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -18504,6 +18522,8 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
             {
                 let mut id__ = None;
                 let mut data_type__ = None;
+                let mut nullable__ = None;
+                let mut metadata__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Id => {
@@ -18518,11 +18538,27 @@ impl<'de> serde::Deserialize<'de> for PlaceholderNode {
                             }
                             data_type__ = map_.next_value()?;
                         }
+                        GeneratedField::Nullable => {
+                            if nullable__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("nullable"));
+                            }
+                            nullable__ = map_.next_value()?;
+                        }
+                        GeneratedField::Metadata => {
+                            if metadata__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("metadata"));
+                            }
+                            metadata__ = Some(
+                                map_.next_value::<std::collections::HashMap<_, _>>()?
+                            );
+                        }
                     }
                 }
                 Ok(PlaceholderNode {
                     id: id__.unwrap_or_default(),
                     data_type: data_type__,
+                    nullable: nullable__,
+                    metadata: metadata__.unwrap_or_default(),
                 })
             }
         }
@@ -18889,6 +18925,9 @@ impl serde::Serialize for PrepareNode {
         if self.input.is_some() {
             len += 1;
         }
+        if !self.fields.is_empty() {
+            len += 1;
+        }
         let mut struct_ser = serializer.serialize_struct("datafusion.PrepareNode", len)?;
         if !self.name.is_empty() {
             struct_ser.serialize_field("name", &self.name)?;
@@ -18899,6 +18938,9 @@ impl serde::Serialize for PrepareNode {
         if let Some(v) = self.input.as_ref() {
             struct_ser.serialize_field("input", v)?;
         }
+        if !self.fields.is_empty() {
+            struct_ser.serialize_field("fields", &self.fields)?;
+        }
         struct_ser.end()
     }
 }
@@ -18913,6 +18955,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
             "data_types",
             "dataTypes",
             "input",
+            "fields",
         ];
 
         #[allow(clippy::enum_variant_names)]
@@ -18920,6 +18963,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
             Name,
             DataTypes,
             Input,
+            Fields,
         }
         impl<'de> serde::Deserialize<'de> for GeneratedField {
             fn deserialize<D>(deserializer: D) -> std::result::Result<GeneratedField, D::Error>
@@ -18944,6 +18988,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                             "name" => Ok(GeneratedField::Name),
                             "dataTypes" | "data_types" => Ok(GeneratedField::DataTypes),
                             "input" => Ok(GeneratedField::Input),
+                            "fields" => Ok(GeneratedField::Fields),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
                         }
                     }
@@ -18966,6 +19011,7 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                 let mut name__ = None;
                 let mut data_types__ = None;
                 let mut input__ = None;
+                let mut fields__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
                         GeneratedField::Name => {
@@ -18986,12 +19032,19 @@ impl<'de> serde::Deserialize<'de> for PrepareNode {
                             }
                             input__ = map_.next_value()?;
                         }
+                        GeneratedField::Fields => {
+                            if fields__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("fields"));
+                            }
+                            fields__ = Some(map_.next_value()?);
+                        }
                     }
                 }
                 Ok(PrepareNode {
                     name: name__.unwrap_or_default(),
                     data_types: data_types__.unwrap_or_default(),
                     input: input__,
+                    fields: fields__.unwrap_or_default(),
                 })
             }
         }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 2e1c482db65c4..12b4176274113 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -278,10 +278,14 @@ pub struct CreateExternalTableNode {
 pub struct PrepareNode {
     #[prost(string, tag = "1")]
     pub name: ::prost::alloc::string::String,
+    /// We serialize both the data types and the fields for compatibility with
+    /// older versions (newer versions populate both).
     #[prost(message, repeated, tag = "2")]
     pub data_types: ::prost::alloc::vec::Vec<super::datafusion_common::ArrowType>,
     #[prost(message, optional, boxed, tag = "3")]
     pub input: ::core::option::Option<::prost::alloc::boxed::Box<LogicalPlanNode>>,
+    #[prost(message, repeated, tag = "4")]
+    pub fields: ::prost::alloc::vec::Vec<super::datafusion_common::Field>,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct CreateCatalogSchemaNode {
@@ -651,8 +655,17 @@ pub struct Wildcard {
 pub struct PlaceholderNode {
     #[prost(string, tag = "1")]
     pub id: ::prost::alloc::string::String,
+    /// We serialize the data type, metadata, and nullability separately to maintain
+    /// compatibility with older versions
     #[prost(message, optional, tag = "2")]
     pub data_type: ::core::option::Option<super::datafusion_common::ArrowType>,
+    #[prost(bool, optional, tag = "3")]
+    pub nullable: ::core::option::Option<bool>,
+    #[prost(map = "string, string", tag = "4")]
+    pub metadata: ::std::collections::HashMap<
+        ::prost::alloc::string::String,
+        ::prost::alloc::string::String,
+    >,
 }
 #[derive(Clone, PartialEq, ::prost::Message)]
 pub struct LogicalExprList {
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 507a0cec9d88e..598a77f5420e2 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -17,6 +17,7 @@
 
 use std::sync::Arc;
 
+use arrow::datatypes::Field;
 use datafusion_common::{
     exec_datafusion_err, internal_err, plan_datafusion_err, NullEquality,
     RecursionUnnestOption, Result, ScalarValue, TableReference, UnnestOptions,
@@ -626,12 +627,25 @@ pub fn parse_expr(
         ExprType::Rollup(RollupNode { expr }) => Ok(Expr::GroupingSet(
             GroupingSet::Rollup(parse_exprs(expr, registry, codec)?),
         )),
-        ExprType::Placeholder(PlaceholderNode { id, data_type }) => match data_type {
-            None => Ok(Expr::Placeholder(Placeholder::new(id.clone(), None))),
-            Some(data_type) => Ok(Expr::Placeholder(Placeholder::new(
+        ExprType::Placeholder(PlaceholderNode {
+            id,
+            data_type,
+            nullable,
+            metadata,
+        }) => match data_type {
+            None => Ok(Expr::Placeholder(Placeholder::new_with_field(
                 id.clone(),
-                Some(data_type.try_into()?),
+                None,
             ))),
+            Some(data_type) => {
+                let field =
+                    Field::new("", data_type.try_into()?, nullable.unwrap_or(true))
+                        .with_metadata(metadata.clone());
+                Ok(Expr::Placeholder(Placeholder::new_with_field(
+                    id.clone(),
+                    Some(field.into()),
+                )))
+            }
         },
     }
 }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index ad56185166062..9644c9f69feae 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -33,7 +33,7 @@ use crate::{
 };
 
 use crate::protobuf::{proto_error, ToProtoError};
-use arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef};
+use arrow::datatypes::{DataType, Field, Schema, SchemaBuilder, SchemaRef};
 use datafusion_catalog::cte_worktable::CteWorkTable;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{
@@ -877,9 +877,33 @@ impl AsLogicalPlan for LogicalPlanNode {
                     .iter()
                     .map(DataType::try_from)
                     .collect::<Result<_, _>>()?;
-                LogicalPlanBuilder::from(input)
-                    .prepare(prepare.name.clone(), data_types)?
-                    .build()
+                let fields: Vec<Field> = prepare
+                    .fields
+                    .iter()
+                    .map(Field::try_from)
+                    .collect::<Result<_, _>>()?;
+
+                // If the fields are empty this may have been generated by an
+                // earlier version of DataFusion, in which case the DataTypes
+                // can be used to construct the plan.
+                if fields.is_empty() {
+                    LogicalPlanBuilder::from(input)
+                        .prepare(
+                            prepare.name.clone(),
+                            data_types
+                                .into_iter()
+                                .map(|dt| Field::new("", dt, true).into())
+                                .collect(),
+                        )?
+                        .build()
+                } else {
+                    LogicalPlanBuilder::from(input)
+                        .prepare(
+                            prepare.name.clone(),
+                            fields.into_iter().map(|f| f.into()).collect(),
+                        )?
+                        .build()
+                }
             }
             LogicalPlanType::DropView(dropview) => {
                 Ok(LogicalPlan::Ddl(DdlStatement::DropView(DropView {
@@ -1610,7 +1634,7 @@ impl AsLogicalPlan for LogicalPlanNode {
             }
             LogicalPlan::Statement(Statement::Prepare(Prepare {
                 name,
-                data_types,
+                fields,
                 input,
             })) => {
                 let input =
@@ -1619,11 +1643,17 @@ impl AsLogicalPlan for LogicalPlanNode {
                     logical_plan_type: Some(LogicalPlanType::Prepare(Box::new(
                         protobuf::PrepareNode {
                             name: name.clone(),
-                            data_types: data_types
+                            input: Some(Box::new(input)),
+                            // Store the DataTypes for reading by older DataFusion
+                            data_types: fields
                                 .iter()
-                                .map(|t| t.try_into())
+                                .map(|f| f.data_type().try_into())
+                                .collect::<Result<Vec<_>, _>>()?,
+                            // Store the Fields for current and future DataFusion
+                            fields: fields
+                                .iter()
+                                .map(|f| f.as_ref().try_into())
                                 .collect::<Result<Vec<_>, _>>()?,
-                            input: Some(Box::new(input)),
                         },
                     ))),
                 })
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 6238c2f1cdded..2774b5b6ba7c3 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -608,18 +608,20 @@ pub fn serialize_expr(
                 })),
             }
         }
-        Expr::Placeholder(Placeholder { id, data_type }) => {
-            let data_type = match data_type {
-                Some(data_type) => Some(data_type.try_into()?),
-                None => None,
-            };
-            protobuf::LogicalExprNode {
-                expr_type: Some(ExprType::Placeholder(PlaceholderNode {
-                    id: id.clone(),
-                    data_type,
-                })),
-            }
-        }
+        Expr::Placeholder(Placeholder { id, field }) => protobuf::LogicalExprNode {
+            expr_type: Some(ExprType::Placeholder(PlaceholderNode {
+                id: id.clone(),
+                data_type: match field {
+                    Some(field) => Some(field.data_type().try_into()?),
+                    None => None,
+                },
+                nullable: field.as_ref().map(|f| f.is_nullable()),
+                metadata: field
+                    .as_ref()
+                    .map(|f| f.metadata().clone())
+                    .unwrap_or(HashMap::new()),
+            })),
+        },
     };
 
     Ok(expr_node)
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 18cd8b8e668b3..bfd693e6a0f83 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1075,6 +1075,35 @@ async fn roundtrip_logical_plan_with_view_scan() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn roundtrip_logical_plan_prepared_statement_with_metadata() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    let plan = ctx
+        .sql("SELECT $1")
+        .await
+        .unwrap()
+        .into_optimized_plan()
+        .unwrap();
+    let prepared = LogicalPlanBuilder::new(plan)
+        .prepare(
+            "".to_string(),
+            vec![Field::new("", DataType::Int32, true)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )
+                .into()],
+        )
+        .unwrap()
+        .plan()
+        .clone();
+
+    let bytes = logical_plan_to_bytes(&prepared)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
+    assert_eq!(format!("{prepared}"), format!("{logical_round_trip}"));
+    Ok(())
+}
+
 pub mod proto {
     #[derive(Clone, PartialEq, ::prost::Message)]
     pub struct TopKPlanProto {
diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs
index 3d69183668851..f45a62e948740 100644
--- a/datafusion/proto/tests/cases/serialize.rs
+++ b/datafusion/proto/tests/cases/serialize.rs
@@ -18,10 +18,11 @@
 use std::sync::Arc;
 
 use arrow::array::ArrayRef;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Field};
 
 use datafusion::execution::FunctionRegistry;
 use datafusion::prelude::SessionContext;
+use datafusion_expr::expr::Placeholder;
 use datafusion_expr::{col, create_udf, lit, ColumnarValue};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_functions::string;
@@ -136,6 +137,21 @@ fn roundtrip_qualified_alias() {
     assert_eq!(qual_alias, roundtrip_expr(&qual_alias));
 }
 
+#[test]
+fn roundtrip_placeholder_with_metadata() {
+    let expr = Expr::Placeholder(Placeholder::new_with_field(
+        "placeholder_id".to_string(),
+        Some(
+            Field::new("", DataType::Utf8, false)
+                .with_metadata(
+                    [("some_key".to_string(), "some_value".to_string())].into(),
+                )
+                .into(),
+        ),
+    ));
+    assert_eq!(expr, roundtrip_expr(&expr));
+}
+
 #[test]
 fn roundtrip_deeply_nested_binary_expr() {
     // We need more stack space so this doesn't overflow in dev builds
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 23426701409eb..fef0505e993f1 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -287,7 +287,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         schema,
                         planner_context,
                     )?),
-                    self.convert_data_type(&data_type)?,
+                    self.convert_data_type_to_field(&data_type)?
+                        .data_type()
+                        .clone(),
                 )))
             }
 
@@ -297,7 +299,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 uses_odbc_syntax: _,
             }) => Ok(Expr::Cast(Cast::new(
                 Box::new(lit(value.into_string().unwrap())),
-                self.convert_data_type(&data_type)?,
+                self.convert_data_type_to_field(&data_type)?
+                    .data_type()
+                    .clone(),
             ))),
 
             SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
@@ -969,12 +973,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return not_impl_err!("CAST with format is not supported: {format}");
         }
 
-        let dt = self.convert_data_type(&data_type)?;
+        let dt = self.convert_data_type_to_field(&data_type)?;
         let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
 
         // numeric constants are treated as seconds (rather as nanoseconds)
         // to align with postgres / duckdb semantics
-        let expr = match &dt {
+        let expr = match dt.data_type() {
             DataType::Timestamp(TimeUnit::Nanosecond, tz)
                 if expr.get_type(schema)? == DataType::Int64 =>
             {
@@ -986,7 +990,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             _ => expr,
         };
 
-        Ok(Expr::Cast(Cast::new(Box::new(expr), dt)))
+        // Currently drops metadata attached to the type
+        // https://github.com/apache/datafusion/issues/18060
+        Ok(Expr::Cast(Cast::new(
+            Box::new(expr),
+            dt.data_type().clone(),
+        )))
     }
 
     /// Extracts the root expression and access chain from a compound expression.
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
index 7075a1afd9dd0..3abb2752988f7 100644
--- a/datafusion/sql/src/expr/value.rs
+++ b/datafusion/sql/src/expr/value.rs
@@ -20,7 +20,7 @@ use arrow::compute::kernels::cast_utils::{
     parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
 };
 use arrow::datatypes::{
-    i256, DataType, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    i256, FieldRef, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
 };
 use bigdecimal::num_bigint::BigInt;
 use bigdecimal::{BigDecimal, Signed, ToPrimitive};
@@ -45,7 +45,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     pub(crate) fn parse_value(
         &self,
         value: Value,
-        param_data_types: &[DataType],
+        param_data_types: &[FieldRef],
     ) -> Result<Expr> {
         match value {
             Value::Number(n, _) => self.parse_sql_number(&n, false),
@@ -108,7 +108,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     /// number 1, 2, ... etc. For example, `$1` is the first placeholder; $2 is the second one and so on.
     fn create_placeholder_expr(
         param: String,
-        param_data_types: &[DataType],
+        param_data_types: &[FieldRef],
     ) -> Result<Expr> {
         // Parse the placeholder as a number because it is the only support from sqlparser and postgres
         let index = param[1..].parse::<usize>();
@@ -121,7 +121,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Ok(index) => index - 1,
             Err(_) => {
                 return if param_data_types.is_empty() {
-                    Ok(Expr::Placeholder(Placeholder::new(param, None)))
+                    Ok(Expr::Placeholder(Placeholder::new_with_field(param, None)))
                 } else {
                     // when PREPARE Statement, param_data_types length is always 0
                     plan_err!("Invalid placeholder, not a number: {param}")
@@ -133,7 +133,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Data type of the parameter
         debug!("type of param {param} param_data_types[idx]: {param_type:?}");
 
-        Ok(Expr::Placeholder(Placeholder::new(
+        Ok(Expr::Placeholder(Placeholder::new_with_field(
             param,
             param_type.cloned(),
         )))
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index e93c5e066b662..99138e1b00162 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -21,8 +21,10 @@ use std::str::FromStr;
 use std::sync::Arc;
 use std::vec;
 
+use crate::utils::make_decimal_type;
 use arrow::datatypes::*;
 use datafusion_common::config::SqlParserOptions;
+use datafusion_common::datatype::{DataTypeExt, FieldExt};
 use datafusion_common::error::add_possible_columns_to_diag;
 use datafusion_common::TableReference;
 use datafusion_common::{
@@ -31,15 +33,13 @@ use datafusion_common::{
 };
 use datafusion_common::{not_impl_err, plan_err, DFSchema, DataFusionError, Result};
 use datafusion_expr::logical_plan::{LogicalPlan, LogicalPlanBuilder};
+pub use datafusion_expr::planner::ContextProvider;
 use datafusion_expr::utils::find_column_exprs;
 use datafusion_expr::{col, Expr};
 use sqlparser::ast::{ArrayElemTypeDef, ExactNumberInfo, TimezoneInfo};
 use sqlparser::ast::{ColumnDef as SQLColumnDef, ColumnOption};
 use sqlparser::ast::{DataType as SQLDataType, Ident, ObjectName, TableAlias};
 
-use crate::utils::make_decimal_type;
-pub use datafusion_expr::planner::ContextProvider;
-
 /// SQL parser options
 #[derive(Debug, Clone, Copy)]
 pub struct ParserOptions {
@@ -256,7 +256,7 @@ impl IdentNormalizer {
 pub struct PlannerContext {
     /// Data types for numbered parameters ($1, $2, etc), if supplied
     /// in `PREPARE` statement
-    prepare_param_data_types: Arc<Vec<DataType>>,
+    prepare_param_data_types: Arc<Vec<FieldRef>>,
     /// Map of CTE name to logical plan of the WITH clause.
     /// Use `Arc<LogicalPlan>` to allow cheap cloning
     ctes: HashMap<String, Arc<LogicalPlan>>,
@@ -290,7 +290,7 @@ impl PlannerContext {
     /// Update the PlannerContext with provided prepare_param_data_types
     pub fn with_prepare_param_data_types(
         mut self,
-        prepare_param_data_types: Vec<DataType>,
+        prepare_param_data_types: Vec<FieldRef>,
     ) -> Self {
         self.prepare_param_data_types = prepare_param_data_types.into();
         self
@@ -347,7 +347,7 @@ impl PlannerContext {
     }
 
     /// Return the types of parameters (`$1`, `$2`, etc) if known
-    pub fn prepare_param_data_types(&self) -> &[DataType] {
+    pub fn prepare_param_data_types(&self) -> &[FieldRef] {
         &self.prepare_param_data_types
     }
 
@@ -428,16 +428,18 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let mut fields = Vec::with_capacity(columns.len());
 
         for column in columns {
-            let data_type = self.convert_data_type(&column.data_type)?;
+            let data_type = self.convert_data_type_to_field(&column.data_type)?;
             let not_nullable = column
                 .options
                 .iter()
                 .any(|x| x.option == ColumnOption::NotNull);
-            fields.push(Field::new(
-                self.ident_normalizer.normalize(column.name),
-                data_type,
-                !not_nullable,
-            ));
+            fields.push(
+                data_type
+                    .as_ref()
+                    .clone()
+                    .with_name(self.ident_normalizer.normalize(column.name))
+                    .with_nullable(!not_nullable),
+            );
         }
 
         Ok(Schema::new(fields))
@@ -587,11 +589,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             })
     }
 
-    pub(crate) fn convert_data_type(&self, sql_type: &SQLDataType) -> Result<DataType> {
+    pub(crate) fn convert_data_type_to_field(
+        &self,
+        sql_type: &SQLDataType,
+    ) -> Result<FieldRef> {
         // First check if any of the registered type_planner can handle this type
         if let Some(type_planner) = self.context_provider.get_type_planner() {
             if let Some(data_type) = type_planner.plan_type(sql_type)? {
-                return Ok(data_type);
+                return Ok(data_type.into_nullable_field_ref());
             }
         }
 
@@ -599,28 +604,30 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match sql_type {
             SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type)) => {
                 // Arrays may be multi-dimensional.
-                let inner_data_type = self.convert_data_type(inner_sql_type)?;
-                Ok(DataType::new_list(inner_data_type, true))
+                Ok(self.convert_data_type_to_field(inner_sql_type)?.into_list())
             }
             SQLDataType::Array(ArrayElemTypeDef::SquareBracket(
                 inner_sql_type,
                 maybe_array_size,
             )) => {
-                let inner_data_type = self.convert_data_type(inner_sql_type)?;
+                let inner_field = self.convert_data_type_to_field(inner_sql_type)?;
                 if let Some(array_size) = maybe_array_size {
-                    Ok(DataType::new_fixed_size_list(
-                        inner_data_type,
-                        *array_size as i32,
-                        true,
-                    ))
+                    let array_size: i32 = (*array_size).try_into().map_err(|_| {
+                        plan_datafusion_err!(
+                            "Array size must be a positive 32 bit integer, got {array_size}"
+                        )
+                    })?;
+                    Ok(inner_field.into_fixed_size_list(array_size))
                 } else {
-                    Ok(DataType::new_list(inner_data_type, true))
+                    Ok(inner_field.into_list())
                 }
             }
             SQLDataType::Array(ArrayElemTypeDef::None) => {
                 not_impl_err!("Arrays with unspecified type is not supported")
             }
-            other => self.convert_simple_data_type(other),
+            other => Ok(self
+                .convert_simple_data_type(other)?
+                .into_nullable_field_ref()),
         }
     }
 
@@ -733,17 +740,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 let fields = fields
                     .iter()
                     .enumerate()
-                    .map(|(idx, field)| {
-                        let data_type = self.convert_data_type(&field.field_type)?;
-                        let field_name = match &field.field_name {
+                    .map(|(idx, sql_struct_field)| {
+                        let field = self.convert_data_type_to_field(&sql_struct_field.field_type)?;
+                        let field_name = match &sql_struct_field.field_name {
                             Some(ident) => ident.clone(),
                             None => Ident::new(format!("c{idx}")),
                         };
-                        Ok(Arc::new(Field::new(
-                            self.ident_normalizer.normalize(field_name),
-                            data_type,
-                            true,
-                        )))
+                        Ok(field.as_ref().clone().with_name(self.ident_normalizer.normalize(field_name)))
                     })
                     .collect::<Result<Vec<_>>>()?;
                 Ok(DataType::Struct(Fields::from(fields)))
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 0e868e8c26899..45b20bbb05268 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -29,7 +29,7 @@ use crate::planner::{
 };
 use crate::utils::normalize_ident;
 
-use arrow::datatypes::{DataType, Fields};
+use arrow::datatypes::{Field, FieldRef, Fields};
 use datafusion_common::error::_plan_err;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
@@ -730,14 +730,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 statement,
             } => {
                 // Convert parser data types to DataFusion data types
-                let mut data_types: Vec<DataType> = data_types
+                let mut fields: Vec<FieldRef> = data_types
                     .into_iter()
-                    .map(|t| self.convert_data_type(&t))
+                    .map(|t| self.convert_data_type_to_field(&t))
                     .collect::<Result<_>>()?;
 
                 // Create planner context with parameters
-                let mut planner_context = PlannerContext::new()
-                    .with_prepare_param_data_types(data_types.clone());
+                let mut planner_context =
+                    PlannerContext::new().with_prepare_param_data_types(fields.clone());
 
                 // Build logical plan for inner statement of the prepare statement
                 let plan = self.sql_statement_to_plan_with_context_impl(
@@ -745,21 +745,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     &mut planner_context,
                 )?;
 
-                if data_types.is_empty() {
-                    let map_types = plan.get_parameter_types()?;
+                if fields.is_empty() {
+                    let map_types = plan.get_parameter_fields()?;
                     let param_types: Vec<_> = (1..=map_types.len())
                         .filter_map(|i| {
                             let key = format!("${i}");
                             map_types.get(&key).and_then(|opt| opt.clone())
                         })
                         .collect();
-                    data_types.extend(param_types.iter().cloned());
+                    fields.extend(param_types.iter().cloned());
                     planner_context.with_prepare_param_data_types(param_types);
                 }
 
                 Ok(LogicalPlan::Statement(PlanStatement::Prepare(Prepare {
                     name: ident_to_string(&name),
-                    data_types,
+                    fields,
                     input: Arc::new(plan),
                 })))
             }
@@ -1179,7 +1179,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 ..
             }) => {
                 let return_type = match return_type {
-                    Some(t) => Some(self.convert_data_type(&t)?),
+                    Some(t) => Some(self.convert_data_type_to_field(&t)?),
                     None => None,
                 };
                 let mut planner_context = PlannerContext::new();
@@ -1190,7 +1190,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         let function_args = function_args
                             .into_iter()
                             .map(|arg| {
-                                let data_type = self.convert_data_type(&arg.data_type)?;
+                                let data_type =
+                                    self.convert_data_type_to_field(&arg.data_type)?;
 
                                 let default_expr = match arg.default_expr {
                                     Some(expr) => Some(self.sql_to_expr(
@@ -1203,7 +1204,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                 Ok(OperateFunctionArg {
                                     name: arg.name,
                                     default_expr,
-                                    data_type,
+                                    data_type: data_type.data_type().clone(),
                                 })
                             })
                             .collect::<Result<Vec<OperateFunctionArg>>>();
@@ -1221,7 +1222,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 // Convert resulting expression to data fusion expression
                 //
                 let arg_types = args.as_ref().map(|arg| {
-                    arg.iter().map(|t| t.data_type.clone()).collect::<Vec<_>>()
+                    arg.iter()
+                        .map(|t| Arc::new(Field::new("", t.data_type.clone(), true)))
+                        .collect::<Vec<_>>()
                 });
                 let mut planner_context = PlannerContext::new()
                     .with_prepare_param_data_types(arg_types.unwrap_or_default());
@@ -1264,7 +1267,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     or_replace,
                     temporary,
                     name,
-                    return_type,
+                    return_type: return_type.map(|f| f.data_type().clone()),
                     args,
                     params,
                     schema: DFSchemaRef::new(DFSchema::empty()),
@@ -1998,10 +2001,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                         )?;
                         // Update placeholder's datatype to the type of the target column
                         if let Expr::Placeholder(placeholder) = &mut expr {
-                            placeholder.data_type = placeholder
-                                .data_type
+                            placeholder.field = placeholder
+                                .field
                                 .take()
-                                .or_else(|| Some(field.data_type().clone()));
+                                .or_else(|| Some(Arc::clone(field)));
                         }
                         // Cast to target column type, if necessary
                         expr.cast_to(field.data_type(), source.schema())?
@@ -2105,8 +2108,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                                 idx + 1
                             )
                         })?;
-                        let dt = field.data_type().clone();
-                        let _ = prepare_param_data_types.insert(name, dt);
+                        let _ = prepare_param_data_types.insert(name, Arc::clone(field));
                     }
                 }
             }
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index 343a90af3efb1..e1075da5f9998 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -16,8 +16,12 @@
 // under the License.
 
 use crate::logical_plan;
-use arrow::datatypes::DataType;
-use datafusion_common::{assert_contains, ParamValues, ScalarValue};
+use arrow::datatypes::{DataType, Field, FieldRef};
+use datafusion_common::{
+    assert_contains,
+    metadata::{format_type_and_metadata, ScalarAndMetadata},
+    ParamValues, ScalarValue,
+};
 use datafusion_expr::{LogicalPlan, Prepare, Statement};
 use insta::assert_snapshot;
 use itertools::Itertools as _;
@@ -51,12 +55,42 @@ impl ParameterTest<'_> {
     }
 }
 
+pub struct ParameterTestWithMetadata<'a> {
+    pub sql: &'a str,
+    pub expected_types: Vec<(&'a str, Option<FieldRef>)>,
+    pub param_values: Vec<ScalarAndMetadata>,
+}
+
+impl ParameterTestWithMetadata<'_> {
+    pub fn run(&self) -> String {
+        let plan = logical_plan(self.sql).unwrap();
+
+        let actual_types = plan.get_parameter_fields().unwrap();
+        let expected_types: HashMap<String, Option<FieldRef>> = self
+            .expected_types
+            .iter()
+            .map(|(k, v)| ((*k).to_string(), v.clone()))
+            .collect();
+
+        assert_eq!(actual_types, expected_types);
+
+        let plan_with_params = plan
+            .clone()
+            .with_param_values(ParamValues::List(self.param_values.clone()))
+            .unwrap();
+
+        format!("** Initial Plan:\n{plan}\n** Final Plan:\n{plan_with_params}")
+    }
+}
+
 fn generate_prepare_stmt_and_data_types(sql: &str) -> (LogicalPlan, String) {
     let plan = logical_plan(sql).unwrap();
     let data_types = match &plan {
-        LogicalPlan::Statement(Statement::Prepare(Prepare { data_types, .. })) => {
-            data_types.iter().join(", ").to_string()
-        }
+        LogicalPlan::Statement(Statement::Prepare(Prepare { fields, .. })) => fields
+            .iter()
+            .map(|f| format_type_and_metadata(f.data_type(), Some(f.metadata())))
+            .join(", ")
+            .to_string(),
         _ => panic!("Expected a Prepare statement"),
     };
     (plan, data_types)
@@ -704,6 +738,147 @@ fn test_prepare_statement_to_plan_one_param() {
     );
 }
 
+#[test]
+fn test_update_infer_with_metadata() {
+    // Here the uuid field is inferred as nullable because it appears in the filter
+    // (and not in the update values, where its nullability would be inferred)
+    let uuid_field = Field::new("", DataType::FixedSizeBinary(16), true).with_metadata(
+        [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(),
+    );
+    let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let expected_types = vec![
+        (
+            "$1",
+            Some(Field::new("last_name", DataType::Utf8, false).into()),
+        ),
+        ("$2", Some(uuid_field.clone().with_name("id").into())),
+    ];
+    let param_values = vec![
+        ScalarAndMetadata::new(ScalarValue::from("Turing"), None),
+        ScalarAndMetadata::new(
+            ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
+            Some(uuid_field.metadata().into()),
+        ),
+    ];
+
+    // Check a normal update
+    let test = ParameterTestWithMetadata {
+        sql: "update person_with_uuid_extension set last_name=$1 where id=$2",
+        expected_types: expected_types.clone(),
+        param_values: param_values.clone(),
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name
+        Filter: person_with_uuid_extension.id = $2
+          TableScan: person_with_uuid_extension
+    ** Final Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name
+        Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} }
+          TableScan: person_with_uuid_extension
+    "#
+    );
+
+    // Check a prepared update
+    let test = ParameterTestWithMetadata {
+        sql: "PREPARE my_plan AS update person_with_uuid_extension set last_name=$1 where id=$2",
+        expected_types,
+        param_values
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Prepare: "my_plan" [Utf8, FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>]
+      Dml: op=[Update] table=[person_with_uuid_extension]
+        Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, $1 AS last_name
+          Filter: person_with_uuid_extension.id = $2
+            TableScan: person_with_uuid_extension
+    ** Final Plan:
+    Dml: op=[Update] table=[person_with_uuid_extension]
+      Projection: person_with_uuid_extension.id AS id, person_with_uuid_extension.first_name AS first_name, Utf8("Turing") AS last_name
+        Filter: person_with_uuid_extension.id = FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} }
+          TableScan: person_with_uuid_extension
+    "#
+    );
+}
+
+#[test]
+fn test_insert_infer_with_metadata() {
+    let uuid_field = Field::new("", DataType::FixedSizeBinary(16), false).with_metadata(
+        [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())].into(),
+    );
+    let uuid_bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
+    let expected_types = vec![
+        ("$1", Some(uuid_field.clone().with_name("id").into())),
+        (
+            "$2",
+            Some(Field::new("first_name", DataType::Utf8, false).into()),
+        ),
+        (
+            "$3",
+            Some(Field::new("last_name", DataType::Utf8, false).into()),
+        ),
+    ];
+    let param_values = vec![
+        ScalarAndMetadata::new(
+            ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
+            Some(uuid_field.metadata().into()),
+        ),
+        ScalarAndMetadata::new(ScalarValue::from("Alan"), None),
+        ScalarAndMetadata::new(ScalarValue::from("Turing"), None),
+    ];
+
+    // Check a normal insert
+    let test = ParameterTestWithMetadata {
+        sql: "insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
+        expected_types: expected_types.clone(),
+        param_values: param_values.clone()
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: ($1, $2, $3)
+    ** Final Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
+    "#
+    );
+
+    // Check a prepared insert
+    let test = ParameterTestWithMetadata {
+        sql: "PREPARE my_plan AS insert into person_with_uuid_extension (id, first_name, last_name) values ($1, $2, $3)",
+        expected_types,
+        param_values
+    };
+
+    assert_snapshot!(
+        test.run(),
+        @r#"
+    ** Initial Plan:
+    Prepare: "my_plan" [FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>, Utf8, Utf8]
+      Dml: op=[Insert Into] table=[person_with_uuid_extension]
+        Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+          Values: ($1, $2, $3)
+    ** Final Plan:
+    Dml: op=[Insert Into] table=[person_with_uuid_extension]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name
+        Values: (FixedSizeBinary(16, "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16") FieldMetadata { inner: {"ARROW:extension:name": "arrow.uuid"} } AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
+    "#
+    );
+}
+
 #[test]
 fn test_prepare_statement_to_plan_data_type() {
     let sql = "PREPARE my_plan(DOUBLE) AS SELECT id, age  FROM person WHERE age = $1";
diff --git a/datafusion/sql/tests/common/mod.rs b/datafusion/sql/tests/common/mod.rs
index ee1b761970def..5d9fd9f2c3740 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -151,6 +151,14 @@ impl ContextProvider for MockContextProvider {
                 ),
                 Field::new("😀", DataType::Int32, false),
             ])),
+            "person_with_uuid_extension" => Ok(Schema::new(vec![
+                Field::new("id", DataType::FixedSizeBinary(16), false).with_metadata(
+                    [("ARROW:extension:name".to_string(), "arrow.uuid".to_string())]
+                        .into(),
+                ),
+                Field::new("first_name", DataType::Utf8, false),
+                Field::new("last_name", DataType::Utf8, false),
+            ])),
             "orders" => Ok(Schema::new(vec![
                 Field::new("order_id", DataType::UInt32, false),
                 Field::new("customer_id", DataType::UInt32, false),

From 987f333d6d9b659fff4afc102e3fd65b34bae24d Mon Sep 17 00:00:00 2001
From: Colin Marc <hi@colinmarc.com>
Date: Fri, 24 Oct 2025 20:00:44 +0200
Subject: [PATCH 088/109] fix: only fall back to listing prefixes on 404 errors
 (#18263)

This logic would previously swallow all errors, including things like
DNS resolution failures. If the path was indeed a path and not a prefix,
that would result in the path being dropped, since the prefix wouldn't
include any further files.

Fixes #18242.
---
 datafusion/datasource/src/url.rs | 146 +++++++++++++++++++++++++++----
 1 file changed, 131 insertions(+), 15 deletions(-)

diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
index c87b307c5fb80..0f31eb7caf414 100644
--- a/datafusion/datasource/src/url.rs
+++ b/datafusion/datasource/src/url.rs
@@ -252,7 +252,10 @@ impl ListingTableUrl {
                     .boxed(),
                 // If the head command fails, it is likely that object doesn't exist.
                 // Retry as though it were a prefix (aka a collection)
-                Err(_) => list_with_cache(ctx, store, &self.prefix).await?,
+                Err(object_store::Error::NotFound { .. }) => {
+                    list_with_cache(ctx, store, &self.prefix).await?
+                }
+                Err(e) => return Err(e.into()),
             }
         };
 
@@ -405,6 +408,8 @@ fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use async_trait::async_trait;
+    use bytes::Bytes;
     use datafusion_common::config::TableOptions;
     use datafusion_common::DFSchema;
     use datafusion_execution::config::SessionConfig;
@@ -414,9 +419,13 @@ mod tests {
     use datafusion_expr::{AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF};
     use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
     use datafusion_physical_plan::ExecutionPlan;
-    use object_store::PutPayload;
+    use object_store::{
+        GetOptions, GetResult, ListResult, MultipartUpload, PutMultipartOptions,
+        PutPayload,
+    };
     use std::any::Any;
     use std::collections::HashMap;
+    use std::ops::Range;
     use tempfile::tempdir;
 
     #[test]
@@ -632,48 +641,68 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn test_list_files() {
-        let store = object_store::memory::InMemory::new();
+    async fn test_list_files() -> Result<()> {
+        let store = MockObjectStore {
+            in_mem: object_store::memory::InMemory::new(),
+            forbidden_paths: vec!["forbidden/e.parquet".into()],
+        };
+
         // Create some files:
         create_file(&store, "a.parquet").await;
         create_file(&store, "/t/b.parquet").await;
         create_file(&store, "/t/c.csv").await;
         create_file(&store, "/t/d.csv").await;
 
+        // This file returns a permission error.
+        create_file(&store, "/forbidden/e.parquet").await;
+
         assert_eq!(
-            list_all_files("/", &store, "parquet").await,
+            list_all_files("/", &store, "parquet").await?,
             vec!["a.parquet"],
         );
 
         // test with and without trailing slash
         assert_eq!(
-            list_all_files("/t/", &store, "parquet").await,
+            list_all_files("/t/", &store, "parquet").await?,
             vec!["t/b.parquet"],
         );
         assert_eq!(
-            list_all_files("/t", &store, "parquet").await,
+            list_all_files("/t", &store, "parquet").await?,
             vec!["t/b.parquet"],
         );
 
         // test with and without trailing slash
         assert_eq!(
-            list_all_files("/t", &store, "csv").await,
+            list_all_files("/t", &store, "csv").await?,
             vec!["t/c.csv", "t/d.csv"],
         );
         assert_eq!(
-            list_all_files("/t/", &store, "csv").await,
+            list_all_files("/t/", &store, "csv").await?,
             vec!["t/c.csv", "t/d.csv"],
         );
 
         // Test a non existing prefix
         assert_eq!(
-            list_all_files("/NonExisting", &store, "csv").await,
+            list_all_files("/NonExisting", &store, "csv").await?,
             vec![] as Vec<String>
         );
         assert_eq!(
-            list_all_files("/NonExisting/", &store, "csv").await,
+            list_all_files("/NonExisting/", &store, "csv").await?,
             vec![] as Vec<String>
         );
+
+        // Including forbidden.parquet generates an error.
+        let Err(DataFusionError::ObjectStore(err)) =
+            list_all_files("/forbidden/e.parquet", &store, "parquet").await
+        else {
+            panic!("Expected ObjectStore error");
+        };
+
+        let object_store::Error::PermissionDenied { .. } = &*err else {
+            panic!("Expected PermissionDenied error");
+        };
+
+        Ok(())
     }
 
     /// Creates a file with "hello world" content at the specified path
@@ -691,10 +720,8 @@ mod tests {
         url: &str,
         store: &dyn ObjectStore,
         file_extension: &str,
-    ) -> Vec<String> {
-        try_list_all_files(url, store, file_extension)
-            .await
-            .unwrap()
+    ) -> Result<Vec<String>> {
+        try_list_all_files(url, store, file_extension).await
     }
 
     /// Runs "list_all_files" and returns their paths
@@ -716,6 +743,95 @@ mod tests {
         Ok(files)
     }
 
+    #[derive(Debug)]
+    struct MockObjectStore {
+        in_mem: object_store::memory::InMemory,
+        forbidden_paths: Vec<Path>,
+    }
+
+    impl std::fmt::Display for MockObjectStore {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            self.in_mem.fmt(f)
+        }
+    }
+
+    #[async_trait]
+    impl ObjectStore for MockObjectStore {
+        async fn put_opts(
+            &self,
+            location: &Path,
+            payload: PutPayload,
+            opts: object_store::PutOptions,
+        ) -> object_store::Result<object_store::PutResult> {
+            self.in_mem.put_opts(location, payload, opts).await
+        }
+
+        async fn put_multipart_opts(
+            &self,
+            location: &Path,
+            opts: PutMultipartOptions,
+        ) -> object_store::Result<Box<dyn MultipartUpload>> {
+            self.in_mem.put_multipart_opts(location, opts).await
+        }
+
+        async fn get_opts(
+            &self,
+            location: &Path,
+            options: GetOptions,
+        ) -> object_store::Result<GetResult> {
+            self.in_mem.get_opts(location, options).await
+        }
+
+        async fn get_ranges(
+            &self,
+            location: &Path,
+            ranges: &[Range<u64>],
+        ) -> object_store::Result<Vec<Bytes>> {
+            self.in_mem.get_ranges(location, ranges).await
+        }
+
+        async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
+            if self.forbidden_paths.contains(location) {
+                Err(object_store::Error::PermissionDenied {
+                    path: location.to_string(),
+                    source: "forbidden".into(),
+                })
+            } else {
+                self.in_mem.head(location).await
+            }
+        }
+
+        async fn delete(&self, location: &Path) -> object_store::Result<()> {
+            self.in_mem.delete(location).await
+        }
+
+        fn list(
+            &self,
+            prefix: Option<&Path>,
+        ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+            self.in_mem.list(prefix)
+        }
+
+        async fn list_with_delimiter(
+            &self,
+            prefix: Option<&Path>,
+        ) -> object_store::Result<ListResult> {
+            self.in_mem.list_with_delimiter(prefix).await
+        }
+
+        async fn copy(&self, from: &Path, to: &Path) -> object_store::Result<()> {
+            self.in_mem.copy(from, to).await
+        }
+
+        async fn copy_if_not_exists(
+            &self,
+            from: &Path,
+            to: &Path,
+        ) -> object_store::Result<()> {
+            self.in_mem.copy_if_not_exists(from, to).await
+        }
+    }
+
     struct MockSession {
         config: SessionConfig,
         runtime_env: Arc<RuntimeEnv>,

From f4a49b538c5de116758de1edf474eba874f4cb20 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Sat, 25 Oct 2025 10:57:00 +0800
Subject: [PATCH 089/109] feat(small): Set 'summary' level metrics for
 `DataSourceExec` with parquet source (#18196)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

Part of https://github.com/apache/datafusion/issues/18116

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->
The below configuration can be used to let `EXPLAIN ANALYZE` only show
important high-level insights.
```
set datafusion.explain.analyze_level = summary;
```

This PR sets `summary` level metrics for the parquet data source:
### `summary` level metrics for `DataSourceExec` with `Parquet` source
- File level pruning metrics
- Row-group level pruning metrics
- Bytes scanned
- metadata load time
In
https://github.com/apache/datafusion/blob/155b56e521d75186776a65f1634ee03058899a79/datafusion/datasource-parquet/src/metrics.rs#L29

The remaining metrics are kept in the `dev` level. I'm not sure if the
page level pruning metrics should also be included to the `summary`
level, I'm open to suggestions for this, or any other metrics that
should also be included.

While implementing this, I came up with a few ideas to further improve
metrics tracking in the Parquet scanner. I’ve documented them in
https://github.com/apache/datafusion/issues/18195

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
Set the above metrics to `summary` analyze level

## Are these changes tested?
UTs
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?
No
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 datafusion/core/tests/sql/explain_analyze.rs | 65 ++++++++++++++++----
 datafusion/datasource-parquet/src/metrics.rs | 53 ++++++++++------
 2 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 54a57ed901162..6d386cc456d8b 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -161,22 +161,35 @@ fn nanos_from_timestamp(ts: &Timestamp) -> i64 {
 }
 
 // Test different detail level for config `datafusion.explain.analyze_level`
+
+async fn collect_plan_with_context(
+    sql_str: &str,
+    ctx: &SessionContext,
+    level: ExplainAnalyzeLevel,
+) -> String {
+    {
+        let state = ctx.state_ref();
+        let mut state = state.write();
+        state.config_mut().options_mut().explain.analyze_level = level;
+    }
+    let dataframe = ctx.sql(sql_str).await.unwrap();
+    let batches = dataframe.collect().await.unwrap();
+    arrow::util::pretty::pretty_format_batches(&batches)
+        .unwrap()
+        .to_string()
+}
+
+async fn collect_plan(sql_str: &str, level: ExplainAnalyzeLevel) -> String {
+    let ctx = SessionContext::new();
+    collect_plan_with_context(sql_str, &ctx, level).await
+}
+
 #[tokio::test]
 async fn explain_analyze_level() {
-    async fn collect_plan(level: ExplainAnalyzeLevel) -> String {
-        let mut config = SessionConfig::new();
-        config.options_mut().explain.analyze_level = level;
-        let ctx = SessionContext::new_with_config(config);
-        let sql = "EXPLAIN ANALYZE \
+    let sql = "EXPLAIN ANALYZE \
             SELECT * \
             FROM generate_series(10) as t1(v1) \
             ORDER BY v1 DESC";
-        let dataframe = ctx.sql(sql).await.unwrap();
-        let batches = dataframe.collect().await.unwrap();
-        arrow::util::pretty::pretty_format_batches(&batches)
-            .unwrap()
-            .to_string()
-    }
 
     for (level, needle, should_contain) in [
         (ExplainAnalyzeLevel::Summary, "spill_count", false),
@@ -184,7 +197,35 @@ async fn explain_analyze_level() {
         (ExplainAnalyzeLevel::Dev, "spill_count", true),
         (ExplainAnalyzeLevel::Dev, "output_rows", true),
     ] {
-        let plan = collect_plan(level).await;
+        let plan = collect_plan(sql, level).await;
+        assert_eq!(
+            plan.contains(needle),
+            should_contain,
+            "plan for level {level:?} unexpected content: {plan}"
+        );
+    }
+}
+
+#[tokio::test]
+async fn explain_analyze_level_datasource_parquet() {
+    let table_name = "tpch_lineitem_small";
+    let parquet_path = "tests/data/tpch_lineitem_small.parquet";
+    let sql = format!("EXPLAIN ANALYZE SELECT * FROM {table_name}");
+
+    // Register test parquet file into context
+    let ctx = SessionContext::new();
+    ctx.register_parquet(table_name, parquet_path, ParquetReadOptions::default())
+        .await
+        .expect("register parquet table for explain analyze test");
+
+    for (level, needle, should_contain) in [
+        (ExplainAnalyzeLevel::Summary, "metadata_load_time", true),
+        (ExplainAnalyzeLevel::Summary, "page_index_eval_time", false),
+        (ExplainAnalyzeLevel::Dev, "metadata_load_time", true),
+        (ExplainAnalyzeLevel::Dev, "page_index_eval_time", true),
+    ] {
+        let plan = collect_plan_with_context(&sql, &ctx, level).await;
+
         assert_eq!(
             plan.contains(needle),
             should_contain,
diff --git a/datafusion/datasource-parquet/src/metrics.rs b/datafusion/datasource-parquet/src/metrics.rs
index d75a979d4cad0..5f17fbb4b9eea 100644
--- a/datafusion/datasource-parquet/src/metrics.rs
+++ b/datafusion/datasource-parquet/src/metrics.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion_physical_plan::metrics::{
-    Count, ExecutionPlanMetricsSet, MetricBuilder, Time,
+    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, Time,
 };
 
 /// Stores metrics about the parquet execution for a particular parquet file.
@@ -88,30 +88,59 @@ impl ParquetFileMetrics {
         filename: &str,
         metrics: &ExecutionPlanMetricsSet,
     ) -> Self {
-        let predicate_evaluation_errors = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("predicate_evaluation_errors", partition);
-
+        // -----------------------
+        // 'summary' level metrics
+        // -----------------------
         let row_groups_matched_bloom_filter = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_matched_bloom_filter", partition);
 
         let row_groups_pruned_bloom_filter = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_pruned_bloom_filter", partition);
 
         let row_groups_matched_statistics = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_matched_statistics", partition);
 
         let row_groups_pruned_statistics = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("row_groups_pruned_statistics", partition);
 
+        let page_index_rows_pruned = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .counter("page_index_rows_pruned", partition);
+        let page_index_rows_matched = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .counter("page_index_rows_matched", partition);
+
         let bytes_scanned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
             .counter("bytes_scanned", partition);
 
+        let metadata_load_time = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .with_type(MetricType::SUMMARY)
+            .subset_time("metadata_load_time", partition);
+
+        let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
+            .with_type(MetricType::SUMMARY)
+            .counter("files_ranges_pruned_statistics", partition);
+
+        // -----------------------
+        // 'dev' level metrics
+        // -----------------------
+        let predicate_evaluation_errors = MetricBuilder::new(metrics)
+            .with_new_label("filename", filename.to_string())
+            .counter("predicate_evaluation_errors", partition);
+
         let pushdown_rows_pruned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .counter("pushdown_rows_pruned", partition);
@@ -129,24 +158,10 @@ impl ParquetFileMetrics {
             .with_new_label("filename", filename.to_string())
             .subset_time("bloom_filter_eval_time", partition);
 
-        let page_index_rows_pruned = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("page_index_rows_pruned", partition);
-        let page_index_rows_matched = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .counter("page_index_rows_matched", partition);
-
         let page_index_eval_time = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .subset_time("page_index_eval_time", partition);
 
-        let metadata_load_time = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .subset_time("metadata_load_time", partition);
-
-        let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
-            .counter("files_ranges_pruned_statistics", partition);
-
         let predicate_cache_inner_records = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .counter("predicate_cache_inner_records", partition);

From 97b9029be64cfc2461c716d988caf5a936e1f3bf Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Fri, 24 Oct 2025 22:42:40 -0500
Subject: [PATCH 090/109] fix null count stats computation (#18276)

---
 datafusion/datasource-parquet/src/metadata.rs | 15 ++++++-----
 .../sqllogictest/test_files/parquet.slt       | 27 +++++++++++++++++++
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs
index 4de68793ce02a..c8ee4d3b9f57b 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -299,7 +299,6 @@ impl<'a> DFParquetMetadata<'a> {
                             summarize_min_max_null_counts(
                                 &mut accumulators,
                                 idx,
-                                num_rows,
                                 &stats_converter,
                                 row_groups_metadata,
                             )
@@ -417,7 +416,6 @@ struct StatisticsAccumulators<'a> {
 fn summarize_min_max_null_counts(
     accumulators: &mut StatisticsAccumulators,
     arrow_schema_index: usize,
-    num_rows: usize,
     stats_converter: &StatisticsConverter,
     row_groups_metadata: &[RowGroupMetaData],
 ) -> Result<()> {
@@ -449,11 +447,14 @@ fn summarize_min_max_null_counts(
         );
     }
 
-    accumulators.null_counts_array[arrow_schema_index] =
-        Precision::Exact(match sum(&null_counts) {
-            Some(null_count) => null_count as usize,
-            None => num_rows,
-        });
+    accumulators.null_counts_array[arrow_schema_index] = match sum(&null_counts) {
+        Some(null_count) => Precision::Exact(null_count as usize),
+        None => match null_counts.len() {
+            // If sum() returned None we either have no rows or all values are null
+            0 => Precision::Exact(0),
+            _ => Precision::Absent,
+        },
+    };
 
     Ok(())
 }
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index e722005bf0f0d..11942108ab2b6 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -862,3 +862,30 @@ select part, k, v from t order by k
 
 statement ok
 DROP TABLE t;
+
+# Regression test for files with stats on some columns and not others
+# See https://github.com/apache/datafusion/pull/18276
+
+query I
+COPY (SELECT 1::int AS a, 2::int as b)
+TO 'test_files/scratch/parquet/mixed_stats.parquet'
+STORED AS PARQUET OPTIONS (
+  'STATISTICS_ENABLED::b' 'none'
+);
+----
+1
+
+statement ok
+CREATE EXTERNAL TABLE t
+STORED AS PARQUET
+LOCATION 'test_files/scratch/parquet/mixed_stats.parquet';
+
+query I
+SELECT b
+FROM t
+WHERE b = 2;
+----
+2
+
+statement ok
+DROP TABLE t;

From eef1c9e78f4adea7d97b0046186a0bf84de9e543 Mon Sep 17 00:00:00 2001
From: Colin Marc <hi@colinmarc.com>
Date: Sat, 25 Oct 2025 05:52:08 +0200
Subject: [PATCH 091/109] feat: be indifferent to padding when decoding base64
 (#18264)

This changes the `decode` built-in function so that SQL like the
following works:

```sql
decode('cXdlcXc=', 'base64');
```

Padding isn't required to decode correctly, but it's surprising to users
if we just reject it outright.
---
 datafusion/functions/src/encoding/inner.rs    | 51 +++++++++----------
 .../sqllogictest/test_files/encoding.slt      |  9 ++--
 2 files changed, 28 insertions(+), 32 deletions(-)

diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index 5baa91936320d..e5314ad220c8f 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -24,7 +24,10 @@ use arrow::{
     datatypes::{ByteArrayType, DataType},
 };
 use arrow_buffer::{Buffer, OffsetBufferBuilder};
-use base64::{engine::general_purpose, Engine as _};
+use base64::{
+    engine::{DecodePaddingMode, GeneralPurpose, GeneralPurposeConfig},
+    Engine as _,
+};
 use datafusion_common::{
     cast::{as_generic_binary_array, as_generic_string_array},
     not_impl_err, plan_err,
@@ -40,6 +43,14 @@ use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
 use std::any::Any;
 
+// Allow padding characters, but don't require them, and don't generate them.
+const BASE64_ENGINE: GeneralPurpose = GeneralPurpose::new(
+    &base64::alphabet::STANDARD,
+    GeneralPurposeConfig::new()
+        .with_encode_padding(false)
+        .with_decode_padding_mode(DecodePaddingMode::Indifferent),
+);
+
 #[user_doc(
     doc_section(label = "Binary String Functions"),
     description = "Encode binary data into a textual representation.",
@@ -302,7 +313,7 @@ fn hex_encode(input: &[u8]) -> String {
 }
 
 fn base64_encode(input: &[u8]) -> String {
-    general_purpose::STANDARD_NO_PAD.encode(input)
+    BASE64_ENGINE.encode(input)
 }
 
 fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
@@ -315,7 +326,7 @@ fn hex_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
 }
 
 fn base64_decode(input: &[u8], buf: &mut [u8]) -> Result<usize> {
-    general_purpose::STANDARD_NO_PAD
+    BASE64_ENGINE
         .decode_slice(input, buf)
         .map_err(|e| internal_datafusion_err!("Failed to decode from base64: {e}"))
 }
@@ -364,18 +375,16 @@ where
 impl Encoding {
     fn encode_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
         ColumnarValue::Scalar(match self {
-            Self::Base64 => ScalarValue::Utf8(
-                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
-            ),
+            Self::Base64 => ScalarValue::Utf8(value.map(|v| BASE64_ENGINE.encode(v))),
             Self::Hex => ScalarValue::Utf8(value.map(hex::encode)),
         })
     }
 
     fn encode_large_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
         ColumnarValue::Scalar(match self {
-            Self::Base64 => ScalarValue::LargeUtf8(
-                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
-            ),
+            Self::Base64 => {
+                ScalarValue::LargeUtf8(value.map(|v| BASE64_ENGINE.encode(v)))
+            }
             Self::Hex => ScalarValue::LargeUtf8(value.map(hex::encode)),
         })
     }
@@ -411,15 +420,9 @@ impl Encoding {
         };
 
         let out = match self {
-            Self::Base64 => {
-                general_purpose::STANDARD_NO_PAD
-                    .decode(value)
-                    .map_err(|e| {
-                        internal_datafusion_err!(
-                            "Failed to decode value using base64: {e}"
-                        )
-                    })?
-            }
+            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
+                internal_datafusion_err!("Failed to decode value using base64: {e}")
+            })?,
             Self::Hex => hex::decode(value).map_err(|e| {
                 internal_datafusion_err!("Failed to decode value using hex: {e}")
             })?,
@@ -435,15 +438,9 @@ impl Encoding {
         };
 
         let out = match self {
-            Self::Base64 => {
-                general_purpose::STANDARD_NO_PAD
-                    .decode(value)
-                    .map_err(|e| {
-                        internal_datafusion_err!(
-                            "Failed to decode value using base64: {e}"
-                        )
-                    })?
-            }
+            Self::Base64 => BASE64_ENGINE.decode(value).map_err(|e| {
+                internal_datafusion_err!("Failed to decode value using base64: {e}")
+            })?,
             Self::Hex => hex::decode(value).map_err(|e| {
                 internal_datafusion_err!("Failed to decode value using hex: {e}")
             })?,
diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt
index 960e81f4d14c5..300294f6e1157 100644
--- a/datafusion/sqllogictest/test_files/encoding.slt
+++ b/datafusion/sqllogictest/test_files/encoding.slt
@@ -23,7 +23,7 @@ CREATE TABLE test(
   hex_field TEXT
 ) as VALUES
   (0, 'abc',  encode('abc', 'base64'), encode('abc', 'hex')),
-  (1, 'qweqwe', encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
+  (1, 'qweqw', encode('qweqw', 'base64') || '=', encode('qweqw', 'hex')),
   (2, NULL, NULL, NULL),
   (3, X'8f50d3f60eae370ddbf85c86219c55108a350165', encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'base64'), encode('8f50d3f60eae370ddbf85c86219c55108a350165', 'hex'))
 ;
@@ -52,7 +52,7 @@ query T
 SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
 ----
 616263
-717765717765
+7177657177
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -60,7 +60,7 @@ query T
 SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
 ----
 abc
-qweqwe
+qweqw
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -68,7 +68,7 @@ query T
 SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
 ----
 abc
-qweqwe
+qweqw
 NULL
 8f50d3f60eae370ddbf85c86219c55108a350165
 
@@ -110,7 +110,6 @@ SELECT
   column1_utf8view,
   encode(column1_utf8view, 'base64') AS column1_base64,
   encode(column1_utf8view, 'hex') AS column1_hex,
-  
   column2_utf8view,
   encode(column2_utf8view, 'base64') AS column2_base64,
   encode(column2_utf8view, 'hex') AS column2_hex

From 2a828972d1d4950a688ba5b2525202193bd2cabd Mon Sep 17 00:00:00 2001
From: Manasa Manoj <manasa23bcy41@iiitkottayam.ac.in>
Date: Sat, 25 Oct 2025 12:56:11 +0530
Subject: [PATCH 092/109] docs: fix broken SQL & DataFrame links in root README
 (#18153) (#18274)

## Which issue does this PR close?
- Closes #18153

## Rationale for this change
The SQL and DataFrame documentation links in the root README were broken
and returned 404 errors.
This PR updates the URLs to point to the correct documentation paths,
improving navigation for new users and contributors.

## Are these changes tested?
Verified that the new links are valid and accessible.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4c4b955176b2b..5191496eaafe3 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ See [use cases] for examples. The following related subprojects target end users
   DataFusion.
 
 "Out of the box,"
-DataFusion offers [SQL] and [`Dataframe`] APIs, excellent [performance],
+DataFusion offers [SQL](https://datafusion.apache.org/user-guide/sql/index.html) and [Dataframe](https://datafusion.apache.org/user-guide/dataframe.html) APIs, excellent [performance],
 built-in support for CSV, Parquet, JSON, and Avro, extensive customization, and
 a great community.
 

From 4309b85133f254516b78992046f760eca6c2f11a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 26 Oct 2025 01:18:20 -0400
Subject: [PATCH 093/109] Improve docs and examples for `DataTypeExt` and
 `FieldExt` (#18271)

## Which issue does this PR close?

- Follow on to https://github.com/apache/datafusion/pull/17986 from
@paleolimbot


## Rationale for this change

As we thread Field through more of the DataFusion APs, making it easy to
convert back and forth with Field will be increasingly important. We
added some helper methods in
https://github.com/apache/datafusion/pull/17986, but I think they could
be better documented (I wrote them so this is not a dig at @paleolimbot
!)

Lets add some more documentation and examples so it is clearer what this
code is doing.

## What changes are included in this PR?

1. Add more Documentation and examples so it is clearer what this code
is doing.


## Are these changes tested?

By CI

## Are there any user-facing changes?
More docs, no functional changes
---
 datafusion/common/src/datatype.rs | 86 +++++++++++++++++++++++++++----
 1 file changed, 77 insertions(+), 9 deletions(-)

diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs
index 85ffcf689c3f4..544ec0c2468cd 100644
--- a/datafusion/common/src/datatype.rs
+++ b/datafusion/common/src/datatype.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [DataTypeExt] extension trait for converting DataTypes to Fields
+//! [`DataTypeExt`] and [`FieldExt`] extension trait for working with DataTypes to Fields
 
 use crate::arrow::datatypes::{DataType, Field, FieldRef};
 use std::sync::Arc;
@@ -27,9 +27,23 @@ pub trait DataTypeExt {
     /// This is used to track the places where we convert a [`DataType`]
     /// into a nameless field to interact with an API that is
     /// capable of representing an extension type and/or nullability.
+    ///
+    /// For example, it will convert a `DataType::Int32` into
+    /// `Field::new("", DataType::Int32, true)`.
+    ///
+    /// ```
+    /// # use datafusion_common::datatype::DataTypeExt;
+    /// # use arrow::datatypes::DataType;
+    /// let dt = DataType::Utf8;
+    /// let field = dt.into_nullable_field();
+    /// // result is a nullable Utf8 field with "" name
+    /// assert_eq!(field.name(), "");
+    /// assert_eq!(field.data_type(), &DataType::Utf8);
+    /// assert!(field.is_nullable());
+    /// ```
     fn into_nullable_field(self) -> Field;
 
-    /// Convert the type to field ref with nullable type and "" name
+    /// Convert the type to [`FieldRef`] with nullable type and "" name
     ///
     /// Concise wrapper around [`DataTypeExt::into_nullable_field`] that
     /// constructs a [`FieldRef`].
@@ -46,20 +60,74 @@ impl DataTypeExt for DataType {
     }
 }
 
-/// DataFusion extension methods for Arrow [`Field`]
+/// DataFusion extension methods for Arrow [`Field`] and [`FieldRef`]
 pub trait FieldExt {
     /// Returns a new Field representing a List of this Field's DataType.
+    ///
+    /// For example if input represents an `Int32`, the return value will
+    /// represent a `List<Int32>`.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// // Int32 field
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // convert to a List field
+    /// let list_field = int_field.into_list();
+    /// // List<Int32>
+    /// // Note that the item field name has been renamed to "item"
+    /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
+    ///     Field::new("item", DataType::Int32, true)
+    /// )));
+    ///
     fn into_list(self) -> Self;
 
-    /// Return a new Field representing this Field as the item type of a FixedSizeList
+    /// Return a new Field representing this Field as the item type of a
+    /// [`DataType::FixedSizeList`]
+    ///
+    /// For example if input represents an `Int32`, the return value will
+    /// represent a `FixedSizeList<Int32, size>`.
+    ///
+    /// Example:
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::{DataType, Field};
+    /// # use datafusion_common::datatype::FieldExt;
+    /// // Int32 field
+    /// let int_field = Field::new("my_int", DataType::Int32, true);
+    /// // convert to a FixedSizeList field of size 3
+    /// let fixed_size_list_field = int_field.into_fixed_size_list(3);
+    /// // FixedSizeList<Int32, 3>
+    /// // Note that the item field name has been renamed to "item"
+    /// assert_eq!(
+    ///   fixed_size_list_field.data_type(),
+    ///   &DataType::FixedSizeList(Arc::new(
+    ///    Field::new("item", DataType::Int32, true)),
+    ///    3
+    /// ));
+    ///
     fn into_fixed_size_list(self, list_size: i32) -> Self;
 
-    /// Create a field with the default list field name ("item")
+    /// Update the field to have the default list field name ("item")
+    ///
+    /// Lists are allowed to have an arbitrarily named field; however, a name
+    /// other than 'item' will cause it to fail an == check against a more
+    /// idiomatically created list in arrow-rs which causes issues.
+    ///
+    /// For example, if input represents an `Int32` field named "my_int",
+    /// the return value will represent an `Int32` field named "item".
     ///
-    /// Note that lists are allowed to have an arbitrarily named field;
-    /// however, a name other than 'item' will cause it to fail an
-    /// == check against a more idiomatically created list in
-    /// arrow-rs which causes issues.
+    /// Example:
+    /// ```
+    /// # use arrow::datatypes::Field;
+    /// # use datafusion_common::datatype::FieldExt;
+    /// let my_field = Field::new("my_int", arrow::datatypes::DataType::Int32, true);
+    /// let item_field = my_field.into_list_item();
+    /// assert_eq!(item_field.name(), Field::LIST_FIELD_DEFAULT_NAME);
+    /// assert_eq!(item_field.name(), "item");
+    /// ```
     fn into_list_item(self) -> Self;
 }
 

From d07255461b6d37391ff52ae560cb6942e4de3318 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Sun, 26 Oct 2025 13:28:53 +0800
Subject: [PATCH 094/109] doc: Contributor guide for AI-generated PRs (#18237)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

Part of https://github.com/apache/datafusion/issues/18095

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

In the contributor guide documentation, better explain why large
AI-generated PRs without understanding should be rejected. See more in
the original issue discussion.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Co-authored-by: Alex Huang <huangweijun1001@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 docs/source/contributor-guide/index.md | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 383827893c70f..df664975a84ae 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -113,7 +113,7 @@ do take priority over the conventional commit approach, allowing maintainers to
 
 [conventional commits]: https://www.conventionalcommits.org/en/v1.0.0/
 
-# Reviewing Pull Requests
+## Reviewing Pull Requests
 
 Some helpful links:
 
@@ -175,3 +175,27 @@ The good thing about open code and open development is that any issues in one ch
 
 Pull requests will be marked with a `stale` label after 60 days of inactivity and then closed 7 days after that.
 Commenting on the PR will remove the `stale` label.
+
+## AI-Assisted contributions
+
+DataFusion has the following policy for AI-assisted PRs:
+
+- The PR author should **understand the core ideas** behind the implementation **end-to-end**, and be able to justify the design and code during review.
+- **Calls out unknowns and assumptions**. It's okay to not fully understand some bits of AI generated code. You should comment on these cases and point them out to reviewers so that they can use their knowledge of the codebase to clear up any concerns. For example, you might comment "calling this function here seems to work but I'm not familiar with how it works internally, I wonder if there's a race condition if it is called concurrently".
+
+### Why fully AI-generated PRs without understanding are not helpful
+
+Today, AI tools cannot reliably make complex changes to DataFusion on their own, which is why we rely on pull requests and code review.
+
+The purposes of code review are:
+
+1. Finish the intended task.
+2. Share knowledge between authors and reviewers, as a long-term investment in the project. For this reason, even if someone familiar with the codebase can finish a task quickly, we're still happy to help a new contributor work on it even if it takes longer.
+
+An AI dump for an issue doesn’t meet these purposes. Maintainers could finish the task faster by using AI directly, and the submitters gain little knowledge if they act only as a pass through AI proxy without understanding.
+
+Please understand the reviewing capacity is **very limited** for the project, so large PRs which appear to not have the requisite understanding might not get reviewed, and eventually closed or redirected.
+
+### Better ways to contribute than an “AI dump”
+
+It's recommended to write a high-quality issue with a clear problem statement and a minimal, reproducible example. This can make it easier for others to contribute.

From 814236093c4045bb3d972e4c1124727229743ed9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 26 Oct 2025 01:30:03 -0400
Subject: [PATCH 095/109] Easier construction of ScalarAndMetadata (#18272)

## Which issue does this PR close?

- Follow on to https://github.com/apache/datafusion/pull/17986 from
@paleolimbot

## Rationale for this change

As we thread Field through more of the DataFusion APs, making it easy to
convert back and forth
will be increasingly important. We added `ScalarAndMetadata` and I think
it is a good idea to add
some helper methods to make it easy to create `ScalarAndMetadata`.

## What changes are included in this PR?

Add some From impls that make conversions easier

## Are these changes tested?

By CI
## Are there any user-facing changes?

SOme new APIs
---
 datafusion/common/src/metadata.rs    | 10 +++++++++-
 datafusion/common/src/param_value.rs | 11 +++--------
 datafusion/expr/src/expr.rs          |  9 +++++++++
 datafusion/sql/tests/cases/params.rs |  6 +++---
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs
index ec0b3bc81467b..39065808efb98 100644
--- a/datafusion/common/src/metadata.rs
+++ b/datafusion/common/src/metadata.rs
@@ -60,7 +60,15 @@ impl ScalarAndMetadata {
         target_type: &DataType,
     ) -> Result<Self, DataFusionError> {
         let new_value = self.value().cast_to(target_type)?;
-        Ok(ScalarAndMetadata::new(new_value, self.metadata.clone()))
+        Ok(Self::new(new_value, self.metadata.clone()))
+    }
+}
+
+/// create a new ScalarAndMetadata from a ScalarValue without
+/// any metadata
+impl From<ScalarValue> for ScalarAndMetadata {
+    fn from(value: ScalarValue) -> Self {
+        Self::new(value, None)
     }
 }
 
diff --git a/datafusion/common/src/param_value.rs b/datafusion/common/src/param_value.rs
index 5ab58239e66c6..ebf68e4dd210d 100644
--- a/datafusion/common/src/param_value.rs
+++ b/datafusion/common/src/param_value.rs
@@ -115,12 +115,7 @@ impl ParamValues {
 
 impl From<Vec<ScalarValue>> for ParamValues {
     fn from(value: Vec<ScalarValue>) -> Self {
-        Self::List(
-            value
-                .into_iter()
-                .map(|v| ScalarAndMetadata::new(v, None))
-                .collect(),
-        )
+        Self::List(value.into_iter().map(ScalarAndMetadata::from).collect())
     }
 }
 
@@ -131,7 +126,7 @@ where
     fn from(value: Vec<(K, ScalarValue)>) -> Self {
         let value: HashMap<String, ScalarAndMetadata> = value
             .into_iter()
-            .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None)))
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::from(v)))
             .collect();
         Self::Map(value)
     }
@@ -144,7 +139,7 @@ where
     fn from(value: HashMap<K, ScalarValue>) -> Self {
         let value: HashMap<String, ScalarAndMetadata> = value
             .into_iter()
-            .map(|(k, v)| (k.into(), ScalarAndMetadata::new(v, None)))
+            .map(|(k, v)| (k.into(), ScalarAndMetadata::from(v)))
             .collect();
         Self::Map(value)
     }
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index e1115b714053c..6077b3c1e5bb0 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -47,6 +47,7 @@ use sqlparser::ast::{
 
 // Moved in 51.0.0 to datafusion_common
 pub use datafusion_common::metadata::FieldMetadata;
+use datafusion_common::metadata::ScalarAndMetadata;
 
 // This mirrors sqlparser::ast::NullTreatment but we need our own variant
 // for when the sql feature is disabled.
@@ -424,6 +425,14 @@ impl From<WindowFunction> for Expr {
     }
 }
 
+/// Create an [`Expr`] from an [`ScalarAndMetadata`]
+impl From<ScalarAndMetadata> for Expr {
+    fn from(value: ScalarAndMetadata) -> Self {
+        let (value, metadata) = value.into_inner();
+        Expr::Literal(value, metadata)
+    }
+}
+
 /// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is
 /// useful for creating [`Expr`] from a [`DFSchema`].
 ///
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index e1075da5f9998..4a484b1171bc3 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -754,7 +754,7 @@ fn test_update_infer_with_metadata() {
         ("$2", Some(uuid_field.clone().with_name("id").into())),
     ];
     let param_values = vec![
-        ScalarAndMetadata::new(ScalarValue::from("Turing"), None),
+        ScalarAndMetadata::from(ScalarValue::from("Turing")),
         ScalarAndMetadata::new(
             ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
             Some(uuid_field.metadata().into()),
@@ -831,8 +831,8 @@ fn test_insert_infer_with_metadata() {
             ScalarValue::FixedSizeBinary(16, Some(uuid_bytes)),
             Some(uuid_field.metadata().into()),
         ),
-        ScalarAndMetadata::new(ScalarValue::from("Alan"), None),
-        ScalarAndMetadata::new(ScalarValue::from("Turing"), None),
+        ScalarAndMetadata::from(ScalarValue::from("Alan")),
+        ScalarAndMetadata::from(ScalarValue::from("Turing")),
     ];
 
     // Check a normal insert

From 2bb7bf698d2e04365a0b43e47785f7bfd08eb148 Mon Sep 17 00:00:00 2001
From: Jonathan Chen <chenleejonathan@gmail.com>
Date: Sun, 26 Oct 2025 07:05:15 -0400
Subject: [PATCH 096/109] doc: Add Join Physical Plan documentation, and
 configuration flag to benchmarks (#18209)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #.

## Rationale for this change
Allow users to understand some decisions for when to change certain
joins configurations.
<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

## What changes are included in this PR?
Add readme to joins physical plan
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 benchmarks/src/tpch/run.rs        | 13 +++++++
 dev/update_config_docs.sh         | 60 +++++++++++++++++++++++++++++++
 docs/source/user-guide/configs.md | 60 +++++++++++++++++++++++++++++++
 3 files changed, 133 insertions(+)

diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
index 2b66de641b670..cc59b78030360 100644
--- a/benchmarks/src/tpch/run.rs
+++ b/benchmarks/src/tpch/run.rs
@@ -92,6 +92,15 @@ pub struct RunOpt {
     #[structopt(short = "j", long = "prefer_hash_join", default_value = "true")]
     prefer_hash_join: BoolDefaultTrue,
 
+    /// If true then Piecewise Merge Join can be used, if false then it will opt for Nested Loop Join
+    /// True by default.
+    #[structopt(
+        short = "j",
+        long = "enable_piecewise_merge_join",
+        default_value = "false"
+    )]
+    enable_piecewise_merge_join: BoolDefaultTrue,
+
     /// Mark the first column of each table as sorted in ascending order.
     /// The tables should have been created with the `--sort` option for this to have any effect.
     #[structopt(short = "t", long = "sorted")]
@@ -112,6 +121,8 @@ impl RunOpt {
             .config()?
             .with_collect_statistics(!self.disable_statistics);
         config.options_mut().optimizer.prefer_hash_join = self.prefer_hash_join;
+        config.options_mut().optimizer.enable_piecewise_merge_join =
+            self.enable_piecewise_merge_join;
         let rt_builder = self.common.runtime_env_builder()?;
         let ctx = SessionContext::new_with_config_rt(config, rt_builder.build_arc()?);
         // register tables
@@ -379,6 +390,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            enable_piecewise_merge_join: false,
             sorted: false,
         };
         opt.register_tables(&ctx).await?;
@@ -416,6 +428,7 @@ mod tests {
             output_path: None,
             disable_statistics: false,
             prefer_hash_join: true,
+            enable_piecewise_merge_join: false,
             sorted: false,
         };
         opt.register_tables(&ctx).await?;
diff --git a/dev/update_config_docs.sh b/dev/update_config_docs.sh
index ed3e699c1413a..90bbc5d3bad06 100755
--- a/dev/update_config_docs.sh
+++ b/dev/update_config_docs.sh
@@ -175,6 +175,66 @@ SET datafusion.execution.batch_size = 1024;
 
 [`FairSpillPool`]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.FairSpillPool.html
 
+## Join Queries
+
+Currently Apache Datafusion supports the following join algorithms:
+
+- Nested Loop Join
+- Sort Merge Join
+- Hash Join
+- Symmetric Hash Join
+- Piecewise Merge Join (experimental)
+
+The physical planner will choose the appropriate algorithm based on the statistics + join
+condition of the two tables.
+
+# Join Algorithm Optimizer Configurations
+
+You can modify join optimization behavior in your queries by setting specific configuration values.
+Use the following command to update a configuration:
+
+``` sql
+SET datafusion.optimizer.<configuration_name>;
+```
+
+Example
+
+``` sql
+SET datafusion.optimizer.prefer_hash_join = false;
+```
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query:
+
+## Join Optimizer Configurations
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query.
+
+### allow_symmetric_joins_without_pruning (bool, default = true)
+
+Controls whether symmetric hash joins are allowed for unbounded data sources even when their inputs
+lack ordering or filtering.
+
+- If disabled, the `SymmetricHashJoin` operator cannot prune its internal buffers to be produced only at the end of execution.
+
+### prefer_hash_join (bool, default = true)
+
+Determines whether the optimizer prefers Hash Join over Sort Merge Join during physical plan selection.
+
+- true: favors HashJoin for faster execution when sufficient memory is available.
+- false: allows SortMergeJoin to be chosen when more memory-efficient execution is needed.
+
+### enable_piecewise_merge_join (bool, default = false)
+
+Enables the experimental Piecewise Merge Join algorithm.
+
+- When enabled, the physical planner may select PiecewiseMergeJoin if there is exactly one range
+  filter in the join condition.
+- Piecewise Merge Join is faster than Nested Loop Join performance wise for single range filter
+  except for cases where it is joining two large tables (num_rows > 100,000) that are approximately
+  equal in size.
+
 EOF
 
 
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index c0e4ccd850d95..fbf55a56057b6 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -253,3 +253,63 @@ SET datafusion.execution.batch_size = 1024;
 ```
 
 [`fairspillpool`]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.FairSpillPool.html
+
+## Join Queries
+
+Currently Apache Datafusion supports the following join algorithms:
+
+- Nested Loop Join
+- Sort Merge Join
+- Hash Join
+- Symmetric Hash Join
+- Piecewise Merge Join (experimental)
+
+The physical planner will choose the appropriate algorithm based on the statistics + join
+condition of the two tables.
+
+# Join Algorithm Optimizer Configurations
+
+You can modify join optimization behavior in your queries by setting specific configuration values.
+Use the following command to update a configuration:
+
+```sql
+SET datafusion.optimizer.<configuration_name>;
+```
+
+Example
+
+```sql
+SET datafusion.optimizer.prefer_hash_join = false;
+```
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query:
+
+## Join Optimizer Configurations
+
+Adjusting the following configuration values influences how the optimizer selects the join algorithm
+used to execute your SQL query.
+
+### allow_symmetric_joins_without_pruning (bool, default = true)
+
+Controls whether symmetric hash joins are allowed for unbounded data sources even when their inputs
+lack ordering or filtering.
+
+- If disabled, the `SymmetricHashJoin` operator cannot prune its internal buffers to be produced only at the end of execution.
+
+### prefer_hash_join (bool, default = true)
+
+Determines whether the optimizer prefers Hash Join over Sort Merge Join during physical plan selection.
+
+- true: favors HashJoin for faster execution when sufficient memory is available.
+- false: allows SortMergeJoin to be chosen when more memory-efficient execution is needed.
+
+### enable_piecewise_merge_join (bool, default = false)
+
+Enables the experimental Piecewise Merge Join algorithm.
+
+- When enabled, the physical planner may select PiecewiseMergeJoin if there is exactly one range
+  filter in the join condition.
+- Piecewise Merge Join is faster than Nested Loop Join performance wise for single range filter
+  except for cases where it is joining two large tables (num_rows > 100,000) that are approximately
+  equal in size.

From e2516e274000f324079526112d0db1ce0f85bd3e Mon Sep 17 00:00:00 2001
From: Douglas Anderson <djanderson@users.noreply.github.com>
Date: Sun, 26 Oct 2025 05:36:19 -0600
Subject: [PATCH 097/109] Implement `DESCRIBE SELECT` to show schema rather
 than `EXPLAIN` plan (#18238)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #18234.

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->


https://discord.com/channels/885562378132000778/1430237388474552380/1430618776751313018

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

```
cargo run -q --bin datafusion-cli
DataFusion CLI v50.3.0
> DESCRIBE SELECT 1;
+-------------+-----------+-------------+
| column_name | data_type | is_nullable |
+-------------+-----------+-------------+
| Int64(1)    | Int64     | NO          |
+-------------+-----------+-------------+
1 row(s) fetched.
Elapsed 0.022 seconds.
```

## Are these changes tested?

No, looking for feedback on approach first... happy to add a test.

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

Yes, it changes to behavior of `DESCRIBE SELECT` from explaining the
physical plan (EXPLAIN) to describing the schema of the query (like
DESCRIBE table).

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 datafusion/sql/src/statement.rs               | 23 ++++++++++++++++
 .../sqllogictest/test_files/describe.slt      | 26 +++++++++++++++++++
 docs/source/library-user-guide/upgrading.md   |  6 +++++
 3 files changed, 55 insertions(+)

diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 45b20bbb05268..81381bf49fc5b 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -242,6 +242,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 table_name,
                 ..
             } => self.describe_table_to_plan(table_name),
+            Statement::Explain {
+                describe_alias: DescribeAlias::Describe | DescribeAlias::Desc, // only parse 'DESCRIBE statement' or 'DESC statement' and not 'EXPLAIN statement'
+                statement,
+                ..
+            } => match *statement {
+                Statement::Query(query) => self.describe_query_to_plan(*query),
+                _ => {
+                    not_impl_err!("Describing statements other than SELECT not supported")
+                }
+            },
             Statement::Explain {
                 verbose,
                 statement,
@@ -1399,6 +1409,19 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         }))
     }
 
+    fn describe_query_to_plan(&self, query: Query) -> Result<LogicalPlan> {
+        let plan = self.query_to_plan(query, &mut PlannerContext::new())?;
+
+        let schema = Arc::new(plan.schema().as_arrow().clone());
+
+        let output_schema = DFSchema::try_from(LogicalPlan::describe_schema()).unwrap();
+
+        Ok(LogicalPlan::DescribeTable(DescribeTable {
+            schema,
+            output_schema: Arc::new(output_schema),
+        }))
+    }
+
     fn copy_to_plan(&self, statement: CopyToStatement) -> Result<LogicalPlan> {
         // Determine if source is table or query and handle accordingly
         let copy_source = statement.source;
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index de5208b5483aa..4c184c04d128f 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -116,3 +116,29 @@ col1 Int32 YES
 # Test error cases
 statement error
 DESC nonexistent_table;
+
+##########
+# Describe statement
+##########
+
+# Test describing the schema of a simple statement
+query TTT
+DESCRIBE SELECT 1;
+----
+Int64(1) Int64 NO
+
+# Insert some data into the existing test table...
+statement ok
+INSERT INTO test_desc_table (id, name) VALUES (1, 'Alice'), (2, 'Bob'), (3, 'Charlie'), (4, 'Alice');
+
+# ... and describe the schema of a more complex query
+query TTT
+DESCRIBE SELECT name, COUNT(*) AS name_count FROM test_desc_table
+    GROUP BY name HAVING COUNT(*) > 1 ORDER BY name_count DESC;
+----
+name Utf8View YES
+name_count Int64 NO
+
+# Describing a statement that's not a query is not supported
+statement error Describing statements other than SELECT not supported
+DESCRIBE CREATE TABLE test_desc_table (id INT, name VARCHAR);
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 8b03193e7f992..6b9cb0843c534 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -116,6 +116,12 @@ Users may need to update their paths to account for these changes.
 
 See [issue #17713] for more details.
 
+### `DESCRIBE query` support
+
+`DESCRIBE query` was previously an alias for `EXPLAIN query`, which outputs the
+_execution plan_ of the query. With this release, `DESCRIBE query` now outputs
+the computed _schema_ of the query, consistent with the behavior of `DESCRIBE table_name`.
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables

From 61d3543e51bfa2afe9437ad457e2d8305fbbe07a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 26 Oct 2025 07:37:08 -0400
Subject: [PATCH 098/109] Add integration test for IO operations for listing
 tables queries (#18229)

## Which issue does this PR close?

- Part of  https://github.com/apache/datafusion/pull/18160

## Rationale for this change

As we spend more effort optimizing the number of IO requests made during
various scenarios, we need to ensure we have test coverage to:
1. Verify that the optimizations are working as intended
2. Prevent regressions in the future as code changes are made

## What changes are included in this PR?

Add a new integration test that verifies what IO operations happen when
creating and querying listing tables

## Are these changes tested?

It is all tests
## Are there any user-facing changes?
No, only tests
---
 datafusion/core/tests/datasource/mod.rs       |   1 +
 .../tests/datasource/object_store_access.rs   | 616 ++++++++++++++++++
 2 files changed, 617 insertions(+)
 create mode 100644 datafusion/core/tests/datasource/object_store_access.rs

diff --git a/datafusion/core/tests/datasource/mod.rs b/datafusion/core/tests/datasource/mod.rs
index d1f3b3957c0fd..3785aa0766182 100644
--- a/datafusion/core/tests/datasource/mod.rs
+++ b/datafusion/core/tests/datasource/mod.rs
@@ -21,3 +21,4 @@
 
 // Include tests in csv module
 mod csv;
+mod object_store_access;
diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs
new file mode 100644
index 0000000000000..6b9585f408a17
--- /dev/null
+++ b/datafusion/core/tests/datasource/object_store_access.rs
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for object store access patterns with [`ListingTable`]\
+//!
+//! These tests setup a `ListingTable` backed by an in-memory object store
+//! that counts the number of requests made against it and then do
+//! various operations (table creation, queries with and without predicates)
+//! to verify the expected object store access patterns.
+//!
+//! [`ListingTable`]: datafusion::datasource::listing::ListingTable
+
+use arrow::array::{ArrayRef, Int32Array, RecordBatch};
+use async_trait::async_trait;
+use bytes::Bytes;
+use datafusion::prelude::{CsvReadOptions, SessionContext};
+use futures::stream::BoxStream;
+use insta::assert_snapshot;
+use object_store::memory::InMemory;
+use object_store::path::Path;
+use object_store::{
+    GetOptions, GetRange, GetResult, ListResult, MultipartUpload, ObjectMeta,
+    ObjectStore, PutMultipartOptions, PutOptions, PutPayload, PutResult,
+};
+use parking_lot::Mutex;
+use std::fmt;
+use std::fmt::{Display, Formatter};
+use std::ops::Range;
+use std::sync::Arc;
+use url::Url;
+
+#[tokio::test]
+async fn create_single_csv_file() {
+    assert_snapshot!(
+        single_file_csv_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=csv_table.csv
+    - GET  path=csv_table.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_csv_file() {
+    assert_snapshot!(
+        single_file_csv_test().await.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.00001 | 5e-12 | true  |
+    | 0.00002 | 4e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=csv_table.csv
+    - GET  (opts) path=csv_table.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_multi_file_csv_file() {
+    assert_snapshot!(
+        multi_file_csv_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  path=data/file_0.csv
+    - GET  path=data/file_1.csv
+    - GET  path=data/file_2.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_multi_csv_file() {
+    assert_snapshot!(
+        multi_file_csv_test().await.query("select * from csv_table").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+
+    | c1      | c2    | c3    |
+    +---------+-------+-------+
+    | 0.0     | 0.0   | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00001 | 1e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    | 0.00002 | 2e-12 | true  |
+    | 0.00003 | 5e-12 | false |
+    +---------+-------+-------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/file_0.csv
+    - GET  (opts) path=data/file_1.csv
+    - GET  (opts) path=data/file_2.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file() {
+    assert_snapshot!(
+        single_file_parquet_test().await.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=2986-2994 path=parquet_table.parquet
+    - GET  (range) range=2264-2986 path=parquet_table.parquet
+    - GET  (range) range=2124-2264 path=parquet_table.parquet
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file() {
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select count(distinct a), count(b) from parquet_table").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +---------------------------------+------------------------+
+    | count(DISTINCT parquet_table.a) | count(parquet_table.b) |
+    +---------------------------------+------------------------+
+    | 200                             | 200                    |
+    +---------------------------------+------------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=4-534,534-1064
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1594,1594-2124
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file_with_single_predicate() {
+    // Note that evaluating predicates requires additional object store requests
+    // (to evaluate predicates)
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 150").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +----------------------+----------------------+
+    | min(parquet_table.a) | max(parquet_table.b) |
+    +----------------------+----------------------+
+    | 151                  | 1199                 |
+    +----------------------+----------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124
+    "
+    );
+}
+
+#[tokio::test]
+async fn query_single_parquet_file_multi_row_groups_multiple_predicates() {
+    // Note that evaluating predicates requires additional object store requests
+    // (to evaluate predicates)
+    assert_snapshot!(
+        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 50 AND b < 1150").await,
+        @r"
+    ------- Query Output (1 rows) -------
+    +----------------------+----------------------+
+    | min(parquet_table.a) | max(parquet_table.b) |
+    +----------------------+----------------------+
+    | 51                   | 1149                 |
+    +----------------------+----------------------+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - HEAD path=parquet_table.parquet
+    - GET  (ranges) path=parquet_table.parquet ranges=4-421,421-534,534-951,951-1064
+    - GET  (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124
+    "
+    );
+}
+
+/// Create a test with a single CSV file with three columns and two rows
+async fn single_file_csv_test() -> Test {
+    // upload CSV data to object store
+    let csv_data = r#"c1,c2,c3
+0.00001,5e-12,true
+0.00002,4e-12,false
+"#;
+
+    Test::new()
+        .with_bytes("/csv_table.csv", csv_data)
+        .await
+        .register_csv("csv_table", "/csv_table.csv")
+        .await
+}
+
+/// Create a test with three CSV files in a directory
+async fn multi_file_csv_test() -> Test {
+    let mut test = Test::new();
+    // upload CSV data to object store
+    for i in 0..3 {
+        let csv_data1 = format!(
+            r#"c1,c2,c3
+0.0000{i},{i}e-12,true
+0.00003,5e-12,false
+"#
+        );
+        test = test
+            .with_bytes(&format!("/data/file_{i}.csv"), csv_data1)
+            .await;
+    }
+    // register table
+    test.register_csv("csv_table", "/data/").await
+}
+
+/// Create a test with a single parquet file that has two
+/// columns and two row groups
+///
+/// Column "a": Int32 with values 0-100] in row group 1
+/// and [101-200] in row group 2
+///
+/// Column "b": Int32 with values 1000-1100] in row group 1
+/// and [1101-1200] in row group 2
+async fn single_file_parquet_test() -> Test {
+    // Create parquet bytes
+    let a: ArrayRef = Arc::new(Int32Array::from_iter_values(0..200));
+    let b: ArrayRef = Arc::new(Int32Array::from_iter_values(1000..1200));
+    let batch = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();
+
+    let mut buffer = vec![];
+    let props = parquet::file::properties::WriterProperties::builder()
+        .set_max_row_group_size(100)
+        .build();
+    let mut writer =
+        parquet::arrow::ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props))
+            .unwrap();
+    writer.write(&batch).unwrap();
+    writer.close().unwrap();
+
+    Test::new()
+        .with_bytes("/parquet_table.parquet", buffer)
+        .await
+        .register_parquet("parquet_table", "/parquet_table.parquet")
+        .await
+}
+
+/// Runs tests with a request counting object store
+struct Test {
+    object_store: Arc<RequestCountingObjectStore>,
+    session_context: SessionContext,
+}
+
+impl Test {
+    fn new() -> Self {
+        let object_store = Arc::new(RequestCountingObjectStore::new());
+        let session_context = SessionContext::new();
+        session_context
+            .runtime_env()
+            .register_object_store(&Url::parse("mem://").unwrap(), object_store.clone());
+        Self {
+            object_store,
+            session_context,
+        }
+    }
+
+    /// Returns a string representation of all recorded requests thus far
+    fn requests(&self) -> String {
+        format!("{}", self.object_store)
+    }
+
+    /// Store the specified bytes at the given path
+    async fn with_bytes(self, path: &str, bytes: impl Into<Bytes>) -> Self {
+        let path = Path::from(path);
+        self.object_store
+            .inner
+            .put(&path, PutPayload::from(bytes.into()))
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    async fn register_csv(self, table_name: &str, path: &str) -> Self {
+        let mut options = CsvReadOptions::new();
+        options.has_header = true;
+        let url = format!("mem://{path}");
+        self.session_context
+            .register_csv(table_name, url, options)
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    async fn register_parquet(self, table_name: &str, path: &str) -> Self {
+        let path = format!("mem://{path}");
+        self.session_context
+            .register_parquet(table_name, path, Default::default())
+            .await
+            .unwrap();
+        self
+    }
+
+    /// Runs the specified query and returns a string representation of the results
+    /// suitable for comparison with insta snapshots
+    ///
+    /// Clears all recorded requests before running the query
+    async fn query(&self, sql: &str) -> String {
+        self.object_store.clear_requests();
+        let results = self
+            .session_context
+            .sql(sql)
+            .await
+            .unwrap()
+            .collect()
+            .await
+            .unwrap();
+
+        let num_rows = results.iter().map(|batch| batch.num_rows()).sum::<usize>();
+        let formatted_result =
+            arrow::util::pretty::pretty_format_batches(&results).unwrap();
+
+        let object_store = &self.object_store;
+
+        format!(
+            r#"------- Query Output ({num_rows} rows) -------
+{formatted_result}
+------- Object Store Request Summary -------
+{object_store}
+"#
+        )
+    }
+}
+
+/// Details of individual requests made through the [`RequestCountingObjectStore`]
+#[derive(Clone, Debug)]
+enum RequestDetails {
+    Get { path: Path },
+    GetOpts { path: Path, get_options: GetOptions },
+    GetRanges { path: Path, ranges: Vec<Range<u64>> },
+    GetRange { path: Path, range: Range<u64> },
+    Head { path: Path },
+    List { prefix: Option<Path> },
+    ListWithDelimiter { prefix: Option<Path> },
+    ListWithOffset { prefix: Option<Path>, offset: Path },
+}
+
+fn display_range(range: &Range<u64>) -> impl Display + '_ {
+    struct Wrapper<'a>(&'a Range<u64>);
+    impl Display for Wrapper<'_> {
+        fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+            write!(f, "{}-{}", self.0.start, self.0.end)
+        }
+    }
+    Wrapper(range)
+}
+impl Display for RequestDetails {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            RequestDetails::Get { path } => {
+                write!(f, "GET  path={path}")
+            }
+            RequestDetails::GetOpts { path, get_options } => {
+                write!(f, "GET  (opts) path={path}")?;
+                if let Some(range) = &get_options.range {
+                    match range {
+                        GetRange::Bounded(range) => {
+                            let range = display_range(range);
+                            write!(f, " range={range}")?;
+                        }
+                        GetRange::Offset(offset) => {
+                            write!(f, " range=offset:{offset}")?;
+                        }
+                        GetRange::Suffix(suffix) => {
+                            write!(f, " range=suffix:{suffix}")?;
+                        }
+                    }
+                }
+                if let Some(version) = &get_options.version {
+                    write!(f, " version={version}")?;
+                }
+                if get_options.head {
+                    write!(f, " head=true")?;
+                }
+                Ok(())
+            }
+            RequestDetails::GetRanges { path, ranges } => {
+                write!(f, "GET  (ranges) path={path}")?;
+                if !ranges.is_empty() {
+                    write!(f, " ranges=")?;
+                    for (i, range) in ranges.iter().enumerate() {
+                        if i > 0 {
+                            write!(f, ",")?;
+                        }
+                        write!(f, "{}", display_range(range))?;
+                    }
+                }
+                Ok(())
+            }
+            RequestDetails::GetRange { path, range } => {
+                let range = display_range(range);
+                write!(f, "GET  (range) range={range} path={path}")
+            }
+            RequestDetails::Head { path } => {
+                write!(f, "HEAD path={path}")
+            }
+            RequestDetails::List { prefix } => {
+                write!(f, "LIST")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+            RequestDetails::ListWithDelimiter { prefix } => {
+                write!(f, "LIST (with delimiter)")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+            RequestDetails::ListWithOffset { prefix, offset } => {
+                write!(f, "LIST (with offset) offset={offset}")?;
+                if let Some(prefix) = prefix {
+                    write!(f, " prefix={prefix}")?;
+                }
+                Ok(())
+            }
+        }
+    }
+}
+
+#[derive(Debug)]
+struct RequestCountingObjectStore {
+    /// Inner (memory) store
+    inner: Arc<dyn ObjectStore>,
+    requests: Mutex<Vec<RequestDetails>>,
+}
+
+impl Display for RequestCountingObjectStore {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        write!(f, "RequestCountingObjectStore()")?;
+        let requests = self.recorded_requests();
+        write!(f, "\nTotal Requests: {}", requests.len())?;
+        for request in requests {
+            write!(f, "\n- {request}")?;
+        }
+        Ok(())
+    }
+}
+
+impl RequestCountingObjectStore {
+    pub fn new() -> Self {
+        let inner = Arc::new(InMemory::new());
+        Self {
+            inner,
+            requests: Mutex::new(vec![]),
+        }
+    }
+
+    pub fn clear_requests(&self) {
+        self.requests.lock().clear();
+    }
+
+    /// Return a copy of the recorded requests normalized
+    /// by removing the path prefix
+    pub fn recorded_requests(&self) -> Vec<RequestDetails> {
+        self.requests.lock().to_vec()
+    }
+}
+
+#[async_trait]
+impl ObjectStore for RequestCountingObjectStore {
+    async fn put_opts(
+        &self,
+        _location: &Path,
+        _payload: PutPayload,
+        _opts: PutOptions,
+    ) -> object_store::Result<PutResult> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn put_multipart_opts(
+        &self,
+        _location: &Path,
+        _opts: PutMultipartOptions,
+    ) -> object_store::Result<Box<dyn MultipartUpload>> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn get(&self, location: &Path) -> object_store::Result<GetResult> {
+        let result = self.inner.get(location).await?;
+        self.requests.lock().push(RequestDetails::Get {
+            path: location.to_owned(),
+        });
+        Ok(result)
+    }
+
+    async fn get_opts(
+        &self,
+        location: &Path,
+        options: GetOptions,
+    ) -> object_store::Result<GetResult> {
+        let result = self.inner.get_opts(location, options.clone()).await?;
+        self.requests.lock().push(RequestDetails::GetOpts {
+            path: location.to_owned(),
+            get_options: options,
+        });
+        Ok(result)
+    }
+
+    async fn get_range(
+        &self,
+        location: &Path,
+        range: Range<u64>,
+    ) -> object_store::Result<Bytes> {
+        let result = self.inner.get_range(location, range.clone()).await?;
+        self.requests.lock().push(RequestDetails::GetRange {
+            path: location.to_owned(),
+            range: range.clone(),
+        });
+        Ok(result)
+    }
+
+    async fn get_ranges(
+        &self,
+        location: &Path,
+        ranges: &[Range<u64>],
+    ) -> object_store::Result<Vec<Bytes>> {
+        let result = self.inner.get_ranges(location, ranges).await?;
+        self.requests.lock().push(RequestDetails::GetRanges {
+            path: location.to_owned(),
+            ranges: ranges.to_vec(),
+        });
+        Ok(result)
+    }
+
+    async fn head(&self, location: &Path) -> object_store::Result<ObjectMeta> {
+        let result = self.inner.head(location).await?;
+        self.requests.lock().push(RequestDetails::Head {
+            path: location.to_owned(),
+        });
+        Ok(result)
+    }
+
+    async fn delete(&self, _location: &Path) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    fn list(
+        &self,
+        prefix: Option<&Path>,
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+        self.requests.lock().push(RequestDetails::List {
+            prefix: prefix.map(|p| p.to_owned()),
+        });
+
+        self.inner.list(prefix)
+    }
+
+    fn list_with_offset(
+        &self,
+        prefix: Option<&Path>,
+        offset: &Path,
+    ) -> BoxStream<'static, object_store::Result<ObjectMeta>> {
+        self.requests.lock().push(RequestDetails::ListWithOffset {
+            prefix: prefix.map(|p| p.to_owned()),
+            offset: offset.to_owned(),
+        });
+        self.inner.list_with_offset(prefix, offset)
+    }
+
+    async fn list_with_delimiter(
+        &self,
+        prefix: Option<&Path>,
+    ) -> object_store::Result<ListResult> {
+        self.requests
+            .lock()
+            .push(RequestDetails::ListWithDelimiter {
+                prefix: prefix.map(|p| p.to_owned()),
+            });
+        self.inner.list_with_delimiter(prefix).await
+    }
+
+    async fn copy(&self, _from: &Path, _to: &Path) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+
+    async fn copy_if_not_exists(
+        &self,
+        _from: &Path,
+        _to: &Path,
+    ) -> object_store::Result<()> {
+        Err(object_store::Error::NotImplemented)
+    }
+}

From 8f396b888112bb90725343d250184f20afbacaba Mon Sep 17 00:00:00 2001
From: Adrian Garcia Badaracco <1755071+adriangb@users.noreply.github.com>
Date: Sun, 26 Oct 2025 19:11:17 -0500
Subject: [PATCH 099/109] Push partition_statistics into DataSource (#18233)

Removes a downcast match in favor of use of the trait. This mirrors the
changes to DataSourceExec to use partition_statistics instead of
statistics from https://github.com/apache/datafusion/pull/15852
---
 datafusion/datasource/src/file_scan_config.rs | 124 +++++++++++++++++-
 datafusion/datasource/src/memory.rs           |  28 +++-
 datafusion/datasource/src/source.rs           |  32 ++---
 3 files changed, 159 insertions(+), 25 deletions(-)

diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index 4dfb6a4ec3d33..695252803bae7 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -598,8 +598,39 @@ impl DataSource for FileScanConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(self.projected_stats())
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Get statistics for a specific partition
+            if let Some(file_group) = self.file_groups.get(partition) {
+                if let Some(stat) = file_group.file_statistics(None) {
+                    // Project the statistics based on the projection
+                    let table_cols_stats = self
+                        .projection_indices()
+                        .into_iter()
+                        .map(|idx| {
+                            if idx < self.file_schema().fields().len() {
+                                stat.column_statistics[idx].clone()
+                            } else {
+                                // TODO provide accurate stat for partition column
+                                // See https://github.com/apache/datafusion/issues/1186
+                                ColumnStatistics::new_unknown()
+                            }
+                        })
+                        .collect();
+
+                    return Ok(Statistics {
+                        num_rows: stat.num_rows,
+                        total_byte_size: stat.total_byte_size,
+                        column_statistics: table_cols_stats,
+                    });
+                }
+            }
+            // If no statistics available for this partition, return unknown
+            Ok(Statistics::new_unknown(&self.projected_schema()))
+        } else {
+            // Return aggregate statistics across all partitions
+            Ok(self.projected_stats())
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
@@ -1603,7 +1634,7 @@ mod tests {
         );
 
         let source_statistics = conf.file_source.statistics().unwrap();
-        let conf_stats = conf.statistics().unwrap();
+        let conf_stats = conf.partition_statistics(None).unwrap();
 
         // projection should be reflected in the file source statistics
         assert_eq!(conf_stats.num_rows, Precision::Inexact(3));
@@ -2510,4 +2541,91 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_partition_statistics_projection() {
+        // This test verifies that partition_statistics applies projection correctly.
+        // The old implementation had a bug where it returned file group statistics
+        // without applying the projection, returning all column statistics instead
+        // of just the projected ones.
+
+        use crate::source::DataSourceExec;
+        use datafusion_physical_plan::ExecutionPlan;
+
+        // Create a schema with 4 columns
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("col0", DataType::Int32, false),
+            Field::new("col1", DataType::Int32, false),
+            Field::new("col2", DataType::Int32, false),
+            Field::new("col3", DataType::Int32, false),
+        ]));
+
+        // Create statistics for all 4 columns
+        let file_group_stats = Statistics {
+            num_rows: Precision::Exact(100),
+            total_byte_size: Precision::Exact(1024),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(5),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(10),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(15),
+                    ..ColumnStatistics::new_unknown()
+                },
+            ],
+        };
+
+        // Create a file group with statistics
+        let file_group = FileGroup::new(vec![PartitionedFile::new("test.parquet", 1024)])
+            .with_statistics(Arc::new(file_group_stats));
+
+        // Create a FileScanConfig with projection: only keep columns 0 and 2
+        let config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::parse("test:///").unwrap(),
+            Arc::clone(&schema),
+            Arc::new(MockSource::default()),
+        )
+        .with_projection(Some(vec![0, 2])) // Only project columns 0 and 2
+        .with_file_groups(vec![file_group])
+        .build();
+
+        // Create a DataSourceExec from the config
+        let exec = DataSourceExec::from_data_source(config);
+
+        // Get statistics for partition 0
+        let partition_stats = exec.partition_statistics(Some(0)).unwrap();
+
+        // Verify that only 2 columns are in the statistics (the projected ones)
+        assert_eq!(
+            partition_stats.column_statistics.len(),
+            2,
+            "Expected 2 column statistics (projected), but got {}",
+            partition_stats.column_statistics.len()
+        );
+
+        // Verify the column statistics are for columns 0 and 2
+        assert_eq!(
+            partition_stats.column_statistics[0].null_count,
+            Precision::Exact(0),
+            "First projected column should be col0 with 0 nulls"
+        );
+        assert_eq!(
+            partition_stats.column_statistics[1].null_count,
+            Precision::Exact(10),
+            "Second projected column should be col2 with 10 nulls"
+        );
+
+        // Verify row count and byte size are preserved
+        assert_eq!(partition_stats.num_rows, Precision::Exact(100));
+        assert_eq!(partition_stats.total_byte_size, Precision::Exact(1024));
+    }
 }
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index eb55aa9b0b0d2..7d5c8c4834ead 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -21,6 +21,7 @@ use std::collections::BinaryHeap;
 use std::fmt;
 use std::fmt::Debug;
 use std::ops::Deref;
+use std::slice::from_ref;
 use std::sync::Arc;
 
 use crate::sink::DataSink;
@@ -192,12 +193,27 @@ impl DataSource for MemorySourceConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(common::compute_record_batch_statistics(
-            &self.partitions,
-            &self.schema,
-            self.projection.clone(),
-        ))
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Compute statistics for a specific partition
+            if let Some(batches) = self.partitions.get(partition) {
+                Ok(common::compute_record_batch_statistics(
+                    from_ref(batches),
+                    &self.schema,
+                    self.projection.clone(),
+                ))
+            } else {
+                // Invalid partition index
+                Ok(Statistics::new_unknown(&self.projected_schema))
+            }
+        } else {
+            // Compute statistics across all partitions
+            Ok(common::compute_record_batch_statistics(
+                &self.partitions,
+                &self.schema,
+                self.projection.clone(),
+            ))
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index 20d9a1d6e53f0..11a8a3867b809 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -151,7 +151,21 @@ pub trait DataSource: Send + Sync + Debug {
     fn scheduling_type(&self) -> SchedulingType {
         SchedulingType::NonCooperative
     }
-    fn statistics(&self) -> Result<Statistics>;
+
+    /// Returns statistics for a specific partition, or aggregate statistics
+    /// across all partitions if `partition` is `None`.
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics>;
+
+    /// Returns aggregate statistics across all partitions.
+    ///
+    /// # Deprecated
+    /// Use [`Self::partition_statistics`] instead, which provides more fine-grained
+    /// control over statistics retrieval (per-partition or aggregate).
+    #[deprecated(since = "51.0.0", note = "Use partition_statistics instead")]
+    fn statistics(&self) -> Result<Statistics> {
+        self.partition_statistics(None)
+    }
+
     /// Return a copy of this DataSource with a new fetch limit
     fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>;
     fn fetch(&self) -> Option<usize>;
@@ -285,21 +299,7 @@ impl ExecutionPlan for DataSourceExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
-        if let Some(partition) = partition {
-            let mut statistics = Statistics::new_unknown(&self.schema());
-            if let Some(file_config) =
-                self.data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                if let Some(file_group) = file_config.file_groups.get(partition) {
-                    if let Some(stat) = file_group.file_statistics(None) {
-                        statistics = stat.clone();
-                    }
-                }
-            }
-            Ok(statistics)
-        } else {
-            Ok(self.data_source.statistics()?)
-        }
+        self.data_source.partition_statistics(partition)
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {

From 4ecccde0877ad859451e791bdc47f928e0783057 Mon Sep 17 00:00:00 2001
From: Yongting You <2010youy01@gmail.com>
Date: Mon, 27 Oct 2025 12:45:21 +0800
Subject: [PATCH 100/109] feat: Add `output_bytes` to baseline metrics (#18268)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes https://github.com/apache/datafusion/issues/16244

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

Support `output_bytes` in `BaselineMetrics` (a common metrics set for
almost all operators)

```
DataFusion CLI v50.3.0
> explain analyze select * from generate_series(1, 1000000) as t1(v1) order by v1 desc;
+-------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| plan_type         | plan                                                                                                                                                                                                            |
+-------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Plan with Metrics | SortExec: expr=[v1@0 DESC], preserve_partitioning=[false], metrics=[output_rows=1000000, elapsed_compute=96.421534ms, output_bytes=7.6 MB, spill_count=0, spilled_bytes=0.0 B, spilled_rows=0, batches_split=0] |
|                   |   ProjectionExec: expr=[value@0 as v1], metrics=[output_rows=1000000, elapsed_compute=34.125µs, output_bytes=7.7 MB]                                                                                            |
|                   |     LazyMemoryExec: partitions=1, batch_generators=[generate_series: start=1, end=1000000, batch_size=8192], metrics=[output_rows=1000000, elapsed_compute=2.262626ms, output_bytes=7.7 MB]                     |
|                   |                                                                                                                                                                                                                 |
+-------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row(s) fetched.
Elapsed 0.080 seconds.
```

Note it might overestimate memory due to a well-known issue. See the PR
snippet for details
```rs

    /// Memory usage of all output batches.
    ///
    /// Note: This value may be overestimated. If multiple output `RecordBatch`
    /// instances share underlying memory buffers, their sizes will be counted
    /// multiple times.
    /// Issue: <https://github.com/apache/datafusion/issues/16841>
    output_bytes: Count,
```

I think this metric provides valuable insight, so it's better for it to
overestimate than not exist at all.

## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
1. Add `output_bytes` to `BaselineMetrics`, and it's set to `summary`
analyze level. (see config `datafusion.explain.analyze_level` for
details)
2. This metrics will be automatically tracked through `record_poll()`
API, which is a common interface most operators uses when a new output
batch is generated.

## Are these changes tested?
UT
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
3. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 datafusion/core/tests/sql/explain_analyze.rs  | 23 +++++++++++
 .../physical-plan/src/metrics/baseline.rs     | 20 ++++++++++
 .../physical-plan/src/metrics/builder.rs      |  8 ++++
 datafusion/physical-plan/src/metrics/mod.rs   |  1 +
 datafusion/physical-plan/src/metrics/value.rs | 38 ++++++++++++-------
 docs/source/user-guide/metrics.md             |  9 +++--
 6 files changed, 82 insertions(+), 17 deletions(-)

diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 6d386cc456d8b..43f79ead02573 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -63,36 +63,59 @@ async fn explain_analyze_baseline_metrics() {
         "AggregateExec: mode=Partial, gby=[]",
         "metrics=[output_rows=3, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "AggregateExec: mode=Partial, gby=[]",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
         "metrics=[output_rows=5, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
         "metrics=[output_rows=99, elapsed_compute="
     );
+    assert_metrics!(
+        &formatted,
+        "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "ProjectionExec: expr=[]",
         "metrics=[output_rows=5, elapsed_compute="
     );
+    assert_metrics!(&formatted, "ProjectionExec: expr=[]", "output_bytes=");
     assert_metrics!(
         &formatted,
         "CoalesceBatchesExec: target_batch_size=4096",
         "metrics=[output_rows=5, elapsed_compute"
     );
+    assert_metrics!(
+        &formatted,
+        "CoalesceBatchesExec: target_batch_size=4096",
+        "output_bytes="
+    );
     assert_metrics!(
         &formatted,
         "UnionExec",
         "metrics=[output_rows=3, elapsed_compute="
     );
+    assert_metrics!(&formatted, "UnionExec", "output_bytes=");
     assert_metrics!(
         &formatted,
         "WindowAggExec",
         "metrics=[output_rows=1, elapsed_compute="
     );
+    assert_metrics!(&formatted, "WindowAggExec", "output_bytes=");
 
     fn expected_to_have_metrics(plan: &dyn ExecutionPlan) -> bool {
         use datafusion::physical_plan;
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
index 45cef58b5dd8c..858773b94664d 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-plan/src/metrics/baseline.rs
@@ -21,6 +21,8 @@ use std::task::Poll;
 
 use arrow::record_batch::RecordBatch;
 
+use crate::spill::get_record_batch_memory_size;
+
 use super::{Count, ExecutionPlanMetricsSet, MetricBuilder, Time, Timestamp};
 use datafusion_common::Result;
 
@@ -53,6 +55,16 @@ pub struct BaselineMetrics {
 
     /// output rows: the total output rows
     output_rows: Count,
+
+    /// Memory usage of all output batches.
+    ///
+    /// Note: This value may be overestimated. If multiple output `RecordBatch`
+    /// instances share underlying memory buffers, their sizes will be counted
+    /// multiple times.
+    /// Issue: <https://github.com/apache/datafusion/issues/16841>
+    output_bytes: Count,
+    // Remember to update `docs/source/user-guide/metrics.md` when updating comments
+    // or adding new metrics
 }
 
 impl BaselineMetrics {
@@ -71,6 +83,9 @@ impl BaselineMetrics {
             output_rows: MetricBuilder::new(metrics)
                 .with_type(super::MetricType::SUMMARY)
                 .output_rows(partition),
+            output_bytes: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .output_bytes(partition),
         }
     }
 
@@ -84,6 +99,7 @@ impl BaselineMetrics {
             end_time: Default::default(),
             elapsed_compute: self.elapsed_compute.clone(),
             output_rows: Default::default(),
+            output_bytes: Default::default(),
         }
     }
 
@@ -211,6 +227,8 @@ impl RecordOutput for usize {
 impl RecordOutput for RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(&self);
+        bm.output_bytes.add(n_bytes);
         self
     }
 }
@@ -218,6 +236,8 @@ impl RecordOutput for RecordBatch {
 impl RecordOutput for &RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(self);
+        bm.output_bytes.add(n_bytes);
         self
     }
 }
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 74ba5a2a18343..88ec1a3f67d12 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -151,6 +151,14 @@ impl<'a> MetricBuilder<'a> {
         count
     }
 
+    /// Consume self and create a new counter for recording total output bytes
+    pub fn output_bytes(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::OutputBytes(count.clone()));
+        count
+    }
+
     /// Consume self and create a new gauge for reporting current memory usage
     pub fn mem_used(self, partition: usize) -> Gauge {
         let gauge = Gauge::new();
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 0fd7bfb8c812d..02aad6eb60ac3 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -296,6 +296,7 @@ impl MetricsSet {
             MetricValue::ElapsedCompute(_) => false,
             MetricValue::SpillCount(_) => false,
             MetricValue::SpilledBytes(_) => false,
+            MetricValue::OutputBytes(_) => false,
             MetricValue::SpilledRows(_) => false,
             MetricValue::CurrentMemoryUsage(_) => false,
             MetricValue::Gauge { name, .. } => name == metric_name,
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
index 3149fca95ba84..fc947935503c5 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-plan/src/metrics/value.rs
@@ -395,6 +395,8 @@ pub enum MetricValue {
     SpillCount(Count),
     /// Total size of spilled bytes produced: "spilled_bytes" metric
     SpilledBytes(Count),
+    /// Total size of output bytes produced: "output_bytes" metric
+    OutputBytes(Count),
     /// Total size of spilled rows produced: "spilled_rows" metric
     SpilledRows(Count),
     /// Current memory used
@@ -449,6 +451,9 @@ impl PartialEq for MetricValue {
             (MetricValue::SpilledBytes(count), MetricValue::SpilledBytes(other)) => {
                 count == other
             }
+            (MetricValue::OutputBytes(count), MetricValue::OutputBytes(other)) => {
+                count == other
+            }
             (MetricValue::SpilledRows(count), MetricValue::SpilledRows(other)) => {
                 count == other
             }
@@ -505,6 +510,7 @@ impl MetricValue {
             Self::OutputRows(_) => "output_rows",
             Self::SpillCount(_) => "spill_count",
             Self::SpilledBytes(_) => "spilled_bytes",
+            Self::OutputBytes(_) => "output_bytes",
             Self::SpilledRows(_) => "spilled_rows",
             Self::CurrentMemoryUsage(_) => "mem_used",
             Self::ElapsedCompute(_) => "elapsed_compute",
@@ -523,6 +529,7 @@ impl MetricValue {
             Self::OutputRows(count) => count.value(),
             Self::SpillCount(count) => count.value(),
             Self::SpilledBytes(bytes) => bytes.value(),
+            Self::OutputBytes(bytes) => bytes.value(),
             Self::SpilledRows(count) => count.value(),
             Self::CurrentMemoryUsage(used) => used.value(),
             Self::ElapsedCompute(time) => time.value(),
@@ -550,6 +557,7 @@ impl MetricValue {
             Self::OutputRows(_) => Self::OutputRows(Count::new()),
             Self::SpillCount(_) => Self::SpillCount(Count::new()),
             Self::SpilledBytes(_) => Self::SpilledBytes(Count::new()),
+            Self::OutputBytes(_) => Self::OutputBytes(Count::new()),
             Self::SpilledRows(_) => Self::SpilledRows(Count::new()),
             Self::CurrentMemoryUsage(_) => Self::CurrentMemoryUsage(Gauge::new()),
             Self::ElapsedCompute(_) => Self::ElapsedCompute(Time::new()),
@@ -588,6 +596,7 @@ impl MetricValue {
             (Self::OutputRows(count), Self::OutputRows(other_count))
             | (Self::SpillCount(count), Self::SpillCount(other_count))
             | (Self::SpilledBytes(count), Self::SpilledBytes(other_count))
+            | (Self::OutputBytes(count), Self::OutputBytes(other_count))
             | (Self::SpilledRows(count), Self::SpilledRows(other_count))
             | (
                 Self::Count { count, .. },
@@ -638,18 +647,21 @@ impl MetricValue {
     /// numbers are "more useful" (and displayed first)
     pub fn display_sort_key(&self) -> u8 {
         match self {
-            Self::OutputRows(_) => 0,     // show first
-            Self::ElapsedCompute(_) => 1, // show second
-            Self::SpillCount(_) => 2,
-            Self::SpilledBytes(_) => 3,
-            Self::SpilledRows(_) => 4,
-            Self::CurrentMemoryUsage(_) => 5,
-            Self::Count { .. } => 6,
-            Self::Gauge { .. } => 7,
-            Self::Time { .. } => 8,
-            Self::StartTimestamp(_) => 9, // show timestamps last
-            Self::EndTimestamp(_) => 10,
-            Self::Custom { .. } => 11,
+            // `BaselineMetrics` that is common for most operators
+            Self::OutputRows(_) => 0,
+            Self::ElapsedCompute(_) => 1,
+            Self::OutputBytes(_) => 2,
+            // Other metrics
+            Self::SpillCount(_) => 3,
+            Self::SpilledBytes(_) => 4,
+            Self::SpilledRows(_) => 5,
+            Self::CurrentMemoryUsage(_) => 6,
+            Self::Count { .. } => 7,
+            Self::Gauge { .. } => 8,
+            Self::Time { .. } => 9,
+            Self::StartTimestamp(_) => 10, // show timestamps last
+            Self::EndTimestamp(_) => 11,
+            Self::Custom { .. } => 12,
         }
     }
 
@@ -669,7 +681,7 @@ impl Display for MetricValue {
             | Self::Count { count, .. } => {
                 write!(f, "{count}")
             }
-            Self::SpilledBytes(count) => {
+            Self::SpilledBytes(count) | Self::OutputBytes(count) => {
                 let readable_count = human_readable_size(count.value());
                 write!(f, "{readable_count}")
             }
diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md
index f2634b901518b..1fb2f4a5c7700 100644
--- a/docs/source/user-guide/metrics.md
+++ b/docs/source/user-guide/metrics.md
@@ -27,10 +27,11 @@ DataFusion operators expose runtime metrics so you can understand where time is
 
 `BaselineMetrics` are available in most physical operators to capture common measurements.
 
-| Metric          | Description                                            |
-| --------------- | ------------------------------------------------------ |
-| elapsed_compute | CPU time the operator actively spends processing work. |
-| output_rows     | Total number of rows the operator produces.            |
+| Metric          | Description                                                                                                                                                                                        |
+| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| elapsed_compute | CPU time the operator actively spends processing work.                                                                                                                                             |
+| output_rows     | Total number of rows the operator produces.                                                                                                                                                        |
+| output_bytes    | Memory usage of all output batches. Note: This value may be overestimated. If multiple output `RecordBatch` instances share underlying memory buffers, their sizes will be counted multiple times. |
 
 ## Operator-specific Metrics
 

From 0a8f15481fb1b92ad03bccb29f0066ba2d5024ca Mon Sep 17 00:00:00 2001
From: Aryamaan Singh <71913204+toxicteddy00077@users.noreply.github.com>
Date: Mon, 27 Oct 2025 12:25:39 +0530
Subject: [PATCH 101/109] Fix: Error rather than silently ignore extra
 parameter passed to ceil/floor (#18265)

## Which issue does this PR close?

<!--
-->

- Closes #18175

## Rationale for this change
<!--
-->
The Ceil/Floor calls via SQL was being parsed such that they were taking
2 arguments instead of 1, the second of which is not currently needed
and the second argument was being ignored and passed silently.



## What changes are included in this PR?

<!--
-->

The second parameter(`field`) which was being passed if is of the
`CeilFloorKind` enum from `sqlparser` crate . Neither of the enum's two
variants (`DateTimeField` and `Scale`)are being implemented hence they
have been ignored with apporpriate error type and only succeeds if the
`DateTimeField` has `NoDateTime` variant i,e it is treated as empty.

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
All Unit Tests pass successfully.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/sql/src/expr/mod.rs                | 37 +++++++++++++------
 datafusion/sqllogictest/test_files/scalar.slt | 16 ++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index fef0505e993f1..715a02db8b027 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -20,9 +20,10 @@ use datafusion_expr::planner::{
     PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
 };
 use sqlparser::ast::{
-    AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
-    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
-    StructField, Subscript, TrimWhereField, TypedString, Value, ValueWithSpan,
+    AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
+    DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
+    ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
+    TypedString, Value, ValueWithSpan,
 };
 
 use datafusion_common::{
@@ -498,14 +499,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
             }
 
-            SQLExpr::Floor {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context),
-            SQLExpr::Ceil {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context),
+            SQLExpr::Floor { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("FLOOR with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("FLOOR with scale is not supported")
+                }
+            },
+            SQLExpr::Ceil { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("CEIL with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("CEIL with scale is not supported")
+                }
+            },
             SQLExpr::Overlay {
                 expr,
                 overlay_what,
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index b0e200015dfd8..faa0d69ae84b9 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -309,6 +309,14 @@ select ceil(a), ceil(b), ceil(c) from small_floats;
 1 0 0
 1 0 1
 
+# ceil with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: CEIL with scale is not supported
+select ceil(100.1234, 1)
+
+# ceil with datetime parameter (not supported)
+query error DataFusion error: This feature is not implemented: CEIL with datetime is not supported
+select ceil(100.1234 to year)
+
 ## degrees
 
 # degrees scalar function
@@ -448,6 +456,14 @@ select floor(a), floor(b), floor(c) from signed_integers;
 2 -1000 123
 4 NULL NULL
 
+# floor with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with scale is not supported
+select floor(a, 1)
+
+# floor with datetime parameter ( not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with datetime is not supported
+select floor(a to year)
+
 ## ln
 
 # ln scalar function

From f870dcd87852d77c16e49c8b7c38337becf787ef Mon Sep 17 00:00:00 2001
From: Kazantsev Maksim <kazantsev.maksim.n@gmail.com>
Date: Mon, 27 Oct 2025 00:06:54 -0700
Subject: [PATCH 102/109] fix: Support Dictionary[Int32, Binary] for bitmap
 count spark function (#18273)

## Which issue does this PR close?

Closes https://github.com/apache/datafusion/issues/18058

## Rationale for this change

When adding the bitmap_count function to Comet, we get the following
error - org.apache.comet.CometNativeException: Error from DataFusion:
bitmap_count expects Binary/BinaryView/FixedSizeBinary/LargeBinary as
argument, got Dictionary(Int32, Binary).

## Are these changes tested?

Added new UT

---------

Co-authored-by: Kazantsev Maksim <mn.kazantsev@gmail.com>
---
 .../spark/src/function/bitmap/bitmap_count.rs | 65 +++++++++++++++++--
 .../test_files/spark/bitmap/bitmap_count.slt  | 32 +++++++++
 2 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/datafusion/spark/src/function/bitmap/bitmap_count.rs b/datafusion/spark/src/function/bitmap/bitmap_count.rs
index 15bd33229a3d5..56a9c5edb812c 100644
--- a/datafusion/spark/src/function/bitmap/bitmap_count.rs
+++ b/datafusion/spark/src/function/bitmap/bitmap_count.rs
@@ -19,13 +19,13 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
-    LargeBinaryArray,
+    as_dictionary_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
+    FixedSizeBinaryArray, Int64Array, LargeBinaryArray,
 };
-use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{
-    Binary, BinaryView, FixedSizeBinary, Int64, LargeBinary,
+    Binary, BinaryView, Dictionary, FixedSizeBinary, LargeBinary,
 };
+use arrow::datatypes::{DataType, Int16Type, Int32Type, Int64Type, Int8Type};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::{
@@ -71,7 +71,7 @@ impl ScalarUDFImpl for BitmapCount {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Int64)
+        Ok(DataType::Int64)
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -90,6 +90,17 @@ macro_rules! downcast_and_count_ones {
     }};
 }
 
+macro_rules! downcast_dict_and_count_ones {
+    ($input_dict:expr, $key_array_type:ident) => {{
+        let dict_array = as_dictionary_array::<$key_array_type>($input_dict);
+        let array = dict_array.downcast_dict::<BinaryArray>().unwrap();
+        Ok(array
+            .into_iter()
+            .map(binary_count_ones)
+            .collect::<Int64Array>())
+    }};
+}
+
 pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
     let [input_array] = take_function_args("bitmap_count", arg)?;
 
@@ -100,6 +111,17 @@ pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
         FixedSizeBinary(_size) => {
             downcast_and_count_ones!(input_array, FixedSizeBinaryArray)
         }
+        Dictionary(k, v) if v.as_ref() == &Binary => match k.as_ref() {
+            DataType::Int8 => downcast_dict_and_count_ones!(input_array, Int8Type),
+            DataType::Int16 => downcast_dict_and_count_ones!(input_array, Int16Type),
+            DataType::Int32 => downcast_dict_and_count_ones!(input_array, Int32Type),
+            DataType::Int64 => downcast_dict_and_count_ones!(input_array, Int64Type),
+            data_type => {
+                internal_err!(
+                    "bitmap_count does not support Dictionary({data_type}, Binary)"
+                )
+            }
+        },
         data_type => {
             internal_err!("bitmap_count does not support {data_type}")
         }
@@ -114,8 +136,12 @@ mod tests {
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, Int64Array};
     use arrow::datatypes::DataType::Int64;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::ColumnarValue::Scalar;
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
+    use std::sync::Arc;
 
     macro_rules! test_bitmap_count_binary_invoke {
         ($INPUT:expr, $EXPECTED:expr) => {
@@ -171,4 +197,31 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_dictionary_encoded_bitmap_count_invoke() -> Result<()> {
+        let dict = Scalar(ScalarValue::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(ScalarValue::Binary(Some(vec![0xFFu8, 0xFFu8]))),
+        ));
+
+        let arg_fields = vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)),
+            true,
+        )
+        .into()];
+        let args = ScalarFunctionArgs {
+            args: vec![dict.clone()],
+            arg_fields,
+            number_rows: 1,
+            return_field: Field::new("f", Int64, true).into(),
+            config_options: Arc::new(ConfigOptions::default()),
+        };
+        let udf = BitmapCount::new();
+        let actual = udf.invoke_with_args(args)?;
+        let expect = Scalar(ScalarValue::Int64(Some(16)));
+        assert_eq!(*actual.into_array(1)?, *expect.into_array(1)?);
+        Ok(())
+    }
 }
diff --git a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
index 2789efef7bf36..39dca512226b2 100644
--- a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
@@ -59,3 +59,35 @@ SELECT bitmap_count(arrow_cast(a, 'FixedSizeBinary(2)')) FROM (VALUES (X'1010'),
 5
 16
 NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int32, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int8, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int16, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int64, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL

From 0daa88c44679ac2871a127b82d5ea08f87717a78 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 27 Oct 2025 08:18:33 -0400
Subject: [PATCH 103/109] chore(deps): Update `half` to 2.7.1, ignore
 `RUSTSEC-2025-0111` (#18287)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes https://github.com/apache/datafusion/issues/18288

## Rationale for this change

`cargo audit` says that the current version of `half` we have in our
Cargo.lock file was yanked




```
Crate:     half
Version:   2.7.0
Warning:   yanked
Dependency tree:
half 2.7.0
```

And indeed it is:
https://crates.io/crates/half/versions
<img width="1193" height="830" alt="Screenshot 2025-10-26 at 7 20 54 AM"
src="https://github.com/user-attachments/assets/ad6944c6-912c-4c56-9d1d-efe760ae85ee"
/>

So let's update to a non yanked version

## What changes are included in this PR?

run `cargo update -p half` and check the result in

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->
---
 .github/workflows/audit.yml | 7 ++++++-
 Cargo.lock                  | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index cae620baf46cb..ac8d6ed6f9938 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -46,4 +46,9 @@ jobs:
         with:
           tool: cargo-audit
       - name: Run audit check
-        run: cargo audit
+        # RUSTSEC-2025-0111: tokio-tar is by testcontainers for orchestration
+        # of testing, so does not impact DataFusion's security
+        # See https://github.com/apache/datafusion/issues/18288
+        # NOTE: can remove this once testcontainers releases a version that includes
+        # https://github.com/testcontainers/testcontainers-rs/pull/852
+        run: cargo audit --ignore RUSTSEC-2025-0111
diff --git a/Cargo.lock b/Cargo.lock
index e368dcf9a91e2..735738338c3d5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3255,9 +3255,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.7.0"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
  "cfg-if",
  "crunchy",

From 868c4550bac9addbf7d1c9c41c75d36550a72fb2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 08:23:03 -0400
Subject: [PATCH 104/109] chore(deps): bump taiki-e/install-action from 2.62.36
 to 2.62.38 (#18293)

Bumps
[taiki-e/install-action](https://github.com/taiki-e/install-action) from
2.62.36 to 2.62.38.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/releases">taiki-e/install-action's
releases</a>.</em></p>
<blockquote>
<h2>2.62.38</h2>
<ul>
<li>
<p>Update <code>coreutils@latest</code> to 0.3.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.3.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.17.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.1.</p>
</li>
</ul>
<h2>2.62.37</h2>
<ul>
<li>
<p>Update <code>cargo-binstall@latest</code> to 1.15.8.</p>
</li>
<li>
<p>Update <code>zizmor@latest</code> to 1.16.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.16.</p>
</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/taiki-e/install-action/blob/main/CHANGELOG.md">taiki-e/install-action's
changelog</a>.</em></p>
<blockquote>
<h1>Changelog</h1>
<p>All notable changes to this project will be documented in this
file.</p>
<p>This project adheres to <a href="https://semver.org">Semantic
Versioning</a>.</p>
<!-- raw HTML omitted -->
<h2>[Unreleased]</h2>
<ul>
<li>Update <code>mise@latest</code> to 2025.10.18.</li>
</ul>
<h2>[2.62.38] - 2025-10-25</h2>
<ul>
<li>
<p>Update <code>coreutils@latest</code> to 0.3.0.</p>
</li>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.3.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.17.</p>
</li>
<li>
<p>Update <code>cargo-tarpaulin@latest</code> to 0.34.1.</p>
</li>
</ul>
<h2>[2.62.37] - 2025-10-24</h2>
<ul>
<li>
<p>Update <code>cargo-binstall@latest</code> to 1.15.8.</p>
</li>
<li>
<p>Update <code>zizmor@latest</code> to 1.16.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.16.</p>
</li>
</ul>
<h2>[2.62.36] - 2025-10-23</h2>
<ul>
<li>
<p>Update <code>syft@latest</code> to 1.36.0.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.19.0.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.15.</p>
</li>
</ul>
<h2>[2.62.35] - 2025-10-22</h2>
<ul>
<li>
<p>Update <code>wasmtime@latest</code> to 38.0.2.</p>
</li>
<li>
<p>Update <code>cargo-nextest@latest</code> to 0.9.108.</p>
</li>
<li>
<p>Update <code>mise@latest</code> to 2025.10.14.</p>
</li>
<li>
<p>Update <code>vacuum@latest</code> to 0.18.9.</p>
</li>
</ul>
<!-- raw HTML omitted -->
</blockquote>
<p>... (truncated)</p>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/taiki-e/install-action/commit/c5b1b6f479c32f356cc6f4ba672a47f63853b13b"><code>c5b1b6f</code></a>
Release 2.62.38</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/7cd74f6aac6a2a6c13632c29a30ffc0ef8053cf2"><code>7cd74f6</code></a>
Update <code>coreutils@latest</code> to 0.3.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/def9901333773abdceeb414c2c2a68cc4276eea9"><code>def9901</code></a>
Update <code>wasmtime@latest</code> to 38.0.3</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/a9d3853729137d6a76fdb344e3fdba064bb51dd5"><code>a9d3853</code></a>
Update coreutils manifest</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/958d48b0c9eb6cf8c0edca899e787eb73a91794c"><code>958d48b</code></a>
Update <code>mise@latest</code> to 2025.10.17</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/fb485991fd79e393a6a4e3715369bdd7a96fc12d"><code>fb48599</code></a>
Update <code>cargo-tarpaulin@latest</code> to 0.34.1</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/1c7b1d35fcc8f6525be0cbdacbf5977079a3f94c"><code>1c7b1d3</code></a>
Release 2.62.37</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/18cba62798fa05dd5849e62a3759a8ef249feefc"><code>18cba62</code></a>
Update <code>cargo-binstall@latest</code> to 1.15.8</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/f3c0c6962aed40004323e265015332d9d9cf90f9"><code>f3c0c69</code></a>
Update <code>zizmor@latest</code> to 1.16.0</li>
<li><a
href="https://github.com/taiki-e/install-action/commit/99fc3e5b1e80c12d05e5bff5af81a035ab4e98b5"><code>99fc3e5</code></a>
Update <code>mise@latest</code> to 2025.10.16</li>
<li>See full diff in <a
href="https://github.com/taiki-e/install-action/compare/ebb229c6baa68383264f2822689b07b4916d9177...c5b1b6f479c32f356cc6f4ba672a47f63853b13b">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=taiki-e/install-action&package-manager=github_actions&previous-version=2.62.36&new-version=2.62.38)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/audit.yml | 2 +-
 .github/workflows/rust.yml  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index ac8d6ed6f9938..3685bb2f9a789 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index e9606e15c4ec3..4b61a04bfb141 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -425,7 +425,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -752,7 +752,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@c5b1b6f479c32f356cc6f4ba672a47f63853b13b  # v2.62.38
         with:
           tool: cargo-msrv
 

From c09ca5f828cc9b4558d161f90ed596e60385da81 Mon Sep 17 00:00:00 2001
From: Martin <57065083+sm4rtm4art@users.noreply.github.com>
Date: Mon, 27 Oct 2025 13:43:48 +0100
Subject: [PATCH 105/109] "Gentle Introduction to Arrow / Record Batches"
 #11336 (#18051)

## Which issue does this PR close?

<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax. For example
`Closes #123` indicates that this PR will close issue #123.
-->

- Closes #11336

Since this is my first contribution, I suppose to mention @alamb ,
author of the Issue #11336

Could you please trigger the CI? Thanks!

## Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.
-->

The Arrow introduction guide (#11336) needed improvements to make it
more accessible for newcomers while providing better navigation to
advanced topics.


## What changes are included in this PR?

<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->

Issue #11336 requested a gentle introduction to Apache Arrow and
RecordBatches to help DataFusion users understand the foundational
concepts. This PR enhances the existing Arrow introduction guide with
clearer explanations, practical examples, visual aids, and comprehensive
navigation links to make it more accessible for newcomers while
providing pathways to advanced topics.

Was unsure if this fits to `docs/source/user-guide/dataframe.md'

## Are these changes tested?

<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->

applyed prettier, like described.

## Are there any user-facing changes?

<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
-->
Yes - improved documentation for the Arrow introduction guide at
`docs/source/user-guide/arrow-introduction.md`

<!--
If there are any breaking changes to public APIs, please add the `api
change` label.
-->

---------

Co-authored-by: Martin <your.email@example.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/lib.rs                   |  31 ++-
 docs/source/index.rst                        |   1 +
 docs/source/user-guide/arrow-introduction.md | 255 +++++++++++++++++++
 docs/source/user-guide/dataframe.md          |   2 +
 4 files changed, 288 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/user-guide/arrow-introduction.md

diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index e7ace544a11cf..78db28eaacc79 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -443,7 +443,30 @@
 //! other operators read a single [`RecordBatch`] from their input to produce a
 //! single [`RecordBatch`] as output.
 //!
-//! For example, given this SQL query:
+//! For example, given this SQL:
+//!
+//! ```sql
+//! SELECT name FROM 'data.parquet' WHERE id > 10
+//! ```
+//!
+//! An simplified DataFusion execution plan is shown below. It first reads
+//! data from the Parquet file, then applies the filter, then the projection,
+//! and finally produces output. Each step processes one [`RecordBatch`] at a
+//! time. Multiple batches are processed concurrently on different CPU cores
+//! for plans with multiple partitions.
+//!
+//! ```text
+//! ┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+//! │ Parquet     │───▶│ DataSource   │───▶│ FilterExec     │───▶│ ProjectionExec   │───▶│ Results  │
+//! │ File        │    │              │    │                │    │                  │    │          │
+//! └─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+//!                    (reads data)        (id > 10)             (keeps "name" col)
+//!                    RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+//! ```
+//!
+//! DataFusion uses the classic "pull" based control flow (explained more in the
+//! next section) to implement streaming execution. As an example,
+//! consider the following SQL query:
 //!
 //! ```sql
 //! SELECT date_trunc('month', time) FROM data WHERE id IN (10,20,30);
@@ -897,6 +920,12 @@ doc_comment::doctest!("../../../README.md", readme_example_test);
 // For example, if `user_guide_expressions(line 123)` fails,
 // go to `docs/source/user-guide/expressions.md` to find the relevant problem.
 //
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/user-guide/arrow-introduction.md",
+    user_guide_arrow_introduction
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/user-guide/concepts-readings-events.md",
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6bb3c9485b718..b589c9ce4047d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -118,6 +118,7 @@ To get started, see
    user-guide/crate-configuration
    user-guide/cli/index
    user-guide/dataframe
+   user-guide/arrow-introduction
    user-guide/expressions
    user-guide/sql/index
    user-guide/configs
diff --git a/docs/source/user-guide/arrow-introduction.md b/docs/source/user-guide/arrow-introduction.md
new file mode 100644
index 0000000000000..89662a0c29c5d
--- /dev/null
+++ b/docs/source/user-guide/arrow-introduction.md
@@ -0,0 +1,255 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Gentle Arrow Introduction
+
+```{contents}
+:local:
+:depth: 2
+```
+
+## Overview
+
+DataFusion uses [Apache Arrow] as its native in-memory format, so anyone using DataFusion will likely interact with Arrow at some point. This guide introduces the key Arrow concepts you need to know to effectively use DataFusion.
+
+Apache Arrow defines a standardized columnar representation for in-memory data. This enables different systems and languages (e.g., Rust and Python) to share data with zero-copy interchange, avoiding serialization overhead. In addition to zero copy interchange, Arrow also standardizes best practice columnar data representation enabling high performance analytical processing through vectorized execution.
+
+## Columnar Layout
+
+Quick visual: row-major (left) vs Arrow's columnar layout (right). For a deeper primer, see the [arrow2 guide].
+
+```text
+Traditional Row Storage:          Arrow Columnar Storage:
+┌──────────────────┐              ┌─────────┬─────────┬──────────┐
+│ id │ name │ age  │              │   id    │  name   │   age    │
+├────┼──────┼──────┤              ├─────────┼─────────┼──────────┤
+│ 1  │  A   │  30  │              │ [1,2,3] │ [A,B,C] │[30,25,35]│
+│ 2  │  B   │  25  │              └─────────┴─────────┴──────────┘
+│ 3  │  C   │  35  │                   ↑          ↑         ↑
+└──────────────────┘              Int32Array StringArray Int32Array
+(read entire rows)                (process entire columns at once)
+```
+
+## `RecordBatch`
+
+Arrow's standard unit for packaging data is the **[`RecordBatch`]**.
+
+A **[`RecordBatch`]** represents a horizontal slice of a table—a collection of equal-length columnar arrays that conform to a defined schema. Each column within the slice is a contiguous Arrow array, and all columns have the same number of rows (length). This chunked, immutable unit enables efficient streaming and parallel execution.
+
+Think of it as having two perspectives:
+
+- **Columnar inside**: Each column (`id`, `name`, `age`) is a contiguous array optimized for vectorized operations
+- **Row-chunked externally**: The batch represents a chunk of rows (e.g., rows 1-1000), making it a manageable unit for streaming
+
+RecordBatches are **immutable snapshots**—once created, they cannot be modified. Any transformation produces a _new_ RecordBatch, enabling safe parallel processing without locks or coordination overhead.
+
+This design allows DataFusion to process streams of row-based chunks while gaining maximum performance from the columnar layout.
+
+## Streaming Through the Engine
+
+DataFusion processes queries as pull-based pipelines where operators request batches from their inputs. This streaming approach enables early result production, bounds memory usage (spilling to disk only when necessary), and naturally supports parallel execution across multiple CPU cores.
+
+For example, given the following query:
+
+```sql
+SELECT name FROM 'data.parquet' WHERE id > 10
+```
+
+The DataFusion Pipeline looks like this:
+
+```text
+
+┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+│ Parquet     │───▶│ Scan         │───▶│ Filter         │───▶│ Projection       │───▶│ Results  │
+│ File        │    │ Operator     │    │ Operator       │    │ Operator         │    │          │
+└─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+                   (reads data)        (id > 10)             (keeps "name" col)
+                   RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+```
+
+In this pipeline, [`RecordBatch`]es are the "packages" of columnar data that flow between the different stages of query execution. Each operator processes batches incrementally, enabling the system to produce results before reading the entire input.
+
+## Creating `ArrayRef` and `RecordBatch`es
+
+Sometimes you need to create Arrow data programmatically rather than reading from files.
+
+The first thing needed is creating an Arrow Array, for each column. [arrow-rs] provides array builders and `From` impls to create arrays from Rust vectors.
+
+```rust
+use arrow::array::{StringArray, Int32Array};
+// Create an Int32Array from a vector of i32 values
+let ids = Int32Array::from(vec![1, 2, 3]);
+// There are similar constructors for other array types, e.g., StringArray, Float64Array, etc.
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+```
+
+Every element in an Arrow array can be "null" (aka missing). Often, arrays are
+created from `Option<T>` values to indicate nullability (e.g., `Some("alice")`
+vs `None` above).
+
+Note: You'll see [`Arc`] used frequently in the code—Arrow arrays are wrapped in
+[`Arc`] (atomically reference-counted pointers) to enable cheap, thread-safe
+sharing across operators and tasks. [`ArrayRef`] is simply a type alias for
+`Arc<dyn Array>`. To create an `ArrayRef`, wrap your array in `Arc::new(...)` as shown below.
+
+```rust
+use std::sync::Arc;
+# use arrow::array::{ArrayRef, Int32Array, StringArray};
+// To get an ArrayRef, wrap the Int32Array in an Arc.
+// (note you will often have to explicitly type annotate to ArrayRef)
+let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+
+// you can also store Strings and other types in ArrayRefs
+let arr: ArrayRef = Arc::new(
+  StringArray::from(vec![Some("alice"), None, Some("carol")])
+);
+```
+
+To create a [`RecordBatch`], you need to define its [`Schema`] (the column names and types) and provide the corresponding columns as [`ArrayRef`]s as shown below:
+
+```rust
+# use std::sync::Arc;
+# use arrow_schema::ArrowError;
+# use arrow::array::{ArrayRef, Int32Array, StringArray, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+
+// Create the columns as Arrow arrays
+let ids = Int32Array::from(vec![1, 2, 3]);
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+// Create the schema
+let schema = Arc::new(Schema::new(vec![
+    Field::new("id", DataType::Int32, false), // false means non-nullable
+    Field::new("name", DataType::Utf8, true), // true means nullable
+]));
+// Assemble the columns
+let cols: Vec<ArrayRef> = vec![
+      Arc::new(ids),
+      Arc::new(names)
+];
+// Finally, create the RecordBatch
+RecordBatch::try_new(schema, cols).expect("Failed to create RecordBatch");
+```
+
+## Working with `ArrayRef` and `RecordBatch`
+
+Most DataFusion APIs are in terms of [`ArrayRef`] and [`RecordBatch`]. To work with the
+underlying data, you typically downcast the [`ArrayRef`] to its concrete type
+(e.g., [`Int32Array`]).
+
+To do so either use the `as_any().downcast_ref::<T>()` method or the
+`as_::<T>()` helper method from the [AsArray] trait.
+
+[asarray]: https://docs.rs/arrow-array/latest/arrow_array/cast/trait.AsArray.html
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// First check the data type of the array
+match arr.data_type() {
+   &DataType::Int32 => {
+         // Downcast to Int32Array
+         let int_array = arr.as_primitive::<Int32Type>();
+         // Now you can access Int32Array methods
+         for i in 0..int_array.len() {
+              println!("Value at index {}: {}", i, int_array.value(i));
+         }
+   }
+    _ => {
+        println ! ("Array is not of type Int32");
+    }
+}
+```
+
+The following two downcasting methods are equivalent:
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// Downcast to Int32Array using as_any
+let int_array1 = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+// This is the same as using the as_::<T>() helper
+let int_array2 = arr.as_primitive::<Int32Type>();
+assert_eq!(int_array1, int_array2);
+```
+
+## Common Pitfalls
+
+When working with Arrow and RecordBatches, watch out for these common issues:
+
+- **Schema consistency**: All batches in a stream must share the exact same [`Schema`]. For example, you can't have one batch where a column is [`Int32`] and the next where it's [`Int64`], even if the values would fit
+- **Immutability**: Arrays are immutable—to "modify" data, you must build new arrays or new RecordBatches. For instance, to change a value in an array, you'd create a new array with the updated value
+- **Row by Row Processing**: Avoid iterating over Arrays element by element when possible, and use Arrow's built-in [compute kernels] instead
+- **Type mismatches**: Mixed input types across files may require explicit casts. For example, a string column `"123"` from a CSV file won't automatically join with an integer column `123` from a Parquet file—you'll need to cast one to match the other. Use Arrow's [`cast`] kernel where appropriate
+- **Batch size assumptions**: Don't assume a particular batch size; always iterate until the stream ends. One file might produce 8192-row batches while another produces 1024-row batches
+
+[compute kernels]: https://docs.rs/arrow/latest/arrow/compute/index.html
+
+## Further reading
+
+**Arrow Documentation:**
+
+- [Arrow Format Introduction](https://arrow.apache.org/docs/format/Intro.html) - Understand the Arrow specification and why it enables zero-copy data sharing
+- [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html) - Deep dive into memory layout for performance optimization
+- [Arrow Rust Documentation](https://docs.rs/arrow/latest/arrow/) - Complete API reference for the Rust implementation
+
+**Key API References:**
+
+- [RecordBatch](https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html) - The fundamental data structure for columnar data (a table slice)
+- [ArrayRef](https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html) - Represents a reference-counted Arrow array (single column)
+- [DataType](https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html) - Enum of all supported Arrow data types (e.g., Int32, Utf8)
+- [Schema](https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html) - Describes the structure of a RecordBatch (column names and types)
+
+[apache arrow]: https://arrow.apache.org/docs/index.html
+[`arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
+[`arrayref`]: https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html
+[`cast`]: https://docs.rs/arrow/latest/arrow/compute/fn.cast.html
+[`field`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Field.html
+[`schema`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html
+[`datatype`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html
+[`int32array`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.Int32Array.html
+[`stringarray`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.StringArray.html
+[`int32`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int32
+[`int64`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int64
+[extension points]: ../library-user-guide/extensions.md
+[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html
+[custom table providers guide]: ../library-user-guide/custom-table-providers.md
+[user-defined functions (udfs)]: ../library-user-guide/functions/adding-udfs.md
+[custom optimizer rules and physical operators]: ../library-user-guide/extending-operators.md
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`.register_table()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.register_table
+[`.sql()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql
+[`.show()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.show
+[`memtable`]: https://docs.rs/datafusion/latest/datafusion/datasource/struct.MemTable.html
+[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
+[`csvreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.CsvReadOptions.html
+[`parquetreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.ParquetReadOptions.html
+[`recordbatch`]: https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html
+[`read_csv`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_csv
+[`read_parquet`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_parquet
+[`read_json`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_json
+[`read_avro`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_avro
+[`dataframe`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html
+[`.collect()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.collect
+[arrow2 guide]: https://jorgecarleitao.github.io/arrow2/main/guide/arrow.html#what-is-apache-arrow
+[configuration settings]: configs.md
+[`datafusion.execution.batch_size`]: configs.md#setting-configuration-options
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index 82f1eeb2823dc..85724a72399ad 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -19,6 +19,8 @@
 
 # DataFrame API
 
+## DataFrame overview
+
 A DataFrame represents a logical set of rows with the same named columns,
 similar to a [Pandas DataFrame] or [Spark DataFrame].
 

From b817dcdea7d86bd686681667e346747a5b5380c2 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 08:57:08 -0400
Subject: [PATCH 106/109] chore(deps): bump regex from 1.11.3 to 1.12.2
 (#18294)

Bumps [regex](https://github.com/rust-lang/regex) from 1.11.3 to 1.12.2.
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/rust-lang/regex/blob/master/CHANGELOG.md">regex's
changelog</a>.</em></p>
<blockquote>
<h1>1.12.2 (2025-10-13)</h1>
<p>This release fixes a <code>cargo doc</code> breakage on nightly when
<code>--cfg docsrs</code> is
enabled. This caused documentation to fail to build on docs.rs.</p>
<p>Bug fixes:</p>
<ul>
<li>[BUG <a
href="https://redirect.github.com/rust-lang/regex/issues/1305">#1305</a>](<a
href="https://redirect.github.com/rust-lang/regex/issues/1305">rust-lang/regex#1305</a>):
Switches the <code>doc_auto_cfg</code> feature to <code>doc_cfg</code>
on nightly for docs.rs builds.</li>
</ul>
<h1>1.12.1 (2025-10-10)</h1>
<p>This release makes a bug fix in the new
<code>regex::Captures::get_match</code> API
introduced in <code>1.12.0</code>. There was an oversight with the
lifetime parameter
for the <code>Match</code> returned. This is technically a breaking
change, but given
that it was caught almost immediately and I've yanked the
<code>1.12.0</code> release,
I think this is fine.</p>
<h1>1.12.0 (2025-10-10)</h1>
<p>This release contains a smattering of bug fixes, a fix for excessive
memory
consumption in some cases and a new
<code>regex::Captures::get_match</code> API.</p>
<p>Improvements:</p>
<ul>
<li>[FEATURE <a
href="https://redirect.github.com/rust-lang/regex/issues/1146">#1146</a>](<a
href="https://redirect.github.com/rust-lang/regex/issues/1146">rust-lang/regex#1146</a>):
Add <code>Capture::get_match</code> for returning the overall match
without <code>unwrap()</code>.</li>
</ul>
<p>Bug fixes:</p>
<ul>
<li>[BUG <a
href="https://redirect.github.com/rust-lang/regex/issues/1083">#1083</a>](<a
href="https://redirect.github.com/rust-lang/regex/issues/1083">rust-lang/regex#1083</a>):
Fixes a panic in the lazy DFA (can only occur for especially large
regexes).</li>
<li>[BUG <a
href="https://redirect.github.com/rust-lang/regex/issues/1116">#1116</a>](<a
href="https://redirect.github.com/rust-lang/regex/issues/1116">rust-lang/regex#1116</a>):
Fixes a memory usage regression for large regexes (introduced in
<code>regex 1.9</code>).</li>
<li>[BUG <a
href="https://redirect.github.com/rust-lang/regex/issues/1195">#1195</a>](<a
href="https://redirect.github.com/rust-lang/regex/issues/1195">rust-lang/regex#1195</a>):
Fix universal start states in sparse DFA.</li>
<li>[BUG <a
href="https://redirect.github.com/rust-lang/regex/issues/1295">#1295</a>](<a
href="https://redirect.github.com/rust-lang/regex/pull/1295">rust-lang/regex#1295</a>):
Fixes a panic when deserializing a corrupted dense DFA.</li>
<li><a
href="https://github.com/rust-lang/regex/commit/8f5d9479d0f1da5726488a530d7fd66a73d05b80">BUG
8f5d9479</a>:
Make <code>regex_automata::meta::Regex::find</code> consistently return
<code>None</code> when
<code>WhichCaptures::None</code> is used.</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/rust-lang/regex/commit/5ea3eb1e95f0338e283f5f0b4681f0891a1cd836"><code>5ea3eb1</code></a>
1.12.2</li>
<li><a
href="https://github.com/rust-lang/regex/commit/ab0b07171b82d1d4fdc8359505d12b2e818514d4"><code>ab0b071</code></a>
regex-automata-0.4.13</li>
<li><a
href="https://github.com/rust-lang/regex/commit/691d51457db276bbdf9ca3de2cafe285c662c59f"><code>691d514</code></a>
regex-syntax-0.8.8</li>
<li><a
href="https://github.com/rust-lang/regex/commit/1dd90777791dbc6bbf389157d05ac8176c6ad051"><code>1dd9077</code></a>
docs: swap <code>doc_auto_cfg</code> with <code>doc_cfg</code></li>
<li><a
href="https://github.com/rust-lang/regex/commit/0089034cb37b0bf3785f2e0211f7eca74033f4d1"><code>0089034</code></a>
regex-cli-0.2.3</li>
<li><a
href="https://github.com/rust-lang/regex/commit/140f8949da3f575490bac80ff23dfc29458b82c7"><code>140f894</code></a>
regex-lite-0.1.8</li>
<li><a
href="https://github.com/rust-lang/regex/commit/27d6d65263cb80266a62e3189408a44f201a0975"><code>27d6d65</code></a>
1.12.1</li>
<li><a
href="https://github.com/rust-lang/regex/commit/85398ad5002048bbeaa90f1fe37fbb31df2bc0d6"><code>85398ad</code></a>
changelog: 1.12.1</li>
<li><a
href="https://github.com/rust-lang/regex/commit/764efbd305d3a7b817ec8892ff0a656ec657d660"><code>764efbd</code></a>
api: tweak the lifetime of <code>Captures::get_match</code></li>
<li><a
href="https://github.com/rust-lang/regex/commit/ee6aa55e01786e4d2c11eb1be805835bbb3bfa99"><code>ee6aa55</code></a>
rure-0.2.4</li>
<li>Additional commits viewable in <a
href="https://github.com/rust-lang/regex/compare/1.11.3...1.12.2">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=regex&package-manager=cargo&previous-version=1.11.3&new-version=1.12.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock | 8 ++++----
 Cargo.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 735738338c3d5..d99e20fc7cabb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5253,9 +5253,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.3"
+version = "1.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -5265,9 +5265,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.11"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
  "aho-corasick",
  "memchr",
diff --git a/Cargo.toml b/Cargo.toml
index 3e0861c07ab0a..98268737eb994 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -172,7 +172,7 @@ insta = { version = "1.43.2", features = ["glob", "filters"] }
 prost = "0.13.1"
 rand = "0.9"
 recursive = "0.1.1"
-regex = "1.11"
+regex = "1.12"
 rstest = "0.25.0"
 serde_json = "1"
 sqlparser = { version = "0.59.0", default-features = false, features = ["std", "visitor"] }

From 1feb80f1648e8b58c002971ec66d72386d72877d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 12:57:36 +0000
Subject: [PATCH 107/109] chore(deps): bump clap from 4.5.48 to 4.5.50 (#18292)

Bumps [clap](https://github.com/clap-rs/clap) from 4.5.48 to 4.5.50.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/clap-rs/clap/releases">clap's
releases</a>.</em></p>
<blockquote>
<h2>v4.5.50</h2>
<h2>[4.5.50] - 2025-10-20</h2>
<h3>Features</h3>
<ul>
<li>Accept <code>Cow</code> where <code>String</code> and
<code>&amp;str</code> are accepted</li>
</ul>
</blockquote>
</details>
<details>
<summary>Changelog</summary>
<p><em>Sourced from <a
href="https://github.com/clap-rs/clap/blob/master/CHANGELOG.md">clap's
changelog</a>.</em></p>
<blockquote>
<h2>[4.5.50] - 2025-10-20</h2>
<h3>Features</h3>
<ul>
<li>Accept <code>Cow</code> where <code>String</code> and
<code>&amp;str</code> are accepted</li>
</ul>
<h2>[4.5.49] - 2025-10-13</h2>
<h3>Fixes</h3>
<ul>
<li><em>(help)</em> Correctly wrap when ANSI escape codes are
present</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/clap-rs/clap/commit/d8acd4729878ca72d305e6cf7adf7acc0da36738"><code>d8acd47</code></a>
chore: Release</li>
<li><a
href="https://github.com/clap-rs/clap/commit/7c2b8d9ad4d22650f969bd455d80b4181a7e25ff"><code>7c2b8d9</code></a>
docs: Update changelog</li>
<li><a
href="https://github.com/clap-rs/clap/commit/e69a2ea55bc9076d95caf60d79e481581f688724"><code>e69a2ea</code></a>
Merge pull request <a
href="https://redirect.github.com/clap-rs/clap/issues/5987">#5987</a>
from mernen/fix-bash-comp-words-loop</li>
<li><a
href="https://github.com/clap-rs/clap/commit/e03cc2e798183e9528f53d42d8b2699f034fc667"><code>e03cc2e</code></a>
Merge pull request <a
href="https://redirect.github.com/clap-rs/clap/issues/5988">#5988</a>
from cordx56/fix-builder-custom-version-docs</li>
<li><a
href="https://github.com/clap-rs/clap/commit/5ab2579844a47a26b4567f77a7b9d198be006f0a"><code>5ab2579</code></a>
fix: Minor fix for builder docs about version</li>
<li><a
href="https://github.com/clap-rs/clap/commit/2f66432721bd24602455dc3e31765195c6107c34"><code>2f66432</code></a>
fix(complete): Only parse arguments before current</li>
<li><a
href="https://github.com/clap-rs/clap/commit/4d9d2100f75693645ea68180ed4b6b3ecacb9923"><code>4d9d210</code></a>
test(complete): Illustrate current behavior in Bash</li>
<li><a
href="https://github.com/clap-rs/clap/commit/6abe2f8c61e31d8d43fee42c18414926c60893be"><code>6abe2f8</code></a>
chore: Release</li>
<li><a
href="https://github.com/clap-rs/clap/commit/d5c74542ce628b57424caec88efee1a231c436a0"><code>d5c7454</code></a>
docs: Update changelog</li>
<li><a
href="https://github.com/clap-rs/clap/commit/5b2e960267b94d4811c9c3b99c62899a87505413"><code>5b2e960</code></a>
Merge pull request <a
href="https://redirect.github.com/clap-rs/clap/issues/5985">#5985</a>
from mernen/bash-cur</li>
<li>Additional commits viewable in <a
href="https://github.com/clap-rs/clap/compare/clap_complete-v4.5.48...clap_complete-v4.5.50">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=clap&package-manager=cargo&previous-version=4.5.48&new-version=4.5.50)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock                         | 20 ++++++++++----------
 datafusion-cli/Cargo.toml          |  2 +-
 datafusion/sqllogictest/Cargo.toml |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d99e20fc7cabb..f214c48b278ac 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1388,9 +1388,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.48"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
+checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1398,9 +1398,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.48"
+version = "4.5.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
+checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1410,9 +1410,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.47"
+version = "4.5.49"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -1589,7 +1589,7 @@ dependencies = [
  "anes",
  "cast",
  "ciborium",
- "clap 4.5.48",
+ "clap 4.5.50",
  "criterion-plot",
  "futures",
  "is-terminal",
@@ -1912,7 +1912,7 @@ dependencies = [
  "aws-config",
  "aws-credential-types",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.50",
  "ctor",
  "datafusion",
  "datafusion-common",
@@ -2635,7 +2635,7 @@ dependencies = [
  "bigdecimal",
  "bytes",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.50",
  "datafusion",
  "datafusion-spark",
  "datafusion-substrait",
@@ -3979,7 +3979,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
 dependencies = [
  "anstream",
  "anstyle",
- "clap 4.5.48",
+ "clap 4.5.50",
  "escape8259",
 ]
 
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 53744e6c609b8..f3069b492352d 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -40,7 +40,7 @@ async-trait = { workspace = true }
 aws-config = "1.8.7"
 aws-credential-types = "1.2.7"
 chrono = { workspace = true }
-clap = { version = "4.5.47", features = ["cargo", "derive"] }
+clap = { version = "4.5.50", features = ["cargo", "derive"] }
 datafusion = { workspace = true, features = [
     "avro",
     "compression",
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index d02d5f9cb5e44..8ab3932e84338 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -42,7 +42,7 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.5.47", features = ["derive", "env"] }
+clap = { version = "4.5.50", features = ["derive", "env"] }
 datafusion = { workspace = true, default-features = true, features = ["avro", "parquet_encryption"] }
 datafusion-spark = { workspace = true, default-features = true }
 datafusion-substrait = { workspace = true, default-features = true }

From c6ad17cf2b604513d37c2a208b7891061419ba15 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 27 Oct 2025 10:16:48 -0400
Subject: [PATCH 108/109] Upgrade DataFusion to arrow/parquet 57.0.0 (#17888)

## Which issue does this PR close?

- Related to https://github.com/apache/arrow-rs/issues/7835
- Closes #3666

Note while this PR looks massive, a large portion is display updates due
to better display of Fields and DataTypes

## Rationale for this change

Upgrade to the latest arrow

Also, there are several new features in arrow-57 that I want to be able
to test including Variant, arrow-avro, and a new parquet metadata
reader.

## What changes are included in this PR?

1. Update arrow/parquet
2. Update prost
3. Update substrait
4. Update pbjson
5. Make API changes to avoid deprecated APIs

## Are these changes tested?

By CI

## Are there any user-facing changes?
New arrow
---
 Cargo.lock                                    | 319 ++++-------
 Cargo.toml                                    |  22 +-
 datafusion-cli/src/functions.rs               |   4 +-
 datafusion-cli/src/main.rs                    |  18 +-
 datafusion-examples/Cargo.toml                |   2 +-
 .../examples/flight/flight_client.rs          |   5 +-
 .../examples/flight/flight_server.rs          |   5 +-
 .../examples/parquet_encrypted.rs             |   8 +-
 .../examples/parquet_encrypted_with_kms.rs    |   4 +-
 datafusion/common/Cargo.toml                  |   2 +-
 datafusion/common/src/config.rs               |  33 +-
 datafusion/common/src/dfschema.rs             |   2 +-
 datafusion/common/src/encryption.rs           |  32 +-
 .../common/src/file_options/parquet_writer.rs |  22 +-
 datafusion/common/src/pyarrow.rs              |  26 +-
 datafusion/core/benches/parquet_query_sql.rs  |   5 +-
 datafusion/core/src/dataframe/parquet.rs      |   5 +-
 .../src/datasource/file_format/parquet.rs     |  83 +--
 datafusion/core/src/physical_planner.rs       |  26 +-
 .../tests/dataframe/dataframe_functions.rs    |  20 +-
 datafusion/core/tests/dataframe/mod.rs        |  54 +-
 datafusion/core/tests/parquet/encryption.rs   |   6 +-
 .../core/tests/parquet/filter_pushdown.rs     |  13 +-
 .../physical_optimizer/enforce_sorting.rs     |  75 ++-
 .../enforce_sorting_monotonicity.rs           | 504 +++++++++---------
 .../physical_optimizer/sanity_checker.rs      |   4 +-
 datafusion/core/tests/sql/select.rs           |  10 +-
 .../datasource-parquet/src/file_format.rs     | 101 ++--
 datafusion/datasource-parquet/src/metadata.rs |   7 +-
 datafusion/datasource-parquet/src/opener.rs   |   5 +-
 .../datasource-parquet/src/page_filter.rs     |   2 +-
 datafusion/datasource-parquet/src/reader.rs   |   5 +-
 datafusion/datasource-parquet/src/source.rs   |   8 +-
 .../execution/src/parquet_encryption.rs       |   4 +-
 .../functions-aggregate-common/src/utils.rs   |  10 +-
 datafusion/functions/src/core/arrow_cast.rs   |  28 +-
 datafusion/functions/src/datetime/date_bin.rs |   2 +-
 .../optimizer/src/analyzer/type_coercion.rs   |  10 +-
 .../src/decorrelate_predicate_subquery.rs     |  12 +-
 .../physical-expr/src/expressions/cast.rs     |   4 +-
 .../src/expressions/dynamic_filters.rs        |   4 +-
 .../src/windows/bounded_window_agg_exec.rs    |   4 +-
 datafusion/proto-common/src/to_proto/mod.rs   |  13 +-
 datafusion/proto/src/bytes/mod.rs             |   2 +-
 datafusion/sql/tests/cases/params.rs          |   8 +-
 datafusion/sql/tests/sql_integration.rs       |  36 +-
 datafusion/sqllogictest/test_files/array.slt  |  42 +-
 .../sqllogictest/test_files/arrow_typeof.slt  |  22 +-
 datafusion/sqllogictest/test_files/case.slt   |   2 +-
 .../sqllogictest/test_files/coalesce.slt      |   6 +-
 .../test_files/count_star_rule.slt            |   2 +-
 .../test_files/current_time_timezone.slt      |   4 +-
 datafusion/sqllogictest/test_files/dates.slt  |   7 +-
 datafusion/sqllogictest/test_files/ddl.slt    |   2 +-
 .../sqllogictest/test_files/describe.slt      |   2 +-
 .../sqllogictest/test_files/dictionary.slt    |   4 +-
 .../test_files/expr/date_part.slt             |   4 +-
 .../sqllogictest/test_files/float16.slt       |  20 +-
 .../sqllogictest/test_files/group_by.slt      |   6 +-
 .../test_files/information_schema_columns.slt |   2 +-
 datafusion/sqllogictest/test_files/insert.slt |   6 +-
 .../test_files/insert_to_external.slt         |   4 +-
 .../sqllogictest/test_files/interval.slt      |   6 +-
 .../sqllogictest/test_files/join_lists.slt    |   1 -
 datafusion/sqllogictest/test_files/joins.slt  |  32 +-
 datafusion/sqllogictest/test_files/map.slt    |   4 +-
 .../sqllogictest/test_files/parquet.slt       |   8 +-
 datafusion/sqllogictest/test_files/pwmj.slt   |   2 +-
 .../sqllogictest/test_files/qualify.slt       |   8 +-
 .../test_files/spark/array/shuffle.slt        |   2 -
 datafusion/sqllogictest/test_files/struct.slt |  38 +-
 .../sqllogictest/test_files/subquery_sort.slt |   4 +-
 .../sqllogictest/test_files/timestamps.slt    | 176 +++---
 .../sqllogictest/test_files/type_coercion.slt |   2 +-
 datafusion/sqllogictest/test_files/union.slt  |   2 +-
 datafusion/sqllogictest/test_files/unnest.slt |  10 +-
 datafusion/sqllogictest/test_files/window.slt | 258 ++++-----
 .../sqllogictest/test_files/window_limits.slt |  24 +-
 datafusion/substrait/Cargo.toml               |   2 +-
 docs/source/library-user-guide/upgrading.md   |   9 +
 docs/source/user-guide/sql/data_types.md      |  13 +-
 .../source/user-guide/sql/scalar_functions.md |  28 +-
 82 files changed, 1138 insertions(+), 1200 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f214c48b278ac..55c334e157db4 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -225,9 +225,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc"
+checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -249,23 +249,23 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8"
+checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
  "chrono",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-array"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d"
+checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31"
 dependencies = [
  "ahash 0.8.12",
  "arrow-buffer",
@@ -275,25 +275,28 @@ dependencies = [
  "chrono-tz",
  "half",
  "hashbrown 0.16.0",
- "num",
+ "num-complex",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc"
+checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27"
 dependencies = [
  "bytes",
  "half",
- "num",
+ "num-bigint",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-cast"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023"
+checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,15 +309,15 @@ dependencies = [
  "comfy-table",
  "half",
  "lexical-core",
- "num",
+ "num-traits",
  "ryu",
 ]
 
 [[package]]
 name = "arrow-csv"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb"
+checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -327,21 +330,22 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0"
+checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
  "half",
- "num",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-flight"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f"
+checksum = "f70bb56412a007b0cfc116d15f24dda6adeed9611a213852a004cda20085a3b9"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -359,16 +363,17 @@ dependencies = [
  "futures",
  "once_cell",
  "paste",
- "prost 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-types",
  "tonic",
+ "tonic-prost",
 ]
 
 [[package]]
 name = "arrow-ipc"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5"
+checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -382,9 +387,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b"
+checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -394,19 +399,21 @@ dependencies = [
  "chrono",
  "half",
  "indexmap 2.12.0",
+ "itoa",
  "lexical-core",
  "memchr",
- "num",
- "serde",
+ "num-traits",
+ "ryu",
+ "serde_core",
  "serde_json",
  "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f"
+checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -417,9 +424,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-pyarrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258"
+checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515"
 dependencies = [
  "arrow-array",
  "arrow-data",
@@ -429,9 +436,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753"
+checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -442,34 +449,35 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe"
+checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5"
 dependencies = [
  "bitflags 2.9.4",
  "serde",
+ "serde_core",
  "serde_json",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a"
+checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-string"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d"
+checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -477,7 +485,7 @@ dependencies = [
  "arrow-schema",
  "arrow-select",
  "memchr",
- "num",
+ "num-traits",
  "regex",
  "regex-syntax",
 ]
@@ -2143,7 +2151,7 @@ dependencies = [
  "mimalloc",
  "nix",
  "object_store",
- "prost 0.13.5",
+ "prost",
  "rand 0.9.2",
  "serde_json",
  "tempfile",
@@ -2229,7 +2237,7 @@ dependencies = [
  "doc-comment",
  "futures",
  "log",
- "prost 0.13.5",
+ "prost",
  "semver",
  "tokio",
 ]
@@ -2532,7 +2540,7 @@ dependencies = [
  "object_store",
  "pbjson",
  "pretty_assertions",
- "prost 0.13.5",
+ "prost",
  "serde",
  "serde_json",
  "tokio",
@@ -2546,7 +2554,7 @@ dependencies = [
  "datafusion-common",
  "doc-comment",
  "pbjson",
- "prost 0.13.5",
+ "prost",
  "serde",
 ]
 
@@ -2674,7 +2682,7 @@ dependencies = [
  "itertools 0.14.0",
  "object_store",
  "pbjson-types",
- "prost 0.13.5",
+ "prost",
  "serde_json",
  "substrait",
  "tokio",
@@ -3157,16 +3165,16 @@ dependencies = [
 name = "gen"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
 name = "gen-common"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
@@ -3506,7 +3514,7 @@ dependencies = [
  "libc",
  "percent-encoding",
  "pin-project-lite",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tower-service",
  "tracing",
@@ -4184,20 +4192,6 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
-[[package]]
-name = "num"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
-dependencies = [
- "num-bigint",
- "num-complex",
- "num-integer",
- "num-iter",
- "num-rational",
- "num-traits",
-]
-
 [[package]]
 name = "num-bigint"
 version = "0.4.6"
@@ -4233,28 +4227,6 @@ dependencies = [
  "num-traits",
 ]
 
-[[package]]
-name = "num-iter"
-version = "0.1.45"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
-dependencies = [
- "autocfg",
- "num-integer",
- "num-traits",
-]
-
-[[package]]
-name = "num-rational"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
-dependencies = [
- "num-bigint",
- "num-integer",
- "num-traits",
-]
-
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -4397,9 +4369,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27"
+checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -4418,8 +4390,9 @@ dependencies = [
  "half",
  "hashbrown 0.16.0",
  "lz4_flex",
- "num",
  "num-bigint",
+ "num-integer",
+ "num-traits",
  "object_store",
  "paste",
  "ring",
@@ -4465,26 +4438,14 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "pbjson"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68"
+checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3"
 dependencies = [
- "base64 0.21.7",
+ "base64 0.22.1",
  "serde",
 ]
 
-[[package]]
-name = "pbjson-build"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.13.0",
- "prost 0.13.5",
- "prost-types 0.13.5",
-]
-
 [[package]]
 name = "pbjson-build"
 version = "0.8.0"
@@ -4493,22 +4454,22 @@ checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095"
 dependencies = [
  "heck 0.5.0",
  "itertools 0.14.0",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
 ]
 
 [[package]]
 name = "pbjson-types"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887"
+checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526"
 dependencies = [
  "bytes",
  "chrono",
  "pbjson",
- "pbjson-build 0.7.0",
- "prost 0.13.5",
- "prost-build 0.13.5",
+ "pbjson-build",
+ "prost",
+ "prost-build",
  "serde",
 ]
 
@@ -4787,16 +4748,6 @@ dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "prost"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
-dependencies = [
- "bytes",
- "prost-derive 0.13.5",
-]
-
 [[package]]
 name = "prost"
 version = "0.14.1"
@@ -4804,27 +4755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d"
 dependencies = [
  "bytes",
- "prost-derive 0.14.1",
-]
-
-[[package]]
-name = "prost-build"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.14.0",
- "log",
- "multimap",
- "once_cell",
- "petgraph 0.7.1",
- "prettyplease",
- "prost 0.13.5",
- "prost-types 0.13.5",
- "regex",
- "syn 2.0.106",
- "tempfile",
+ "prost-derive",
 ]
 
 [[package]]
@@ -4840,26 +4771,13 @@ dependencies = [
  "once_cell",
  "petgraph 0.7.1",
  "prettyplease",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
  "regex",
  "syn 2.0.106",
  "tempfile",
 ]
 
-[[package]]
-name = "prost-derive"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
-dependencies = [
- "anyhow",
- "itertools 0.14.0",
- "proc-macro2",
- "quote",
- "syn 2.0.106",
-]
-
 [[package]]
 name = "prost-derive"
 version = "0.14.1"
@@ -4873,22 +4791,13 @@ dependencies = [
  "syn 2.0.106",
 ]
 
-[[package]]
-name = "prost-types"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
-dependencies = [
- "prost 0.13.5",
-]
-
 [[package]]
 name = "prost-types"
 version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72"
 dependencies = [
- "prost 0.14.1",
+ "prost",
 ]
 
 [[package]]
@@ -4931,9 +4840,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a"
+checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
 dependencies = [
  "indoc",
  "libc",
@@ -4948,19 +4857,18 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598"
+checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
 dependencies = [
- "once_cell",
  "target-lexicon",
 ]
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c"
+checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -4968,9 +4876,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50"
+checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
@@ -4980,9 +4888,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc"
+checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
@@ -5020,7 +4928,7 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls",
- "socket2 0.6.0",
+ "socket2",
  "thiserror",
  "tokio",
  "tracing",
@@ -5057,7 +4965,7 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.0",
+ "socket2",
  "tracing",
  "windows-sys 0.60.2",
 ]
@@ -5950,16 +5858,6 @@ dependencies = [
  "cmake",
 ]
 
-[[package]]
-name = "socket2"
-version = "0.5.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "socket2"
 version = "0.6.0"
@@ -6149,18 +6047,18 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.58.0"
+version = "0.59.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228"
+checksum = "540683f325ab9ab1a2008bc24588f3e76f63b6a3f52bc47e121122376a063639"
 dependencies = [
  "heck 0.5.0",
  "pbjson",
- "pbjson-build 0.7.0",
+ "pbjson-build",
  "pbjson-types",
  "prettyplease",
- "prost 0.13.5",
- "prost-build 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-build",
+ "prost-types",
  "protobuf-src",
  "regress",
  "schemars 0.8.22",
@@ -6445,7 +6343,7 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2 0.6.0",
+ "socket2",
  "tokio-macros",
  "windows-sys 0.61.0",
 ]
@@ -6481,7 +6379,7 @@ dependencies = [
  "postgres-protocol",
  "postgres-types",
  "rand 0.9.2",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tokio-util",
  "whoami",
@@ -6568,9 +6466,9 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.13.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
 dependencies = [
  "async-trait",
  "axum",
@@ -6585,8 +6483,8 @@ dependencies = [
  "hyper-util",
  "percent-encoding",
  "pin-project",
- "prost 0.13.5",
- "socket2 0.5.10",
+ "socket2",
+ "sync_wrapper",
  "tokio",
  "tokio-stream",
  "tower",
@@ -6595,6 +6493,17 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "tonic-prost"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
+dependencies = [
+ "bytes",
+ "prost",
+ "tonic",
+]
+
 [[package]]
 name = "tower"
 version = "0.5.2"
diff --git a/Cargo.toml b/Cargo.toml
index 98268737eb994..1cfb23bb183de 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.20", default-features = false }
-arrow = { version = "56.2.0", features = [
+arrow = { version = "57.0.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "56.2.0", default-features = false }
-arrow-flight = { version = "56.2.0", features = [
+arrow-buffer = { version = "57.0.0", default-features = false }
+arrow-flight = { version = "57.0.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "56.2.0", default-features = false, features = [
+arrow-ipc = { version = "57.0.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "56.2.0", default-features = false }
-arrow-schema = { version = "56.2.0", default-features = false }
+arrow-ord = { version = "57.0.0", default-features = false }
+arrow-schema = { version = "57.0.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.10"
@@ -156,20 +156,20 @@ half = { version = "2.7.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 hex = { version = "0.4.3" }
 indexmap = "2.12.0"
+insta = { version = "1.43.2", features = ["glob", "filters"] }
 itertools = "0.14"
 log = "^0.4"
 object_store = { version = "0.12.4", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "56.2.0", default-features = false, features = [
+parquet = { version = "57.0.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
 ] }
-pbjson = { version = "0.7.0" }
-pbjson-types = "0.7"
+pbjson = { version = "0.8.0" }
+pbjson-types = "0.8"
 # Should match arrow-flight's version of prost.
-insta = { version = "1.43.2", features = ["glob", "filters"] }
-prost = "0.13.1"
+prost = "0.14.1"
 rand = "0.9"
 recursive = "0.1.1"
 regex = "1.12"
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 3ec446c515836..d23b12469e385 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -419,7 +419,9 @@ impl TableFunctionImpl for ParquetMetadataFunc {
                     stats_max_value_arr.push(None);
                 };
                 compression_arr.push(format!("{:?}", column.compression()));
-                encodings_arr.push(format!("{:?}", column.encodings()));
+                // need to collect into Vec to format
+                let encodings: Vec<_> = column.encodings().collect();
+                encodings_arr.push(format!("{:?}", encodings));
                 index_page_offset_arr.push(column.index_page_offset());
                 dictionary_page_offset_arr.push(column.dictionary_page_offset());
                 data_page_offset_arr.push(column.data_page_offset());
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index bdb2fdf5198e2..09fa8ef15af84 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -497,7 +497,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -510,7 +510,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -532,7 +532,7 @@ mod tests {
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                        | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type       | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression        | encodings                | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] |                   |                        | 4                | 152                   | 163                     |
+        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [PLAIN, RLE, BIT_PACKED] |                   |                        | 4                | 152                   | 163                     |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -592,9 +592,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 2    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 2    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
@@ -623,9 +623,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 5    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 3    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 5    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 3    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 68bb5376a1acc..bb0525e57753b 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -81,7 +81,7 @@ serde_json = { workspace = true }
 tempfile = { workspace = true }
 test-utils = { path = "../test-utils" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
-tonic = "0.13.1"
+tonic = "0.14"
 tracing = { version = "0.1" }
 tracing-subscriber = { version = "0.3" }
 url = { workspace = true }
diff --git a/datafusion-examples/examples/flight/flight_client.rs b/datafusion-examples/examples/flight/flight_client.rs
index e3237284b4307..ff4b5903ad884 100644
--- a/datafusion-examples/examples/flight/flight_client.rs
+++ b/datafusion-examples/examples/flight/flight_client.rs
@@ -17,6 +17,7 @@
 
 use std::collections::HashMap;
 use std::sync::Arc;
+use tonic::transport::Endpoint;
 
 use datafusion::arrow::datatypes::Schema;
 
@@ -34,7 +35,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     let testdata = datafusion::test_util::parquet_test_data();
 
     // Create Flight client
-    let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
+    let endpoint = Endpoint::new("http://localhost:50051")?;
+    let channel = endpoint.connect().await?;
+    let mut client = FlightServiceClient::new(channel);
 
     // Call get_schema to get the schema of a Parquet file
     let request = tonic::Request::new(FlightDescriptor {
diff --git a/datafusion-examples/examples/flight/flight_server.rs b/datafusion-examples/examples/flight/flight_server.rs
index 58bfb7a341c19..22265e415fbdb 100644
--- a/datafusion-examples/examples/flight/flight_server.rs
+++ b/datafusion-examples/examples/flight/flight_server.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator};
 use std::sync::Arc;
 
 use arrow_flight::{PollInfo, SchemaAsIpc};
@@ -106,6 +106,7 @@ impl FlightService for FlightServiceImpl {
 
                 // add an initial FlightData message that sends schema
                 let options = arrow::ipc::writer::IpcWriteOptions::default();
+                let mut compression_context = CompressionContext::default();
                 let schema_flight_data = SchemaAsIpc::new(&schema, &options);
 
                 let mut flights = vec![FlightData::from(schema_flight_data)];
@@ -115,7 +116,7 @@ impl FlightService for FlightServiceImpl {
 
                 for batch in &results {
                     let (flight_dictionaries, flight_batch) = encoder
-                        .encoded_batch(batch, &mut tracker, &options)
+                        .encode(batch, &mut tracker, &options, &mut compression_context)
                         .map_err(|e: ArrowError| Status::internal(e.to_string()))?;
 
                     flights.extend(flight_dictionaries.into_iter().map(Into::into));
diff --git a/datafusion-examples/examples/parquet_encrypted.rs b/datafusion-examples/examples/parquet_encrypted.rs
index e9e239b7a1c32..690d9f2a5f140 100644
--- a/datafusion-examples/examples/parquet_encrypted.rs
+++ b/datafusion-examples/examples/parquet_encrypted.rs
@@ -16,12 +16,13 @@
 // under the License.
 
 use datafusion::common::DataFusionError;
-use datafusion::config::TableParquetOptions;
+use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::logical_expr::{col, lit};
 use datafusion::parquet::encryption::decrypt::FileDecryptionProperties;
 use datafusion::parquet::encryption::encrypt::FileEncryptionProperties;
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use std::sync::Arc;
 use tempfile::TempDir;
 
 #[tokio::main]
@@ -55,7 +56,7 @@ async fn main() -> datafusion::common::Result<()> {
 
     // Write encrypted parquet
     let mut options = TableParquetOptions::default();
-    options.crypto.file_encryption = Some((&encrypt).into());
+    options.crypto.file_encryption = Some(ConfigFileEncryptionProperties::from(&encrypt));
     parquet_df
         .write_parquet(
             tempfile_str.as_str(),
@@ -100,7 +101,8 @@ async fn query_dataframe(df: &DataFrame) -> Result<(), DataFusionError> {
 // Setup encryption and decryption properties
 fn setup_encryption(
     parquet_df: &DataFrame,
-) -> Result<(FileEncryptionProperties, FileDecryptionProperties), DataFusionError> {
+) -> Result<(Arc<FileEncryptionProperties>, Arc<FileDecryptionProperties>), DataFusionError>
+{
     let schema = parquet_df.schema();
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
     let column_key = b"1234567890123450".to_vec(); // 128bit/16
diff --git a/datafusion-examples/examples/parquet_encrypted_with_kms.rs b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
index 19b0e8d0b1994..45bfd183773a0 100644
--- a/datafusion-examples/examples/parquet_encrypted_with_kms.rs
+++ b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
@@ -226,7 +226,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         options: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         _file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>> {
+    ) -> Result<Option<Arc<FileEncryptionProperties>>> {
         let config: EncryptionConfig = options.to_extension_options()?;
 
         // Generate a random encryption key for this file.
@@ -268,7 +268,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         &self,
         _options: &EncryptionFactoryOptions,
         _file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>> {
+    ) -> Result<Option<Arc<FileDecryptionProperties>>> {
         let decryption_properties =
             FileDecryptionProperties::with_key_retriever(Arc::new(TestKeyRetriever {}))
                 .build()?;
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index f5e51cb236d47..abeb4e66a269f 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -71,7 +71,7 @@ log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
-pyo3 = { version = "0.25", optional = true }
+pyo3 = { version = "0.26", optional = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true, optional = true }
 tokio = { workspace = true }
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 271ba6ddcff51..1713377f8d4de 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -26,14 +26,15 @@ use crate::format::{ExplainAnalyzeLevel, ExplainFormat};
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
 use crate::{DataFusionError, Result};
+#[cfg(feature = "parquet_encryption")]
+use hex;
 use std::any::Any;
 use std::collections::{BTreeMap, HashMap};
 use std::error::Error;
 use std::fmt::{self, Display};
 use std::str::FromStr;
-
 #[cfg(feature = "parquet_encryption")]
-use hex;
+use std::sync::Arc;
 
 /// A macro that wraps a configuration struct and automatically derives
 /// [`Default`] and [`ConfigField`] for it, allowing it to be used
@@ -2409,13 +2410,13 @@ impl From<ConfigFileEncryptionProperties> for FileEncryptionProperties {
                 hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix");
             fep = fep.with_aad_prefix(aad_prefix);
         }
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileEncryptionProperties> for ConfigFileEncryptionProperties {
-    fn from(f: &FileEncryptionProperties) -> Self {
+impl From<&Arc<FileEncryptionProperties>> for ConfigFileEncryptionProperties {
+    fn from(f: &Arc<FileEncryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec, column_metas_vec) = f.column_keys();
 
         let mut column_encryption_properties: HashMap<
@@ -2557,13 +2558,13 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
             fep = fep.with_aad_prefix(aad_prefix);
         }
 
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileDecryptionProperties> for ConfigFileDecryptionProperties {
-    fn from(f: &FileDecryptionProperties) -> Self {
+impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
+    fn from(f: &Arc<FileDecryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec) = f.column_keys();
         let mut column_decryption_properties: HashMap<
             String,
@@ -2834,6 +2835,7 @@ mod tests {
     };
     use std::any::Any;
     use std::collections::HashMap;
+    use std::sync::Arc;
 
     #[derive(Default, Debug, Clone)]
     pub struct TestExtensionConfig {
@@ -2990,16 +2992,15 @@ mod tests {
             .unwrap();
 
         // Test round-trip
-        let config_encrypt: ConfigFileEncryptionProperties =
-            (&file_encryption_properties).into();
-        let encryption_properties_built: FileEncryptionProperties =
-            config_encrypt.clone().into();
+        let config_encrypt =
+            ConfigFileEncryptionProperties::from(&file_encryption_properties);
+        let encryption_properties_built =
+            Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
         assert_eq!(file_encryption_properties, encryption_properties_built);
 
-        let config_decrypt: ConfigFileDecryptionProperties =
-            (&decryption_properties).into();
-        let decryption_properties_built: FileDecryptionProperties =
-            config_decrypt.clone().into();
+        let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
+        let decryption_properties_built =
+            Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
         assert_eq!(decryption_properties, decryption_properties_built);
 
         ///////////////////////////////////////////////////////////////////////////////////
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 6866b4011f9ec..34a36f5436572 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -1417,7 +1417,7 @@ mod tests {
     fn from_qualified_schema_into_arrow_schema() -> Result<()> {
         let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let arrow_schema = schema.as_arrow();
-        insta::assert_snapshot!(arrow_schema, @r#"Field { name: "c0", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#);
+        insta::assert_snapshot!(arrow_schema.to_string(), @r#"Field { "c0": nullable Boolean }, Field { "c1": nullable Boolean }"#);
         Ok(())
     }
 
diff --git a/datafusion/common/src/encryption.rs b/datafusion/common/src/encryption.rs
index b764ad77cff19..2a8cfdbc89966 100644
--- a/datafusion/common/src/encryption.rs
+++ b/datafusion/common/src/encryption.rs
@@ -24,38 +24,10 @@ pub use parquet::encryption::decrypt::FileDecryptionProperties;
 pub use parquet::encryption::encrypt::FileEncryptionProperties;
 
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileDecryptionProperties;
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileEncryptionProperties;
 
 pub use crate::config::{ConfigFileDecryptionProperties, ConfigFileEncryptionProperties};
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_encryption_to_config_encryption(
-    encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    encryption.map(|fe| fe.into())
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_encryption_to_config_encryption(
-    _encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    None
-}
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_config_decryption_to_decryption(
-    decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    decryption.clone().into()
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_config_decryption_to_decryption(
-    _decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    FileDecryptionProperties {}
-}
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 3977f2b489e18..564929c61bab0 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -402,15 +402,14 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
-    #[cfg(feature = "parquet_encryption")]
-    use crate::encryption::map_encryption_to_config_encryption;
-    use parquet::{
-        basic::Compression,
-        file::properties::{
-            BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
-            DEFAULT_BLOOM_FILTER_NDV,
-        },
+    use crate::config::{
+        ConfigFileEncryptionProperties, ParquetColumnOptions, ParquetEncryptionOptions,
+        ParquetOptions,
+    };
+    use parquet::basic::Compression;
+    use parquet::file::properties::{
+        BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
+        DEFAULT_BLOOM_FILTER_NDV,
     };
     use std::collections::HashMap;
 
@@ -539,7 +538,10 @@ mod tests {
         };
 
         #[cfg(feature = "parquet_encryption")]
-        let fep = map_encryption_to_config_encryption(props.file_encryption_properties());
+        let fep = props
+            .file_encryption_properties()
+            .map(ConfigFileEncryptionProperties::from);
+
         #[cfg(not(feature = "parquet_encryption"))]
         let fep = None;
 
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
index ff413e08ab076..3b7d80b3da784 100644
--- a/datafusion/common/src/pyarrow.rs
+++ b/datafusion/common/src/pyarrow.rs
@@ -22,7 +22,7 @@ use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::PyException;
 use pyo3::prelude::PyErr;
 use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyObject, PyResult, Python};
+use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyResult, Python};
 
 use crate::{DataFusionError, ScalarValue};
 
@@ -52,11 +52,11 @@ impl FromPyArrow for ScalarValue {
 }
 
 impl ToPyArrow for ScalarValue {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?;
+        let pyscalar = pyarray.call_method1("__getitem__", (0,))?;
 
         Ok(pyscalar)
     }
@@ -79,23 +79,22 @@ impl<'source> IntoPyObject<'source> for ScalarValue {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyarray_bound = pyarray.bind(py);
-        pyarray_bound.call_method1("__getitem__", (0,))
+        pyarray.call_method1("__getitem__", (0,))
     }
 }
 
 #[cfg(test)]
 mod tests {
     use pyo3::ffi::c_str;
-    use pyo3::prepare_freethreaded_python;
     use pyo3::py_run;
     use pyo3::types::PyDict;
+    use pyo3::Python;
 
     use super::*;
 
     fn init_python() {
-        prepare_freethreaded_python();
-        Python::with_gil(|py| {
+        Python::initialize();
+        Python::attach(|py| {
             if py.run(c_str!("import pyarrow"), None, None).is_err() {
                 let locals = PyDict::new(py);
                 py.run(
@@ -135,12 +134,11 @@ mod tests {
             ScalarValue::Date32(Some(1234)),
         ];
 
-        Python::with_gil(|py| {
+        Python::attach(|py| {
             for scalar in example_scalars.iter() {
-                let result = ScalarValue::from_pyarrow_bound(
-                    scalar.to_pyarrow(py).unwrap().bind(py),
-                )
-                .unwrap();
+                let result =
+                    ScalarValue::from_pyarrow_bound(&scalar.to_pyarrow(py).unwrap())
+                        .unwrap();
                 assert_eq!(scalar, &result);
             }
         });
@@ -150,7 +148,7 @@ mod tests {
     fn test_py_scalar() -> PyResult<()> {
         init_python();
 
-        Python::with_gil(|py| -> PyResult<()> {
+        Python::attach(|py| -> PyResult<()> {
             let scalar_float = ScalarValue::Float64(Some(12.34));
             let py_float = scalar_float
                 .into_pyobject(py)?
diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs
index 14dcdf15f173b..e2b3810480130 100644
--- a/datafusion/core/benches/parquet_query_sql.rs
+++ b/datafusion/core/benches/parquet_query_sql.rs
@@ -166,11 +166,12 @@ fn generate_file() -> NamedTempFile {
     }
 
     let metadata = writer.close().unwrap();
+    let file_metadata = metadata.file_metadata();
     assert_eq!(
-        metadata.num_rows as usize,
+        file_metadata.num_rows() as usize,
         WRITE_RECORD_BATCH_SIZE * NUM_BATCHES
     );
-    assert_eq!(metadata.row_groups.len(), EXPECTED_ROW_GROUPS);
+    assert_eq!(metadata.row_groups().len(), EXPECTED_ROW_GROUPS);
 
     println!(
         "Generated parquet file in {} seconds",
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index d46a902ca5139..930b4fad1d9b3 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -116,6 +116,8 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::{col, lit};
 
+    #[cfg(feature = "parquet_encryption")]
+    use datafusion_common::config::ConfigFileEncryptionProperties;
     use object_store::local::LocalFileSystem;
     use parquet::file::reader::FileReader;
     use tempfile::TempDir;
@@ -280,7 +282,8 @@ mod tests {
 
         // Write encrypted parquet using write_parquet
         let mut options = TableParquetOptions::default();
-        options.crypto.file_encryption = Some((&encrypt).into());
+        options.crypto.file_encryption =
+            Some(ConfigFileEncryptionProperties::from(&encrypt));
         options.global.allow_single_file_parallelism = allow_single_file_parallelism;
 
         df.write_parquet(
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 088c4408fff57..1781ea569d905 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -154,7 +154,6 @@ mod tests {
     use futures::stream::BoxStream;
     use futures::StreamExt;
     use insta::assert_snapshot;
-    use log::error;
     use object_store::local::LocalFileSystem;
     use object_store::ObjectMeta;
     use object_store::{
@@ -163,9 +162,10 @@ mod tests {
     };
     use parquet::arrow::arrow_reader::ArrowReaderOptions;
     use parquet::arrow::ParquetRecordBatchStreamBuilder;
-    use parquet::file::metadata::{KeyValue, ParquetColumnIndex, ParquetOffsetIndex};
-    use parquet::file::page_index::index::Index;
-    use parquet::format::FileMetaData;
+    use parquet::file::metadata::{
+        KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
+    };
+    use parquet::file::page_index::column_index::ColumnIndexMetaData;
     use tokio::fs::File;
 
     enum ForceViews {
@@ -1144,18 +1144,14 @@ mod tests {
 
         // 325 pages in int_col
         assert_eq!(int_col_offset.len(), 325);
-        match int_col_index {
-            Index::INT32(index) => {
-                assert_eq!(index.indexes.len(), 325);
-                for min_max in index.clone().indexes {
-                    assert!(min_max.min.is_some());
-                    assert!(min_max.max.is_some());
-                    assert!(min_max.null_count.is_some());
-                }
-            }
-            _ => {
-                error!("fail to read page index.")
-            }
+        let ColumnIndexMetaData::INT32(index) = int_col_index else {
+            panic!("fail to read page index.")
+        };
+        assert_eq!(index.min_values().len(), 325);
+        assert_eq!(index.max_values().len(), 325);
+        // all values are non null
+        for idx in 0..325 {
+            assert_eq!(index.null_count(idx), Some(0));
         }
     }
 
@@ -1556,7 +1552,7 @@ mod tests {
         Ok(parquet_sink)
     }
 
-    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, FileMetaData)> {
+    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, ParquetMetaData)> {
         let mut written = parquet_sink.written();
         let written = written.drain();
         assert_eq!(
@@ -1566,28 +1562,33 @@ mod tests {
             written.len()
         );
 
-        let (path, file_metadata) = written.take(1).next().unwrap();
-        Ok((path, file_metadata))
+        let (path, parquet_meta_data) = written.take(1).next().unwrap();
+        Ok((path, parquet_meta_data))
     }
 
-    fn assert_file_metadata(file_metadata: FileMetaData, expected_kv: &Vec<KeyValue>) {
-        let FileMetaData {
-            num_rows,
-            schema,
-            key_value_metadata,
-            ..
-        } = file_metadata;
-        assert_eq!(num_rows, 2, "file metadata to have 2 rows");
+    fn assert_file_metadata(
+        parquet_meta_data: ParquetMetaData,
+        expected_kv: &Vec<KeyValue>,
+    ) {
+        let file_metadata = parquet_meta_data.file_metadata();
+        let schema_descr = file_metadata.schema_descr();
+        assert_eq!(file_metadata.num_rows(), 2, "file metadata to have 2 rows");
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "a"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "a"),
             "output file metadata should contain col a"
         );
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "b"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "b"),
             "output file metadata should contain col b"
         );
 
-        let mut key_value_metadata = key_value_metadata.unwrap();
+        let mut key_value_metadata = file_metadata.key_value_metadata().unwrap().clone();
         key_value_metadata.sort_by(|a, b| a.key.cmp(&b.key));
         assert_eq!(&key_value_metadata, expected_kv);
     }
@@ -1644,13 +1645,11 @@ mod tests {
 
         // check the file metadata includes partitions
         let mut expected_partitions = std::collections::HashSet::from(["a=foo", "a=bar"]);
-        for (
-            path,
-            FileMetaData {
-                num_rows, schema, ..
-            },
-        ) in written.take(2)
-        {
+        for (path, parquet_metadata) in written.take(2) {
+            let file_metadata = parquet_metadata.file_metadata();
+            let schema = file_metadata.schema_descr();
+            let num_rows = file_metadata.num_rows();
+
             let path_parts = path.parts().collect::<Vec<_>>();
             assert_eq!(path_parts.len(), 2, "should have path prefix");
 
@@ -1663,11 +1662,17 @@ mod tests {
 
             assert_eq!(num_rows, 1, "file metadata to have 1 row");
             assert!(
-                !schema.iter().any(|col_schema| col_schema.name == "a"),
+                !schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "a"),
                 "output file metadata will not contain partitioned col a"
             );
             assert!(
-                schema.iter().any(|col_schema| col_schema.name == "b"),
+                schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "b"),
                 "output file metadata should contain col b"
             );
         }
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 708c52001ee88..c280b50a9f07a 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2644,7 +2644,7 @@ mod tests {
         // verify that the plan correctly casts u8 to i64
         // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
         // the cast here is implicit so has CastOptions with safe=true
-        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#;
+        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: false"#;
 
         assert_contains!(format!("{exec_plan:?}"), expected);
         Ok(())
@@ -2704,9 +2704,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2718,9 +2715,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2732,9 +2726,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -2843,9 +2834,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2857,9 +2845,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2871,9 +2856,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -3047,7 +3029,7 @@ mod tests {
             .expect_err("planning error")
             .strip_backtrace();
 
-        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }"#);
+        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32 }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32 }], metadata: {} }"#);
     }
 
     #[tokio::test]
@@ -3063,7 +3045,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = "exprs: [ProjectionExpr { expr: BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
+        let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
 
         assert_contains!(format!("{execution_plan:?}"), expected);
 
@@ -3085,7 +3067,7 @@ mod tests {
 
         assert_contains!(
             &e,
-            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct(foo Boolean), Utf8]"#
+            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct("foo": Boolean), Utf8]"#
         );
 
         Ok(())
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index d95eb38c19e1a..265862ff9af8a 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -309,16 +309,16 @@ async fn test_fn_arrow_typeof() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&batches),
-        @r#"
-    +------------------------------------------------------------------------------------------------------------------+
-    | arrow_typeof(test.l)                                                                                             |
-    +------------------------------------------------------------------------------------------------------------------+
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    +------------------------------------------------------------------------------------------------------------------+
-    "#);
+        @r"
+    +----------------------+
+    | arrow_typeof(test.l) |
+    +----------------------+
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    +----------------------+
+    ");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 979ada2bc6bb6..17d1695478a56 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -2944,18 +2944,18 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                                   |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                            |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                               |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
@@ -2978,18 +2978,18 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                                                                                                       |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                            |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                   |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                        |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
@@ -4435,12 +4435,12 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Projection: shapes.shape_id [shape_id:UInt32]
       Unnest: lists[shape_id2|depth=1] structs[] [shape_id:UInt32, shape_id2:UInt32;N]
-        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
+        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { data_type: UInt32, nullable: true });N]
           TableScan: shapes projection=[shape_id] [shape_id:UInt32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs
index 819d8bf3a283d..09b93f06ce85d 100644
--- a/datafusion/core/tests/parquet/encryption.rs
+++ b/datafusion/core/tests/parquet/encryption.rs
@@ -314,7 +314,7 @@ async fn verify_file_encrypted(
         for col in row_group.columns() {
             assert!(matches!(
                 col.crypto_metadata(),
-                Some(ColumnCryptoMetaData::EncryptionWithFooterKey)
+                Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY)
             ));
         }
     }
@@ -336,7 +336,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         config: &EncryptionFactoryOptions,
         _schema: &SchemaRef,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileEncryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileEncryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
@@ -353,7 +353,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileDecryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileDecryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs
index b769fec7d3728..226497fe58240 100644
--- a/datafusion/core/tests/parquet/filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -631,8 +631,8 @@ async fn predicate_cache_pushdown_default() -> datafusion_common::Result<()> {
 
 #[tokio::test]
 async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
-    // Can disable the cache even with filter pushdown by setting the size to 0. In this case we
-    // expect the inner records are reported but no records are read from the cache
+    // Can disable the cache even with filter pushdown by setting the size to 0.
+    // This results in no records read from the cache and no metrics reported
     let mut config = SessionConfig::new();
     config.options_mut().execution.parquet.pushdown_filters = true;
     config
@@ -641,13 +641,10 @@ async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
         .parquet
         .max_predicate_cache_size = Some(0);
     let ctx = SessionContext::new_with_config(config);
+    // Since the cache is disabled, there is no reporting or use of the cache
     PredicateCacheTest {
-        // file has 8 rows, which need to be read twice, one for filter, one for
-        // final output
-        expected_inner_records: 16,
-        // Expect this to 0 records read as the cache is disabled. However, it is
-        // non zero due to https://github.com/apache/arrow-rs/issues/8307
-        expected_records: 3,
+        expected_inner_records: 0,
+        expected_records: 0,
     }
     .run(&ctx)
     .await
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index ad77a453350f8..6202598218710 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -667,12 +667,12 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -716,13 +716,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -763,13 +763,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -824,15 +824,15 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -889,17 +889,17 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
               SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
                 DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -961,14 +961,14 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> {
     Input Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1023,16 +1023,16 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -1081,7 +1081,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]
         UnionExec
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1090,7 +1090,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet
@@ -1122,7 +1122,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
         UnionExec
           SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
@@ -1131,7 +1131,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
@@ -1678,7 +1678,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
@@ -1686,7 +1686,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true]
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
@@ -1783,18 +1783,18 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       FilterExec: NOT non_nullable_col@1
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             CoalesceBatchesExec: target_batch_size=128
               SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
                 DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       FilterExec: NOT non_nullable_col@1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           CoalesceBatchesExec: target_batch_size=128
             SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
@@ -2238,17 +2238,17 @@ async fn test_multiple_sort_window_exec() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
             DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
@@ -2273,7 +2273,7 @@ async fn test_commutativity() -> Result<()> {
     assert_snapshot!(displayable(orig_plan.as_ref()).indent(true), @r#"
     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
       RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
 
@@ -2483,7 +2483,6 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()>
     Ok(())
 }
 
-// Test that verifies that an orthogonal sort (a sort on columns not in the input ordering)
 #[test]
 fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let schema = create_test_schema3()?;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
index 7d6c0484b624b..ef233e222912c 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
@@ -229,11 +229,11 @@ fn test_window_partial_constant_and_set_monotonicity_0() {
         @ r#"
     Input Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -253,11 +253,11 @@ fn test_window_partial_constant_and_set_monotonicity_1() {
         @ r#"
     Input Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -275,15 +275,15 @@ fn test_window_partial_constant_and_set_monotonicity_2() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -299,15 +299,15 @@ fn test_window_partial_constant_and_set_monotonicity_3() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -323,16 +323,16 @@ fn test_window_partial_constant_and_set_monotonicity_4() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -348,16 +348,16 @@ fn test_window_partial_constant_and_set_monotonicity_5() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -373,16 +373,16 @@ fn test_window_partial_constant_and_set_monotonicity_6() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -398,16 +398,16 @@ fn test_window_partial_constant_and_set_monotonicity_7() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
             );
 }
 
@@ -427,15 +427,15 @@ fn test_window_partial_constant_and_set_monotonicity_8() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -451,15 +451,15 @@ fn test_window_partial_constant_and_set_monotonicity_9() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -477,7 +477,7 @@ fn test_window_partial_constant_and_set_monotonicity_10() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -497,7 +497,7 @@ fn test_window_partial_constant_and_set_monotonicity_11() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -522,7 +522,7 @@ fn test_window_partial_constant_and_set_monotonicity_12() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -543,7 +543,7 @@ fn test_window_partial_constant_and_set_monotonicity_13() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -564,7 +564,7 @@ fn test_window_partial_constant_and_set_monotonicity_14() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -585,7 +585,7 @@ fn test_window_partial_constant_and_set_monotonicity_15() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -608,15 +608,15 @@ fn test_window_partial_constant_and_set_monotonicity_16() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -633,15 +633,15 @@ fn test_window_partial_constant_and_set_monotonicity_17() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -658,15 +658,15 @@ fn test_window_partial_constant_and_set_monotonicity_18() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -685,7 +685,7 @@ fn test_window_partial_constant_and_set_monotonicity_19() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -710,7 +710,7 @@ fn test_window_partial_constant_and_set_monotonicity_20() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -729,15 +729,15 @@ fn test_window_partial_constant_and_set_monotonicity_21() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -756,7 +756,7 @@ fn test_window_partial_constant_and_set_monotonicity_22() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -777,7 +777,7 @@ fn test_window_partial_constant_and_set_monotonicity_23() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -800,15 +800,15 @@ fn test_window_partial_constant_and_set_monotonicity_24() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -827,7 +827,7 @@ fn test_window_partial_constant_and_set_monotonicity_25() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -847,7 +847,7 @@ fn test_window_partial_constant_and_set_monotonicity_26() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -867,7 +867,7 @@ fn test_window_partial_constant_and_set_monotonicity_27() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -893,7 +893,7 @@ fn test_window_partial_constant_and_set_monotonicity_28() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -912,15 +912,15 @@ fn test_window_partial_constant_and_set_monotonicity_29() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#)
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#)
 }
 
 // Case 30:
@@ -937,7 +937,7 @@ fn test_window_partial_constant_and_set_monotonicity_30() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -957,7 +957,7 @@ fn test_window_partial_constant_and_set_monotonicity_31() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -981,15 +981,15 @@ fn test_window_partial_constant_and_set_monotonicity_32() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1008,7 +1008,7 @@ fn test_window_partial_constant_and_set_monotonicity_33() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1027,15 +1027,15 @@ fn test_window_partial_constant_and_set_monotonicity_34() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 // Case 35:
@@ -1053,7 +1053,7 @@ fn test_window_partial_constant_and_set_monotonicity_35() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1077,15 +1077,15 @@ fn test_window_partial_constant_and_set_monotonicity_36() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1102,15 +1102,15 @@ fn test_window_partial_constant_and_set_monotonicity_37() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1129,7 +1129,7 @@ fn test_window_partial_constant_and_set_monotonicity_38() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1149,7 +1149,7 @@ fn test_window_partial_constant_and_set_monotonicity_39() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1173,15 +1173,15 @@ fn test_window_partial_constant_and_set_monotonicity_40() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1200,7 +1200,7 @@ fn test_window_partial_constant_and_set_monotonicity_41() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1221,7 +1221,7 @@ fn test_window_partial_constant_and_set_monotonicity_42() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1242,7 +1242,7 @@ fn test_window_partial_constant_and_set_monotonicity_43() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1267,7 +1267,7 @@ fn test_window_partial_constant_and_set_monotonicity_44() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1288,7 +1288,7 @@ fn test_window_partial_constant_and_set_monotonicity_45() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1307,15 +1307,15 @@ fn test_window_partial_constant_and_set_monotonicity_46() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1331,15 +1331,15 @@ fn test_window_partial_constant_and_set_monotonicity_47() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1361,15 +1361,15 @@ fn test_window_partial_constant_and_set_monotonicity_48() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1387,7 +1387,7 @@ fn test_window_partial_constant_and_set_monotonicity_49() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1406,15 +1406,15 @@ fn test_window_partial_constant_and_set_monotonicity_50() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1432,7 +1432,7 @@ fn test_window_partial_constant_and_set_monotonicity_51() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1458,7 +1458,7 @@ fn test_window_partial_constant_and_set_monotonicity_52() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1479,7 +1479,7 @@ fn test_window_partial_constant_and_set_monotonicity_53() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1499,7 +1499,7 @@ fn test_window_partial_constant_and_set_monotonicity_54() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1517,15 +1517,15 @@ fn test_window_partial_constant_and_set_monotonicity_55() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1547,15 +1547,15 @@ fn test_window_partial_constant_and_set_monotonicity_56() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1574,7 +1574,7 @@ fn test_window_partial_constant_and_set_monotonicity_57() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1595,7 +1595,7 @@ fn test_window_partial_constant_and_set_monotonicity_58() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1615,7 +1615,7 @@ fn test_window_partial_constant_and_set_monotonicity_59() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1641,7 +1641,7 @@ fn test_window_partial_constant_and_set_monotonicity_60() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1662,7 +1662,7 @@ fn test_window_partial_constant_and_set_monotonicity_61() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1683,7 +1683,7 @@ fn test_window_partial_constant_and_set_monotonicity_62() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1701,15 +1701,15 @@ fn test_window_partial_constant_and_set_monotonicity_63() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 // =============================================REGION ENDS=============================================
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index ce6eb13c86c44..9867ed1733413 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -421,7 +421,7 @@ async fn test_bounded_window_agg_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: partitions=1, partition_sizes=[0]
     "#
@@ -449,7 +449,7 @@ async fn test_bounded_window_agg_no_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       DataSourceExec: partitions=1, partition_sizes=[0]
     "#
     );
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index 2eb3ba36dd909..8a0f620627384 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -222,11 +222,11 @@ async fn test_parameter_invalid_types() -> Result<()> {
         .collect()
         .await;
     assert_snapshot!(results.unwrap_err().strip_backtrace(),
-        @r#"
-    type_coercion
-    caused by
-    Error during planning: Cannot infer common argument type for comparison operation List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) = Int32
-    "#);
+        @r"
+        type_coercion
+        caused by
+        Error during planning: Cannot infer common argument type for comparison operation List(nullable Int32) = Int32
+        ");
     Ok(())
 }
 
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index 963c1d77950c6..f27bda387fda5 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -38,8 +38,6 @@ use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions};
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 use datafusion_common::encryption::FileDecryptionProperties;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
@@ -59,11 +57,13 @@ use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
 
+use crate::metadata::DFParquetMetadata;
 use crate::reader::CachedParquetFileReaderFactory;
 use crate::source::{parse_coerce_int96_string, ParquetSource};
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion_datasource::source::DataSourceExec;
+use datafusion_execution::cache::cache_manager::FileMetadataCache;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
@@ -77,14 +77,12 @@ use parquet::arrow::arrow_writer::{
 use parquet::arrow::async_reader::MetadataFetch;
 use parquet::arrow::{ArrowWriter, AsyncArrowWriter};
 use parquet::basic::Type;
-
-use crate::metadata::DFParquetMetadata;
-use datafusion_execution::cache::cache_manager::FileMetadataCache;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
 use parquet::file::writer::SerializedFileWriter;
-use parquet::format::FileMetaData;
 use parquet::schema::types::SchemaDescriptor;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -306,25 +304,23 @@ async fn get_file_decryption_properties(
     state: &dyn Session,
     options: &TableParquetOptions,
     file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
-    let file_decryption_properties: Option<FileDecryptionProperties> =
-        match &options.crypto.file_decryption {
-            Some(cfd) => Some(map_config_decryption_to_decryption(cfd)),
-            None => match &options.crypto.factory_id {
-                Some(factory_id) => {
-                    let factory =
-                        state.runtime_env().parquet_encryption_factory(factory_id)?;
-                    factory
-                        .get_file_decryption_properties(
-                            &options.crypto.factory_options,
-                            file_path,
-                        )
-                        .await?
-                }
-                None => None,
-            },
-        };
-    Ok(file_decryption_properties)
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
+    Ok(match &options.crypto.file_decryption {
+        Some(cfd) => Some(Arc::new(FileDecryptionProperties::from(cfd.clone()))),
+        None => match &options.crypto.factory_id {
+            Some(factory_id) => {
+                let factory =
+                    state.runtime_env().parquet_encryption_factory(factory_id)?;
+                factory
+                    .get_file_decryption_properties(
+                        &options.crypto.factory_options,
+                        file_path,
+                    )
+                    .await?
+            }
+            None => None,
+        },
+    })
 }
 
 #[cfg(not(feature = "parquet_encryption"))]
@@ -332,7 +328,7 @@ async fn get_file_decryption_properties(
     _state: &dyn Session,
     _options: &TableParquetOptions,
     _file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
     Ok(None)
 }
 
@@ -385,7 +381,7 @@ impl FileFormat for ParquetFormat {
                 .await?;
                 let result = DFParquetMetadata::new(store.as_ref(), object)
                     .with_metadata_size_hint(self.metadata_size_hint())
-                    .with_decryption_properties(file_decryption_properties.as_ref())
+                    .with_decryption_properties(file_decryption_properties)
                     .with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache)))
                     .with_coerce_int96(coerce_int96)
                     .fetch_schema_with_location()
@@ -446,7 +442,7 @@ impl FileFormat for ParquetFormat {
             state.runtime_env().cache_manager.get_file_metadata_cache();
         DFParquetMetadata::new(store, object)
             .with_metadata_size_hint(self.metadata_size_hint())
-            .with_decryption_properties(file_decryption_properties.as_ref())
+            .with_decryption_properties(file_decryption_properties)
             .with_file_metadata_cache(Some(file_metadata_cache))
             .fetch_statistics(&table_schema)
             .await
@@ -1027,9 +1023,10 @@ pub async fn fetch_parquet_metadata(
     store: &dyn ObjectStore,
     object_meta: &ObjectMeta,
     size_hint: Option<usize>,
-    #[allow(unused)] decryption_properties: Option<&FileDecryptionProperties>,
+    decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Arc<ParquetMetaData>> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, object_meta)
         .with_metadata_size_hint(size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1053,6 +1050,7 @@ pub async fn fetch_statistics(
     decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Statistics> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, file)
         .with_metadata_size_hint(metadata_size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1080,7 +1078,7 @@ pub struct ParquetSink {
     parquet_options: TableParquetOptions,
     /// File metadata from successfully produced parquet files. The Mutex is only used
     /// to allow inserting to HashMap from behind borrowed reference in DataSink::write_all.
-    written: Arc<parking_lot::Mutex<HashMap<Path, FileMetaData>>>,
+    written: Arc<parking_lot::Mutex<HashMap<Path, ParquetMetaData>>>,
 }
 
 impl Debug for ParquetSink {
@@ -1117,7 +1115,7 @@ impl ParquetSink {
 
     /// Retrieve the file metadata for the written files, keyed to the path
     /// which may be partitioned (in the case of hive style partitioning).
-    pub fn written(&self) -> HashMap<Path, FileMetaData> {
+    pub fn written(&self) -> HashMap<Path, ParquetMetaData> {
         self.written.lock().clone()
     }
 
@@ -1141,7 +1139,7 @@ impl ParquetSink {
         builder = set_writer_encryption_properties(
             builder,
             runtime,
-            &parquet_opts,
+            parquet_opts,
             schema,
             path,
         )
@@ -1189,14 +1187,15 @@ impl ParquetSink {
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     runtime: &Arc<RuntimeEnv>,
-    parquet_opts: &TableParquetOptions,
+    parquet_opts: TableParquetOptions,
     schema: &Arc<Schema>,
     path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
-    if let Some(file_encryption_properties) = &parquet_opts.crypto.file_encryption {
+    if let Some(file_encryption_properties) = parquet_opts.crypto.file_encryption {
         // Encryption properties have been specified directly
-        return Ok(builder
-            .with_file_encryption_properties(file_encryption_properties.clone().into()));
+        return Ok(builder.with_file_encryption_properties(Arc::new(
+            FileEncryptionProperties::from(file_encryption_properties),
+        )));
     } else if let Some(encryption_factory_id) = &parquet_opts.crypto.factory_id.as_ref() {
         // Encryption properties will be generated by an encryption factory
         let encryption_factory =
@@ -1221,7 +1220,7 @@ async fn set_writer_encryption_properties(
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     _runtime: &Arc<RuntimeEnv>,
-    _parquet_opts: &TableParquetOptions,
+    _parquet_opts: TableParquetOptions,
     _schema: &Arc<Schema>,
     _path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
@@ -1244,7 +1243,7 @@ impl FileSink for ParquetSink {
         let parquet_opts = &self.parquet_options;
 
         let mut file_write_tasks: JoinSet<
-            std::result::Result<(Path, FileMetaData), DataFusionError>,
+            std::result::Result<(Path, ParquetMetaData), DataFusionError>,
         > = JoinSet::new();
 
         let runtime = context.runtime_env();
@@ -1275,11 +1274,11 @@ impl FileSink for ParquetSink {
                         writer.write(&batch).await?;
                         reservation.try_resize(writer.memory_size())?;
                     }
-                    let file_metadata = writer
+                    let parquet_meta_data = writer
                         .close()
                         .await
                         .map_err(|e| DataFusionError::ParquetError(Box::new(e)))?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             } else {
                 let writer = ObjectWriterBuilder::new(
@@ -1303,7 +1302,7 @@ impl FileSink for ParquetSink {
                 let parallel_options_clone = parallel_options.clone();
                 let pool = Arc::clone(context.memory_pool());
                 file_write_tasks.spawn(async move {
-                    let file_metadata = output_single_parquet_file_parallelized(
+                    let parquet_meta_data = output_single_parquet_file_parallelized(
                         writer,
                         rx,
                         schema,
@@ -1313,7 +1312,7 @@ impl FileSink for ParquetSink {
                         pool,
                     )
                     .await?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             }
         }
@@ -1322,11 +1321,11 @@ impl FileSink for ParquetSink {
         while let Some(result) = file_write_tasks.join_next().await {
             match result {
                 Ok(r) => {
-                    let (path, file_metadata) = r?;
-                    row_count += file_metadata.num_rows;
+                    let (path, parquet_meta_data) = r?;
+                    row_count += parquet_meta_data.file_metadata().num_rows();
                     let mut written_files = self.written.lock();
                     written_files
-                        .try_insert(path.clone(), file_metadata)
+                        .try_insert(path.clone(), parquet_meta_data)
                         .map_err(|e| internal_datafusion_err!("duplicate entry detected for partitioned file {path}: {e}"))?;
                     drop(written_files);
                 }
@@ -1589,7 +1588,7 @@ async fn concatenate_parallel_row_groups(
     mut serialize_rx: Receiver<SpawnedTask<RBStreamSerializeResult>>,
     mut object_store_writer: Box<dyn AsyncWrite + Send + Unpin>,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let mut file_reservation =
         MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
 
@@ -1617,14 +1616,14 @@ async fn concatenate_parallel_row_groups(
         rg_out.close()?;
     }
 
-    let file_metadata = parquet_writer.close()?;
+    let parquet_meta_data = parquet_writer.close()?;
     let final_buff = merged_buff.buffer.try_lock().unwrap();
 
     object_store_writer.write_all(final_buff.as_slice()).await?;
     object_store_writer.shutdown().await?;
     file_reservation.free();
 
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 /// Parallelizes the serialization of a single parquet file, by first serializing N
@@ -1639,7 +1638,7 @@ async fn output_single_parquet_file_parallelized(
     skip_arrow_metadata: bool,
     parallel_options: ParallelParquetWriterOptions,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
     let (serialize_tx, serialize_rx) =
@@ -1666,7 +1665,7 @@ async fn output_single_parquet_file_parallelized(
         parallel_options,
         Arc::clone(&pool),
     );
-    let file_metadata = concatenate_parallel_row_groups(
+    let parquet_meta_data = concatenate_parallel_row_groups(
         writer,
         merged_buff,
         serialize_rx,
@@ -1679,7 +1678,7 @@ async fn output_single_parquet_file_parallelized(
         .join_unwind()
         .await
         .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 #[cfg(test)]
diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs
index c8ee4d3b9f57b..6505a447d7ce6 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -58,7 +58,7 @@ pub struct DFParquetMetadata<'a> {
     store: &'a dyn ObjectStore,
     object_meta: &'a ObjectMeta,
     metadata_size_hint: Option<usize>,
-    decryption_properties: Option<&'a FileDecryptionProperties>,
+    decryption_properties: Option<Arc<FileDecryptionProperties>>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
     /// timeunit to coerce INT96 timestamps to
     pub coerce_int96: Option<TimeUnit>,
@@ -85,7 +85,7 @@ impl<'a> DFParquetMetadata<'a> {
     /// set decryption properties
     pub fn with_decryption_properties(
         mut self,
-        decryption_properties: Option<&'a FileDecryptionProperties>,
+        decryption_properties: Option<Arc<FileDecryptionProperties>>,
     ) -> Self {
         self.decryption_properties = decryption_properties;
         self
@@ -145,7 +145,8 @@ impl<'a> DFParquetMetadata<'a> {
 
         #[cfg(feature = "parquet_encryption")]
         if let Some(decryption_properties) = decryption_properties {
-            reader = reader.with_decryption_properties(Some(decryption_properties));
+            reader = reader
+                .with_decryption_properties(Some(Arc::clone(decryption_properties)));
         }
 
         if cache_metadata && file_metadata_cache.is_some() {
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 167fc3c5147e9..af7a537ca6f44 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -208,7 +208,7 @@ impl FileOpener for ParquetOpener {
             let mut options = ArrowReaderOptions::new().with_page_index(false);
             #[cfg(feature = "parquet_encryption")]
             if let Some(fd_val) = file_decryption_properties {
-                options = options.with_file_decryption_properties((*fd_val).clone());
+                options = options.with_file_decryption_properties(Arc::clone(&fd_val));
             }
             let mut metadata_timer = file_metrics.metadata_load_time.timer();
 
@@ -581,8 +581,7 @@ impl EncryptionContext {
             None => match &self.encryption_factory {
                 Some((encryption_factory, encryption_config)) => Ok(encryption_factory
                     .get_file_decryption_properties(encryption_config, file_location)
-                    .await?
-                    .map(Arc::new)),
+                    .await?),
                 None => Ok(None),
             },
         }
diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs
index 5f3e05747d404..65d1affb44a95 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -36,7 +36,7 @@ use datafusion_pruning::PruningPredicate;
 use log::{debug, trace};
 use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
-use parquet::format::PageLocation;
+use parquet::file::page_index::offset_index::PageLocation;
 use parquet::schema::types::SchemaDescriptor;
 use parquet::{
     arrow::arrow_reader::{RowSelection, RowSelector},
diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs
index 687a7f15fccc8..88a3cea5623bc 100644
--- a/datafusion/datasource-parquet/src/reader.rs
+++ b/datafusion/datasource-parquet/src/reader.rs
@@ -262,8 +262,9 @@ impl AsyncFileReader for CachedParquetFileReader {
 
         async move {
             #[cfg(feature = "parquet_encryption")]
-            let file_decryption_properties =
-                options.and_then(|o| o.file_decryption_properties());
+            let file_decryption_properties = options
+                .and_then(|o| o.file_decryption_properties())
+                .map(Arc::clone);
 
             #[cfg(not(feature = "parquet_encryption"))]
             let file_decryption_properties = None;
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index 20d71692926fd..186d922fc3732 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -52,12 +52,12 @@ use datafusion_physical_plan::metrics::Count;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion_physical_plan::DisplayFormatType;
 
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_execution::parquet_encryption::EncryptionFactory;
 use itertools::Itertools;
 use object_store::ObjectStore;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::decrypt::FileDecryptionProperties;
 
 /// Execution plan for reading one or more Parquet files.
 ///
@@ -547,8 +547,8 @@ impl FileSource for ParquetSource {
             .table_parquet_options()
             .crypto
             .file_decryption
-            .as_ref()
-            .map(map_config_decryption_to_decryption)
+            .clone()
+            .map(FileDecryptionProperties::from)
             .map(Arc::new);
 
         let coerce_int96 = self
diff --git a/datafusion/execution/src/parquet_encryption.rs b/datafusion/execution/src/parquet_encryption.rs
index 73881e11ca72f..027421e08f549 100644
--- a/datafusion/execution/src/parquet_encryption.rs
+++ b/datafusion/execution/src/parquet_encryption.rs
@@ -41,14 +41,14 @@ pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
         config: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>>;
+    ) -> Result<Option<Arc<FileEncryptionProperties>>>;
 
     /// Generate file decryption properties to use when reading a Parquet file.
     async fn get_file_decryption_properties(
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>>;
+    ) -> Result<Option<Arc<FileDecryptionProperties>>>;
 }
 
 /// Stores [`EncryptionFactory`] implementations that can be retrieved by a unique string identifier
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index b01f2c8629c9b..7ce5f09373f5c 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -95,6 +95,8 @@ pub struct DecimalAverager<T: DecimalType> {
     target_mul: T::Native,
     /// the output precision
     target_precision: u8,
+    /// the output scale
+    target_scale: i8,
 }
 
 impl<T: DecimalType> DecimalAverager<T> {
@@ -129,6 +131,7 @@ impl<T: DecimalType> DecimalAverager<T> {
                 sum_mul,
                 target_mul,
                 target_precision,
+                target_scale,
             })
         } else {
             // can't convert the lit decimal to the returned data type
@@ -147,8 +150,11 @@ impl<T: DecimalType> DecimalAverager<T> {
         if let Ok(value) = sum.mul_checked(self.target_mul.div_wrapping(self.sum_mul)) {
             let new_value = value.div_wrapping(count);
 
-            let validate =
-                T::validate_decimal_precision(new_value, self.target_precision);
+            let validate = T::validate_decimal_precision(
+                new_value,
+                self.target_precision,
+                self.target_scale,
+            );
 
             if validate.is_ok() {
                 Ok(new_value)
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index 94a41ba4bb251..c4e58601cd106 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -60,16 +60,26 @@ use datafusion_macros::user_doc;
     description = "Casts a value to a specific Arrow data type.",
     syntax_example = "arrow_cast(expression, datatype)",
     sql_example = r#"```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```"#,
     argument(
         name = "expression",
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 74e286de0f584..c4e89743bd558 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -687,7 +687,7 @@ mod tests {
         let res = invoke_date_bin_with_args(args, 1, return_field);
         assert_eq!(
             res.err().unwrap().strip_backtrace(),
-            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(Microsecond, None)"
+            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(µs)"
         );
 
         args = vec![
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 3d5dee3a72559..4fb0f8553b4ba 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -2117,7 +2117,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2258,7 +2258,7 @@ mod test {
         let err = coerce_case_expression(case, &schema).unwrap_err();
         assert_snapshot!(
             err.strip_backtrace(),
-            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(Nanosecond, None)) to common types in CASE WHEN expression"
+            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(ns)) to common types in CASE WHEN expression"
         );
 
         Ok(())
@@ -2465,7 +2465,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: a = CAST(CAST(a AS Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false)) AS Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false))
+        Projection: a = CAST(CAST(a AS Map("key_value": Struct("key": Utf8, "value": nullable Float64), unsorted)) AS Map("entries": Struct("key": Utf8, "value": nullable Float64), unsorted))
           EmptyRelation: rows=0
         "#
         )
@@ -2488,7 +2488,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(Nanosecond, None))
+        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2513,7 +2513,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) - CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) - CAST(Utf8("1998-03-18") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index c8be689fc5a42..ccf90f91e68f9 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -1972,14 +1972,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [arr:Int32;N]
               Unnest: lists[sq.arr|depth=1] structs[] [arr:Int32;N]
-                TableScan: sq [arr:List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [arr:List(Field { data_type: Int32, nullable: true });N]
+        "
         )
     }
 
@@ -2007,14 +2007,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: __correlated_sq_1.a = test.b [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [a:UInt32;N]
               Unnest: lists[sq.a|depth=1] structs[] [a:UInt32;N]
-                TableScan: sq [a:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [a:List(Field { data_type: UInt32, nullable: true });N]
+        "
         )
     }
 
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index 407e3e6a9d294..0419161b532ce 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -439,8 +439,8 @@ mod tests {
         let expression =
             cast_with_options(col("a", &schema)?, &schema, Decimal128(6, 2), None)?;
         let e = expression.evaluate(&batch).unwrap_err().strip_backtrace(); // panics on OK
-        assert_snapshot!(e, @"Arrow error: Invalid argument error: 12345679 is too large to store in a Decimal128 of precision 6. Max is 999999");
-
+        assert_snapshot!(e, @"Arrow error: Invalid argument error: 123456.79 is too large to store in a Decimal128 of precision 6. Max is 9999.99");
+        // safe cast should return null
         let expression_safe = cast_with_options(
             col("a", &schema)?,
             &schema,
diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
index a53b32c976893..964a193db833a 100644
--- a/datafusion/physical-expr/src/expressions/dynamic_filters.rs
+++ b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -381,14 +381,14 @@ mod test {
         )
         .unwrap();
         let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         let dynamic_filter_2 = reassign_expr_columns(
             Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
             &filter_schema_2,
         )
         .unwrap();
         let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         // Both filters allow evaluating the same expression
         let batch_1 = RecordBatch::try_new(
             Arc::clone(&filter_schema_1),
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 891fd0ae48511..a76316369ec77 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1696,7 +1696,7 @@ mod tests {
 
         // Get string representation of the plan
         assert_snapshot!(displayable(physical_plan.as_ref()).indent(true), @r#"
-        BoundedWindowAggExec: wdw=[last: Field { name: "last", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { name: "nth_value(-1)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { name: "nth_value(-2)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[last: Field { "last": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { "nth_value(-1)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { "nth_value(-2)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[3]
         "#);
 
@@ -1814,7 +1814,7 @@ mod tests {
         // Get string representation of the plan
         assert_snapshot!(displayable(plan.as_ref()).indent(true), @r#"
         ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]
-          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { name: "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
+          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]": Int64 }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
             StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]
         "#);
 
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 8e4131479e506..e9de1d9e9a9ef 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -28,7 +28,9 @@ use arrow::datatypes::{
     DataType, Field, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, Schema,
     SchemaRef, TimeUnit, UnionMode,
 };
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{
+    CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
+};
 use datafusion_common::{
     config::{
         CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
@@ -1018,8 +1020,15 @@ fn encode_scalar_nested_value(
 
     let gen = IpcDataGenerator {};
     let mut dict_tracker = DictionaryTracker::new(false);
+    let write_options = IpcWriteOptions::default();
+    let mut compression_context = CompressionContext::default();
     let (encoded_dictionaries, encoded_message) = gen
-        .encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        .encode(
+            &batch,
+            &mut dict_tracker,
+            &write_options,
+            &mut compression_context,
+        )
         .map_err(|e| {
             Error::General(format!("Error encoding ScalarValue::List as IPC: {e}"))
         })?;
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 12d9938373ce6..6eab2239015a7 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -313,7 +313,7 @@ pub fn physical_plan_from_json(
     let back: protobuf::PhysicalPlanNode = serde_json::from_str(json)
         .map_err(|e| plan_datafusion_err!("Error serializing plan: {e}"))?;
     let extension_codec = DefaultPhysicalExtensionCodec {};
-    back.try_into_physical_plan(&ctx, &extension_codec)
+    back.try_into_physical_plan(ctx, &extension_codec)
 }
 
 /// Deserialize a PhysicalPlan from bytes
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index 4a484b1171bc3..147628656d8f3 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -667,11 +667,11 @@ fn test_insert_infer() {
         @r#"
     ** Initial Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
@@ -698,11 +698,11 @@ fn test_prepare_statement_insert_infer() {
     ** Initial Plan:
     Prepare: "my_plan" [UInt32, Utf8, Utf8]
       Dml: op=[Insert Into] table=[person]
-        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
           Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index f66af28f436e6..96d9f23522f1f 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -669,10 +669,10 @@ fn plan_insert() {
     assert_snapshot!(
         plan,
         @r#"
-        Dml: op=[Insert Into] table=[person]
-          Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
-            Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
-        "#
+    Dml: op=[Insert Into] table=[person]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
+    "#
     );
 }
 
@@ -875,11 +875,11 @@ fn test_timestamp_filter() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state
-          Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS Timestamp(Nanosecond, None))
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state
+      Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(s)) AS Timestamp(ns))
+        TableScan: person
+    "
     );
 }
 
@@ -1586,11 +1586,11 @@ fn select_from_typed_string_values() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: t.col1, t.col2
-          SubqueryAlias: t
-            Projection: column1 AS col1, column2 AS col2
-              Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(Nanosecond, None)), CAST(Utf8("2004-04-09") AS Date32))
-        "#
+    Projection: t.col1, t.col2
+      SubqueryAlias: t
+        Projection: column1 AS col1, column2 AS col2
+          Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(ns)), CAST(Utf8("2004-04-09") AS Date32))
+    "#
     );
 }
 
@@ -3151,7 +3151,7 @@ fn select_typed_time_string() {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("08:09:10.123") AS Time64(Nanosecond)) AS time
+    Projection: CAST(Utf8("08:09:10.123") AS Time64(ns)) AS time
       EmptyRelation: rows=1
     "#
     );
@@ -4686,7 +4686,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None))
+    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4696,7 +4696,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)) AS Timestamp(Nanosecond, None))
+    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)) AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4708,7 +4708,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(Nanosecond, None)))
+    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(ns)))
       EmptyRelation: rows=1
     "#
     );
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 43899642a93aa..29f0241c88620 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -710,13 +710,13 @@ select
 query TTT
 select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
 
 # arrays table
 query ???
@@ -1182,7 +1182,7 @@ select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))
 query T
 select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')));
 ----
-List(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable LargeList(nullable Int64))
 
 
 query ???
@@ -3292,7 +3292,7 @@ select
     array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]),
     arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]));
 ----
-[1, 2, 3] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] List(nullable Utf8View)
 
 # array_concat error
 query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with "Error during planning: array_concat does not support type Int64"
@@ -4585,7 +4585,7 @@ NULL [baz] baz
 query T
 SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'));
 ----
-List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Utf8View)
 
 # expect a,b,c,d. make_array forces all types to be of a common type (see above)
 query T
@@ -7653,8 +7653,8 @@ CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/
 query T
 select arrow_typeof(f0) from fixed_size_list_array;
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
+FixedSizeList(2 x nullable Int64)
+FixedSizeList(2 x nullable Int64)
 
 query ?
 select * from fixed_size_list_array;
@@ -7683,8 +7683,8 @@ select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
+List(nullable List(nullable Int64))
 
 query ?
 select make_array(f0) from fixed_size_list_array
@@ -7695,8 +7695,8 @@ select make_array(f0) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(f0)) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable FixedSizeList(2 x nullable Int64))
+List(nullable FixedSizeList(2 x nullable Int64))
 
 query ?
 select array_concat(column1, [7]) from arrays_values_v2;
@@ -8275,19 +8275,19 @@ select * from test_create_array_table;
 query T
 select arrow_typeof(a) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int32)
 
 query T
 select arrow_typeof(c) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int32))
 
 # Test casting to array types
 # issue: https://github.com/apache/datafusion/issues/9440
 query ??T
 select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
 ----
-[1, 2, 3] [[1]] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] [[1]] List(nullable Utf8View)
 
 # test empty arrays return length
 # issue: https://github.com/apache/datafusion/pull/12459
@@ -8307,8 +8307,8 @@ create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]);
 query T
 select arrow_typeof(a) from fixed_size_col_table;
 ----
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int32)
+FixedSizeList(3 x nullable Int32)
 
 query ? rowsort
 SELECT DISTINCT a FROM fixed_size_col_table
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 654218531f1db..ac32ef821bc43 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -61,13 +61,13 @@ Decimal128(38, 10)
 query T
 SELECT arrow_typeof(now()::timestamp)
 ----
-Timestamp(Nanosecond, None)
+Timestamp(ns)
 
 # arrow_typeof_timestamp_utc
 query T
 SELECT arrow_typeof(now())
 ----
-Timestamp(Nanosecond, Some("+00:00"))
+Timestamp(ns, "+00:00")
 
 # arrow_typeof_timestamp_date32(
 query T
@@ -98,7 +98,7 @@ SELECT arrow_cast('1')
 query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string
 SELECT arrow_cast('1', 43)
 
-query error Error unrecognized word: unknown
+query error DataFusion error: Execution error: Unsupported type 'unknown'\. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'\. Error unknown token: unknown
 SELECT arrow_cast('1', 'unknown')
 
 # Round Trip tests:
@@ -130,7 +130,7 @@ SELECT
   arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, Some("+08:00"))')) as col_tstz_ns,
   arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict
 ----
-Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Timestamp(Second, Some("+08:00")) Timestamp(Millisecond, Some("+08:00")) Timestamp(Microsecond, Some("+08:00")) Timestamp(Nanosecond, Some("+08:00")) Dictionary(Int32, Utf8)
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns) Timestamp(s, "+08:00") Timestamp(ms, "+08:00") Timestamp(µs, "+08:00") Timestamp(ns, "+08:00") Dictionary(Int32, Utf8)
 
 
 
@@ -255,7 +255,7 @@ SELECT
   arrow_typeof(col_ts_ns)
   FROM foo;
 ----
-Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None)
+Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns)
 
 
 statement ok
@@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)');
 ----
 0 days 0 hours 30 mins 0 secs
 
-query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(Second\)
+query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
 select arrow_cast('30 minutes', 'Duration(Second)');
 
 
@@ -357,12 +357,12 @@ select arrow_cast(make_array(1, 2, 3), 'List(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'List(Int64)'));
 ----
-List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'), 'List(List(Int64))'));
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
 
 ## LargeList
 
@@ -380,12 +380,12 @@ select arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
 ----
-LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'));
 ----
-LargeList(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable LargeList(nullable Int64))
 
 ## FixedSizeList
 
@@ -417,7 +417,7 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)');
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)'));
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int64)
 
 query ?
 select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 2f9173d2dcbd8..352300e753a7f 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -594,4 +594,4 @@ query I
 SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
 ----
 2
-2
\ No newline at end of file
+2
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index 9740bade5e27b..e34a601851d78 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -199,14 +199,14 @@ select
   coalesce(array[1, 2], array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 query ?T
 select
   coalesce(null, array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[3, 4] List(nullable Int64)
 
 # coalesce with array
 query ?T
@@ -214,7 +214,7 @@ select
   coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
   arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 # test dict(int32, utf8)
 statement ok
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index 826742267290c..b78c021a565c1 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -88,7 +88,7 @@ logical_plan
 03)----TableScan: t1 projection=[a]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
-02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/current_time_timezone.slt b/datafusion/sqllogictest/test_files/current_time_timezone.slt
index a9e27bd4045ff..c80c4b51d5ac8 100644
--- a/datafusion/sqllogictest/test_files/current_time_timezone.slt
+++ b/datafusion/sqllogictest/test_files/current_time_timezone.slt
@@ -29,7 +29,7 @@ true
 query T
 SELECT arrow_typeof(current_time());
 ----
-Time64(Nanosecond)
+Time64(ns)
 
 # Test 3: Set timezone to +08:00 and verify current_time is still stable
 statement ok
@@ -44,7 +44,7 @@ true
 query T
 SELECT arrow_typeof(current_time());
 ----
-Time64(Nanosecond)
+Time64(ns)
 
 # Test 5: Test with negative offset timezone
 statement ok
diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt
index 2e91a0363db06..a309be1148095 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -85,9 +85,14 @@ g
 h
 
 ## Plan error when compare Utf8 and timestamp in where clause
-statement error DataFusion error: type_coercion\ncaused by\nError during planning: Cannot coerce arithmetic expression Timestamp\(Nanosecond, Some\("\+00:00"\)\) \+ Utf8 to valid types
+statement error
 select i_item_desc from test
 where d3_date > now() + '5 days';
+----
+DataFusion error: type_coercion
+caused by
+Error during planning: Cannot coerce arithmetic expression Timestamp(ns, "+00:00") + Utf8 to valid types
+
 
 # DATE minus DATE
 # https://github.com/apache/arrow-rs/issues/4383
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index bc6cbfab0caed..64c78284594f6 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -867,7 +867,7 @@ query TTTTTT
 show columns FROM table_with_pk;
 ----
 datafusion public table_with_pk sn Int32 NO
-datafusion public table_with_pk ts Timestamp(Nanosecond, Some("+00:00")) NO
+datafusion public table_with_pk ts Timestamp(ns, "+00:00") NO
 datafusion public table_with_pk currency Utf8View NO
 datafusion public table_with_pk amount Float32 YES
 
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index 4c184c04d128f..88347965c67a5 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -83,7 +83,7 @@ float_col Float32 YES
 double_col Float64 YES
 date_string_col Utf8View YES
 string_col Utf8View YES
-timestamp_col Timestamp(Nanosecond, None) YES
+timestamp_col Timestamp(ns) YES
 year Int32 YES
 month Int32 YES
 
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index 9e8a39494095f..fd9a7fb9ce447 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -85,7 +85,7 @@ f1 Float64 YES
 f2 Utf8 YES
 f3 Utf8 YES
 f4 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 # in list with dictionary input
 query BBB
@@ -157,7 +157,7 @@ DESCRIBE m2;
 type Dictionary(Int32, Utf8) YES
 tag_id Dictionary(Int32, Utf8) YES
 f5 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 query I
 select count(*) from m1 where tag_id = '1000' and time < '2024-01-03T14:46:35+01:00';
diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/expr/date_part.slt
index 64f16f72421a0..bee8602d80bd2 100644
--- a/datafusion/sqllogictest/test_files/expr/date_part.slt
+++ b/datafusion/sqllogictest/test_files/expr/date_part.slt
@@ -1005,10 +1005,10 @@ SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
 ----
 10
 
-query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(s\)
 SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))
 
-query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(s\)
 SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))
 
 query I
diff --git a/datafusion/sqllogictest/test_files/float16.slt b/datafusion/sqllogictest/test_files/float16.slt
index 5e59c730f0787..699eb81844a40 100644
--- a/datafusion/sqllogictest/test_files/float16.slt
+++ b/datafusion/sqllogictest/test_files/float16.slt
@@ -51,13 +51,14 @@ NULL NULL NULL NULL NULL NULL
 NaN NaN NaN NaN NaN NaN
 
 # Try coercing with literal NULL
-query error
+query R
 select column1 + NULL from float16s;
 ----
-DataFusion error: type_coercion
-caused by
-Error during planning: Cannot automatically convert Null to Float16
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Test coercions with equality
 query BBBBBB
@@ -78,11 +79,14 @@ false false false false false false
 
 
 # Try coercing with literal NULL
-query error
+query B
 select column1 = NULL from float16s;
 ----
-DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Float16 = Null
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index b72f73d44698f..08636b482e38d 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -3646,7 +3646,7 @@ physical_plan
 07)------------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 09)----------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount]
-10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 
@@ -3943,7 +3943,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # reset partition number to 8.
@@ -4065,7 +4065,7 @@ logical_plan
 05)--------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, sum1@2 as sum1, sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as sumb]
-02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
 04)------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
diff --git a/datafusion/sqllogictest/test_files/information_schema_columns.slt b/datafusion/sqllogictest/test_files/information_schema_columns.slt
index d348a764fa85f..c733b3baa7a47 100644
--- a/datafusion/sqllogictest/test_files/information_schema_columns.slt
+++ b/datafusion/sqllogictest/test_files/information_schema_columns.slt
@@ -42,7 +42,7 @@ my_catalog my_schema table_with_many_types float64_col 1 NULL YES Float64 NULL N
 my_catalog my_schema table_with_many_types int32_col 0 NULL NO Int32 NULL NULL 32 2 NULL NULL NULL
 my_catalog my_schema table_with_many_types large_binary_col 5 NULL NO LargeBinary NULL 9223372036854775807 NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types large_utf8_col 3 NULL NO LargeUtf8 NULL 9223372036854775807 NULL NULL NULL NULL NULL
-my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(Nanosecond, None) NULL NULL NULL NULL NULL NULL NULL
+my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(ns) NULL NULL NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types utf8_col 2 NULL YES Utf8 NULL 2147483647 NULL NULL NULL NULL NULL
 
 # Cleanup
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 9a3c959884aa0..b8b2a7c372768 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -68,7 +68,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -128,7 +128,7 @@ physical_plan
 01)DataSinkExec: sink=MemoryTable (partitions=1)
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -179,7 +179,7 @@ physical_plan
 02)--ProjectionExec: expr=[a1@0 as a1, a2@1 as a2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index 075256ae4b92d..dc8ef59bbedcb 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -422,7 +422,7 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -483,7 +483,7 @@ physical_plan
 01)DataSinkExec: sink=ParquetSink(file_groups=[])
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt
index 1ef3048ddc66a..8c5a4382ed2c8 100644
--- a/datafusion/sqllogictest/test_files/interval.slt
+++ b/datafusion/sqllogictest/test_files/interval.slt
@@ -444,7 +444,7 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - '1980-01-01'::date;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;
 
 # interval (array) + date / timestamp (array)
@@ -466,7 +466,7 @@ select i + ts from t;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select i - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select i - ts from t;
 
 # interval unit abreiviation and plurals
@@ -530,7 +530,7 @@ SELECT interval '5 day' hour
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - ts from t;
 
 # interval + date
diff --git a/datafusion/sqllogictest/test_files/join_lists.slt b/datafusion/sqllogictest/test_files/join_lists.slt
index c07bd85551f34..0a48a4f9203ec 100644
--- a/datafusion/sqllogictest/test_files/join_lists.slt
+++ b/datafusion/sqllogictest/test_files/join_lists.slt
@@ -60,4 +60,3 @@ DROP TABLE categories_raw;
 
 statement ok
 DROP TABLE places;
-
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 0174321dd831e..4bdf2e5da9632 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3199,7 +3199,7 @@ physical_plan
 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 09)----CoalesceBatchesExec: target_batch_size=2
 10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
@@ -3237,7 +3237,7 @@ physical_plan
 08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
 09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 12)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -3276,14 +3276,14 @@ physical_plan
 06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
 12)--------CoalesceBatchesExec: target_batch_size=2
 13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
 14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 17)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
@@ -3318,7 +3318,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
@@ -3345,7 +3345,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test ordering preservation for RIGHT join
@@ -3441,7 +3441,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # run query above in multiple partitions
@@ -4036,12 +4036,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
 
 
 # Test CROSS JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
 
 
@@ -4061,12 +4061,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
 
 
 # Test INNER JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i);
 
 # Test RIGHT JOIN LATERAL syntax (unsupported)
@@ -4671,7 +4671,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id), LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4
@@ -4687,7 +4687,7 @@ logical_plan
 08)----Subquery:
 09)------Filter: j3.j3_string = outer_ref(j2.j2_string)
 10)--------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT * FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id = j2_id) as j2) as j2;
@@ -4703,7 +4703,7 @@ logical_plan
 08)----------Subquery:
 09)------------Filter: outer_ref(j1.j1_id) = j2.j2_id
 10)--------------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT j1_string, j2_string FROM j1 LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true);
@@ -4716,7 +4716,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true));
@@ -4730,7 +4730,7 @@ logical_plan
 06)------Subquery:
 07)--------Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id
 08)----------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 4f1e5ef39a00d..fc21638b3f3cc 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -43,8 +43,8 @@ LOCATION '../core/tests/data/parquet_map.parquet';
 query TTT
 describe data;
 ----
-ints Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
-strings Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
+ints Map("entries": Struct("key": Utf8, "value": Int64), unsorted) NO
+strings Map("entries": Struct("key": Utf8, "value": Utf8), unsorted) NO
 timestamp Utf8View NO
 
 query ??T
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 11942108ab2b6..c21f3129d4ee9 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -268,7 +268,7 @@ FROM (
 ) t
 GROUP BY 1
 ----
-Timestamp(Millisecond, Some("UTC")) 2014-08-27T14:00:00Z 131072
+Timestamp(ms, "UTC") 2014-08-27T14:00:00Z 131072
 
 # Test config listing_table_ignore_subdirectory:
 
@@ -689,7 +689,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark
 ----
-a Timestamp(Nanosecond, None) YES
+a Timestamp(ns) YES
 
 # Note that the values are read as nanosecond precision
 query P
@@ -718,7 +718,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark;
 ----
-a Timestamp(Millisecond, None) YES
+a Timestamp(ms) YES
 
 # Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
 # these values should be
@@ -742,7 +742,7 @@ select * from int96_from_spark
 9999-12-31T03:00:00
 2024-12-30T23:00:00
 NULL
-ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
+ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(ms)
 
 # Cleanup / reset default setting
 statement ok
diff --git a/datafusion/sqllogictest/test_files/pwmj.slt b/datafusion/sqllogictest/test_files/pwmj.slt
index 0014b3c545f29..eafa4d0ba3945 100644
--- a/datafusion/sqllogictest/test_files/pwmj.slt
+++ b/datafusion/sqllogictest/test_files/pwmj.slt
@@ -158,7 +158,7 @@ ORDER BY 1,2;
 33 44
 44 55
 
-query TT 
+query TT
 EXPLAIN
 SELECT t1.t1_id, t2.t2_id
 FROM join_t1 t1
diff --git a/datafusion/sqllogictest/test_files/qualify.slt b/datafusion/sqllogictest/test_files/qualify.slt
index d53b56ce58de1..366d65df67929 100644
--- a/datafusion/sqllogictest/test_files/qualify.slt
+++ b/datafusion/sqllogictest/test_files/qualify.slt
@@ -275,7 +275,7 @@ physical_plan
 01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
-04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan row_number()
@@ -293,7 +293,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
 04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -321,7 +321,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
 04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
@@ -358,7 +358,7 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=8192
 05)--------FilterExec: rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 = 1, projection=[dept@0, sum(users.salary)@1]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortPreservingMergeExec: [sum(users.salary)@1 DESC]
 09)----------------SortExec: expr=[sum(users.salary)@1 DESC], preserve_partitioning=[true]
 10)------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept], aggr=[sum(users.salary)]
diff --git a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
index cb3c77cac8fbb..7614caef666bb 100644
--- a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
+++ b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
@@ -109,5 +109,3 @@ DROP TABLE test_shuffle_list_types;
 
 statement ok
 DROP TABLE test_shuffle_fixed_size;
-
-
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index 95eeffc31903f..0e3c5145d156b 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -53,9 +53,9 @@ select * from struct_values;
 query TT
 select arrow_typeof(s1), arrow_typeof(s2) from struct_values;
 ----
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
 
 
 # struct[i]
@@ -229,12 +229,12 @@ select named_struct('field_a', 1, 'field_b', 2);
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 query T
 select arrow_typeof({'first': 1, 'second': 2, 'third': 3});
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 # test nested struct literal
 query ?
@@ -413,7 +413,7 @@ create table t(a struct<r varchar, c int>, b struct<r varchar, c float>) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 query ?
 select [a, b] from t;
@@ -464,12 +464,12 @@ select * from t;
 query T
 select arrow_typeof(c1) from t;
 ----
-Struct(r Utf8View, b Int32)
+Struct("r": nullable Utf8View, "b": nullable Int32)
 
 query T
 select arrow_typeof(c2) from t;
 ----
-Struct(r Utf8View, b Float32)
+Struct("r": nullable Utf8View, "b": nullable Float32)
 
 statement ok
 drop table t;
@@ -486,8 +486,8 @@ select * from t;
 query T
 select arrow_typeof(column1) from t;
 ----
-Struct(r Utf8, c Float64)
-Struct(r Utf8, c Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
 
 statement ok
 drop table t;
@@ -519,9 +519,9 @@ select coalesce(s1) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
@@ -546,9 +546,9 @@ select coalesce(s1, s2) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
@@ -583,7 +583,7 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 statement ok
 drop table t;
@@ -606,13 +606,13 @@ create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float,
 query T
 select arrow_typeof(a) from t;
 ----
-Struct(r Utf8View, c Int32, g Float32)
+Struct("r": nullable Utf8View, "c": nullable Int32, "g": nullable Float32)
 
 # type of each column should not coerced but perserve as it is
 query T
 select arrow_typeof(b) from t;
 ----
-Struct(r Utf8View, c Float32, g Int32)
+Struct("r": nullable Utf8View, "c": nullable Float32, "g": nullable Int32)
 
 statement ok
 drop table t;
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
index 1e5a3c8f526ac..ea7addd8e36f7 100644
--- a/datafusion/sqllogictest/test_files/subquery_sort.slt
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -100,7 +100,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3, c9], file_type=csv, has_header=true
 
@@ -126,7 +126,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 6fe9995c7b67a..84dd7098a2ee5 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -79,7 +79,7 @@ SET TIME ZONE = '+08'
 query T
 select arrow_typeof(now());
 ----
-Timestamp(Nanosecond, Some("+08"))
+Timestamp(ns, "+08")
 
 query I
 SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date;
@@ -691,11 +691,11 @@ select
 ----
 08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15
 
-query error Cannot cast string 'not a time' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
 SELECT TIME 'not a time' as time;
 
 # invalid time
-query error Cannot cast string '24:01:02' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
 SELECT TIME '24:01:02' as time;
 
 # invalid timezone
@@ -908,7 +908,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_micros(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -926,7 +926,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -944,7 +944,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 # month interval with INTERVAL keyword in date_bin with default start time
 query P
@@ -1540,24 +1540,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to berlin
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2024-10-27T00:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T00:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2024-10-27T00:00:00Z Timestamp(ns, "UTC")
+2024-10-27T00:30:00Z Timestamp(ns, "UTC")
+2024-10-27T01:30:00Z Timestamp(ns, "UTC")
+2024-10-27T02:00:00Z Timestamp(ns, "UTC")
+2024-10-27T02:30:00Z Timestamp(ns, "UTC")
+2024-10-27T03:00:00Z Timestamp(ns, "UTC")
+2024-10-27T03:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_berlin order by ts;
 ----
-2024-10-27T02:00:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
+2024-10-27T02:00:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:30:00+01:00 Timestamp(ns, "Europe/Berlin")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1624,24 +1624,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to Sau Paulo
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2018-11-04T01:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2018-11-04T01:00:00Z Timestamp(ns, "UTC")
+2018-11-04T01:30:00Z Timestamp(ns, "UTC")
+2018-11-04T02:30:00Z Timestamp(ns, "UTC")
+2018-11-04T03:00:00Z Timestamp(ns, "UTC")
+2018-11-04T03:30:00Z Timestamp(ns, "UTC")
+2018-11-04T04:00:00Z Timestamp(ns, "UTC")
+2018-11-04T04:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_sao_paulo order by ts;
 ----
-2018-11-03T22:00:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T22:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T23:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
+2018-11-03T22:00:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T22:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T23:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1797,7 +1797,7 @@ SELECT ts1 + i FROM foo;
 2003-07-12T01:31:15.000123463
 
 # Timestamp + Timestamp => error
-query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
+query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(ns\) \+ Timestamp\(ns\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(ns\) \+ Timestamp\(ns\)
 SELECT ts1 + ts2
 FROM foo;
 
@@ -2256,7 +2256,7 @@ SET TIME ZONE = '+00'
 query T
 SELECT arrow_typeof(time) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 # check date_trunc
 query P
@@ -2271,27 +2271,27 @@ SELECT date_trunc('day', time) FROM foo
 query T
 SELECT arrow_typeof(date_trunc('day', time)) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 query T
 select arrow_typeof(date_trunc('minute', to_timestamp_seconds(61)))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 query T
 select arrow_typeof(date_trunc('second', to_timestamp_millis(61)))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query T
 select arrow_typeof(date_trunc('millisecond', to_timestamp_micros(61)))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query T
 select arrow_typeof(date_trunc('microsecond', to_timestamp(61)))
 ----
-Timestamp(Nanosecond, None)  
+Timestamp(ns)
 
 # check date_bin
 query P
@@ -2306,7 +2306,7 @@ SELECT date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00') FROM foo
 query T
 SELECT arrow_typeof(date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00')) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 
 # timestamp comparison with and without timezone
@@ -2348,7 +2348,7 @@ true true true true true true true true true true true true true
 query TTT
 SELECT arrow_typeof(to_timestamp(1)), arrow_typeof(to_timestamp(null)), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # verify timestamp output types using timestamp literal syntax
 query BBBBBB
@@ -2384,7 +2384,7 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5
 query TTT
 SELECT arrow_typeof(to_timestamp(1, '%c', '%s')), arrow_typeof(to_timestamp(null, '%+', '%s')), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # to_timestamp with invalid formatting
 query error input contains invalid characters
@@ -2690,8 +2690,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1;
 query PT
 SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1;
 ----
-2018-07-01T06:00:00Z Timestamp(Nanosecond, Some("+00"))
-2018-07-01T07:00:00Z Timestamp(Nanosecond, Some("+00"))
+2018-07-01T06:00:00Z Timestamp(ns, "+00")
+2018-07-01T07:00:00Z Timestamp(ns, "+00")
 
 query D
 SELECT 0::TIME
@@ -3281,7 +3281,7 @@ from (
   select '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' as time
 );
 ----
-2024-04-01T00:00:20+02:00 Timestamp(Nanosecond, Some("Europe/Brussels")) 2024-04-01T00:00:20 Timestamp(Nanosecond, None)
+2024-04-01T00:00:20+02:00 Timestamp(ns, "Europe/Brussels") 2024-04-01T00:00:20 Timestamp(ns)
 
 # use to_local_time() in date_bin()
 query P
@@ -3326,53 +3326,53 @@ from t;
 query PPT
 select column1, to_local_time(column1::timestamp), arrow_typeof(to_local_time(column1::timestamp)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_timezone;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(ns)
 
 # combine to_local_time() with date_bin()
 query P
@@ -3667,7 +3667,7 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT TIMESTAMP '2005-09-10 13:31:00' AS a)
 ----
-Timestamp(Nanosecond, None) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
+Timestamp(ns) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
 
 query TTTTT
 SELECT
@@ -3678,4 +3678,4 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a)
 ----
-Timestamp(Nanosecond, Some("+00")) 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
+Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index 3175a0646b799..e3baa8fedcf63 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -47,7 +47,7 @@ query error DataFusion error: Error during planning: Cannot coerce arithmetic ex
 select interval '1 month' - '2023-05-01'::date;
 
 # interval - timestamp
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
 
 # dictionary(int32, utf8) -> utf8
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 1f7605d220c5e..75db459b1881a 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -521,7 +521,7 @@ physical_plan
 16)----ProjectionExec: expr=[1 as cnt]
 17)------PlaceholderRowExec
 18)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
-19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 20)--------ProjectionExec: expr=[1 as c1]
 21)----------PlaceholderRowExec
 
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 38fcc1ba9016f..50121813133bf 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -863,11 +863,11 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u
 # Test implicit LATERAL support for UNNEST
 # Issue: https://github.com/apache/datafusion/issues/13659
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u, unnest(u.column1);
 
 # Test implicit LATERAL support for UNNEST (INNER JOIN)
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1;
 
 # Test implicit LATERAL planning for UNNEST
@@ -883,7 +883,7 @@ logical_plan
 06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 08)----------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # Test implicit LATERAL planning for UNNEST (INNER JOIN)
 query TT
@@ -899,7 +899,7 @@ logical_plan
 07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 09)------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # uncorrelated EXISTS with unnest
 query I
@@ -969,7 +969,7 @@ physical_plan
 08)--------------UnnestExec
 09)----------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 12)----------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
 
 # Unnest array where data is already ordered by column2 (100, 200, 300, 400)
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index f1a708d84dd3c..d9b4a818f99e4 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -360,7 +360,7 @@ physical_plan
 02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)]
 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
@@ -1241,9 +1241,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c8, c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2]
-02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], file_type=csv, has_header=true
 
@@ -1262,9 +1262,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
 01)ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1286,10 +1286,10 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1311,12 +1311,12 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c4]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 09)----------------CoalesceBatchesExec: target_batch_size=4096
 10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
@@ -1343,8 +1343,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1386,8 +1386,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1446,8 +1446,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=5, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=15), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1488,8 +1488,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1531,9 +1531,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1573,10 +1573,10 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], file_type=csv, has_header=true
 
@@ -1655,19 +1655,19 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as a, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as b, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as c, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as d, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@7 as e, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as f, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as g, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as i, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as j, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as l, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as m, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@15 as n, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as o, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as p, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as a1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as b1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as c1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as d1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@9 as e1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as f1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as g1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as j1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as l1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as m1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as n1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as o1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as h11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as j11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as k11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as l11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@10 as m11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as n11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as o11]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false]
-11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
-12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
+12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false]
-14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
-15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
 
@@ -1741,8 +1741,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1785,8 +1785,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1831,9 +1831,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@1 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST]
 07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9]
@@ -1926,13 +1926,13 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
 07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
 08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
 
@@ -1968,7 +1968,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2097,7 +2097,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2123,10 +2123,10 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------CoalesceBatchesExec: target_batch_size=4096
 09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2211,11 +2211,11 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
 
@@ -2266,12 +2266,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
@@ -2312,9 +2312,9 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
 02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING": nullable Float64 }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], file_type=csv, has_header=true
 
@@ -2348,7 +2348,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2385,7 +2385,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2422,7 +2422,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2462,7 +2462,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false], sort_prefix=[rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2537,7 +2537,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2559,7 +2559,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[CAST(c9@1 AS Decimal128(20, 0)) + CAST(c5@0 AS Decimal128(20, 0)) DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], file_type=csv, has_header=true
 
@@ -2580,7 +2580,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2685,10 +2685,10 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING": Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2771,8 +2771,8 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIII
@@ -2843,8 +2843,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2895,8 +2895,8 @@ physical_plan
 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIII
@@ -2939,8 +2939,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -2984,8 +2984,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -3084,12 +3084,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
-08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
+08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 09)----------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 10)------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3152,17 +3152,17 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
 04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
 10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
+11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -3226,7 +3226,7 @@ physical_plan
 01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
 02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5
 03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50
-04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # Top level sort is pushed down through BoundedWindowAggExec as its SUM result does already satisfy the required
@@ -3248,7 +3248,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -3333,11 +3333,11 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 08)--------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3364,17 +3364,17 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------CoalesceBatchesExec: target_batch_size=4096
 08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
 10)------------------CoalesceBatchesExec: target_batch_size=4096
 11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 13)------------------------CoalesceBatchesExec: target_batch_size=4096
 14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
@@ -3433,10 +3433,10 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 07)------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], file_type=csv, has_header=true
 
@@ -3477,7 +3477,7 @@ physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], file_type=csv, has_header=true
 
@@ -3529,7 +3529,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--CoalesceBatchesExec: target_batch_size=4096
 03)----FilterExec: b@2 = 0
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
@@ -3547,7 +3547,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--SortExec: expr=[d@4 ASC NULLS LAST], preserve_partitioning=[false]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: b@2 = 0
@@ -3584,9 +3584,9 @@ logical_plan
 05)--------TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1]
-02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c@2 as c, d@3 as d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3603,7 +3603,7 @@ logical_plan
 04)------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)]
 physical_plan
 01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
-02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: d@1 = 0
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
@@ -3618,7 +3618,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3631,7 +3631,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query I
@@ -3673,7 +3673,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query II
@@ -3724,7 +3724,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d]
-03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
+03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -4059,7 +4059,7 @@ logical_plan
 03)----TableScan: table_with_pk projection=[sn, ts, currency, amount]
 physical_plan
 01)ProjectionExec: expr=[sn@0 as sn, ts@1 as ts, currency@2 as currency, amount@3 as amount, sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1]
-02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -4178,9 +4178,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2, sum1@3 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c3@0 as c3, c4@1 as c4, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@0 + c4@1 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c4, c9], file_type=csv, has_header=true
 
@@ -4219,7 +4219,7 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -4242,7 +4242,7 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
 05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5311,7 +5311,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5355,7 +5355,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 >= 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5397,7 +5397,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5438,7 +5438,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c1@0 = 1 OR c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5481,11 +5481,11 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
-08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 10)------------------CoalesceBatchesExec: target_batch_size=1
 11)--------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5532,13 +5532,13 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
 08)--------------CoalesceBatchesExec: target_batch_size=1
 09)----------------FilterExec: c2@1 > 1
-10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 11)--------------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 12)----------------------CoalesceBatchesExec: target_batch_size=1
 13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5599,7 +5599,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as sum_c9]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5615,7 +5615,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as sum_c9]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
 
@@ -5630,7 +5630,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as min_c5]
-03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
 06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
@@ -5646,7 +5646,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c5]
 physical_plan
 01)ProjectionExec: expr=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as max_c5]
-02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], file_type=csv, has_header=true
 
 query II rowsort
@@ -5829,7 +5829,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@0 as k, time@2 as time, count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as normal_count, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as distinct_count]
-03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@0 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@0], 2), input_partitions=2
@@ -5892,7 +5892,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@1 as k, time@2 as time, sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as sum_v, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as sum_distinct_v]
-03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@1 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@1], 2), input_partitions=2
@@ -5937,7 +5937,7 @@ LIMIT 5
 ----
 DataFusion error: type_coercion
 caused by
-Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(Field { name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(nullable Null)
 
 
 
@@ -5965,7 +5965,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5
 05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2]
diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt
index c1e680084f4b7..883cd4404f4f3 100644
--- a/datafusion/sqllogictest/test_files/window_limits.slt
+++ b/datafusion/sqllogictest/test_files/window_limits.slt
@@ -71,7 +71,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -108,7 +108,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -170,7 +170,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead1, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as lead3, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lead5]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -207,7 +207,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -244,7 +244,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -309,7 +309,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as running_sum, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as running_avg, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as running_min, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as running_max]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
@@ -371,7 +371,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rnk, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as drnk]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -433,7 +433,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
+03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
 04)------SortExec: expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -498,7 +498,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as fv, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as l1, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lv, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as n3]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -541,7 +541,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
@@ -587,7 +587,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
@@ -764,6 +764,6 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, salary@1 as salary, lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 605dfc15be3f5..8417bd56852f5 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -40,7 +40,7 @@ itertools = { workspace = true }
 object_store = { workspace = true }
 pbjson-types = { workspace = true }
 prost = { workspace = true }
-substrait = { version = "0.58", features = ["serde"] }
+substrait = { version = "0.59", features = ["serde"] }
 url = { workspace = true }
 tokio = { workspace = true, features = ["fs"] }
 uuid = { version = "1.17.0", features = ["v4"] }
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 6b9cb0843c534..4174fef7a6922 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -25,6 +25,15 @@
 
 You can see the current [status of the `51.0.0`release here](https://github.com/apache/datafusion/issues/17558)
 
+### `arrow` / `parquet` updated to 57.0.0
+
+### Upgrade to arrow `57.0.0` and parquet `57.0.0`
+
+This version of DataFusion upgrades the underlying Apache Arrow implementation
+to version `57.0.0`, including several dependent crates such as `prost`,
+`tonic`, `pyo3`, and `substrait`. . See the [release
+notes](https://github.com/apache/arrow-rs/releases/tag/57.0.0) for more details.
+
 ### `MSRV` updated to 1.87.0
 
 The Minimum Supported Rust Version (MSRV) has been updated to [`1.87.0`].
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index d977a4396e40d..02edb6371ce3e 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -41,7 +41,18 @@ You can cast a SQL expression to a specific Arrow type using the `arrow_cast` fu
 For example, to cast the output of `now()` to a `Timestamp` with second precision:
 
 ```sql
-select arrow_cast(now(), 'Timestamp(Second, None)');
+select arrow_cast(now(), 'Timestamp(s)') as "now()";
++---------------------+
+| now()               |
++---------------------+
+| 2025-10-24T20:02:45 |
++---------------------+
+```
+
+The older syntax still works as well:
+
+```sql
+select arrow_cast(now(), 'Timestamp(Second, None)') as "now()";
 +---------------------+
 | now()               |
 +---------------------+
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d2e7066191f91..63a3eb2da87bc 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -4972,16 +4972,26 @@ arrow_cast(expression, datatype)
 #### Example
 
 ```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```
 
 ### `arrow_typeof`

From 440fb82d16477cd47217908d8262fab58501663a Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 27 Oct 2025 09:42:07 -0700
Subject: [PATCH 109/109] chore(deps): bump syn from 2.0.106 to 2.0.108
 (#18291)

Bumps [syn](https://github.com/dtolnay/syn) from 2.0.106 to 2.0.108.
<details>
<summary>Release notes</summary>
<p><em>Sourced from <a
href="https://github.com/dtolnay/syn/releases">syn's
releases</a>.</em></p>
<blockquote>
<h2>2.0.108</h2>
<ul>
<li>Parse unrecognized or invalid literals as Lit::Verbatim (<a
href="https://redirect.github.com/dtolnay/syn/issues/1925">#1925</a>)</li>
</ul>
<h2>2.0.107</h2>
<ul>
<li>Improve panic message when constructing a LitInt, LitFloat, or Lit
from invalid syntax (<a
href="https://redirect.github.com/dtolnay/syn/issues/1917">#1917</a>)</li>
<li>Improve panic message on Punctuated index out of bounds (<a
href="https://redirect.github.com/dtolnay/syn/issues/1922">#1922</a>)</li>
</ul>
</blockquote>
</details>
<details>
<summary>Commits</summary>
<ul>
<li><a
href="https://github.com/dtolnay/syn/commit/7a7e331255822d49bea01e29c326ee7a5cd5415c"><code>7a7e331</code></a>
Release 2.0.108</li>
<li><a
href="https://github.com/dtolnay/syn/commit/30463afa201abc30e086bd1fb1deb714eb8910f4"><code>30463af</code></a>
Merge pull request <a
href="https://redirect.github.com/dtolnay/syn/issues/1926">#1926</a>
from dtolnay/litfuzz</li>
<li><a
href="https://github.com/dtolnay/syn/commit/1cc9167f60d209865e91bf73a949d25914e6bf18"><code>1cc9167</code></a>
Add fuzzer for literal parsing</li>
<li><a
href="https://github.com/dtolnay/syn/commit/c49e1d3a65ab423beee54ed730ea3f849ec49e0b"><code>c49e1d3</code></a>
Merge pull request <a
href="https://redirect.github.com/dtolnay/syn/issues/1925">#1925</a>
from dtolnay/litparse</li>
<li><a
href="https://github.com/dtolnay/syn/commit/d047536103b7edfb0408dab8ec65cde19e73a88f"><code>d047536</code></a>
Report unexpected verbatim literals in test</li>
<li><a
href="https://github.com/dtolnay/syn/commit/ce9776747974555e30cd890b9e1d3030e02efc13"><code>ce97767</code></a>
Parse unrecognized or invalid literals as Lit::Verbatim</li>
<li><a
href="https://github.com/dtolnay/syn/commit/e4a8957feb1b86e6da4309c9886ca15ddfd7b7ad"><code>e4a8957</code></a>
Release 2.0.107</li>
<li><a
href="https://github.com/dtolnay/syn/commit/1792e83acfcc4810ccca70c22952986a6ea09d7e"><code>1792e83</code></a>
Merge pull request <a
href="https://redirect.github.com/dtolnay/syn/issues/1922">#1922</a>
from dtolnay/outofbounds</li>
<li><a
href="https://github.com/dtolnay/syn/commit/532e4af53355f8c4585251e1507336bed8d39f14"><code>532e4af</code></a>
Improve panic message on Punctuated index out of bounds</li>
<li><a
href="https://github.com/dtolnay/syn/commit/909c2221dd582e18f748988384e8ec4edd7544cf"><code>909c222</code></a>
Add test of Punctuated indexing</li>
<li>Additional commits viewable in <a
href="https://github.com/dtolnay/syn/compare/2.0.106...2.0.108">compare
view</a></li>
</ul>
</details>
<br />


[![Dependabot compatibility
score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=syn&package-manager=cargo&previous-version=2.0.106&new-version=2.0.108)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)

Dependabot will resolve any conflicts with this PR as long as you don't
alter it yourself. You can also trigger a rebase manually by commenting
`@dependabot rebase`.

[//]: # (dependabot-automerge-start)
[//]: # (dependabot-automerge-end)

---

<details>
<summary>Dependabot commands and options</summary>
<br />

You can trigger Dependabot actions by commenting on this PR:
- `@dependabot rebase` will rebase this PR
- `@dependabot recreate` will recreate this PR, overwriting any edits
that have been made to it
- `@dependabot merge` will merge this PR after your CI passes on it
- `@dependabot squash and merge` will squash and merge this PR after
your CI passes on it
- `@dependabot cancel merge` will cancel a previously requested merge
and block automerging
- `@dependabot reopen` will reopen this PR if it is closed
- `@dependabot close` will close this PR and stop Dependabot recreating
it. You can achieve the same result by closing it manually
- `@dependabot show <dependency name> ignore conditions` will show all
of the ignore conditions of the specified dependency
- `@dependabot ignore this major version` will close this PR and stop
Dependabot creating any more for this major version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this minor version` will close this PR and stop
Dependabot creating any more for this minor version (unless you reopen
the PR or upgrade to it yourself)
- `@dependabot ignore this dependency` will close this PR and stop
Dependabot creating any more for this dependency (unless you reopen the
PR or upgrade to it yourself)


</details>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 Cargo.lock                   | 110 +++++++++++++++++------------------
 datafusion/macros/Cargo.toml |   2 +-
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 55c334e157db4..aaa75ecf32476 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -536,7 +536,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -547,7 +547,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1017,7 +1017,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1147,7 +1147,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1170,7 +1170,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1425,7 +1425,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1740,7 +1740,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -1751,7 +1751,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2374,7 +2374,7 @@ version = "50.3.0"
 dependencies = [
  "datafusion-doc",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2767,7 +2767,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2823,7 +2823,7 @@ dependencies = [
  "enum-ordinalize",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -2861,7 +2861,7 @@ checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -3122,7 +3122,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -3833,7 +3833,7 @@ checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4427,7 +4427,7 @@ dependencies = [
  "regex",
  "regex-syntax",
  "structmeta",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4555,7 +4555,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4628,7 +4628,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4703,7 +4703,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4774,7 +4774,7 @@ dependencies = [
  "prost",
  "prost-types",
  "regex",
- "syn 2.0.106",
+ "syn 2.0.108",
  "tempfile",
 ]
 
@@ -4788,7 +4788,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4883,7 +4883,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -4896,7 +4896,7 @@ dependencies = [
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5107,7 +5107,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5156,7 +5156,7 @@ checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5339,7 +5339,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.106",
+ "syn 2.0.108",
  "unicode-ident",
 ]
 
@@ -5351,7 +5351,7 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
  "rand 0.8.5",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5554,7 +5554,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5645,7 +5645,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5656,7 +5656,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5680,7 +5680,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5692,7 +5692,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5736,7 +5736,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5912,7 +5912,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5960,7 +5960,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "structmeta-derive",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -5971,7 +5971,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6020,7 +6020,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6032,7 +6032,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6066,7 +6066,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.106",
+ "syn 2.0.108",
  "typify",
  "walkdir",
 ]
@@ -6090,9 +6090,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.106"
+version = "2.0.108"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6116,7 +6116,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6233,7 +6233,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6356,7 +6356,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6572,7 +6572,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -6680,7 +6680,7 @@ dependencies = [
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.106",
+ "syn 2.0.108",
  "thiserror",
  "unicode-ident",
 ]
@@ -6698,7 +6698,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.106",
+ "syn 2.0.108",
  "typify-impl",
 ]
 
@@ -6894,7 +6894,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "wasm-bindgen-shared",
 ]
 
@@ -6929,7 +6929,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -6964,7 +6964,7 @@ checksum = "b673bca3298fe582aeef8352330ecbad91849f85090805582400850f8270a2e8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7109,7 +7109,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7120,7 +7120,7 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7436,7 +7436,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "synstructure",
 ]
 
@@ -7457,7 +7457,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
@@ -7477,7 +7477,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
  "synstructure",
 ]
 
@@ -7517,7 +7517,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.108",
 ]
 
 [[package]]
diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml
index fe979720bc566..64781ddeaf421 100644
--- a/datafusion/macros/Cargo.toml
+++ b/datafusion/macros/Cargo.toml
@@ -43,4 +43,4 @@ proc-macro = true
 [dependencies]
 datafusion-doc = { workspace = true }
 quote = "1.0.41"
-syn = { version = "2.0.106", features = ["full"] }
+syn = { version = "2.0.108", features = ["full"] }