From 832cfe2326dc25a9704f819ed6b3a7913573d4ea Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Fri, 11 Oct 2024 12:31:34 -0700
Subject: [PATCH 1/6] fix: handle when the left side of the union has no fields
 (e.g. an empty projection)

---
 datafusion/physical-plan/src/union.rs | 39 +++++++++++++++++----------
 1 file changed, 25 insertions(+), 14 deletions(-)
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index a41336ea6eb78..63696ac0d0948 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -499,31 +499,42 @@ pub fn can_interleave<T: Borrow<Arc<dyn ExecutionPlan>>>(
 }
 
 fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
-    let first_schema = inputs[0].schema();
+    // needs to handle n children, including child which have an empty projection or different number of fields
+    let num_fields = inputs.iter().fold(0, |acc, input| {
+        std::cmp::max(acc, input.schema().fields().len())
+    });
 
-    let fields = (0..first_schema.fields().len())
+    let fields: Vec<Field> = (0..num_fields)
         .map(|i| {
-            inputs
-                .iter()
-                .enumerate()
-                .map(|(input_idx, input)| {
-                    let field = input.schema().field(i).clone();
-                    let mut metadata = field.metadata().clone();
+            // collect fields for i
+            let field_options_for_i =
+                inputs.iter().enumerate().filter_map(|(input_idx, input)| {
+                    let field = if input.schema().fields().len() <= i {
+                        return None;
+                    } else {
+                        input.schema().field(i).clone()
+                    };
 
+                    // merge field metadata
+                    let mut metadata = field.metadata().clone();
                     let other_metadatas = inputs
                         .iter()
                         .enumerate()
-                        .filter(|(other_idx, _)| *other_idx != input_idx)
+                        .filter(|(other_idx, other_input)| {
+                            *other_idx != input_idx
+                                && other_input.schema().fields().len() > i
+                        })
                         .flat_map(|(_, other_input)| {
                             other_input.schema().field(i).metadata().clone().into_iter()
                         });
-
                     metadata.extend(other_metadatas);
-                    field.with_metadata(metadata)
-                })
+                    Some(field.with_metadata(metadata))
+                });
+
+            // pick first nullable field (if exists)
+            field_options_for_i
                 .find_or_first(Field::is_nullable)
-                // We can unwrap this because if inputs was empty, this would've already panic'ed when we
-                // indexed into inputs[0].
+                // We can unwrap this because if inputs was empty, we would never had iterated with (0..num_fields)
                 .unwrap()
         })
         .collect::<Vec<_>>();

From 6ca49dae94ca7e9a3b7b6654e4c03dacb21509f7 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Tue, 15 Oct 2024 12:51:48 -0700
Subject: [PATCH 2/6] chore: default=true for
 skip_physical_aggregate_schema_check, and add warn logging

---
 datafusion/common/src/config.rs                           | 2 +-
 datafusion/core/src/physical_planner.rs                   | 3 +++
 datafusion/sqllogictest/test_files/information_schema.slt | 4 ++--
 docs/source/user-guide/configs.md                         | 2 +-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 32b7213d952f5..3a3ed07ac9edf 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -305,7 +305,7 @@ config_namespace! {
         ///
         /// This is used to workaround bugs in the planner that are now caught by
         /// the new schema verification step.
-        pub skip_physical_aggregate_schema_check: bool, default = false
+        pub skip_physical_aggregate_schema_check: bool, default = true
 
         /// Specifies the reserved memory for each spillable sort operation to
         /// facilitate an in-memory merge.
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 5f98ba4efcf02..9611c6940d621 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -693,6 +693,9 @@ impl DefaultPhysicalPlanner {
                             differences.push(format!("field nullability at index {} [{}]: (physical) {} vs (logical) {}", i, physical_field.name(), physical_field.is_nullable(), logical_field.is_nullable()));
                         }
                     }
+
+                    log::warn!("Physical input schema should be the same as the one converted from logical input schema, but did not match for logical plan:\n{}", input.display_indent());
+
                     return internal_err!("Physical input schema should be the same as the one converted from logical input schema. Differences: {}", differences
                         .iter()
                         .map(|s| format!("\n\t- {}", s))
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index 4653df4000803..08168caa03eb4 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -223,7 +223,7 @@ datafusion.execution.parquet.writer_version 1.0
 datafusion.execution.planning_concurrency 13
 datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8
 datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000
-datafusion.execution.skip_physical_aggregate_schema_check false
+datafusion.execution.skip_physical_aggregate_schema_check true
 datafusion.execution.soft_max_rows_per_output_file 50000000
 datafusion.execution.sort_in_place_threshold_bytes 1048576
 datafusion.execution.sort_spill_reservation_bytes 10485760
@@ -317,7 +317,7 @@ datafusion.execution.parquet.writer_version 1.0 (writing) Sets parquet writer ve
 datafusion.execution.planning_concurrency 13 Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system
 datafusion.execution.skip_partial_aggregation_probe_ratio_threshold 0.8 Aggregation ratio (number of distinct groups / number of input rows) threshold for skipping partial aggregation. If the value is greater then partial aggregation will skip aggregation for further input
 datafusion.execution.skip_partial_aggregation_probe_rows_threshold 100000 Number of input rows partial aggregation partition should process, before aggregation ratio check and trying to switch to skipping aggregation mode
-datafusion.execution.skip_physical_aggregate_schema_check false When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.
+datafusion.execution.skip_physical_aggregate_schema_check true When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.
 datafusion.execution.soft_max_rows_per_output_file 50000000 Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max
 datafusion.execution.sort_in_place_threshold_bytes 1048576 When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.
 datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index dd9ce759b28a3..ea0ecf95f3f82 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -80,7 +80,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.execution.parquet.maximum_parallel_row_group_writers         | 1                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                           |
 | datafusion.execution.parquet.maximum_buffered_record_batches_per_stream | 2                         | (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.                                                                                           |
 | datafusion.execution.planning_concurrency                               | 0                         | Fan-out during initial physical planning. This is mostly use to plan `UNION` children in parallel. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| datafusion.execution.skip_physical_aggregate_schema_check               | false                     | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.                                                                                                                                                                        |
+| datafusion.execution.skip_physical_aggregate_schema_check               | true                      | When set to true, skips verifying that the schema produced by planning the input of `LogicalPlan::Aggregate` exactly matches the schema of the input plan. When set to false, if the schema does not match exactly (including nullability and metadata), a planning error will be raised. This is used to workaround bugs in the planner that are now caught by the new schema verification step.                                                                                                                                                                        |
 | datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                        |
 | datafusion.execution.sort_in_place_threshold_bytes                      | 1048576                   | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.execution.meta_fetch_concurrency                             | 32                        | Number of files to read in parallel when inferring schema and statistics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |

From 3819810155059e83434b00661ed8ca4345b3bc05 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 16 Jul 2024 12:14:19 -0400
Subject: [PATCH 3/6] Test + workaround for SanityCheck plan

---
 .../physical-optimizer/src/sanity_checker.rs  | 10 +++
 datafusion/sqllogictest/test_files/union.slt  | 76 +++++++------------
 2 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs
index 8edbb0f091140..24c1405b088e2 100644
--- a/datafusion/physical-optimizer/src/sanity_checker.rs
+++ b/datafusion/physical-optimizer/src/sanity_checker.rs
@@ -33,6 +33,8 @@ use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supp
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
 use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
+use datafusion_physical_plan::sorts::sort::SortExec;
+use datafusion_physical_plan::union::UnionExec;
 
 use crate::PhysicalOptimizerRule;
 use datafusion_physical_expr_common::sort_expr::format_physical_sort_requirement_list;
@@ -135,6 +137,14 @@ pub fn check_plan_sanity(
         plan.required_input_ordering(),
         plan.required_input_distribution(),
     ) {
+        // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
+        if child.as_any().downcast_ref::<UnionExec>().is_some() {
+            continue;
+        }
+        if child.as_any().downcast_ref::<SortExec>().is_some() {
+            continue;
+        }
+
         let child_eq_props = child.equivalence_properties();
         if let Some(sort_req) = sort_req {
             if !child_eq_props.ordering_satisfy_requirement(&sort_req) {
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 352c01ca295c9..fd85ff30c8ed6 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -538,6 +538,9 @@ physical_plan
 # Clean up after the test
 ########
 
+statement ok
+drop table t
+
 statement ok
 drop table t1;
 
@@ -778,61 +781,36 @@ select make_array(make_array(1)) x UNION ALL SELECT make_array(arrow_cast(make_a
 [[-1]]
 [[1]]
 
+###
+# Test for https://github.com/apache/datafusion/issues/11492
+###
+
+# Input data is
+# a,b,c
+# 1,2,3
+
 statement ok
-CREATE EXTERNAL TABLE aggregate_test_100 (
-  c1  VARCHAR NOT NULL,
-  c2  TINYINT NOT NULL,
-  c3  SMALLINT NOT NULL,
-  c4  SMALLINT,
-  c5  INT,
-  c6  BIGINT NOT NULL,
-  c7  SMALLINT NOT NULL,
-  c8  INT NOT NULL,
-  c9  BIGINT UNSIGNED NOT NULL,
-  c10 VARCHAR NOT NULL,
-  c11 FLOAT NOT NULL,
-  c12 DOUBLE NOT NULL,
-  c13 VARCHAR NOT NULL
+CREATE EXTERNAL TABLE t (
+  a INT,
+  b INT,
+  c INT
 )
 STORED AS CSV
-LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+LOCATION '../core/tests/data/example.csv'
+WITH ORDER (a ASC)
 OPTIONS ('format.has_header' 'true');
 
-statement ok
-set datafusion.execution.batch_size = 2;
+query T
+SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a;
+----
+1
+bar
 
-# Constant value tracking across union
-query TT
-explain
-SELECT * FROM(
-(
-    SELECT * FROM aggregate_test_100 WHERE c1='a'
-)
-UNION ALL
-(
-    SELECT * FROM aggregate_test_100 WHERE c1='a'
-))
-ORDER BY c1
+query I
+SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a;
 ----
-logical_plan
-01)Sort: aggregate_test_100.c1 ASC NULLS LAST
-02)--Union
-03)----Filter: aggregate_test_100.c1 = Utf8("a")
-04)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
-05)----Filter: aggregate_test_100.c1 = Utf8("a")
-06)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
-physical_plan
-01)CoalescePartitionsExec
-02)--UnionExec
-03)----CoalesceBatchesExec: target_batch_size=2
-04)------FilterExec: c1@0 = a
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------FilterExec: c1@0 = a
-09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
+1
+NULL 
 
-# Clean up after the test
 statement ok
-drop table aggregate_test_100;
+drop table t

From 6762c7331ec9140d5d910b502ea398b2e950da07 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Mon, 27 Jan 2025 16:10:38 -0800
Subject: [PATCH 4/6] chore: fmt fix

---
 datafusion/physical-optimizer/src/sanity_checker.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs
index 24c1405b088e2..ad01a0047eaa2 100644
--- a/datafusion/physical-optimizer/src/sanity_checker.rs
+++ b/datafusion/physical-optimizer/src/sanity_checker.rs
@@ -32,9 +32,9 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
-use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::union::UnionExec;
+use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
 use crate::PhysicalOptimizerRule;
 use datafusion_physical_expr_common::sort_expr::format_physical_sort_requirement_list;

From 7b9263f6b23bd36cc98250cbff7a06180b917eb4 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Mon, 3 Feb 2025 10:27:28 -0800
Subject: [PATCH 5/6] Revert "Test + workaround for SanityCheck plan"

This reverts commit 3819810155059e83434b00661ed8ca4345b3bc05.
---
 .../physical-optimizer/src/sanity_checker.rs  | 10 ---
 datafusion/sqllogictest/test_files/union.slt  | 76 ++++++++++++-------
 2 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/datafusion/physical-optimizer/src/sanity_checker.rs b/datafusion/physical-optimizer/src/sanity_checker.rs
index ad01a0047eaa2..8edbb0f091140 100644
--- a/datafusion/physical-optimizer/src/sanity_checker.rs
+++ b/datafusion/physical-optimizer/src/sanity_checker.rs
@@ -32,8 +32,6 @@ use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
-use datafusion_physical_plan::sorts::sort::SortExec;
-use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
 use crate::PhysicalOptimizerRule;
@@ -137,14 +135,6 @@ pub fn check_plan_sanity(
         plan.required_input_ordering(),
         plan.required_input_distribution(),
     ) {
-        // TEMP HACK WORKAROUND https://github.com/apache/datafusion/issues/11492
-        if child.as_any().downcast_ref::<UnionExec>().is_some() {
-            continue;
-        }
-        if child.as_any().downcast_ref::<SortExec>().is_some() {
-            continue;
-        }
-
         let child_eq_props = child.equivalence_properties();
         if let Some(sort_req) = sort_req {
             if !child_eq_props.ordering_satisfy_requirement(&sort_req) {
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index fd85ff30c8ed6..352c01ca295c9 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -538,9 +538,6 @@ physical_plan
 # Clean up after the test
 ########
 
-statement ok
-drop table t
-
 statement ok
 drop table t1;
 
@@ -781,36 +778,61 @@ select make_array(make_array(1)) x UNION ALL SELECT make_array(arrow_cast(make_a
 [[-1]]
 [[1]]
 
-###
-# Test for https://github.com/apache/datafusion/issues/11492
-###
-
-# Input data is
-# a,b,c
-# 1,2,3
-
 statement ok
-CREATE EXTERNAL TABLE t (
-  a INT,
-  b INT,
-  c INT
+CREATE EXTERNAL TABLE aggregate_test_100 (
+  c1  VARCHAR NOT NULL,
+  c2  TINYINT NOT NULL,
+  c3  SMALLINT NOT NULL,
+  c4  SMALLINT,
+  c5  INT,
+  c6  BIGINT NOT NULL,
+  c7  SMALLINT NOT NULL,
+  c8  INT NOT NULL,
+  c9  BIGINT UNSIGNED NOT NULL,
+  c10 VARCHAR NOT NULL,
+  c11 FLOAT NOT NULL,
+  c12 DOUBLE NOT NULL,
+  c13 VARCHAR NOT NULL
 )
 STORED AS CSV
-LOCATION '../core/tests/data/example.csv'
-WITH ORDER (a ASC)
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
 OPTIONS ('format.has_header' 'true');
 
-query T
-SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT 'bar' as a from t) ORDER BY a;
-----
-1
-bar
+statement ok
+set datafusion.execution.batch_size = 2;
 
-query I
-SELECT (SELECT a from t ORDER BY a) UNION ALL (SELECT NULL as a from t) ORDER BY a;
+# Constant value tracking across union
+query TT
+explain
+SELECT * FROM(
+(
+    SELECT * FROM aggregate_test_100 WHERE c1='a'
+)
+UNION ALL
+(
+    SELECT * FROM aggregate_test_100 WHERE c1='a'
+))
+ORDER BY c1
 ----
-1
-NULL 
+logical_plan
+01)Sort: aggregate_test_100.c1 ASC NULLS LAST
+02)--Union
+03)----Filter: aggregate_test_100.c1 = Utf8("a")
+04)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
+05)----Filter: aggregate_test_100.c1 = Utf8("a")
+06)------TableScan: aggregate_test_100 projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], partial_filters=[aggregate_test_100.c1 = Utf8("a")]
+physical_plan
+01)CoalescePartitionsExec
+02)--UnionExec
+03)----CoalesceBatchesExec: target_batch_size=2
+04)------FilterExec: c1@0 = a
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
+07)----CoalesceBatchesExec: target_batch_size=2
+08)------FilterExec: c1@0 = a
+09)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+10)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13], has_header=true
 
+# Clean up after the test
 statement ok
-drop table t
+drop table aggregate_test_100;

From 0e132da3b3f788a7a63b6e409f32b504ec3e9a98 Mon Sep 17 00:00:00 2001
From: wiedld <dlw405@gmail.com>
Date: Mon, 3 Feb 2025 11:29:32 -0800
Subject: [PATCH 6/6] fix: clippy warnings

---
 datafusion/core/src/datasource/physical_plan/arrow_file.rs | 2 ++
 datafusion/optimizer/src/unwrap_cast_in_comparison.rs      | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 54344d55bbd11..ab39e673b8413 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -311,6 +311,7 @@ impl FileOpener for ArrowOpener {
                     {
                         decoder.read_dictionary(
                             dict_block,
+                            #[allow(deprecated)]
                             &Buffer::from_bytes(dict_result.into()),
                         )?;
                     }
@@ -350,6 +351,7 @@ impl FileOpener for ArrowOpener {
                                 decoder
                                     .read_record_batch(
                                         &block,
+                                        #[allow(deprecated)]
                                         &Buffer::from_bytes(data.into()),
                                     )
                                     .transpose()
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 892d450ba85bc..c1b07097ac42f 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -25,6 +25,7 @@ use crate::optimizer::ApplyOrder;
 use crate::{OptimizerConfig, OptimizerRule};
 
 use crate::utils::NamePreserver;
+#[allow(deprecated)]
 use arrow::datatypes::{
     DataType, TimeUnit, MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION,
 };
@@ -369,7 +370,9 @@ fn try_cast_numeric_literal(
             // Different precision for decimal128 can store different range of value.
             // For example, the precision is 3, the max of value is `999` and the min
             // value is `-999`
+            #[allow(deprecated)]
             MIN_DECIMAL_FOR_EACH_PRECISION[*precision as usize - 1],
+            #[allow(deprecated)]
             MAX_DECIMAL_FOR_EACH_PRECISION[*precision as usize - 1],
         ),
         _ => return None,