apache
diff --git a/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.lock‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎datafusion/common/src/config.rs‎
Lines changed: 69 additions & 2 deletions b/‎datafusion/common/src/config.rs‎
Lines changed: 69 additions & 2 deletions
diff --git a/‎datafusion/core/src/execution/context/parquet.rs‎
Lines changed: 13 additions & 14 deletions b/‎datafusion/core/src/execution/context/parquet.rs‎
Lines changed: 13 additions & 14 deletions
diff --git a/‎datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs‎
Lines changed: 38 additions & 0 deletions b/‎datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎datafusion/core/tests/memory_limit/mod.rs‎
Lines changed: 79 additions & 3 deletions b/‎datafusion/core/tests/memory_limit/mod.rs‎
Lines changed: 79 additions & 3 deletions
diff --git a/‎datafusion/core/tests/parquet/row_group_pruning.rs‎
Lines changed: 2 additions & 2 deletions b/‎datafusion/core/tests/parquet/row_group_pruning.rs‎
Lines changed: 2 additions & 2 deletions
@@ -98,6 +98,7 @@ arrow-flight = { version = "55.1.0", features = [
 ] }
 arrow-ipc = { version = "55.0.0", default-features = false, features = [
     "lz4",
+    "zstd",
 ] }
 arrow-ord = { version = "55.0.0", default-features = false }
 arrow-schema = { version = "55.0.0", default-features = false }
 
@@ -17,6 +17,8 @@
 
 //! Runtime configuration, via [`ConfigOptions`]
 
+use arrow_ipc::CompressionType;
+
 use crate::error::_config_err;
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
@@ -274,6 +276,61 @@ config_namespace! {
     }
 }
 
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
+pub enum SpillCompression {
+    Zstd,
+    Lz4Frame,
+    #[default]
+    Uncompressed,
+}
+
+impl FromStr for SpillCompression {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_ascii_lowercase().as_str() {
+            "zstd" => Ok(Self::Zstd),
+            "lz4_frame" => Ok(Self::Lz4Frame),
+            "uncompressed" | "" => Ok(Self::Uncompressed),
+            other => Err(DataFusionError::Configuration(format!(
+                "Invalid Spill file compression type: {other}. Expected one of: zstd, lz4_frame, uncompressed"
+            ))),
+        }
+    }
+}
+
+impl ConfigField for SpillCompression {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
+        v.some(key, self, description)
+    }
+
+    fn set(&mut self, _: &str, value: &str) -> Result<()> {
+        *self = SpillCompression::from_str(value)?;
+        Ok(())
+    }
+}
+
+impl Display for SpillCompression {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let str = match self {
+            Self::Zstd => "zstd",
+            Self::Lz4Frame => "lz4_frame",
+            Self::Uncompressed => "uncompressed",
+        };
+        write!(f, "{str}")
+    }
+}
+
+impl From<SpillCompression> for Option<CompressionType> {
+    fn from(c: SpillCompression) -> Self {
+        match c {
+            SpillCompression::Zstd => Some(CompressionType::ZSTD),
+            SpillCompression::Lz4Frame => Some(CompressionType::LZ4_FRAME),
+            SpillCompression::Uncompressed => None,
+        }
+    }
+}
+
 config_namespace! {
     /// Options related to query execution
     ///
@@ -294,8 +351,8 @@ config_namespace! {
 
         /// Should DataFusion collect statistics when first creating a table.
         /// Has no effect after the table is created. Applies to the default
-        /// `ListingTableProvider` in DataFusion. Defaults to false.
-        pub collect_statistics: bool, default = false
+        /// `ListingTableProvider` in DataFusion. Defaults to true.
+        pub collect_statistics: bool, default = true
 
         /// Number of partitions for query execution. Increasing partitions can increase
         /// concurrency.
@@ -330,6 +387,16 @@ config_namespace! {
         /// the new schema verification step.
         pub skip_physical_aggregate_schema_check: bool, default = false
 
+        /// Sets the compression codec used when spilling data to disk.
+        ///
+        /// Since datafusion writes spill files using the Arrow IPC Stream format,
+        /// only codecs supported by the Arrow IPC Stream Writer are allowed.
+        /// Valid values are: uncompressed, lz4_frame, zstd.
+        /// Note: lz4_frame offers faster (de)compression, but typically results in
+        /// larger spill files. In contrast, zstd achieves
+        /// higher compression ratios at the cost of slower (de)compression speed.
+        pub spill_compression: SpillCompression, default = SpillCompression::Uncompressed
+
         /// Specifies the reserved memory for each spillable sort operation to
         /// facilitate an in-memory merge.
         ///
 
@@ -34,13 +34,12 @@ impl SessionContext {
     ///
     /// # Note: Statistics
     ///
-    /// NOTE: by default, statistics are not collected when reading the Parquet
-    /// files as this can slow down the initial DataFrame creation. However,
-    /// collecting statistics can greatly accelerate queries with certain
-    /// filters.
+    /// NOTE: by default, statistics are collected when reading the Parquet
+    /// files This can slow down the initial DataFrame creation while
+    /// greatly accelerating queries with certain filters.
     ///
-    /// To enable collect statistics, set the [config option]
-    /// `datafusion.execution.collect_statistics` to `true`. See
+    /// To disable statistics collection, set the [config option]
+    /// `datafusion.execution.collect_statistics` to `false`. See
     /// [`ConfigOptions`] and [`ExecutionOptions::collect_statistics`] for more
     /// details.
     ///
@@ -171,28 +170,28 @@ mod tests {
 
     #[tokio::test]
     async fn register_parquet_respects_collect_statistics_config() -> Result<()> {
-        // The default is false
+        // The default is true
         let mut config = SessionConfig::new();
         config.options_mut().explain.physical_plan_only = true;
         config.options_mut().explain.show_statistics = true;
         let content = explain_query_all_with_config(config).await?;
-        assert_contains!(content, "statistics=[Rows=Absent,");
+        assert_contains!(content, "statistics=[Rows=Exact(");
 
-        // Explicitly set to false
+        // Explicitly set to true
         let mut config = SessionConfig::new();
         config.options_mut().explain.physical_plan_only = true;
         config.options_mut().explain.show_statistics = true;
-        config.options_mut().execution.collect_statistics = false;
+        config.options_mut().execution.collect_statistics = true;
         let content = explain_query_all_with_config(config).await?;
-        assert_contains!(content, "statistics=[Rows=Absent,");
+        assert_contains!(content, "statistics=[Rows=Exact(");
 
-        // Explicitly set to true
+        // Explicitly set to false
         let mut config = SessionConfig::new();
         config.options_mut().explain.physical_plan_only = true;
         config.options_mut().explain.show_statistics = true;
-        config.options_mut().execution.collect_statistics = true;
+        config.options_mut().execution.collect_statistics = false;
         let content = explain_query_all_with_config(config).await?;
-        assert_contains!(content, "statistics=[Rows=Exact(10),");
+        assert_contains!(content, "statistics=[Rows=Absent,");
 
         Ok(())
     }
 
@@ -31,6 +31,7 @@ use datafusion_execution::memory_pool::{
 };
 use datafusion_expr::display_schema;
 use datafusion_physical_plan::spill::get_record_batch_memory_size;
+use itertools::Itertools;
 use std::time::Duration;
 
 use datafusion_execution::{memory_pool::FairSpillPool, runtime_env::RuntimeEnvBuilder};
@@ -72,6 +73,43 @@ async fn sort_query_fuzzer_runner() {
     fuzzer.run().await.unwrap();
 }
 
+/// Reproduce the bug with specific seeds from the
+/// [failing test case](https://github.com/apache/datafusion/issues/16452).
+#[tokio::test(flavor = "multi_thread")]
+async fn test_reproduce_sort_query_issue_16452() {
+    // Seeds from the failing test case
+    let init_seed = 10313160656544581998u64;
+    let query_seed = 15004039071976572201u64;
+    let config_seed_1 = 11807432710583113300u64;
+    let config_seed_2 = 759937414670321802u64;
+
+    let random_seed = 1u64; // Use a fixed seed to ensure consistent behavior
+
+    let mut test_generator = SortFuzzerTestGenerator::new(
+        2000,
+        3,
+        "sort_fuzz_table".to_string(),
+        get_supported_types_columns(random_seed),
+        false,
+        random_seed,
+    );
+
+    let mut results = vec![];
+
+    for config_seed in [config_seed_1, config_seed_2] {
+        let r = test_generator
+            .fuzzer_run(init_seed, query_seed, config_seed)
+            .await
+            .unwrap();
+
+        results.push(r);
+    }
+
+    for (lhs, rhs) in results.iter().tuple_windows() {
+        check_equality_of_batches(lhs, rhs).unwrap();
+    }
+}
+
 /// SortQueryFuzzer holds the runner configuration for executing sort query fuzz tests. The fuzzing details are managed inside `SortFuzzerTestGenerator`.
 ///
 /// It defines:
 
@@ -28,6 +28,7 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::{Int32Type, SchemaRef};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion::assert_batches_eq;
+use datafusion::config::SpillCompression;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::datasource::{MemTable, TableProvider};
@@ -545,10 +546,11 @@ async fn test_external_sort_zero_merge_reservation() {
 // Tests for disk limit (`max_temp_directory_size` in `DiskManager`)
 // ------------------------------------------------------------------
 
-// Create a new `SessionContext` with speicified disk limit and memory pool limit
+// Create a new `SessionContext` with speicified disk limit, memory pool limit, and spill compression codec
 async fn setup_context(
     disk_limit: u64,
     memory_pool_limit: usize,
+    spill_compression: SpillCompression,
 ) -> Result<SessionContext> {
     let disk_manager = DiskManagerBuilder::default()
         .with_mode(DiskManagerMode::OsTmpDirectory)
@@ -570,6 +572,7 @@ async fn setup_context(
     let config = SessionConfig::new()
         .with_sort_spill_reservation_bytes(64 * 1024) // 256KB
         .with_sort_in_place_threshold_bytes(0)
+        .with_spill_compression(spill_compression)
         .with_batch_size(64) // To reduce test memory usage
         .with_target_partitions(1);
 
@@ -580,7 +583,8 @@ async fn setup_context(
 /// (specified by `max_temp_directory_size` in `DiskManager`)
 #[tokio::test]
 async fn test_disk_spill_limit_reached() -> Result<()> {
-    let ctx = setup_context(1024 * 1024, 1024 * 1024).await?; // 1MB disk limit, 1MB memory limit
+    let spill_compression = SpillCompression::Uncompressed;
+    let ctx = setup_context(1024 * 1024, 1024 * 1024, spill_compression).await?; // 1MB disk limit, 1MB memory limit
 
     let df = ctx
         .sql("select * from generate_series(1, 1000000000000) as t1(v1) order by v1")
@@ -602,7 +606,8 @@ async fn test_disk_spill_limit_reached() -> Result<()> {
 #[tokio::test]
 async fn test_disk_spill_limit_not_reached() -> Result<()> {
     let disk_spill_limit = 1024 * 1024; // 1MB
-    let ctx = setup_context(disk_spill_limit, 128 * 1024).await?; // 1MB disk limit, 128KB memory limit
+    let spill_compression = SpillCompression::Uncompressed;
+    let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit
 
     let df = ctx
         .sql("select * from generate_series(1, 10000) as t1(v1) order by v1")
@@ -630,6 +635,77 @@ async fn test_disk_spill_limit_not_reached() -> Result<()> {
     Ok(())
 }
 
+/// External query should succeed using zstd as spill compression codec and
+/// and all temporary spill files are properly cleaned up after execution.
+/// Note: This test does not inspect file contents (e.g. magic number),
+/// as spill files are automatically deleted on drop.
+#[tokio::test]
+async fn test_spill_file_compressed_with_zstd() -> Result<()> {
+    let disk_spill_limit = 1024 * 1024; // 1MB
+    let spill_compression = SpillCompression::Zstd;
+    let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit, zstd
+
+    let df = ctx
+        .sql("select * from generate_series(1, 100000) as t1(v1) order by v1")
+        .await
+        .unwrap();
+    let plan = df.create_physical_plan().await.unwrap();
+
+    let task_ctx = ctx.task_ctx();
+    let _ = collect_batches(Arc::clone(&plan), task_ctx)
+        .await
+        .expect("Query execution failed");
+
+    let spill_count = plan.metrics().unwrap().spill_count().unwrap();
+    let spilled_bytes = plan.metrics().unwrap().spilled_bytes().unwrap();
+
+    println!("spill count {spill_count}");
+    assert!(spill_count > 0);
+    assert!((spilled_bytes as u64) < disk_spill_limit);
+
+    // Verify that all temporary files have been properly cleaned up by checking
+    // that the total disk usage tracked by the disk manager is zero
+    let current_disk_usage = ctx.runtime_env().disk_manager.used_disk_space();
+    assert_eq!(current_disk_usage, 0);
+
+    Ok(())
+}
+
+/// External query should succeed using lz4_frame as spill compression codec and
+/// and all temporary spill files are properly cleaned up after execution.
+/// Note: This test does not inspect file contents (e.g. magic number),
+/// as spill files are automatically deleted on drop.
+#[tokio::test]
+async fn test_spill_file_compressed_with_lz4_frame() -> Result<()> {
+    let disk_spill_limit = 1024 * 1024; // 1MB
+    let spill_compression = SpillCompression::Lz4Frame;
+    let ctx = setup_context(disk_spill_limit, 128 * 1024, spill_compression).await?; // 1MB disk limit, 128KB memory limit, lz4_frame
+
+    let df = ctx
+        .sql("select * from generate_series(1, 100000) as t1(v1) order by v1")
+        .await
+        .unwrap();
+    let plan = df.create_physical_plan().await.unwrap();
+
+    let task_ctx = ctx.task_ctx();
+    let _ = collect_batches(Arc::clone(&plan), task_ctx)
+        .await
+        .expect("Query execution failed");
+
+    let spill_count = plan.metrics().unwrap().spill_count().unwrap();
+    let spilled_bytes = plan.metrics().unwrap().spilled_bytes().unwrap();
+
+    println!("spill count {spill_count}");
+    assert!(spill_count > 0);
+    assert!((spilled_bytes as u64) < disk_spill_limit);
+
+    // Verify that all temporary files have been properly cleaned up by checking
+    // that the total disk usage tracked by the disk manager is zero
+    let current_disk_usage = ctx.runtime_env().disk_manager.used_disk_space();
+    assert_eq!(current_disk_usage, 0);
+
+    Ok(())
+}
 /// Run the query with the specified memory limit,
 /// and verifies the expected errors are returned
 #[derive(Clone, Debug)]
 
@@ -421,7 +421,7 @@ macro_rules! int_tests {
                     .with_query(&format!("SELECT * FROM t where i{} in (100)", $bits))
                     .with_expected_errors(Some(0))
                     .with_matched_by_stats(Some(0))
-                    .with_pruned_by_stats(Some(4))
+                    .with_pruned_by_stats(Some(0))
                     .with_matched_by_bloom_filter(Some(0))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(0)
@@ -1316,7 +1316,7 @@ async fn test_row_group_with_null_values() {
         .with_query("SELECT * FROM t WHERE \"i32\" > 7")
         .with_expected_errors(Some(0))
         .with_matched_by_stats(Some(0))
-        .with_pruned_by_stats(Some(3))
+        .with_pruned_by_stats(Some(0))
         .with_expected_rows(0)
         .with_matched_by_bloom_filter(Some(0))
         .with_pruned_by_bloom_filter(Some(0))