From 6333d7913d2c2692522353535804174f2526952d Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 19 Oct 2024 10:15:04 -0700
Subject: [PATCH 01/68] preliminary changes

---
 arrow-arith/src/numeric.rs                   |  40 ++++
 arrow-array/src/array/mod.rs                 |   2 +
 arrow-array/src/array/primitive_array.rs     | 192 ++++++++++++++++++-
 arrow-array/src/builder/buffer_builder.rs    |   4 +
 arrow-array/src/builder/primitive_builder.rs |   7 +-
 arrow-array/src/builder/struct_builder.rs    |   6 +
 arrow-array/src/types.rs                     |  92 ++++++++-
 arrow-cast/src/cast/mod.rs                   |  25 +--
 arrow-cast/src/display.rs                    |   2 +-
 arrow-data/src/data.rs                       |   4 +-
 arrow-data/src/decimal.rs                    | 159 ++++++++++++++-
 arrow-data/src/equal/mod.rs                  |   2 +
 arrow-data/src/transform/mod.rs              |   8 +-
 arrow-integration-test/src/datatype.rs       |  20 +-
 arrow-integration-test/src/lib.rs            |  36 ++++
 arrow-ipc/src/convert.rs                     |  42 ++--
 arrow-schema/src/datatype.rs                 |  52 +++++
 arrow-schema/src/datatype_parse.rs           |  35 ++++
 arrow-schema/src/ffi.rs                      |  16 +-
 arrow-schema/src/field.rs                    |   2 +
 arrow/benches/array_from_vec.rs              |  46 +++++
 arrow/benches/builder.rs                     |  40 +++-
 arrow/benches/cast_kernels.rs                |  34 ++++
 arrow/benches/decimal_validate.rs            |  57 +++++-
 arrow/src/tensor.rs                          |   4 +
 parquet/src/arrow/arrow_reader/statistics.rs |  98 +++++++++-
 parquet/src/arrow/arrow_writer/levels.rs     |   2 +
 parquet/src/arrow/arrow_writer/mod.rs        |  55 ++++++
 parquet/src/arrow/schema/mod.rs              |   1 +
 29 files changed, 1020 insertions(+), 63 deletions(-)
diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index b6af40f7d7c2..7d4a68ca3f2f 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -109,6 +109,20 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
         Float16 => neg_wrapping!(Float16Type, array),
         Float32 => neg_wrapping!(Float32Type, array),
         Float64 => neg_wrapping!(Float64Type, array),
+        Decimal32(p, s) => {
+            let a = array
+                .as_primitive::<Decimal32Type>()
+                .try_unary::<_, Decimal32Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
+        Decimal64(p, s) => {
+            let a = array
+                .as_primitive::<Decimal64Type>()
+                .try_unary::<_, Decimal64Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
         Decimal128(p, s) => {
             let a = array
                 .as_primitive::<Decimal128Type>()
@@ -232,6 +246,8 @@ fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, A
         (Interval(MonthDayNano), Interval(MonthDayNano)) => interval_op::<IntervalMonthDayNanoType>(op, l, l_scalar, r, r_scalar),
         (Date32, _) => date_op::<Date32Type>(op, l, l_scalar, r, r_scalar),
         (Date64, _) => date_op::<Date64Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal32(_, _), Decimal32(_, _)) => decimal_op::<Decimal32Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal64(_, _), Decimal64(_, _)) => decimal_op::<Decimal64Type>(op, l, l_scalar, r, r_scalar),
         (Decimal128(_, _), Decimal128(_, _)) => decimal_op::<Decimal128Type>(op, l, l_scalar, r, r_scalar),
         (Decimal256(_, _), Decimal256(_, _)) => decimal_op::<Decimal256Type>(op, l, l_scalar, r, r_scalar),
         (l_t, r_t) => match (l_t, r_t) {
@@ -726,6 +742,8 @@ fn decimal_op<T: DecimalType>(
     let r = r.as_primitive::<T>();
 
     let (p1, s1, p2, s2) = match (l.data_type(), r.data_type()) {
+        (DataType::Decimal32(p1, s1), DataType::Decimal32(p2, s2)) => (p1, s1, p2, s2),
+        (DataType::Decimal64(p1, s1), DataType::Decimal64(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal128(p1, s1), DataType::Decimal128(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal256(p1, s1), DataType::Decimal256(p2, s2)) => (p1, s1, p2, s2),
         _ => unreachable!(),
@@ -914,6 +932,28 @@ mod tests {
             "Arithmetic overflow: Overflow happened on: - -9223372036854775808"
         );
 
+        let a = Decimal32Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal32Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
+        let a = Decimal64Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal64Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
         let a = Decimal128Array::from(vec![1, 3, -44, 2, 4])
             .with_precision_and_scale(9, 6)
             .unwrap();
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 296f5ae721b3..6e7a6a3626a8 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -713,6 +713,8 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
             dt => panic!("Unexpected data type for run_ends array {dt:?}"),
         },
         DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
+        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
+        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
         DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
         DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
         dt => panic!("Unexpected data type {dt:?}"),
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 567fa00e7385..da40b7775f30 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -410,6 +410,44 @@ pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
 /// A [`PrimitiveArray`] of elapsed durations in nanoseconds
 pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
 
+/// A [`PrimitiveArray`] of 32-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal32Array;
+/// // Create from Vec<Option<i32>>
+/// let arr = Decimal32Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i32>
+/// let arr = Decimal32Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal32Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal32Array = PrimitiveArray<Decimal32Type>;
+
+/// A [`PrimitiveArray`] of 64-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal64Array;
+/// // Create from Vec<Option<i64>>
+/// let arr = Decimal64Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i64>
+/// let arr = Decimal64Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal64Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal64Array = PrimitiveArray<Decimal64Type>;
+
 /// A [`PrimitiveArray`] of 128-bit fixed point decimals
 ///
 /// # Examples
@@ -418,7 +456,7 @@ pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
 ///
 /// ```
 /// # use arrow_array::Decimal128Array;
-/// // Create from Vec<Option<i18>>
+/// // Create from Vec<Option<i128>>
 /// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]);
 /// // Create from Vec<i128>
 /// let arr = Decimal128Array::from(vec![1, 2, 3]);
@@ -672,6 +710,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
             DataType::Timestamp(t1, _) => {
                 matches!(data_type, DataType::Timestamp(t2, _) if &t1 == t2)
             }
+            DataType::Decimal32(_, _) => matches!(data_type, DataType::Decimal32(_, _)),
+            DataType::Decimal64(_, _) => matches!(data_type, DataType::Decimal64(_, _)),
             DataType::Decimal128(_, _) => matches!(data_type, DataType::Decimal128(_, _)),
             DataType::Decimal256(_, _) => matches!(data_type, DataType::Decimal256(_, _)),
             _ => T::DATA_TYPE.eq(data_type),
@@ -1343,6 +1383,8 @@ def_from_for_primitive!(UInt64Type, u64);
 def_from_for_primitive!(Float16Type, f16);
 def_from_for_primitive!(Float32Type, f32);
 def_from_for_primitive!(Float64Type, f64);
+def_from_for_primitive!(Decimal32Type, i32);
+def_from_for_primitive!(Decimal64Type, i64);
 def_from_for_primitive!(Decimal128Type, i128);
 def_from_for_primitive!(Decimal256Type, i256);
 
@@ -1455,6 +1497,8 @@ def_numeric_from_vec!(UInt64Type);
 def_numeric_from_vec!(Float16Type);
 def_numeric_from_vec!(Float32Type);
 def_numeric_from_vec!(Float64Type);
+def_numeric_from_vec!(Decimal32Type);
+def_numeric_from_vec!(Decimal64Type);
 def_numeric_from_vec!(Decimal128Type);
 def_numeric_from_vec!(Decimal256Type);
 
@@ -1581,6 +1625,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal precision of this array
     pub fn precision(&self) -> u8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(p, _) = self.data_type() {
                     *p
@@ -1608,6 +1672,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal scale of this array
     pub fn scale(&self) -> i8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(_, s) = self.data_type() {
                     *s
@@ -1636,7 +1720,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Decimal128Builder, Decimal256Builder};
+    use crate::builder::{Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder};
     use crate::cast::downcast_array;
     use crate::BooleanArray;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
@@ -2246,6 +2330,42 @@ mod tests {
         let _ = PrimitiveArray::<Int64Type>::from(foo.into_data());
     }
 
+    #[test]
+    fn test_decimal32() {
+        let values: Vec<_> = vec![0, 1, -1, i32::MIN, i32::MAX];
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let values: Vec<_> = vec![0, 1, -1, i64::MIN, i64::MAX];
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
     #[test]
     fn test_decimal128() {
         let values: Vec<_> = vec![0, 1, -1, i128::MIN, i128::MAX];
@@ -2517,6 +2637,74 @@ mod tests {
         assert!(!array.is_null(2));
     }
 
+    #[test]
+    fn test_decimal64_iter() {
+        let mut builder = Decimal64Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal64Array = builder.finish().with_precision_and_scale(18, 4).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal64array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal64Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(18, 4).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal64(18, 4));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
+    #[test]
+    fn test_decimal32_iter() {
+        let mut builder = Decimal32Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal32Array = builder.finish().with_precision_and_scale(9, 2).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal32array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal32Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(9, 2).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal32(9, 2));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
     #[test]
     fn test_unary_opt() {
         let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs
index ab67669febb8..64c9f8db1e50 100644
--- a/arrow-array/src/builder/buffer_builder.rs
+++ b/arrow-array/src/builder/buffer_builder.rs
@@ -43,6 +43,10 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
 /// Buffer builder for 64-bit floating point type.
 pub type Float64BufferBuilder = BufferBuilder<f64>;
 
+/// Buffer builder for 32-bit decimal type.
+pub type Decimal32BufferBuilder = BufferBuilder<<Decimal32Type as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit decimal type.
+pub type Decimal64BufferBuilder = BufferBuilder<<Decimal64Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 128-bit decimal type.
 pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 256-bit decimal type.
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 3191fea6e407..0b987d5e8d0b 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -87,6 +87,10 @@ pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
 /// An elapsed time in nanoseconds array builder.
 pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
 
+/// A decimal 32 array builder
+pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
+/// A decimal 64 array builder
+pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
 /// A decimal 128 array builder
 pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
 /// A decimal 256 array builder
@@ -175,7 +179,8 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// data type of the generated array.
     ///
     /// This method allows overriding the data type, to allow specifying timezones
-    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal128`] and [`DataType::Decimal256`]
+    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal32`],
+    /// [`DataType::Decimal64`], [`DataType::Decimal128`] and [`DataType::Decimal256`]
     ///
     /// # Panics
     ///
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index c0e49b939f2c..8b13c1080210 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -186,6 +186,12 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::FixedSizeBinary(len) => {
             Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
         }
+        DataType::Decimal32(p, s) => Box::new(
+            Decimal32Builder::with_capacity(capacity).with_data_type(DataType::Decimal32(*p, *s)),
+        ),
+        DataType::Decimal64(p, s) => Box::new(
+            Decimal64Builder::with_capacity(capacity).with_data_type(DataType::Decimal64(*p, *s)),
+        ),
         DataType::Decimal128(p, s) => Box::new(
             Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
         ),
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 92262fc04a57..cf1e91556378 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -25,13 +25,17 @@ use crate::timezone::Tz;
 use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{
-    is_validate_decimal256_precision, is_validate_decimal_precision, validate_decimal256_precision,
-    validate_decimal_precision,
+    is_validate_decimal256_precision, is_validate_decimal_precision,
+    is_validate_decimal64_precision, is_validate_decimal32_precision,
+    validate_decimal256_precision, validate_decimal_precision,
+    validate_decimal64_precision, validate_decimal32_precision,
 };
 use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
-    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
+    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_DEFAULT_SCALE, DECIMAL64_DEFAULT_SCALE,
+    DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
@@ -1162,6 +1166,8 @@ mod decimal {
     use super::*;
 
     pub trait DecimalTypeSealed {}
+    impl DecimalTypeSealed for Decimal32Type {}
+    impl DecimalTypeSealed for Decimal64Type {}
     impl DecimalTypeSealed for Decimal128Type {}
     impl DecimalTypeSealed for Decimal256Type {}
 }
@@ -1169,10 +1175,12 @@ mod decimal {
 /// A trait over the decimal types, used by [`PrimitiveArray`] to provide a generic
 /// implementation across the various decimal types
 ///
-/// Implemented by [`Decimal128Type`] and [`Decimal256Type`] for [`Decimal128Array`]
-/// and [`Decimal256Array`] respectively
+/// Implemented by [`Decimal32Type`], [`Decimal64Type`], [`Decimal128Type`] and [`Decimal256Type`]
+/// for [`Decimal32Array`], [`Decimal64Array`], [`Decimal128Array`] and [`Decimal256Array`] respectively
 ///
 /// [`PrimitiveArray`]: crate::array::PrimitiveArray
+/// [`Decimal32Array`]: crate::array::Decimal32Array
+/// [`Decimal64Array`]: crate::array::Decimal64Array
 /// [`Decimal128Array`]: crate::array::Decimal128Array
 /// [`Decimal256Array`]: crate::array::Decimal256Array
 pub trait DecimalType:
@@ -1189,7 +1197,7 @@ pub trait DecimalType:
     /// Default values for [`DataType`]
     const DEFAULT_TYPE: DataType;
 
-    /// "Decimal128" or "Decimal256", for use in error messages
+    /// "Decima32", "Decimal64", "Decimal128" or "Decimal256", for use in error messages
     const PREFIX: &'static str;
 
     /// Formats the decimal value with the provided precision and scale
@@ -1242,6 +1250,74 @@ pub fn validate_decimal_precision_and_scale<T: DecimalType>(
     Ok(())
 }
 
+/// The decimal type for a Decimal32Array
+#[derive(Debug)]
+pub struct Decimal32Type {}
+
+impl DecimalType for Decimal32Type {
+    const BYTE_LENGTH: usize = 4;
+    const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal32";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i32, precision: u8) -> Result<(), ArrowError> {
+        validate_decimal32_precision(num, precision)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal32_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal32Type {
+    type Native = i32;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal32Type {}
+
+/// The decimal type for a Decimal64Array
+#[derive(Debug)]
+pub struct Decimal64Type {}
+
+impl DecimalType for Decimal64Type {
+    const BYTE_LENGTH: usize = 8;
+    const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal64";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i64, precision: u8) -> Result<(), ArrowError> {
+        validate_decimal64_precision(num, precision)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal64_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal64Type {
+    type Native = i64;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal64Type {}
+
 /// The decimal type for a Decimal128Array
 #[derive(Debug)]
 pub struct Decimal128Type {}
@@ -1613,6 +1689,8 @@ mod tests {
         test_layout::<Float16Type>();
         test_layout::<Float32Type>();
         test_layout::<Float64Type>();
+        test_layout::<Decimal32Type>();
+        test_layout::<Decimal64Type>();
         test_layout::<Decimal128Type>();
         test_layout::<Decimal256Type>();
         test_layout::<TimestampNanosecondType>();
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 7abadf5793b3..2b6881d6d5fa 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -168,24 +168,25 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
                 _ => false
             },
         // cast one decimal type to another decimal type
-        (Decimal128(_, _), Decimal128(_, _)) => true,
-        (Decimal256(_, _), Decimal256(_, _)) => true,
-        (Decimal128(_, _), Decimal256(_, _)) => true,
-        (Decimal256(_, _), Decimal128(_, _)) => true,
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
         // unsigned integer to decimal
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal128(_, _)) |
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal256(_, _)) |
+        (UInt8 | UInt16 | UInt32 | UInt64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
         // signed numeric to decimal
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal256(_, _)) |
+        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
         // decimal to unsigned numeric
-        (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 | UInt64) |
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            UInt8 | UInt16 | UInt32 | UInt64) |
         // decimal to signed numeric
-        (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) |
         // decimal to Utf8
-        (Decimal128(_, _) | Decimal256(_, _), Utf8 | LargeUtf8) => true,
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Utf8 | LargeUtf8) |
         // Utf8 to decimal
-        (Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
+        (Utf8 | LargeUtf8, Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) => true,
         (Struct(from_fields), Struct(to_fields)) => {
             from_fields.len() == to_fields.len() &&
                 from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index df96816ea23a..9ac5da644cf0 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -474,7 +474,7 @@ macro_rules! decimal_display {
     };
 }
 
-decimal_display!(Decimal128Type, Decimal256Type);
+decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type);
 
 fn write_timestamp(
     f: &mut dyn Write,
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 8af2a91cf159..e5e51a0e75b6 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -144,7 +144,7 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
-        DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
+        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
             MutableBuffer::new(capacity * mem::size_of::<u8>()),
             empty_buffer,
         ],
@@ -1587,6 +1587,8 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
             DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
         }
         DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
+        DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
+        DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
         DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
         DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
         DataType::FixedSizeBinary(size) => {
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index fe19db641236..9c629cb49ff8 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -23,8 +23,9 @@ use arrow_buffer::i256;
 use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
+    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
+    DECIMAL32_DEFAULT_SCALE, DECIMAL64_DEFAULT_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 
 /// MAX decimal256 value of little-endian format for each precision.
@@ -833,9 +834,161 @@ pub(crate) const MIN_DECIMAL_FOR_EACH_PRECISION_ONE_BASED: [i128; 39] = [
     -99999999999999999999999999999999999999,
 ];
 
-/// Validates that the specified `i128` value can be properly
+/// `MAX_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[p]` holds the maximum `i64` value that can
+/// be stored in [arrow_schema::DataType::Decimal64] value of precision `p`.
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+pub(crate) const MAX_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED: [i64; 19] = [
+    0, // unused first element
+    9,
+    99,
+    999,
+    9999,
+    99999,
+    999999,
+    9999999,
+    99999999,
+    999999999,
+    9999999999,
+    99999999999,
+    999999999999,
+    9999999999999,
+    99999999999999,
+    999999999999999,
+    9999999999999999,
+    99999999999999999,
+    999999999999999999,
+];
+
+/// `MIN_DECIMAL64_FOR_EACH_PRECISION[p]` holds the minimum `i64` value that can
+/// be stored in a [arrow_schema::DataType::Decimal64] value of precision `p`.
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+pub(crate) const MIN_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED: [i64; 19] = [
+    0, // unused first element
+    -9,
+    -99,
+    -999,
+    -9999,
+    -99999,
+    -999999,
+    -9999999,
+    -99999999,
+    -999999999,
+    -9999999999,
+    -99999999999,
+    -999999999999,
+    -9999999999999,
+    -99999999999999,
+    -999999999999999,
+    -9999999999999999,
+    -99999999999999999,
+    -999999999999999999,
+];
+
+/// `MAX_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[p]` holds the maximum `i32` value that can
+/// be stored in [arrow_schema::DataType::Decimal32] value of precision `p`.
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+pub(crate) const MAX_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED: [i32; 10] = [
+    0, // unused first element
+    9,
+    99,
+    999,
+    9999,
+    99999,
+    999999,
+    9999999,
+    99999999,
+    999999999,
+];
+
+/// `MIN_DECIMAL32_FOR_EACH_PRECISION[p]` holds the minimum `i32` value that can
+/// be stored in a [arrow_schema::DataType::Decimal32] value of precision `p`.
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+pub(crate) const MIN_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED: [i32; 10] = [
+    0, // unused first element
+    -9,
+    -99,
+    -999,
+    -9999,
+    -99999,
+    -999999,
+    -9999999,
+    -99999999,
+    -999999999,
+];
+
+/// Validates that the specified `i32` value can be properly
 /// interpreted as a Decimal number with precision `precision`
 #[inline]
+pub fn validate_decimal32_precision(value: i32, precision: u8) -> Result<(), ArrowError> {
+    if precision > DECIMAL32_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal32 is {DECIMAL32_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too large to store in a Decimal32 of precision {precision}. Max is {}",
+            MAX_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        )))
+    } else if value < MIN_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too small to store in a Decimal32 of precision {precision}. Min is {}",
+            MIN_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Determines whether the specified `i32` value can be properly
+/// interpreted as a Decimal number with precision `precision`
+#[inline]
+pub fn is_validate_decimal32_precision(value: i32, precision: u8) -> bool {
+    precision <= DECIMAL32_MAX_PRECISION
+        && value >= MIN_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        && value <= MAX_DECIMAL32_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+}
+
+/// Validates that the specified `i64` value can be properly
+/// interpreted as a Decimal64 number with precision `precision`
+#[inline]
+pub fn validate_decimal64_precision(value: i64, precision: u8) -> Result<(), ArrowError> {
+    if precision > DECIMAL64_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal64 is {DECIMAL64_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too large to store in a Decimal64 of precision {precision}. Max is {}",
+            MAX_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        )))
+    } else if value < MIN_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too small to store in a Decimal64 of precision {precision}. Min is {}",
+            MIN_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Determines whether the specified `i64` value can be properly
+/// interpreted as a Decimal64 number with precision `precision`
+#[inline]
+pub fn is_validate_decimal64_precision(value: i64, precision: u8) -> bool {
+    precision <= DECIMAL64_MAX_PRECISION
+        && value >= MIN_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+        && value <= MAX_DECIMAL64_FOR_EACH_PRECISION_ONE_BASED[precision as usize]
+}
+
+/// Validates that the specified `i128` value can be properly
+/// interpreted as a Decimal128 number with precision `precision`
+#[inline]
 pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), ArrowError> {
     if precision > DECIMAL128_MAX_PRECISION {
         return Err(ArrowError::InvalidArgumentError(format!(
@@ -858,7 +1011,7 @@ pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), Arro
 }
 
 /// Determines whether the specified `i128` value can be properly
-/// interpreted as a Decimal number with precision `precision`
+/// interpreted as a Decimal128 number with precision `precision`
 #[inline]
 pub fn is_validate_decimal_precision(value: i128, precision: u8) -> bool {
     precision <= DECIMAL128_MAX_PRECISION
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index f24179b61700..1c16ee2f8a14 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -78,6 +78,8 @@ fn equal_values(
         DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal32(_, _) => primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal64(_, _) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index c74b0c43481a..f9338c5d8337 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -256,6 +256,8 @@ fn build_extend(array: &ArrayData) -> Extend {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
+        DataType::Decimal32(_, _) => primitive::build_extend::<i32>(array),
+        DataType::Decimal64(_, _) => primitive::build_extend::<i64>(array),
         DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
         DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
         DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
@@ -302,6 +304,8 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::extend_nulls::<i128>,
+        DataType::Decimal32(_, _) => primitive::extend_nulls::<i32>,
+        DataType::Decimal64(_, _) => primitive::extend_nulls::<i64>,
         DataType::Decimal128(_, _) => primitive::extend_nulls::<i128>,
         DataType::Decimal256(_, _) => primitive::extend_nulls::<i256>,
         DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
@@ -455,7 +459,9 @@ impl<'a> MutableArrayData<'a> {
         };
 
         let child_data = match &data_type {
-            DataType::Decimal128(_, _)
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
             | DataType::Null
             | DataType::Boolean
diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs
index e45e94c24e07..4c17fbe76be7 100644
--- a/arrow-integration-test/src/datatype.rs
+++ b/arrow-integration-test/src/datatype.rs
@@ -60,14 +60,14 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
                     _ => 128, // Default bit width
                 };
 
-                if bit_width == 128 {
-                    Ok(DataType::Decimal128(precision, scale))
-                } else if bit_width == 256 {
-                    Ok(DataType::Decimal256(precision, scale))
-                } else {
-                    Err(ArrowError::ParseError(
+                match bit_width {
+                    32 => Ok(DataType::Decimal32(precision, scale)),
+                    64 => Ok(DataType::Decimal64(precision, scale)),
+                    128 => Ok(DataType::Decimal128(precision, scale)),
+                    256 => Ok(DataType::Decimal256(precision, scale)),
+                    _ => Err(ArrowError::ParseError(
                         "Decimal bit_width invalid".to_string(),
-                    ))
+                    )),
                 }
             }
             Some(s) if s == "floatingpoint" => match map.get("precision") {
@@ -337,6 +337,12 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
             TimeUnit::Nanosecond => "NANOSECOND",
         }}),
         DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
+        DataType::Decimal32(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 32})
+        }
+        DataType::Decimal64(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 64})
+        }
         DataType::Decimal128(precision, scale) => {
             json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128})
         }
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index ea5b545f2e81..2dff479d484b 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -812,6 +812,42 @@ pub fn array_from_json(
                 ))),
             }
         }
+        DataType::Decimal32(precision, scale) => {
+            let mut b = Decimal32Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i32>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
+        DataType::Decimal64(precision, scale) => {
+            let mut b = Decimal64Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i64>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
         DataType::Decimal128(precision, scale) => {
             let mut b = Decimal128Builder::with_capacity(json_col.count);
             for (is_valid, value) in json_col
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index eef236529e10..1ed4c17e5bde 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -453,18 +453,14 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
         crate::Type::Decimal => {
             let fsb = field.type_as_decimal().unwrap();
             let bit_width = fsb.bitWidth();
-            if bit_width == 128 {
-                DataType::Decimal128(
-                    fsb.precision().try_into().unwrap(),
-                    fsb.scale().try_into().unwrap(),
-                )
-            } else if bit_width == 256 {
-                DataType::Decimal256(
-                    fsb.precision().try_into().unwrap(),
-                    fsb.scale().try_into().unwrap(),
-                )
-            } else {
-                panic!("Unexpected decimal bit width {bit_width}")
+            let precision: u8 = fsb.precision().try_into().unwrap();
+            let scale: i8 = fsb.scale().try_into().unwrap();
+            match bit_width {
+                32 => DataType::Decimal32(precision, scale),
+                64 => DataType::Decimal64(precision, scale),
+                128 => DataType::Decimal128(precision, scale),
+                256 => DataType::Decimal256(precision, scale),
+                _ => panic!("Unexpected decimal bit width {bit_width}"),
             }
         }
         crate::Type::Union => {
@@ -830,6 +826,28 @@ pub(crate) fn get_fb_field_type<'a>(
             // type in the DictionaryEncoding metadata in the parent field
             get_fb_field_type(value_type, dictionary_tracker, fbb)
         }
+        Decimal32(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(32);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
+        Decimal64(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(64);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
         Decimal128(precision, scale) => {
             let mut builder = crate::DecimalBuilder::new(fbb);
             builder.add_precision(*precision as i32);
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index ff5832dfa68c..1fa24847899c 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -331,6 +331,34 @@ pub enum DataType {
     /// This type mostly used to represent low cardinality string
     /// arrays or a limited set of primitive types as integers.
     Dictionary(Box<DataType>, Box<DataType>),
+    /// Exact 32-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal32(u8, i8),
+    /// Exact 64-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal64(u8, i8),
     /// Exact 128-bit width decimal value with precision and scale
     ///
     /// * precision is the total number of digits
@@ -489,6 +517,8 @@ impl DataType {
                 | Float16
                 | Float32
                 | Float64
+                | Decimal32(_, _)
+                | Decimal64(_, _)
                 | Decimal128(_, _)
                 | Decimal256(_, _)
         )
@@ -641,6 +671,8 @@ impl DataType {
             DataType::Interval(IntervalUnit::YearMonth) => Some(4),
             DataType::Interval(IntervalUnit::DayTime) => Some(8),
             DataType::Interval(IntervalUnit::MonthDayNano) => Some(16),
+            DataType::Decimal32(_, _) => Some(4),
+            DataType::Decimal64(_, _) => Some(8),
             DataType::Decimal128(_, _) => Some(16),
             DataType::Decimal256(_, _) => Some(32),
             DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
@@ -691,6 +723,8 @@ impl DataType {
                 | DataType::Utf8
                 | DataType::LargeUtf8
                 | DataType::Utf8View
+                | DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
                 | DataType::Decimal128(_, _)
                 | DataType::Decimal256(_, _) => 0,
                 DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
@@ -764,6 +798,18 @@ impl DataType {
     }
 }
 
+/// The maximum precision for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_PRECISION: u8 = 9;
+
+/// The maximum scale for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_SCALE: i8 = 9;
+
+/// The maximum precision for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_PRECISION: u8 = 18;
+
+/// The maximum scale for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_SCALE: i8 = 18;
+
 /// The maximum precision for [DataType::Decimal128] values
 pub const DECIMAL128_MAX_PRECISION: u8 = 38;
 
@@ -776,6 +822,12 @@ pub const DECIMAL256_MAX_PRECISION: u8 = 76;
 /// The maximum scale for [DataType::Decimal256] values
 pub const DECIMAL256_MAX_SCALE: i8 = 76;
 
+/// The default scale for [DataType::Decimal32] values
+pub const DECIMAL32_DEFAULT_SCALE: i8 = 2;
+
+/// The default scale for [DataType::Decimal64] values
+pub const DECIMAL64_DEFAULT_SCALE: i8 = 6;
+
 /// The default scale for [DataType::Decimal128] and [DataType::Decimal256]
 /// values
 pub const DECIMAL_DEFAULT_SCALE: i8 = 10;
diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs
index 40d411ba27ca..0ea0153f43e3 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -72,6 +72,8 @@ impl<'a> Parser<'a> {
             Token::Duration => self.parse_duration(),
             Token::Interval => self.parse_interval(),
             Token::FixedSizeBinary => self.parse_fixed_size_binary(),
+            Token::Decimal32 => self.parse_decimal_32(),
+            Token::Decimal64 => self.parse_decimal_64(),
             Token::Decimal128 => self.parse_decimal_128(),
             Token::Decimal256 => self.parse_decimal_256(),
             Token::Dictionary => self.parse_dictionary(),
@@ -259,6 +261,26 @@ impl<'a> Parser<'a> {
         Ok(DataType::FixedSizeBinary(length))
     }
 
+    /// Parses the next Decimal32 (called after `Decimal32` has been consumed)
+    fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal32")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal32")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal32(precision, scale))
+    }
+
+    /// Parses the next Decimal64 (called after `Decimal64` has been consumed)
+    fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal64")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal64")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal64(precision, scale))
+    }
+
     /// Parses the next Decimal128 (called after `Decimal128` has been consumed)
     fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
@@ -469,6 +491,9 @@ impl<'a> Tokenizer<'a> {
             "Dictionary" => Token::Dictionary,
 
             "FixedSizeBinary" => Token::FixedSizeBinary,
+            
+            "Decimal32" => Token::Decimal32,
+            "Decimal64" => Token::Decimal64,
             "Decimal128" => Token::Decimal128,
             "Decimal256" => Token::Decimal256,
 
@@ -531,6 +556,8 @@ enum Token {
     Duration,
     Interval,
     FixedSizeBinary,
+    Decimal32,
+    Decimal64,
     Decimal128,
     Decimal256,
     Dictionary,
@@ -568,6 +595,8 @@ impl Display for Token {
             Token::Some => write!(f, "Some"),
             Token::None => write!(f, "None"),
             Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
+            Token::Decimal32 => write!(f, "Decimal32"),
+            Token::Decimal64 => write!(f, "Decimal64"),
             Token::Decimal128 => write!(f, "Decimal128"),
             Token::Decimal256 => write!(f, "Decimal256"),
             Token::Dictionary => write!(f, "Dictionary"),
@@ -658,6 +687,8 @@ mod test {
             DataType::Utf8,
             DataType::Utf8View,
             DataType::LargeUtf8,
+            DataType::Decimal32(7, 8),
+            DataType::Decimal64(6, 9),
             DataType::Decimal128(7, 12),
             DataType::Decimal256(6, 13),
             // ---------
@@ -750,8 +781,12 @@ mod test {
             // too large for i32
             ("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
             // can't have negative precision
+            ("Decimal32(-3, 5)", "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted"),
+            ("Decimal64(-3, 5)", "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted"),
             ("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
             ("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
+            ("Decimal32(3, 500)", "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted"),
+            ("Decimal64(3, 500)", "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted"),
             ("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
             ("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
 
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index e12c37da4898..dc49fb68bac5 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -504,9 +504,6 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                                 DataType::Decimal128(parsed_precision, parsed_scale)
                             },
                             [precision, scale, bits] => {
-                                if *bits != "128" && *bits != "256" {
-                                    return Err(ArrowError::CDataInterface("Only 128/256 bit wide decimal is supported in the Rust implementation".to_string()));
-                                }
                                 let parsed_precision = precision.parse::<u8>().map_err(|_| {
                                     ArrowError::CDataInterface(
                                         "The decimal type requires an integer precision".to_string(),
@@ -517,10 +514,13 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                                         "The decimal type requires an integer scale".to_string(),
                                     )
                                 })?;
-                                if *bits == "128" {
-                                    DataType::Decimal128(parsed_precision, parsed_scale)
-                                } else {
-                                    DataType::Decimal256(parsed_precision, parsed_scale)
+                                let parsed_bits = bits.parse::<u16>().unwrap_or(0);
+                                match parsed_bits {
+                                    32 => DataType::Decimal32(parsed_precision, parsed_scale),
+                                    64 => DataType::Decimal64(parsed_precision, parsed_scale),
+                                    128 => DataType::Decimal128(parsed_precision, parsed_scale),
+                                    256 => DataType::Decimal256(parsed_precision, parsed_scale),
+                                    _ => return Err(ArrowError::CDataInterface("Only 32/64/128/256 bit wide decimals are supported in the Rust implementation".to_string())),
                                 }
                             }
                             _ => {
@@ -703,6 +703,8 @@ fn get_format_string(dtype: &DataType) -> Result<String, ArrowError> {
         DataType::LargeUtf8 => Ok("U".to_string()),
         DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{num_bytes}")),
         DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{num_elems}")),
+        DataType::Decimal32(precision, scale) => Ok(format!("d:{precision},{scale},32")),
+        DataType::Decimal64(precision, scale) => Ok(format!("d:{precision},{scale},64")),
         DataType::Decimal128(precision, scale) => Ok(format!("d:{precision},{scale}")),
         DataType::Decimal256(precision, scale) => Ok(format!("d:{precision},{scale},256")),
         DataType::Date32 => Ok("tdD".to_string()),
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index b532ea8616b6..f9545590966a 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -544,6 +544,8 @@ impl Field {
             | DataType::Utf8
             | DataType::LargeUtf8
             | DataType::Utf8View
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => {
                 if from.data_type == DataType::Null {
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index fd83ad5c2a10..86705eb7fb43 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -73,6 +73,28 @@ fn struct_array_from_vec(
     criterion::black_box(StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap());
 }
 
+fn decimal32_array_from_vec(array: &[Option<i32>]) {
+    criterion::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(9, 2)
+            .unwrap(),
+    );
+}
+
+fn decimal64_array_from_vec(array: &[Option<i64>]) {
+    criterion::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(17, 2)
+            .unwrap(),
+    );
+}
+
 fn decimal128_array_from_vec(array: &[Option<i128>]) {
     criterion::black_box(
         array
@@ -96,6 +118,30 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
 }
 
 fn decimal_benchmark(c: &mut Criterion) {
+    // bench decimal32 array
+    // create option<i32> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::thread_rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.gen_range::<i32, _>(0..99999999)));
+    }
+    c.bench_function("decimal32_array_from_vec 32768", |b| {
+        b.iter(|| decimal32_array_from_vec(array.as_slice()))
+    });
+
+    // bench decimal64 array
+    // create option<i64> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::thread_rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.gen_range::<i64, _>(0..9999999999)));
+    }
+    c.bench_function("decimal64_array_from_vec 32768", |b| {
+        b.iter(|| decimal64_array_from_vec(array.as_slice()))
+    });
+
     // bench decimal128 array
     // create option<i128> array
     let size: usize = 1 << 15;
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 87a02e7ad1fd..312627972e54 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -107,6 +107,42 @@ fn bench_string(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_decimal32(c: &mut Criterion) {
+    c.bench_function("bench_decimal32_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::thread_rng();
+            let mut decimal_builder = Decimal32Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.gen_range::<i32, _>(0..999999999));
+            }
+            black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(9, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
+fn bench_decimal64(c: &mut Criterion) {
+    c.bench_function("bench_decimal64_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::thread_rng();
+            let mut decimal_builder = Decimal64Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.gen_range::<i64, _>(0..9999999999));
+            }
+            black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(18, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
 fn bench_decimal128(c: &mut Criterion) {
     c.bench_function("bench_decimal128_builder", |b| {
         b.iter(|| {
@@ -126,7 +162,7 @@ fn bench_decimal128(c: &mut Criterion) {
 }
 
 fn bench_decimal256(c: &mut Criterion) {
-    c.bench_function("bench_decimal128_builder", |b| {
+    c.bench_function("bench_decimal256_builder", |b| {
         b.iter(|| {
             let mut rng = rand::thread_rng();
             let mut decimal_builder = Decimal256Builder::with_capacity(BATCH_SIZE);
@@ -150,6 +186,8 @@ criterion_group!(
     bench_primitive_nulls,
     bench_bool,
     bench_string,
+    bench_decimal32,
+    bench_decimal64,
     bench_decimal128,
     bench_decimal256,
 );
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index ec7990d3d764..dab09a46284a 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -82,6 +82,36 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
     Arc::new(builder.finish())
 }
 
+fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal32Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.gen_range::<i32, _>(0..10000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
+fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal64Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.gen_range::<i64, _>(0..1000000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
 fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
     let mut rng = seedable_rng();
     let mut builder = Decimal128Builder::with_capacity(size);
@@ -158,6 +188,8 @@ fn add_benchmark(c: &mut Criterion) {
     let utf8_date_array = build_utf8_date_array(512, true);
     let utf8_date_time_array = build_utf8_date_time_array(512, true);
 
+    let decimal32_array = build_decimal32_array(512, 9, 3);
+    let decimal64_array = build_decimal64_array(512, 10, 3);
     let decimal128_array = build_decimal128_array(512, 10, 3);
     let decimal256_array = build_decimal256_array(512, 50, 3);
     let string_array = build_string_array(512);
@@ -247,6 +279,8 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
     });
 
+// TODO: decimal32, decimal64
+
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
     });
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index be812a225ca2..88086f9a8720 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,7 +18,12 @@
 #[macro_use]
 extern crate criterion;
 
-use arrow::array::{Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder};
+use arrow::array::{Array,
+    Decimal32Array, Decimal32Builder,
+    Decimal64Array, Decimal64Builder,
+    Decimal128Array, Decimal128Builder,
+    Decimal256Array, Decimal256Builder
+};
 use criterion::Criterion;
 use rand::Rng;
 
@@ -26,6 +31,14 @@ extern crate arrow;
 
 use arrow_buffer::i256;
 
+fn validate_decimal32_array(array: Decimal32Array) {
+    array.with_precision_and_scale(8, 0).unwrap();
+}
+
+fn validate_decimal64_array(array: Decimal64Array) {
+    array.with_precision_and_scale(16, 0).unwrap();
+}
+
 fn validate_decimal128_array(array: Decimal128Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
@@ -34,6 +47,46 @@ fn validate_decimal256_array(array: Decimal256Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
 
+fn validate_decimal32_benchmark(c: &mut Criterion) {
+    let mut rng = rand::thread_rng();
+    let size: i32 = 20000;
+    let mut decimal_builder = Decimal32Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.gen_range::<i32, _>(0..99999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(9, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal32_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal32Array::from(data.clone());
+            validate_decimal32_array(array);
+        })
+    });
+}
+
+fn validate_decimal64_benchmark(c: &mut Criterion) {
+    let mut rng = rand::thread_rng();
+    let size: i64 = 20000;
+    let mut decimal_builder = Decimal64Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.gen_range::<i64, _>(0..999999999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(18, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal64_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal64Array::from(data.clone());
+            validate_decimal64_array(array);
+        })
+    });
+}
+
 fn validate_decimal128_benchmark(c: &mut Criterion) {
     let mut rng = rand::thread_rng();
     let size: i128 = 20000;
@@ -78,6 +131,8 @@ fn validate_decimal256_benchmark(c: &mut Criterion) {
 
 criterion_group!(
     benches,
+    validate_decimal32_benchmark,
+    validate_decimal64_benchmark,
     validate_decimal128_benchmark,
     validate_decimal256_benchmark,
 );
diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs
index cd135a2f04df..3b65ea7b52f9 100644
--- a/arrow/src/tensor.rs
+++ b/arrow/src/tensor.rs
@@ -86,6 +86,10 @@ pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
 pub type Date32Tensor<'a> = Tensor<'a, Date32Type>;
 /// [Tensor] of type [Int16Type]
 pub type Date64Tensor<'a> = Tensor<'a, Date64Type>;
+/// [Tensor] of type [Decimal32Type]
+pub type Decimal32Tensor<'a> = Tensor<'a, Decimal32Type>;
+/// [Tensor] of type [Decimal64Type]
+pub type Decimal64Tensor<'a> = Tensor<'a, Decimal64Type>;
 /// [Tensor] of type [Decimal128Type]
 pub type Decimal128Tensor<'a> = Tensor<'a, Decimal128Type>;
 /// [Tensor] of type [Decimal256Type]
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index 8a7511be2afe..eaa87353feca 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -33,9 +33,9 @@ use arrow_array::builder::{
 };
 use arrow_array::{
     new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, Int16Array,
-    Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray, Time32SecondArray,
-    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
+    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, Float16Array, Float32Array,
+    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray,
+    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
     TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
     UInt32Array, UInt64Array, UInt8Array,
 };
@@ -45,12 +45,24 @@ use half::f16;
 use paste::paste;
 use std::sync::Arc;
 
-// Convert the bytes array to i128.
+// Convert the bytes array to i32.
 // The endian of the input bytes array must be big-endian.
-pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
+pub(crate) fn from_bytes_to_i32(b: &[u8]) -> i32 {
     // The bytes array are from parquet file and must be the big-endian.
     // The endian is defined by parquet format, and the reference document
     // https://github.com/apache/parquet-format/blob/54e53e5d7794d383529dd30746378f19a12afd58/src/main/thrift/parquet.thrift#L66
+    i32::from_be_bytes(sign_extend_be::<4>(b))
+}
+
+// Convert the bytes array to i64.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i64(b: &[u8]) -> i64 {
+    i64::from_be_bytes(sign_extend_be::<8>(b))
+}
+
+// Convert the bytes array to i128.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
     i128::from_be_bytes(sign_extend_be::<16>(b))
 }
 
@@ -263,7 +275,7 @@ macro_rules! make_decimal_stats_iterator {
                             s.$func().map(|x| $stat_value_type::from(*x))
                         }
                         ParquetStatistics::Int64(s) => {
-                            s.$func().map(|x| $stat_value_type::from(*x))
+                            s.$func().map(|x| $stat_value_type::try_from(*x).ok()).flatten()
                         }
                         ParquetStatistics::ByteArray(s) => s.$bytes_func().map($convert_func),
                         ParquetStatistics::FixedLenByteArray(s) => {
@@ -281,6 +293,34 @@ macro_rules! make_decimal_stats_iterator {
     };
 }
 
+make_decimal_stats_iterator!(
+    MinDecimal32StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MaxDecimal32StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MinDecimal64StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
+make_decimal_stats_iterator!(
+    MaxDecimal64StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
 make_decimal_stats_iterator!(
     MinDecimal128StatsIterator,
     min_opt,
@@ -474,6 +514,18 @@ macro_rules! get_statistics {
                 }
                 Ok(Arc::new(builder.finish()))
             },
+            DataType::Decimal32(precision, scale) => {
+                let arr = Decimal32Array::from_iter(
+                    [<$stat_type_prefix Decimal32StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
+            DataType::Decimal64(precision, scale) => {
+                let arr = Decimal64Array::from_iter(
+                    [<$stat_type_prefix Decimal64StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
             DataType::Decimal128(precision, scale) => {
                 let arr = Decimal128Array::from_iter(
                     [<$stat_type_prefix Decimal128StatsIterator>]::new($iterator)
@@ -727,7 +779,7 @@ macro_rules! get_decimal_page_stats_iterator {
                             native_index
                                 .indexes
                                 .iter()
-                                .map(|x| x.$func.and_then(|x| Some($stat_value_type::from(x))))
+                                .map(|x| x.$func.and_then(|x| $stat_value_type::try_from(x).ok()))
                                 .collect::<Vec<_>>(),
                         ),
                         Index::BYTE_ARRAY(native_index) => Some(
@@ -761,6 +813,34 @@ macro_rules! get_decimal_page_stats_iterator {
     };
 }
 
+get_decimal_page_stats_iterator!(
+    MinDecimal32DataPageStatsIterator,
+    min,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal32DataPageStatsIterator,
+    max,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MinDecimal64DataPageStatsIterator,
+    min,
+    i64,
+    from_bytes_to_i64
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal64DataPageStatsIterator,
+    max,
+    i64,
+    from_bytes_to_i64
+);
+
 get_decimal_page_stats_iterator!(
     MinDecimal128DataPageStatsIterator,
     min,
@@ -954,6 +1034,10 @@ macro_rules! get_data_page_statistics {
                         )
                     )
                 ),
+                DataType::Decimal32(precision, scale) => Ok(Arc::new(
+                    Decimal32Array::from_iter([<$stat_type_prefix Decimal32DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
+                DataType::Decimal64(precision, scale) => Ok(Arc::new(
+                    Decimal64Array::from_iter([<$stat_type_prefix Decimal64DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal128(precision, scale) => Ok(Arc::new(
                     Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal256(precision, scale) => Ok(Arc::new(
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 3e828bbddd17..d5ff259044d0 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -87,6 +87,8 @@ fn is_leaf(data_type: &DataType) -> bool {
             | DataType::Binary
             | DataType::LargeBinary
             | DataType::BinaryView
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
             | DataType::FixedSizeBinary(_)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 3ec7a3dfea36..47da27decc16 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -827,6 +827,19 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                     let array = values.inner().typed_data::<i32>();
                     write_primitive(typed, array, levels)
                 }
+                ArrowDataType::Decimal32(_, _) => {
+                    let array = column
+                        .as_primitive::<Decimal32Type>()
+                        .unary::<_, Int32Type>(|v| v as i32);
+                    write_primitive(typed, array.values(), levels)
+                }
+                ArrowDataType::Decimal64(_, _) => {
+                    // use the int32 to represent the decimal with low precision
+                    let array = column
+                        .as_primitive::<Decimal64Type>()
+                        .unary::<_, Int32Type>(|v| v as i32);
+                    write_primitive(typed, array.values(), levels)
+                }
                 ArrowDataType::Decimal128(_, _) => {
                     // use the int32 to represent the decimal with low precision
                     let array = column
@@ -869,6 +882,12 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                     let array = values.inner().typed_data::<i64>();
                     write_primitive(typed, array, levels)
                 }
+                ArrowDataType::Decimal64(_, _) => {
+                    let array = column
+                        .as_primitive::<Decimal64Type>()
+                        .unary::<_, Int64Type>(|v| v as i64);
+                    write_primitive(typed, array.values(), levels)
+                }
                 ArrowDataType::Decimal128(_, _) => {
                     // use the int64 to represent the decimal with low precision
                     let array = column
@@ -936,6 +955,14 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                         .unwrap();
                     get_fsb_array_slice(array, indices)
                 }
+                ArrowDataType::Decimal32(_, _) => {
+                    let array = column.as_primitive::<Decimal32Type>();
+                    get_decimal_32_array_slice(array, indices)
+                }
+                ArrowDataType::Decimal64(_, _) => {
+                    let array = column.as_primitive::<Decimal64Type>();
+                    get_decimal_64_array_slice(array, indices)
+                }
                 ArrowDataType::Decimal128(_, _) => {
                     let array = column.as_primitive::<Decimal128Type>();
                     get_decimal_128_array_slice(array, indices)
@@ -1019,6 +1046,34 @@ fn get_interval_dt_array_slice(
     values
 }
 
+fn get_decimal_32_array_slice(
+    array: &arrow_array::Decimal32Array,
+    indices: &[usize],
+) -> Vec<FixedLenByteArray> {
+    let mut values = Vec::with_capacity(indices.len());
+    let size = decimal_length_from_precision(array.precision());
+    for i in indices {
+        let as_be_bytes = array.value(*i).to_be_bytes();
+        let resized_value = as_be_bytes[(4 - size)..].to_vec();
+        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
+    }
+    values
+}
+
+fn get_decimal_64_array_slice(
+    array: &arrow_array::Decimal64Array,
+    indices: &[usize],
+) -> Vec<FixedLenByteArray> {
+    let mut values = Vec::with_capacity(indices.len());
+    let size = decimal_length_from_precision(array.precision());
+    for i in indices {
+        let as_be_bytes = array.value(*i).to_be_bytes();
+        let resized_value = as_be_bytes[(8 - size)..].to_vec();
+        values.push(FixedLenByteArray::from(ByteArray::from(resized_value)));
+    }
+    values
+}
+
 fn get_decimal_128_array_slice(
     array: &arrow_array::Decimal128Array,
     indices: &[usize],
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 3ed3bd24e0a8..6e6400b4b8a2 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -477,6 +477,7 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
+        DataType::Decimal32(precision, scale) | DataType::Decimal64(precision, scale) |
         DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
             // Decimal precision determines the Parquet physical type to use.
             // Following the: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal

From b653b9b48a117b24e85c4932619c144a43756ccd Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Thu, 31 Oct 2024 07:47:39 -0700
Subject: [PATCH 02/68] small fixes

---
 arrow-data/src/decimal.rs    | 2 +-
 arrow-json/src/reader/mod.rs | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index 9c629cb49ff8..e0979188db0d 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
+//! Defines maximum and minimum values for `decimal256`, `decimal128`, `decimal64` and `decimal32` types for varying precisions.
 //!
 //! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.
 
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index bcacf6f706b8..b38186b7a040 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -691,6 +691,8 @@ fn make_decoder(
         DataType::Time32(TimeUnit::Millisecond) => primitive_decoder!(Time32MillisecondType, data_type),
         DataType::Time64(TimeUnit::Microsecond) => primitive_decoder!(Time64MicrosecondType, data_type),
         DataType::Time64(TimeUnit::Nanosecond) => primitive_decoder!(Time64NanosecondType, data_type),
+        DataType::Decimal32(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal32Type>::new(p, s))),
+        DataType::Decimal64(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal64Type>::new(p, s))),
         DataType::Decimal128(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal128Type>::new(p, s))),
         DataType::Decimal256(p, s) => Ok(Box::new(DecimalArrayDecoder::<Decimal256Type>::new(p, s))),
         DataType::Boolean => Ok(Box::<BooleanArrayDecoder>::default()),
@@ -1156,6 +1158,8 @@ mod tests {
 
     #[test]
     fn test_decimals() {
+        test_decimal::<Decimal32Type>(DataType::Decimal32(8, 2));
+        test_decimal::<Decimal64Type>(DataType::Decimal64(10, 2));
         test_decimal::<Decimal128Type>(DataType::Decimal128(10, 2));
         test_decimal::<Decimal256Type>(DataType::Decimal256(10, 2));
     }

From 490bfc8a1ff993f4884bfc7d5a34592401805295 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Tue, 17 Dec 2024 18:44:06 -0800
Subject: [PATCH 03/68] more support

---
 arrow-csv/src/reader/mod.rs                   | 16 ++++++++++
 arrow/benches/cast_kernels.rs                 |  5 +++-
 arrow/tests/array_cast.rs                     | 22 ++++++++++++--
 .../array_reader/fixed_len_byte_array.rs      | 30 +++++++++++++++++--
 .../src/arrow/array_reader/primitive_array.rs | 12 +++++---
 5 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index d3d518316397..3a99e651bf3b 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -652,6 +652,22 @@ fn parse(
             let field = &fields[i];
             match field.data_type() {
                 DataType::Boolean => build_boolean_array(line_number, rows, i, null_regex),
+                DataType::Decimal32(precision, scale) => build_decimal_array::<Decimal32Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
+                DataType::Decimal64(precision, scale) => build_decimal_array::<Decimal64Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
                 DataType::Decimal128(precision, scale) => build_decimal_array::<Decimal128Type>(
                     line_number,
                     rows,
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 132bfb07d338..da929ae1da74 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -279,7 +279,10 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
     });
 
-// TODO: decimal32, decimal64
+    // TODO: decimal32, decimal64
+    c.bench_function("cast decimal32 to decimal32 512", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(8, 2)))
+    });
 
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index ef5ca6041700..bf9962b69f7b 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -18,8 +18,9 @@
 use arrow_array::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder, UnionBuilder};
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type,
+    Int16Type, Int32Type, Int64Type, Int8Type, TimestampMicrosecondType,
+    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_array::{
     Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
@@ -262,6 +263,22 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
         Arc::new(create_decimal_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
+        make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal64Type>(vec![1, 2]),
         make_dictionary_primitive::<Int8Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int16Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int32Type, Decimal128Type>(vec![1, 2]),
@@ -501,6 +518,7 @@ fn get_all_types() -> Vec<DataType> {
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeUtf8)),
                 Dictionary(Box::new(key_type.clone()), Box::new(Binary)),
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeBinary)),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal32(9, 0))),
                 Dictionary(Box::new(key_type.clone()), Box::new(Decimal128(38, 0))),
                 Dictionary(Box::new(key_type), Box::new(Decimal256(76, 0))),
             ]
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index 6b437be943d4..6378cd991e2a 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -27,8 +27,8 @@ use crate::column::reader::decoder::ColumnValueDecoder;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use arrow_array::{
-    ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
-    IntervalDayTimeArray, IntervalYearMonthArray,
+    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    FixedSizeBinaryArray, Float16Array, IntervalDayTimeArray, IntervalYearMonthArray,
 };
 use arrow_buffer::{i256, Buffer, IntervalDayTime};
 use arrow_data::ArrayDataBuilder;
@@ -64,6 +64,22 @@ pub fn make_fixed_len_byte_array_reader(
     };
     match &data_type {
         ArrowType::FixedSizeBinary(_) => {}
+        ArrowType::Decimal32(_, _) => {
+            if byte_length > 4 {
+                return Err(general_err!(
+                    "decimal 64 type too large, must be less then 4 bytes, got {}",
+                    byte_length
+                ));
+            }
+        }
+        ArrowType::Decimal64(_, _) => {
+            if byte_length > 8 {
+                return Err(general_err!(
+                    "decimal 32 type too large, must be less then 8 bytes, got {}",
+                    byte_length
+                ));
+            }
+        }
         ArrowType::Decimal128(_, _) => {
             if byte_length > 16 {
                 return Err(general_err!(
@@ -168,6 +184,16 @@ impl ArrayReader for FixedLenByteArrayReader {
         // conversion lambdas are all infallible. This improves performance by avoiding a branch in
         // the inner loop (see docs for `PrimitiveArray::from_unary`).
         let array: ArrayRef = match &self.data_type {
+            ArrowType::Decimal32(p, s) => {
+                let f = |b: &[u8]| i32::from_be_bytes(sign_extend_be(b));
+                Arc::new(Decimal32Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
+                    as ArrayRef
+            }
+            ArrowType::Decimal64(p, s) => {
+                let f = |b: &[u8]| i64::from_be_bytes(sign_extend_be(b));
+                Arc::new(Decimal64Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
+                    as ArrayRef
+            }
             ArrowType::Decimal128(p, s) => {
                 let f = |b: &[u8]| i128::from_be_bytes(sign_extend_be(b));
                 Arc::new(Decimal128Array::from_unary(&binary, f).with_precision_and_scale(*p, *s)?)
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index a952e00e12ef..375db933b511 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -23,9 +23,9 @@ use crate::column::page::PageIterator;
 use crate::data_type::{DataType, Int96};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use arrow_array::Decimal256Array;
 use arrow_array::{
-    builder::TimestampNanosecondBufferBuilder, ArrayRef, BooleanArray, Decimal128Array,
+    builder::TimestampNanosecondBufferBuilder, ArrayRef, BooleanArray,
+    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
     Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array,
     UInt64Array,
 };
@@ -144,7 +144,8 @@ where
                         // follow C++ implementation and use overflow/reinterpret cast from  i32 to u32 which will map
                         // `i32::MIN..0` to `(i32::MAX as u32)..u32::MAX`
                         ArrowType::UInt32
-                    }
+                    },
+                    ArrowType::Decimal32(_, _) => target_type.clone(),
                     _ => ArrowType::Int32,
                 }
             }
@@ -154,7 +155,8 @@ where
                         // follow C++ implementation and use overflow/reinterpret cast from  i64 to u64 which will map
                         // `i64::MIN..0` to `(i64::MAX as u64)..u64::MAX`
                         ArrowType::UInt64
-                    }
+                    },
+                    ArrowType::Decimal64(_, _) => target_type.clone(),
                     _ => ArrowType::Int64,
                 }
             }
@@ -185,11 +187,13 @@ where
             PhysicalType::INT32 => match array_data.data_type() {
                 ArrowType::UInt32 => Arc::new(UInt32Array::from(array_data)),
                 ArrowType::Int32 => Arc::new(Int32Array::from(array_data)),
+                ArrowType::Decimal32(_, _) => Arc::new(Decimal32Array::from(array_data)),
                 _ => unreachable!(),
             },
             PhysicalType::INT64 => match array_data.data_type() {
                 ArrowType::UInt64 => Arc::new(UInt64Array::from(array_data)),
                 ArrowType::Int64 => Arc::new(Int64Array::from(array_data)),
+                ArrowType::Decimal64(_, _) => Arc::new(Decimal64Array::from(array_data)),
                 _ => unreachable!(),
             },
             PhysicalType::FLOAT => Arc::new(Float32Array::from(array_data)),

From 3e860ece306e25579567ed13d9c200bd59ad3df5 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Fri, 27 Dec 2024 19:15:34 -0800
Subject: [PATCH 04/68] More support for decimal32 and decimal64

---
 arrow-array/src/cast.rs                       |   6 +
 arrow-array/src/record_batch.rs               |   4 +-
 arrow-cast/src/cast/decimal.rs                | 145 ++-
 arrow-cast/src/cast/dictionary.rs             |  98 +-
 arrow-cast/src/cast/mod.rs                    | 932 ++++++++++++------
 arrow-csv/src/reader/mod.rs                   |  48 +
 arrow-csv/src/writer.rs                       |  49 +-
 arrow-data/src/data.rs                        |   8 +-
 arrow-json/src/writer/encoder.rs              |   2 +-
 arrow/benches/cast_kernels.rs                 |   3 +
 .../array_reader/fixed_len_byte_array.rs      |   4 +-
 11 files changed, 864 insertions(+), 435 deletions(-)

diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index fc657f94c6a6..a06ca34a02e7 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -301,6 +301,12 @@ macro_rules! downcast_primitive {
             $crate::repeat_pat!($crate::cast::__private::DataType::Float64, $($data_type),+) => {
                 $m!($crate::types::Float64Type $(, $args)*)
             }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal32(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal32Type $(, $args)*)
+            }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal64(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal64Type $(, $args)*)
+            }
             $crate::repeat_pat!($crate::cast::__private::DataType::Decimal128(_, _), $($data_type),+) => {
                 $m!($crate::types::Decimal128Type $(, $args)*)
             }
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 8958ca6fae62..956b9dd2fcd4 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -64,7 +64,7 @@ pub trait RecordBatchWriter {
 /// Support for limited data types is available. The macro will return a compile error if an unsupported data type is used.
 /// Presently supported data types are:
 /// - `Boolean`, `Null`
-/// - `Decimal128`, `Decimal256`
+/// - `Decimal32`, `Decimal64`, `Decimal128`, `Decimal256`
 /// - `Float16`, `Float32`, `Float64`
 /// - `Int8`, `Int16`, `Int32`, `Int64`
 /// - `UInt8`, `UInt16`, `UInt32`, `UInt64`
@@ -106,6 +106,8 @@ macro_rules! create_array {
     (@from DurationMillisecond) => { $crate::DurationMillisecondArray };
     (@from DurationMicrosecond) => { $crate::DurationMicrosecondArray };
     (@from DurationNanosecond) => { $crate::DurationNanosecondArray };
+    (@from Decimal32) => { $crate::Decimal32Array };
+    (@from Decimal64) => { $crate::Decimal64Array };
     (@from Decimal128) => { $crate::Decimal128Array };
     (@from Decimal256) => { $crate::Decimal256Array };
     (@from TimestampSecond) => { $crate::TimestampSecondArray };
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index ba82ca9040c7..e9216385a818 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -20,14 +20,80 @@ use crate::cast::*;
 /// A utility trait that provides checked conversions between
 /// decimal types inspired by [`NumCast`]
 pub(crate) trait DecimalCast: Sized {
+    fn to_i32(self) -> Option<i32>;
+
+    fn to_i64(self) -> Option<i64>;
+
     fn to_i128(self) -> Option<i128>;
 
     fn to_i256(self) -> Option<i256>;
 
     fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
+
+    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self>;
+}
+
+impl DecimalCast for i32 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self as i64)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i32()
+    }
+
+    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+        n.to_i32()
+    }
+}
+
+impl DecimalCast for i64 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i64()
+    }
+
+    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+        n.to_i64()
+    }
 }
 
 impl DecimalCast for i128 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self as i64)
+    }
+
     fn to_i128(self) -> Option<i128> {
         Some(self)
     }
@@ -39,9 +105,21 @@ impl DecimalCast for i128 {
     fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
         n.to_i128()
     }
+
+    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+        n.to_i128()
+    }
 }
 
 impl DecimalCast for i256 {
+    fn to_i32(self) -> Option<i32> {
+        self.to_i128().map(|x| x as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        self.to_i128().map(|x| x as i64)
+    }
+
     fn to_i128(self) -> Option<i128> {
         self.to_i128()
     }
@@ -53,6 +131,10 @@ impl DecimalCast for i256 {
     fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
         n.to_i256()
     }
+
+    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+        i256::from_f64(n)
+    }
 }
 
 pub(crate) fn cast_decimal_to_decimal_error<I, O>(
@@ -464,52 +546,7 @@ where
     Ok(Arc::new(result))
 }
 
-pub(crate) fn cast_floating_point_to_decimal128<T: ArrowPrimitiveType>(
-    array: &PrimitiveArray<T>,
-    precision: u8,
-    scale: i8,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError>
-where
-    <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
-{
-    let mul = 10_f64.powi(scale as i32);
-
-    if cast_options.safe {
-        array
-            .unary_opt::<_, Decimal128Type>(|v| {
-                (mul * v.as_())
-                    .round()
-                    .to_i128()
-                    .filter(|v| Decimal128Type::is_valid_decimal_precision(*v, precision))
-            })
-            .with_precision_and_scale(precision, scale)
-            .map(|a| Arc::new(a) as ArrayRef)
-    } else {
-        array
-            .try_unary::<_, Decimal128Type, _>(|v| {
-                (mul * v.as_())
-                    .round()
-                    .to_i128()
-                    .ok_or_else(|| {
-                        ArrowError::CastError(format!(
-                            "Cannot cast to {}({}, {}). Overflowing on {:?}",
-                            Decimal128Type::PREFIX,
-                            precision,
-                            scale,
-                            v
-                        ))
-                    })
-                    .and_then(|v| {
-                        Decimal128Type::validate_decimal_precision(v, precision).map(|_| v)
-                    })
-            })?
-            .with_precision_and_scale(precision, scale)
-            .map(|a| Arc::new(a) as ArrayRef)
-    }
-}
-
-pub(crate) fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
+pub(crate) fn cast_floating_point_to_decimal<T: ArrowPrimitiveType, D, M>(
     array: &PrimitiveArray<T>,
     precision: u8,
     scale: i8,
@@ -517,32 +554,34 @@ pub(crate) fn cast_floating_point_to_decimal256<T: ArrowPrimitiveType>(
 ) -> Result<ArrayRef, ArrowError>
 where
     <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
+    D: DecimalType + ArrowPrimitiveType<Native = M>,
+    M: ArrowNativeTypeOp + DecimalCast,
 {
     let mul = 10_f64.powi(scale as i32);
 
     if cast_options.safe {
         array
-            .unary_opt::<_, Decimal256Type>(|v| {
-                i256::from_f64((v.as_() * mul).round())
-                    .filter(|v| Decimal256Type::is_valid_decimal_precision(*v, precision))
+            .unary_opt::<_, D>(|v| {
+                M::from_f64::<M>((mul * v.as_()).round())
+                    .filter(|v| D::is_valid_decimal_precision(*v, precision))
             })
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
     } else {
         array
-            .try_unary::<_, Decimal256Type, _>(|v| {
-                i256::from_f64((v.as_() * mul).round())
+            .try_unary::<_, D, _>(|v| {
+                M::from_f64::<M>((mul * v.as_()).round())
                     .ok_or_else(|| {
                         ArrowError::CastError(format!(
                             "Cannot cast to {}({}, {}). Overflowing on {:?}",
-                            Decimal256Type::PREFIX,
+                            D::PREFIX,
                             precision,
                             scale,
                             v
                         ))
                     })
                     .and_then(|v| {
-                        Decimal256Type::validate_decimal_precision(v, precision).map(|_| v)
+                        D::validate_decimal_precision(v, precision).map(|_| v)
                     })
             })?
             .with_precision_and_scale(precision, scale)
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index ec0ab346f997..4ea514375b61 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -214,49 +214,37 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
         UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
         UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
+        Decimal32(p, s) => {
+            pack_decimal_to_dictionary::<K, Decimal32Type, _>(
+                array,
+                p,
+                s,
+                cast_options
+            )
+        }
+        Decimal64(p, s) => {
+            pack_decimal_to_dictionary::<K, Decimal64Type, _>(
+                array,
+                p,
+                s,
+                cast_options
+            )
+        }
         Decimal128(p, s) => {
-            let dict = pack_numeric_to_dictionary::<K, Decimal128Type>(
+            pack_decimal_to_dictionary::<K, Decimal128Type, _>(
                 array,
-                dict_value_type,
-                cast_options,
-            )?;
-            let dict = dict
-                .as_dictionary::<K>()
-                .downcast_dict::<Decimal128Array>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dict to Decimal128Array".to_string(),
-                    )
-                })?;
-            let value = dict.values().clone();
-            // Set correct precision/scale
-            let value = value.with_precision_and_scale(p, s)?;
-            Ok(Arc::new(DictionaryArray::<K>::try_new(
-                dict.keys().clone(),
-                Arc::new(value),
-            )?))
+                p,
+                s,
+                cast_options
+            )
         }
         Decimal256(p, s) => {
-            let dict = pack_numeric_to_dictionary::<K, Decimal256Type>(
+            pack_decimal_to_dictionary::<K, Decimal256Type, _>(
                 array,
-                dict_value_type,
-                cast_options,
-            )?;
-            let dict = dict
-                .as_dictionary::<K>()
-                .downcast_dict::<Decimal256Array>()
-                .ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Internal Error: Cannot cast dict to Decimal256Array".to_string(),
-                    )
-                })?;
-            let value = dict.values().clone();
-            // Set correct precision/scale
-            let value = value.with_precision_and_scale(p, s)?;
-            Ok(Arc::new(DictionaryArray::<K>::try_new(
-                dict.keys().clone(),
-                Arc::new(value),
-            )?))
+                p,
+                s,
+                cast_options
+            )
         }
         Float16 => {
             pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
@@ -359,6 +347,40 @@ where
     Ok(Arc::new(b.finish()))
 }
 
+pub(crate) fn pack_decimal_to_dictionary<K, D, M>(
+    array: &dyn Array,
+    precision: u8,
+    scale: i8,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    K: ArrowDictionaryKeyType,
+    D: DecimalType + ArrowPrimitiveType<Native = M>,
+    M: ArrowNativeTypeOp + DecimalCast,
+{
+    let dict = pack_numeric_to_dictionary::<K, D>(
+        array,
+        &D::DATA_TYPE,
+        cast_options,
+    )?;
+    let dict = dict
+        .as_dictionary::<K>()
+        .downcast_dict::<PrimitiveArray<D>>()
+        .ok_or_else(|| {
+            ArrowError::ComputeError(format!(
+                "Internal Error: Cannot cast dict to {}",
+                D::PREFIX
+            ))
+        })?;
+    let value = dict.values().clone();
+    // Set correct precision/scale
+    let value = value.with_precision_and_scale(precision, scale)?;
+    Ok(Arc::new(DictionaryArray::<K>::try_new(
+        dict.keys().clone(),
+        Arc::new(value),
+    )?))
+}
+
 pub(crate) fn string_view_to_dictionary<K, O: OffsetSizeTrait>(
     array: &dyn Array,
 ) -> Result<ArrayRef, ArrowError>
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 7c14126f36b6..391ffce90cbe 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -830,6 +830,71 @@ pub fn cast_with_options(
         (Map(_, ordered1), Map(_, ordered2)) if ordered1 == ordered2 => {
             cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned())
         }
+        (Decimal32(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(_, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(_, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal128Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(_, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal256Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(_, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal128Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(_, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal256Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal128(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal128Type>(
                 array.as_primitive(),
@@ -868,315 +933,93 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        (Decimal32(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                scale,
+                from_type,
+                to_type,
+                |x: i32| x as f64,
+                cast_options,
+            )
+        }
+        (Decimal64(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                scale,
+                from_type,
+                to_type,
+                |x: i64| x as f64,
+                cast_options,
+            )
+        }
         (Decimal128(_, scale), _) if !to_type.is_temporal() => {
-            // cast decimal to other type
-            match to_type {
-                UInt8 => cast_decimal_to_integer::<Decimal128Type, UInt8Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                UInt16 => cast_decimal_to_integer::<Decimal128Type, UInt16Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                UInt32 => cast_decimal_to_integer::<Decimal128Type, UInt32Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                UInt64 => cast_decimal_to_integer::<Decimal128Type, UInt64Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                Int8 => cast_decimal_to_integer::<Decimal128Type, Int8Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                Int16 => cast_decimal_to_integer::<Decimal128Type, Int16Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                Int32 => cast_decimal_to_integer::<Decimal128Type, Int32Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                Int64 => cast_decimal_to_integer::<Decimal128Type, Int64Type>(
-                    array,
-                    10_i128,
-                    *scale,
-                    cast_options,
-                ),
-                Float32 => cast_decimal_to_float::<Decimal128Type, Float32Type, _>(array, |x| {
-                    (x as f64 / 10_f64.powi(*scale as i32)) as f32
-                }),
-                Float64 => cast_decimal_to_float::<Decimal128Type, Float64Type, _>(array, |x| {
-                    x as f64 / 10_f64.powi(*scale as i32)
-                }),
-                Utf8View => value_to_string_view(array, cast_options),
-                Utf8 => value_to_string::<i32>(array, cast_options),
-                LargeUtf8 => value_to_string::<i64>(array, cast_options),
-                Null => Ok(new_null_array(to_type, array.len())),
-                _ => Err(ArrowError::CastError(format!(
-                    "Casting from {from_type:?} to {to_type:?} not supported"
-                ))),
-            }
+            cast_from_decimal::<Decimal128Type, _>(
+                array,
+                10_i128,
+                scale,
+                from_type,
+                to_type,
+                |x: i128| x as f64,
+                cast_options,
+            )
         }
         (Decimal256(_, scale), _) if !to_type.is_temporal() => {
-            // cast decimal to other type
-            match to_type {
-                UInt8 => cast_decimal_to_integer::<Decimal256Type, UInt8Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                UInt16 => cast_decimal_to_integer::<Decimal256Type, UInt16Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                UInt32 => cast_decimal_to_integer::<Decimal256Type, UInt32Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                UInt64 => cast_decimal_to_integer::<Decimal256Type, UInt64Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                Int8 => cast_decimal_to_integer::<Decimal256Type, Int8Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                Int16 => cast_decimal_to_integer::<Decimal256Type, Int16Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                Int32 => cast_decimal_to_integer::<Decimal256Type, Int32Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                Int64 => cast_decimal_to_integer::<Decimal256Type, Int64Type>(
-                    array,
-                    i256::from_i128(10_i128),
-                    *scale,
-                    cast_options,
-                ),
-                Float32 => cast_decimal_to_float::<Decimal256Type, Float32Type, _>(array, |x| {
-                    (x.to_f64().unwrap() / 10_f64.powi(*scale as i32)) as f32
-                }),
-                Float64 => cast_decimal_to_float::<Decimal256Type, Float64Type, _>(array, |x| {
-                    x.to_f64().unwrap() / 10_f64.powi(*scale as i32)
-                }),
-                Utf8View => value_to_string_view(array, cast_options),
-                Utf8 => value_to_string::<i32>(array, cast_options),
-                LargeUtf8 => value_to_string::<i64>(array, cast_options),
-                Null => Ok(new_null_array(to_type, array.len())),
-                _ => Err(ArrowError::CastError(format!(
-                    "Casting from {from_type:?} to {to_type:?} not supported"
-                ))),
-            }
+            cast_from_decimal::<Decimal256Type, _>(
+                array,
+                i256::from_i128(10_i128),
+                scale,
+                from_type,
+                to_type,
+                |x: i256| x.to_f64().unwrap(),
+                cast_options,
+            )
+        }
+        (_, Decimal32(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
+        }
+        (_, Decimal64(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
         }
         (_, Decimal128(precision, scale)) if !from_type.is_temporal() => {
-            // cast data to decimal
-            match from_type {
-                UInt8 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<UInt8Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                UInt16 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<UInt16Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                UInt32 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<UInt32Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                UInt64 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<UInt64Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                Int8 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<Int8Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                Int16 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<Int16Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                Int32 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<Int32Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                Int64 => cast_integer_to_decimal::<_, Decimal128Type, _>(
-                    array.as_primitive::<Int64Type>(),
-                    *precision,
-                    *scale,
-                    10_i128,
-                    cast_options,
-                ),
-                Float32 => cast_floating_point_to_decimal128(
-                    array.as_primitive::<Float32Type>(),
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Float64 => cast_floating_point_to_decimal128(
-                    array.as_primitive::<Float64Type>(),
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Utf8View | Utf8 => cast_string_to_decimal::<Decimal128Type, i32>(
-                    array,
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                LargeUtf8 => cast_string_to_decimal::<Decimal128Type, i64>(
-                    array,
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Null => Ok(new_null_array(to_type, array.len())),
-                _ => Err(ArrowError::CastError(format!(
-                    "Casting from {from_type:?} to {to_type:?} not supported"
-                ))),
-            }
+            cast_to_decimal::<Decimal128Type, _>(
+                array,
+                10_i128,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
         }
         (_, Decimal256(precision, scale)) if !from_type.is_temporal() => {
-            // cast data to decimal
-            match from_type {
-                UInt8 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<UInt8Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                UInt16 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<UInt16Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                UInt32 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<UInt32Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                UInt64 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<UInt64Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                Int8 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<Int8Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                Int16 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<Int16Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                Int32 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<Int32Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                Int64 => cast_integer_to_decimal::<_, Decimal256Type, _>(
-                    array.as_primitive::<Int64Type>(),
-                    *precision,
-                    *scale,
-                    i256::from_i128(10_i128),
-                    cast_options,
-                ),
-                Float32 => cast_floating_point_to_decimal256(
-                    array.as_primitive::<Float32Type>(),
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Float64 => cast_floating_point_to_decimal256(
-                    array.as_primitive::<Float64Type>(),
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Utf8View | Utf8 => cast_string_to_decimal::<Decimal256Type, i32>(
-                    array,
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                LargeUtf8 => cast_string_to_decimal::<Decimal256Type, i64>(
-                    array,
-                    *precision,
-                    *scale,
-                    cast_options,
-                ),
-                Null => Ok(new_null_array(to_type, array.len())),
-                _ => Err(ArrowError::CastError(format!(
-                    "Casting from {from_type:?} to {to_type:?} not supported"
-                ))),
-            }
+            cast_to_decimal::<Decimal256Type, _>(
+                array,
+                i256::from_i128(10_i128),
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
         }
         (Struct(_), Struct(to_fields)) => {
             let array = array.as_struct();
@@ -2192,6 +2035,198 @@ pub fn cast_with_options(
     }
 }
 
+fn cast_from_decimal<D, F>(
+    array: &dyn Array,
+    base: D::Native,
+    scale: &i8,
+    from_type: &DataType,
+    to_type: &DataType,
+    as_float: F,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    D: DecimalType + ArrowPrimitiveType,
+    <D as ArrowPrimitiveType>::Native: ArrowNativeTypeOp + ToPrimitive,
+    F: Fn(D::Native) -> f64,
+{
+    use DataType::*;
+    // cast decimal to other type
+    match to_type {
+        UInt8 => cast_decimal_to_integer::<D, UInt8Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        UInt16 => cast_decimal_to_integer::<D, UInt16Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        UInt32 => cast_decimal_to_integer::<D, UInt32Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        UInt64 => cast_decimal_to_integer::<D, UInt64Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        Int8 => cast_decimal_to_integer::<D, Int8Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        Int16 => cast_decimal_to_integer::<D, Int16Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        Int32 => cast_decimal_to_integer::<D, Int32Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        Int64 => cast_decimal_to_integer::<D, Int64Type>(
+            array,
+            base,
+            *scale,
+            cast_options,
+        ),
+        Float32 => cast_decimal_to_float::<D, Float32Type, _>(array, |x| {
+            (as_float(x) / 10_f64.powi(*scale as i32)) as f32
+        }),
+        Float64 => cast_decimal_to_float::<D, Float64Type, _>(array, |x| {
+            as_float(x) / 10_f64.powi(*scale as i32)
+        }),
+        Utf8View => value_to_string_view(array, cast_options),
+        Utf8 => value_to_string::<i32>(array, cast_options),
+        LargeUtf8 => value_to_string::<i64>(array, cast_options),
+        Null => Ok(new_null_array(to_type, array.len())),
+        _ => Err(ArrowError::CastError(format!(
+            "Casting from {from_type:?} to {to_type:?} not supported"
+        ))),
+    }
+}
+
+fn cast_to_decimal<D, M>(
+    array: &dyn Array,
+    base: M,
+    precision: &u8,
+    scale: &i8,
+    from_type: &DataType,
+    to_type: &DataType,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError>
+where
+    D: DecimalType + ArrowPrimitiveType<Native = M>,
+    M: ArrowNativeTypeOp + DecimalCast,
+    u8: num::traits::AsPrimitive<M>,
+    u16: num::traits::AsPrimitive<M>,
+    u32: num::traits::AsPrimitive<M>,
+    u64: num::traits::AsPrimitive<M>,
+    i8: num::traits::AsPrimitive<M>,
+    i16: num::traits::AsPrimitive<M>,
+    i32: num::traits::AsPrimitive<M>,
+    i64: num::traits::AsPrimitive<M>,
+{
+    use DataType::*;
+    // cast data to decimal
+    match from_type {
+        UInt8 => cast_integer_to_decimal::<_, D, M>(
+            array.as_primitive::<UInt8Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        UInt16 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<UInt16Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        UInt32 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<UInt32Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        UInt64 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<UInt64Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        Int8 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<Int8Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        Int16 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<Int16Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        Int32 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<Int32Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        Int64 => cast_integer_to_decimal::<_, D, _>(
+            array.as_primitive::<Int64Type>(),
+            *precision,
+            *scale,
+            base,
+            cast_options,
+        ),
+        Float32 => cast_floating_point_to_decimal::<_, D, _>(
+            array.as_primitive::<Float32Type>(),
+            *precision,
+            *scale,
+            cast_options,
+        ),
+        Float64 => cast_floating_point_to_decimal::<_, D, _>(
+            array.as_primitive::<Float64Type>(),
+            *precision,
+            *scale,
+            cast_options,
+        ),
+        Utf8View | Utf8 => cast_string_to_decimal::<D, i32>(
+            array,
+            *precision,
+            *scale,
+            cast_options,
+        ),
+        LargeUtf8 => cast_string_to_decimal::<D, i64>(
+            array,
+            *precision,
+            *scale,
+            cast_options,
+        ),
+        Null => Ok(new_null_array(to_type, array.len())),
+        _ => Err(ArrowError::CastError(format!(
+            "Casting from {from_type:?} to {to_type:?} not supported"
+        ))),
+    }
+}
+
 /// Get the time unit as a multiple of a second
 const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
     match unit {
@@ -2527,6 +2562,28 @@ mod tests {
         };
     }
 
+    fn create_decimal32_array(
+        array: Vec<Option<i32>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal32Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
+    fn create_decimal64_array(
+        array: Vec<Option<i64>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal64Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
     fn create_decimal_array(
         array: Vec<Option<i128>>,
         precision: u8,
@@ -2675,6 +2732,72 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32() {
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![
+                Some(11234560_i32),
+                Some(21234560_i32),
+                Some(31234560_i32),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal32_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal32(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2, 2), &options);
+        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal32 of precision 2. Max is 99",
+                   result_unsafe.unwrap_err().to_string());
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64() {
+        let input_type = DataType::Decimal64(17, 3);
+        let output_type = DataType::Decimal64(17, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 17, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![
+                Some(11234560_i64),
+                Some(21234560_i64),
+                Some(31234560_i64),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal64_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal64(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2, 2), &options);
+        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal64 of precision 2. Max is 99",
+                   result_unsafe.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128() {
         let input_type = DataType::Decimal128(20, 3);
@@ -2708,6 +2831,38 @@ mod tests {
                    result_unsafe.unwrap_err().to_string());
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_dict() {
+        let p = 9;
+        let s = 3;
+        let input_type = DataType::Decimal32(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal32(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_dict() {
+        let p = 15;
+        let s = 3;
+        let input_type = DataType::Decimal64(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal64(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128_dict() {
         let p = 20;
@@ -2740,6 +2895,46 @@ mod tests {
         assert_eq!(cast_array.data_type(), &output_type);
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_overflow() {
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 9);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i32::MAX)];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Cast error: Cannot cast to Decimal32(9, 9). Overflowing on 2147483647",
+                   result.unwrap_err().to_string());
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_overflow() {
+        let input_type = DataType::Decimal64(18, 3);
+        let output_type = DataType::Decimal64(18, 18);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i64::MAX)];
+        let array = create_decimal64_array(array, 18, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Cast error: Cannot cast to Decimal64(18, 18). Overflowing on 9223372036854775807",
+                   result.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128_overflow() {
         let input_type = DataType::Decimal128(38, 3);
@@ -2780,6 +2975,44 @@ mod tests {
                    result.unwrap_err().to_string());
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal256() {
+        let input_type = DataType::Decimal32(8, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 8, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
+    #[test]
+    fn test_cast_decimal64_to_decimal256() {
+        let input_type = DataType::Decimal64(12, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 12, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
     #[test]
     fn test_cast_decimal128_to_decimal256() {
         let input_type = DataType::Decimal128(20, 3);
@@ -2888,69 +3121,67 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_cast_decimal_to_numeric() {
-        let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+    macro_rules! generate_decimal_to_numeric_cast_test_case {
+        ($INPUT_ARRAY: expr) => {
         // u8
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             UInt8Array,
             &DataType::UInt8,
             vec![Some(1_u8), Some(2_u8), Some(3_u8), None, Some(5_u8)]
         );
         // u16
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             UInt16Array,
             &DataType::UInt16,
             vec![Some(1_u16), Some(2_u16), Some(3_u16), None, Some(5_u16)]
         );
         // u32
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             UInt32Array,
             &DataType::UInt32,
             vec![Some(1_u32), Some(2_u32), Some(3_u32), None, Some(5_u32)]
         );
         // u64
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             UInt64Array,
             &DataType::UInt64,
             vec![Some(1_u64), Some(2_u64), Some(3_u64), None, Some(5_u64)]
         );
         // i8
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Int8Array,
             &DataType::Int8,
             vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
         );
         // i16
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Int16Array,
             &DataType::Int16,
             vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
         );
         // i32
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Int32Array,
             &DataType::Int32,
             vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
         );
         // i64
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Int64Array,
             &DataType::Int64,
             vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
         );
         // f32
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Float32Array,
             &DataType::Float32,
             vec![
@@ -2963,7 +3194,7 @@ mod tests {
         );
         // f64
         generate_cast_test_case!(
-            &array,
+            $INPUT_ARRAY,
             Float64Array,
             &DataType::Float64,
             vec![
@@ -2974,6 +3205,31 @@ mod tests {
                 Some(5.25_f64)
             ]
         );
+        }
+    }
+
+    #[test]
+    fn test_cast_decimal32_to_numeric() {
+        let value_array: Vec<Option<i32>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal32_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case!(&array);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_numeric() {
+        let value_array: Vec<Option<i64>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal64_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case!(&array);
+    }
+
+    #[test]
+    fn test_cast_decimal_to_numeric() {
+        let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal_array(value_array, 38, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case!(&array);
 
         // overflow test: out of range of max u8
         let value_array: Vec<Option<i128>> = vec![Some(51300)];
@@ -9226,6 +9482,14 @@ mod tests {
 
     #[test]
     fn test_cast_decimal_to_string() {
+        assert!(can_cast_types(
+            &DataType::Decimal32(9, 4),
+            &DataType::Utf8View
+        ));
+        assert!(can_cast_types(
+            &DataType::Decimal64(16, 4),
+            &DataType::Utf8View
+        ));
         assert!(can_cast_types(
             &DataType::Decimal128(10, 4),
             &DataType::Utf8View
@@ -9270,7 +9534,7 @@ mod tests {
             }
         }
 
-        let array128: Vec<Option<i128>> = vec![
+        let array32: Vec<Option<i32>> = vec![
             Some(1123454),
             Some(2123456),
             Some(-3123453),
@@ -9281,11 +9545,45 @@ mod tests {
             Some(-123456789),
             None,
         ];
+        let array64: Vec<Option<i64>> = array32
+            .iter()
+            .map(|num| num.map(|x| x as i64))
+            .collect();
+        let array128: Vec<Option<i128>> = array64
+            .iter()
+            .map(|num| num.map(|x| x as i128))
+            .collect();
         let array256: Vec<Option<i256>> = array128
             .iter()
             .map(|num| num.map(i256::from_i128))
             .collect();
 
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8View,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal32_array(array32, 7, 3).unwrap(),
+        );
+
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8View,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal64_array(array64, 7, 3).unwrap(),
+        );
+        
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8View,
             create_decimal_array(array128.clone(), 7, 3).unwrap(),
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 3a99e651bf3b..a8ce3fcc5b1a 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -1325,6 +1325,54 @@ mod tests {
         assert_eq!("0.290472", lng.value_as_string(9));
     }
 
+    #[test]
+    fn test_csv_reader_with_decimal_3264() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("city", DataType::Utf8, false),
+            Field::new("lat", DataType::Decimal32(9, 6), false),
+            Field::new("lng", DataType::Decimal64(16, 6), false),
+        ]));
+
+        let file = File::open("test/data/decimal_test.csv").unwrap();
+
+        let mut csv = ReaderBuilder::new(schema).build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+        // access data from a primitive array
+        let lat = batch
+            .column(1)
+            .as_any()
+            .downcast_ref::<Decimal32Array>()
+            .unwrap();
+
+        assert_eq!("57.653484", lat.value_as_string(0));
+        assert_eq!("53.002666", lat.value_as_string(1));
+        assert_eq!("52.412811", lat.value_as_string(2));
+        assert_eq!("51.481583", lat.value_as_string(3));
+        assert_eq!("12.123456", lat.value_as_string(4));
+        assert_eq!("50.760000", lat.value_as_string(5));
+        assert_eq!("0.123000", lat.value_as_string(6));
+        assert_eq!("123.000000", lat.value_as_string(7));
+        assert_eq!("123.000000", lat.value_as_string(8));
+        assert_eq!("-50.760000", lat.value_as_string(9));
+
+        let lng = batch
+            .column(2)
+            .as_any()
+            .downcast_ref::<Decimal64Array>()
+            .unwrap();
+
+        assert_eq!("-3.335724", lng.value_as_string(0));
+        assert_eq!("-2.179404", lng.value_as_string(1));
+        assert_eq!("-1.778197", lng.value_as_string(2));
+        assert_eq!("-3.179090", lng.value_as_string(3));
+        assert_eq!("-3.179090", lng.value_as_string(4));
+        assert_eq!("0.290472", lng.value_as_string(5));
+        assert_eq!("0.290472", lng.value_as_string(6));
+        assert_eq!("0.290472", lng.value_as_string(7));
+        assert_eq!("0.290472", lng.value_as_string(8));
+        assert_eq!("0.290472", lng.value_as_string(9));
+    }
+
     #[test]
     fn test_csv_from_buf_reader() {
         let schema = Schema::new(vec![
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index c5a0a0b76d59..211a107e2a1e 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -418,8 +418,8 @@ mod tests {
 
     use crate::ReaderBuilder;
     use arrow_array::builder::{
-        BinaryBuilder, Decimal128Builder, Decimal256Builder, FixedSizeBinaryBuilder,
-        LargeBinaryBuilder,
+        BinaryBuilder, Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder,
+        FixedSizeBinaryBuilder, LargeBinaryBuilder,
     };
     use arrow_array::types::*;
     use arrow_buffer::i256;
@@ -496,25 +496,36 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
     #[test]
     fn test_write_csv_decimal() {
         let schema = Schema::new(vec![
-            Field::new("c1", DataType::Decimal128(38, 6), true),
-            Field::new("c2", DataType::Decimal256(76, 6), true),
+            Field::new("c1", DataType::Decimal32(9, 6), true),
+            Field::new("c2", DataType::Decimal64(17, 6), true),
+            Field::new("c3", DataType::Decimal128(38, 6), true),
+            Field::new("c4", DataType::Decimal256(76, 6), true),
         ]);
 
-        let mut c1_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
+        let mut c1_builder = Decimal32Builder::new().with_data_type(DataType::Decimal32(9, 6));
         c1_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
         let c1 = c1_builder.finish();
 
-        let mut c2_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
-        c2_builder.extend(vec![
+        let mut c2_builder = Decimal64Builder::new().with_data_type(DataType::Decimal64(17, 6));
+        c2_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
+        let c2 = c2_builder.finish();
+
+        let mut c3_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
+        c3_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
+        let c3 = c3_builder.finish();
+
+        let mut c4_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
+        c4_builder.extend(vec![
             Some(i256::from_i128(-3335724)),
             Some(i256::from_i128(2179404)),
             None,
             Some(i256::from_i128(290472)),
         ]);
-        let c2 = c2_builder.finish();
+        let c4 = c4_builder.finish();
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![
+            Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)
+        ]).unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
 
@@ -530,15 +541,15 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
         let mut buffer: Vec<u8> = vec![];
         file.read_to_end(&mut buffer).unwrap();
 
-        let expected = r#"c1,c2
--3.335724,-3.335724
-2.179404,2.179404
-,
-0.290472,0.290472
--3.335724,-3.335724
-2.179404,2.179404
-,
-0.290472,0.290472
+        let expected = r#"c1,c2,c3,c4
+-3.335724,-3.335724,-3.335724,-3.335724
+2.179404,2.179404,2.179404,2.179404
+,,,
+0.290472,0.290472,0.290472,0.290472
+-3.335724,-3.335724,-3.335724,-3.335724
+2.179404,2.179404,2.179404,2.179404
+,,,
+0.290472,0.290472,0.290472,0.290472
 "#;
         assert_eq!(expected, str::from_utf8(&buffer).unwrap());
     }
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 4c21d9a7632a..6f016d213675 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -83,6 +83,10 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         | DataType::Float16
         | DataType::Float32
         | DataType::Float64
+        | DataType::Decimal32(_, _)
+        | DataType::Decimal64(_, _)
+        | DataType::Decimal128(_, _)
+        | DataType::Decimal256(_, _)
         | DataType::Date32
         | DataType::Time32(_)
         | DataType::Date64
@@ -139,10 +143,6 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
-        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
         DataType::Union(_, mode) => {
             let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
             match mode {
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index ed430fe6a1ec..d4c11e07ecd4 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -138,7 +138,7 @@ fn make_encoder_impl<'a>(
             };
             (Box::new(encoder) as _, array.nulls().cloned())
         }
-        DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
+        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
             let options = FormatOptions::new().with_display_error(true);
             let formatter = ArrayFormatter::try_new(array, &options)?;
             (Box::new(RawArrayFormatter(formatter)) as _, array.nulls().cloned())
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index da929ae1da74..e95e342122f0 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -283,6 +283,9 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("cast decimal32 to decimal32 512", |b| {
         b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(8, 2)))
     });
+    c.bench_function("cast decimal64 to decimal64 512", |b| {
+        b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(16, 5)))
+    });
 
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index 6378cd991e2a..f04b6236a70b 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -67,7 +67,7 @@ pub fn make_fixed_len_byte_array_reader(
         ArrowType::Decimal32(_, _) => {
             if byte_length > 4 {
                 return Err(general_err!(
-                    "decimal 64 type too large, must be less then 4 bytes, got {}",
+                    "decimal 32 type too large, must be less then 4 bytes, got {}",
                     byte_length
                 ));
             }
@@ -75,7 +75,7 @@ pub fn make_fixed_len_byte_array_reader(
         ArrowType::Decimal64(_, _) => {
             if byte_length > 8 {
                 return Err(general_err!(
-                    "decimal 32 type too large, must be less then 8 bytes, got {}",
+                    "decimal 64 type too large, must be less then 8 bytes, got {}",
                     byte_length
                 ));
             }

From 20ec84eb1bc58b1e155c781e98623e46d0aee242 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 18 Dec 2024 04:52:44 -0500
Subject: [PATCH 05/68] Add Field::with_dict_is_ordered (#6885)

---
 arrow-schema/src/field.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 7fb88d48aedb..e5ea92b689fa 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -426,6 +426,19 @@ impl Field {
     }
 
     /// Returns whether this `Field`'s dictionary is ordered, if this is a dictionary type.
+    ///
+    /// # Example
+    /// ```
+    /// # use arrow_schema::{DataType, Field};
+    /// // non dictionaries do not have a dict is ordered flat
+    /// let field = Field::new("c1", DataType::Int64, false);
+    /// assert_eq!(field.dict_is_ordered(), None);
+    /// // by default dictionary is not ordered
+    /// let field = Field::new("c1", DataType::Dictionary(Box::new(DataType::Int64), Box::new(DataType::Utf8)), false);
+    /// assert_eq!(field.dict_is_ordered(), Some(false));
+    /// let field = field.with_dict_is_ordered(true);
+    /// assert_eq!(field.dict_is_ordered(), Some(true));
+    /// ```
     #[inline]
     pub const fn dict_is_ordered(&self) -> Option<bool> {
         match self.data_type {
@@ -434,6 +447,18 @@ impl Field {
         }
     }
 
+    /// Set the is ordered field for this `Field`, if it is a dictionary.
+    ///
+    /// Does nothing if this is not a dictionary type.
+    ///
+    /// See [`Field::dict_is_ordered`] for more information.
+    pub fn with_dict_is_ordered(mut self, dict_is_ordered: bool) -> Self {
+        if matches!(self.data_type, DataType::Dictionary(_, _)) {
+            self.dict_is_ordered = dict_is_ordered;
+        };
+        self
+    }
+
     /// Merge this field into self if it is compatible.
     ///
     /// Struct fields are merged recursively.

From 9daab3392f738ae39ee97a77bfdbcd0cc302813e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 18 Dec 2024 05:52:15 -0500
Subject: [PATCH 06/68] Minor: make it easier to find fix instructions when
 `cargo fmt` on parquet fails (#6886)

* Minor: make it easier to find instructions when fmt fails

* purposely introduce a fmt issue

* Revert "purposely introduce a fmt issue"

This reverts commit 440e52079135df85128b15936425d2b5af488007.

* Update .github/workflows/rust.yml

Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>

---------

Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>
---
 .github/workflows/rust.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index ff5040fd2947..72a53263d330 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -101,12 +101,13 @@ jobs:
       - name: Format arrow
         run: cargo fmt --all -- --check
       - name: Format parquet
-        # Many modules in parquet are skipped, so check parquet separately. If this check fails, run:
-        #   cargo fmt -p parquet -- --config skip_children=true `find ./parquet -name "*.rs" \! -name format.rs`
-        # from the top level arrow-rs directory and check in the result.
+        # Many modules in parquet are skipped, so check parquet separately
         # https://github.com/apache/arrow-rs/issues/6179
         working-directory: parquet
-        run: cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
+        run: |
+          # if this fails, run this from the parquet directory:
+          # cargo fmt -p parquet -- --config skip_children=true `find . -name "*.rs" \! -name format.rs`
+          cargo fmt -p parquet -- --check --config skip_children=true `find . -name "*.rs" \! -name format.rs`
       - name: Format object_store
         working-directory: object_store
         run: cargo fmt --all -- --check

From 9a4ccd1a00f4da2f9f0262a0a4453c6dd485da1e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 18 Dec 2024 05:52:40 -0500
Subject: [PATCH 07/68] Minor: add comments explaining bad MSRV, output in json
 (#6857)

* Minor: add comments explaining bad MSRV

* purposely introduce msrv brek

* output in JSON format

* Revert "purposely introduce msrv brek"

This reverts commit 61872b69a5a85748031fe852e48b8e3d3381d270.
---
 .github/workflows/rust.yml | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 72a53263d330..044250b70435 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -113,7 +113,7 @@ jobs:
         run: cargo fmt --all -- --check
 
   msrv:
-    name: Verify MSRV
+    name: Verify MSRV (Minimum Supported Rust Version)
     runs-on: ubuntu-latest
     container:
       image: amd64/rust
@@ -127,13 +127,19 @@ jobs:
         run: cargo update -p ahash --precise 0.8.7
       - name: Check arrow
         working-directory: arrow
-        run: cargo msrv --log-target stdout verify
+        run: |
+          # run `cd arrow; cargo msrv verify` to see problematic dependencies
+          cargo msrv verify --output-format=json
       - name: Check parquet
         working-directory: parquet
-        run: cargo msrv --log-target stdout verify
+        run: |
+          # run `cd parquet; cargo msrv verify` to see problematic dependencies
+          cargo msrv verify --output-format=json
       - name: Check arrow-flight
         working-directory: arrow-flight
-        run: cargo msrv --log-target stdout verify
+        run: |
+          # run `cd arrow-flight; cargo msrv verify` to see problematic dependencies
+          cargo msrv verify --output-format=json
       - name: Downgrade object_store dependencies
         working-directory: object_store
         # Necessary because tokio 1.30.0 updates MSRV to 1.63
@@ -143,4 +149,6 @@ jobs:
           cargo update -p url --precise 2.5.0
       - name: Check object_store
         working-directory: object_store
-        run: cargo msrv --log-target stdout verify
+        run: |
+          # run `cd object_store; cargo msrv verify` to see problematic dependencies
+          cargo msrv verify --output-format=json

From 77e92b209b583fa607645d3fbb5ae8d7b1068a70 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 18 Dec 2024 07:25:59 -0500
Subject: [PATCH 08/68] Add 53.4.0 to release schedule (#6896)

* Add 54.4.0 to release schedule

* prettoer
---
 README.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 57794b1d6a46..f995ff6ad478 100644
--- a/README.md
+++ b/README.md
@@ -63,13 +63,14 @@ is described in the [contributing] guide.
 
 Planned Release Schedule
 
-| Approximate Date | Version  | Notes                                   |
-| ---------------- | -------- | --------------------------------------- |
-| Nov 2024         | `53.3.0` | Minor, NO breaking API changes          |
-| Dec 2024         | `54.0.0` | Major, potentially breaking API changes |
-| Jan 2025         | `54.1.0` | Minor, NO breaking API changes          |
-| Feb 2025         | `54.2.0` | Minor, NO breaking API changes          |
-| Mar 2025         | `55.0.0` | Major, potentially breaking API changes |
+| Approximate Date | Version  | Notes                                      |
+| ---------------- | -------- | ------------------------------------------ |
+| Nov 2024         | `53.3.0` | Minor, NO breaking API changes             |
+| Dec 2024         | `54.0.0` | Major, potentially breaking API changes    |
+| Jan 2025         | `53.4.0` | Minor, NO breaking API changes (`53` line) |
+| Jan 2025         | `54.1.0` | Minor, NO breaking API changes             |
+| Feb 2025         | `54.2.0` | Minor, NO breaking API changes             |
+| Mar 2025         | `55.0.0` | Major, potentially breaking API changes    |
 
 [this ticket]: https://github.com/apache/arrow-rs/issues/5368
 [semantic versioning]: https://semver.org/

From 63f5d5e5fca38f209387891609149dd0f61680f1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 18 Dec 2024 07:49:51 -0500
Subject: [PATCH 09/68] Add deprecation / API removal policy (#6852)

* Add deprecation / API removal policy

* Increase proposal to 2 releases

* change from policy to guidelines, add flexibility

* prettier

* Make instructions more actionable
---
 README.md         | 27 +++++++++++++++++++++++++++
 arrow/README.md   |  2 +-
 parquet/README.md |  2 +-
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index f995ff6ad478..723249ad29e5 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,33 @@ versions approximately every 2 months.
 
 [`object_store`]: https://crates.io/crates/object_store
 
+### Deprecation Guidelines
+
+Minor releases may deprecate, but not remove APIs. Deprecating APIs allows
+downstream Rust programs to still compile, but generate compiler warnings. This
+gives downstream crates time to migrate prior to API removal.
+
+To deprecate an API:
+
+- Mark the API as deprecated using `#[deprecated]` and specify the exact arrow-rs version in which it was deprecated
+- Concisely describe the preferred API to help the user transition
+
+The deprecated version is the next version which will be released (please
+consult the list above). To mark the API as deprecated, use the
+`#[deprecated(since = "...", note = "...")]` attribute.
+
+Foe example
+
+```rust
+#[deprecated(since = "51.0.0", note = "Use `date_part` instead")]
+```
+
+In general, deprecated APIs will remain in the codebase for at least two major releases after
+they were deprecated (typically between 6 - 9 months later). For example, an API
+deprecated in `51.3.0` can be removed in `54.0.0` (or later). Deprecated APIs
+may be removed earlier or later than these guidelines at the discretion of the
+maintainers.
+
 ## Related Projects
 
 There are several related crates in different repositories
diff --git a/arrow/README.md b/arrow/README.md
index a1444005ec00..79aefaae9053 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -37,7 +37,7 @@ This crate is tested with the latest stable version of Rust. We do not currently
 
 The `arrow` crate follows the [SemVer standard] defined by Cargo and works well
 within the Rust crate ecosystem. See the [repository README] for more details on
-the release schedule and version.
+the release schedule, version and deprecation policy.
 
 [SemVer standard]: https://doc.rust-lang.org/cargo/reference/semver.html
 [repository README]: https://github.com/apache/arrow-rs
diff --git a/parquet/README.md b/parquet/README.md
index e9f52ff279d5..9ff1d921d692 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -36,7 +36,7 @@ This crate is tested with the latest stable version of Rust. We do not currently
 
 The `parquet` crate follows the [SemVer standard] defined by Cargo and works well
 within the Rust crate ecosystem. See the [repository README] for more details on
-the release schedule and version.
+the release schedule, version and deprecation policy.
 
 [semver standard]: https://doc.rust-lang.org/cargo/reference/semver.html
 [repository readme]: https://github.com/apache/arrow-rs

From b8cc13e9143cc7c005a739b26da642c5d356736f Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 18 Dec 2024 05:57:53 -0800
Subject: [PATCH 10/68] Enable string-based column projections from Parquet
 files (#6871)

* add function to create ProjectionMask from column names

* add some more tests
---
 parquet/src/arrow/arrow_reader/mod.rs |  68 ++++++++++
 parquet/src/arrow/mod.rs              | 178 +++++++++++++++++++++++++-
 parquet/src/arrow/schema/mod.rs       |  11 ++
 3 files changed, 256 insertions(+), 1 deletion(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 378884a1c430..6eba04c86f91 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -989,6 +989,21 @@ mod tests {
         assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
     }
 
+    #[test]
+    fn test_arrow_reader_single_column_by_name() {
+        let file = get_test_file("parquet/generated_simple_numerics/blogs.parquet");
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        let original_schema = Arc::clone(builder.schema());
+
+        let mask = ProjectionMask::columns(builder.parquet_schema(), ["blog_id"]);
+        let reader = builder.with_projection(mask).build().unwrap();
+
+        // Verify that the schema was correctly parsed
+        assert_eq!(1, reader.schema().fields().len());
+        assert_eq!(original_schema.fields()[1], reader.schema().fields()[0]);
+    }
+
     #[test]
     fn test_null_column_reader_test() {
         let mut file = tempfile::tempfile().unwrap();
@@ -2563,6 +2578,59 @@ mod tests {
         }
     }
 
+    #[test]
+    // same as test_read_structs but constructs projection mask via column names
+    fn test_read_structs_by_name() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/nested_structs.rust.parquet");
+        let file = File::open(&path).unwrap();
+        let record_batch_reader = ParquetRecordBatchReader::try_new(file, 60).unwrap();
+
+        for batch in record_batch_reader {
+            batch.unwrap();
+        }
+
+        let file = File::open(&path).unwrap();
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+
+        let mask = ProjectionMask::columns(
+            builder.parquet_schema(),
+            ["roll_num.count", "PC_CUR.mean", "PC_CUR.sum"],
+        );
+        let projected_reader = builder
+            .with_projection(mask)
+            .with_batch_size(60)
+            .build()
+            .unwrap();
+
+        let expected_schema = Schema::new(vec![
+            Field::new(
+                "roll_num",
+                ArrowDataType::Struct(Fields::from(vec![Field::new(
+                    "count",
+                    ArrowDataType::UInt64,
+                    false,
+                )])),
+                false,
+            ),
+            Field::new(
+                "PC_CUR",
+                ArrowDataType::Struct(Fields::from(vec![
+                    Field::new("mean", ArrowDataType::Int64, false),
+                    Field::new("sum", ArrowDataType::Int64, false),
+                ])),
+                false,
+            ),
+        ]);
+
+        assert_eq!(&expected_schema, projected_reader.schema().as_ref());
+
+        for batch in projected_reader {
+            let batch = batch.unwrap();
+            assert_eq!(batch.schema().as_ref(), &expected_schema);
+        }
+    }
+
     #[test]
     fn test_read_maps() {
         let testdata = arrow::util::test_util::parquet_test_data();
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index d77436bc1ff7..6777e00fb05c 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -108,12 +108,14 @@ pub mod async_writer;
 mod record_reader;
 experimental!(mod schema);
 
+use std::sync::Arc;
+
 pub use self::arrow_writer::ArrowWriter;
 #[cfg(feature = "async")]
 pub use self::async_reader::ParquetRecordBatchStreamBuilder;
 #[cfg(feature = "async")]
 pub use self::async_writer::AsyncArrowWriter;
-use crate::schema::types::SchemaDescriptor;
+use crate::schema::types::{SchemaDescriptor, Type};
 use arrow_schema::{FieldRef, Schema};
 
 // continue to export deprecated methods until they are removed
@@ -210,6 +212,71 @@ impl ProjectionMask {
         Self { mask: Some(mask) }
     }
 
+    // Given a starting point in the schema, do a DFS for that node adding leaf paths to `paths`.
+    fn find_leaves(root: &Arc<Type>, parent: Option<&String>, paths: &mut Vec<String>) {
+        let path = parent
+            .map(|p| [p, root.name()].join("."))
+            .unwrap_or(root.name().to_string());
+        if root.is_group() {
+            for child in root.get_fields() {
+                Self::find_leaves(child, Some(&path), paths);
+            }
+        } else {
+            // Reached a leaf, add to paths
+            paths.push(path);
+        }
+    }
+
+    /// Create a [`ProjectionMask`] which selects only the named columns
+    ///
+    /// All leaf columns that fall below a given name will be selected. For example, given
+    /// the schema
+    /// ```ignore
+    /// message schema {
+    ///   OPTIONAL group a (MAP) {
+    ///     REPEATED group key_value {
+    ///       REQUIRED BYTE_ARRAY key (UTF8);  // leaf index 0
+    ///       OPTIONAL group value (MAP) {
+    ///         REPEATED group key_value {
+    ///           REQUIRED INT32 key;          // leaf index 1
+    ///           REQUIRED BOOLEAN value;      // leaf index 2
+    ///         }
+    ///       }
+    ///     }
+    ///   }
+    ///   REQUIRED INT32 b;                    // leaf index 3
+    ///   REQUIRED DOUBLE c;                   // leaf index 4
+    /// }
+    /// ```
+    /// `["a.key_value.value", "c"]` would return leaf columns 1, 2, and 4. `["a"]` would return
+    /// columns 0, 1, and 2.
+    ///
+    /// Note: repeated or out of order indices will not impact the final mask.
+    ///
+    /// i.e. `["b", "c"]` will construct the same mask as `["c", "b", "c"]`.
+    pub fn columns<'a>(
+        schema: &SchemaDescriptor,
+        names: impl IntoIterator<Item = &'a str>,
+    ) -> Self {
+        // first make vector of paths for leaf columns
+        let mut paths: Vec<String> = vec![];
+        for root in schema.root_schema().get_fields() {
+            Self::find_leaves(root, None, &mut paths);
+        }
+        assert_eq!(paths.len(), schema.num_columns());
+
+        let mut mask = vec![false; schema.num_columns()];
+        for name in names {
+            for idx in 0..schema.num_columns() {
+                if paths[idx].starts_with(name) {
+                    mask[idx] = true;
+                }
+            }
+        }
+
+        Self { mask: Some(mask) }
+    }
+
     /// Returns true if the leaf column `leaf_idx` is included by the mask
     pub fn leaf_included(&self, leaf_idx: usize) -> bool {
         self.mask.as_ref().map(|m| m[leaf_idx]).unwrap_or(true)
@@ -246,10 +313,14 @@ mod test {
     use crate::arrow::ArrowWriter;
     use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader, ParquetMetaDataWriter};
     use crate::file::properties::{EnabledStatistics, WriterProperties};
+    use crate::schema::parser::parse_message_type;
+    use crate::schema::types::SchemaDescriptor;
     use arrow_array::{ArrayRef, Int32Array, RecordBatch};
     use bytes::Bytes;
     use std::sync::Arc;
 
+    use super::ProjectionMask;
+
     #[test]
     // Reproducer for https://github.com/apache/arrow-rs/issues/6464
     fn test_metadata_read_write_partial_offset() {
@@ -375,4 +446,109 @@ mod test {
             .unwrap();
         Bytes::from(buf)
     }
+
+    #[test]
+    fn test_mask_from_column_names() {
+        let message_type = "
+            message test_schema {
+                OPTIONAL group a (MAP) {
+                    REPEATED group key_value {
+                        REQUIRED BYTE_ARRAY key (UTF8);
+                        OPTIONAL group value (MAP) {
+                            REPEATED group key_value {
+                                REQUIRED INT32 key;
+                                REQUIRED BOOLEAN value;
+                            }
+                        }
+                    }
+                }
+                REQUIRED INT32 b;
+                REQUIRED DOUBLE c;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["foo", "bar"]);
+        assert_eq!(mask.mask.unwrap(), vec![false; 5]);
+
+        let mask = ProjectionMask::columns(&schema, []);
+        assert_eq!(mask.mask.unwrap(), vec![false; 5]);
+
+        let mask = ProjectionMask::columns(&schema, ["a", "c"]);
+        assert_eq!(mask.mask.unwrap(), [true, true, true, false, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.key_value.key", "c"]);
+        assert_eq!(mask.mask.unwrap(), [true, false, false, false, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.key_value.value", "b"]);
+        assert_eq!(mask.mask.unwrap(), [false, true, true, true, false]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL group a (LIST) {
+                    REPEATED group list {
+                        OPTIONAL group element (LIST) {
+                            REPEATED group list {
+                                OPTIONAL group element (LIST) {
+                                    REPEATED group list {
+                                        OPTIONAL BYTE_ARRAY element (UTF8);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                REQUIRED INT32 b;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["a", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.list.element", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask =
+            ProjectionMask::columns(&schema, ["a.list.element.list.element.list.element", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["b"]);
+        assert_eq!(mask.mask.unwrap(), [false, true]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 b;
+                OPTIONAL INT32 c;
+                OPTIONAL INT32 d;
+                OPTIONAL INT32 e;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["a", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true, false, false, false]);
+
+        let mask = ProjectionMask::columns(&schema, ["d", "b", "d"]);
+        assert_eq!(mask.mask.unwrap(), [false, true, false, true, false]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 b;
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 d;
+                OPTIONAL INT32 e;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["a", "e"]);
+        assert_eq!(mask.mask.unwrap(), [true, false, true, false, true]);
+    }
 }
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index be7fa9a00d31..c9051062204d 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -1400,6 +1400,17 @@ mod tests {
         for i in 0..arrow_fields.len() {
             assert_eq!(&arrow_fields[i], converted_fields[i].as_ref());
         }
+
+        let mask =
+            ProjectionMask::columns(&parquet_schema, ["group2.leaf4", "group1.leaf1", "leaf5"]);
+        let converted_arrow_schema =
+            parquet_to_arrow_schema_by_columns(&parquet_schema, mask, None).unwrap();
+        let converted_fields = converted_arrow_schema.fields();
+
+        assert_eq!(arrow_fields.len(), converted_fields.len());
+        for i in 0..arrow_fields.len() {
+            assert_eq!(&arrow_fields[i], converted_fields[i].as_ref());
+        }
     }
 
     #[test]

From 1e582ad38a2e7c45d7a4e75a5867c93f48fe2583 Mon Sep 17 00:00:00 2001
From: xxchan <xxchan22f@gmail.com>
Date: Wed, 18 Dec 2024 21:58:19 +0800
Subject: [PATCH 11/68] doc: add comment for timezone string (#6899)

* doc: add comment for timezone string

Signed-off-by: xxchan <xxchan22f@gmail.com>

* Update arrow-schema/src/datatype.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

---------

Signed-off-by: xxchan <xxchan22f@gmail.com>
Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 arrow-schema/src/datatype.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 1964fc317a7b..eb5ea0c7cb3b 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -196,6 +196,14 @@ pub enum DataType {
     /// DataType::Timestamp(TimeUnit::Second, Some("literal".into()));
     /// DataType::Timestamp(TimeUnit::Second, Some("string".to_string().into()));
     /// ```
+    ///
+    /// Timezone string parsing
+    /// -----------------------
+    /// When feature `chrono-tz` is not enabled, allowed timezone strings are fixed offsets of the form "+09:00", "-09" or "+0930".
+    ///
+    /// When feature `chrono-tz` is enabled, additional strings supported by [chrono_tz](https://docs.rs/chrono-tz/latest/chrono_tz/)
+    /// are also allowed, which include [IANA database](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
+    /// timezones.
     Timestamp(TimeUnit, Option<Arc<str>>),
     /// A signed 32-bit date representing the elapsed time since UNIX epoch (1970-01-01)
     /// in days.

From 4c2b75b9f09d651a687479b385a4226cda286da1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 19 Dec 2024 08:56:05 -0500
Subject: [PATCH 12/68] Update version to 54.0.0, add CHANGELOG (#6894)

* Update version to 54.0.0

* Update changelog

* update notes

* updtes

* update
---
 CHANGELOG-old.md                 | 170 ++++++++++++++++++++++++++++++
 CHANGELOG.md                     | 173 +++++++++++++++++--------------
 Cargo.toml                       |  32 +++---
 arrow-flight/README.md           |   2 +-
 dev/release/update_change_log.sh |   4 +-
 5 files changed, 283 insertions(+), 98 deletions(-)

diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 376da6277114..3fb17b390ac1 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,176 @@
 
 # Historical Changelog
 
+## [53.3.0](https://github.com/apache/arrow-rs/tree/53.3.0) (2024-11-17)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/53.2.0...53.3.0)
+
+- Signed decimal e-notation parsing bug [\#6728](https://github.com/apache/arrow-rs/issues/6728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for Utf8View -\> numeric in can\_cast\_types [\#6715](https://github.com/apache/arrow-rs/issues/6715)
+- IPC file writer produces incorrect footer when not preserving dict ID [\#6710](https://github.com/apache/arrow-rs/issues/6710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet from\_thrift\_helper incorrectly checks index [\#6693](https://github.com/apache/arrow-rs/issues/6693) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Primitive REPEATED fields not contained in LIST annotated groups aren't read as lists by record reader [\#6648](https://github.com/apache/arrow-rs/issues/6648) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- DictionaryHandling does not recurse into Map fields [\#6644](https://github.com/apache/arrow-rs/issues/6644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Array writer output empty when no record is written [\#6613](https://github.com/apache/arrow-rs/issues/6613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Archery Integration Test with c\# failing on main [\#6577](https://github.com/apache/arrow-rs/issues/6577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Potential unsoundness in `filter_run_end_array` [\#6569](https://github.com/apache/arrow-rs/issues/6569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet reader can generate incorrect validity buffer information for nested structures [\#6510](https://github.com/apache/arrow-rs/issues/6510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- arrow-array ffi: FFI\_ArrowArray.null\_count is always interpreted as unsigned and initialized during conversion from C to Rust. [\#6497](https://github.com/apache/arrow-rs/issues/6497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Minor: Document pattern for accessing views in StringView [\#6673](https://github.com/apache/arrow-rs/pull/6673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve Array::is\_nullable documentation [\#6615](https://github.com/apache/arrow-rs/pull/6615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Minor: improve docs for ByteViewArray-\>ByteArray From impl [\#6610](https://github.com/apache/arrow-rs/pull/6610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Speed up `filter_run_end_array` [\#6712](https://github.com/apache/arrow-rs/pull/6712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+
+**Closed issues:**
+
+- Incorrect like results for pattern starting/ending with `%` percent and containing escape characters [\#6702](https://github.com/apache/arrow-rs/issues/6702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- Fix signed decimal e-notation parsing [\#6729](https://github.com/apache/arrow-rs/pull/6729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gruuya](https://github.com/gruuya))
+- Clean up some arrow-flight tests and duplicated code [\#6725](https://github.com/apache/arrow-rs/pull/6725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
+- Update PR template section about API breaking changes [\#6723](https://github.com/apache/arrow-rs/pull/6723) ([findepi](https://github.com/findepi))
+- Support for casting `StringViewArray` to `DecimalArray` [\#6720](https://github.com/apache/arrow-rs/pull/6720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- File writer preserve dict bug [\#6711](https://github.com/apache/arrow-rs/pull/6711) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add filter\_kernel benchmark for run array [\#6706](https://github.com/apache/arrow-rs/pull/6706) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Fix string view ILIKE checks with NULL values [\#6705](https://github.com/apache/arrow-rs/pull/6705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Implement logical\_null\_count for more array types [\#6704](https://github.com/apache/arrow-rs/pull/6704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Fix LIKE with escapes [\#6703](https://github.com/apache/arrow-rs/pull/6703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Speed up `filter_bytes` [\#6699](https://github.com/apache/arrow-rs/pull/6699) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Minor: fix misleading comment in byte view [\#6695](https://github.com/apache/arrow-rs/pull/6695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jayzhan211](https://github.com/jayzhan211))
+- minor fix on checking index [\#6694](https://github.com/apache/arrow-rs/pull/6694) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jp0317](https://github.com/jp0317))
+- Undo run end filter performance regression [\#6691](https://github.com/apache/arrow-rs/pull/6691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Reimplement `PartialEq` of `GenericByteViewArray` compares by logical value [\#6689](https://github.com/apache/arrow-rs/pull/6689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- feat: expose known\_schema from FlightDataEncoder [\#6688](https://github.com/apache/arrow-rs/pull/6688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- Update hashbrown requirement from 0.14.2 to 0.15.1 [\#6684](https://github.com/apache/arrow-rs/pull/6684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Support Duration in JSON Reader [\#6683](https://github.com/apache/arrow-rs/pull/6683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([simonvandel](https://github.com/simonvandel))
+- Check predicate and values are the same length for run end array filter safety [\#6675](https://github.com/apache/arrow-rs/pull/6675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- \[ffi\] Fix arrow-array null\_count error during conversion from C to Rust [\#6674](https://github.com/apache/arrow-rs/pull/6674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adbmal](https://github.com/adbmal))
+- Support `Utf8View` for `bit_length` kernel [\#6671](https://github.com/apache/arrow-rs/pull/6671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([austin362667](https://github.com/austin362667))
+- Fix string view LIKE checks with NULL values [\#6662](https://github.com/apache/arrow-rs/pull/6662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Improve documentation for `nullif` kernel [\#6658](https://github.com/apache/arrow-rs/pull/6658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve test\_auth error message when contains\(\) fails [\#6657](https://github.com/apache/arrow-rs/pull/6657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Let std::fmt::Debug for StructArray output Null/Validity info [\#6655](https://github.com/apache/arrow-rs/pull/6655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([XinyuZeng](https://github.com/XinyuZeng))
+- Include offending line number when processing CSV file fails [\#6653](https://github.com/apache/arrow-rs/pull/6653) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- feat: add write\_bytes for GenericBinaryBuilder [\#6652](https://github.com/apache/arrow-rs/pull/6652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tisonkun](https://github.com/tisonkun))
+- feat: Support Utf8View in JSON serialization [\#6651](https://github.com/apache/arrow-rs/pull/6651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonmmease](https://github.com/jonmmease))
+- fix: include chrono-tz in flight sql cli [\#6650](https://github.com/apache/arrow-rs/pull/6650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Handle primitive REPEATED field not contained in LIST annotated group [\#6649](https://github.com/apache/arrow-rs/pull/6649) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Implement `append_n` for `BooleanBuilder` [\#6646](https://github.com/apache/arrow-rs/pull/6646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- fix: recurse into Map datatype when hydrating dictionaries [\#6645](https://github.com/apache/arrow-rs/pull/6645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- fix: enable TLS roots for flight CLI client [\#6640](https://github.com/apache/arrow-rs/pull/6640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- doc: Clarify take kernel semantics [\#6632](https://github.com/apache/arrow-rs/pull/6632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Return error rather than panic when too many row groups are written [\#6629](https://github.com/apache/arrow-rs/pull/6629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix test feature selection so all feature combinations work as expected [\#6626](https://github.com/apache/arrow-rs/pull/6626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([itsjunetime](https://github.com/itsjunetime))
+- Add Parquet RowSelection benchmark [\#6623](https://github.com/apache/arrow-rs/pull/6623) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Optimize `take_bits` to optimize `take_boolean` / `take_primitive` / `take_byte_view`: up to -25% [\#6622](https://github.com/apache/arrow-rs/pull/6622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make downcast macros hygenic \(\#6400\) [\#6620](https://github.com/apache/arrow-rs/pull/6620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.88 to =1.0.89 [\#6618](https://github.com/apache/arrow-rs/pull/6618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix arrow-json writer empty [\#6614](https://github.com/apache/arrow-rs/pull/6614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gwik](https://github.com/gwik))
+- Add `ParquetObjectReader::with_runtime` [\#6612](https://github.com/apache/arrow-rs/pull/6612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
+- Re-enable `C#` arrow flight integration test [\#6611](https://github.com/apache/arrow-rs/pull/6611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+## [53.3.0](https://github.com/apache/arrow-rs/tree/53.3.0) (2024-11-17)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/53.2.0...53.3.0)
+
+**Implemented enhancements:**
+
+- `PartialEq` of GenericByteViewArray \(StringViewArray / ByteViewArray\) that compares on equality rather than logical value [\#6679](https://github.com/apache/arrow-rs/issues/6679) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Need a mechanism to handle schema changes due to dictionary hydration in FlightSQL server implementations [\#6672](https://github.com/apache/arrow-rs/issues/6672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Support encoding Utf8View columns to JSON [\#6642](https://github.com/apache/arrow-rs/issues/6642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement `append_n` for `BooleanBuilder` [\#6634](https://github.com/apache/arrow-rs/issues/6634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Some take optimizations [\#6621](https://github.com/apache/arrow-rs/issues/6621) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Error Instead of Panic On Attempting to Write More Than 32769 Row Groups [\#6591](https://github.com/apache/arrow-rs/issues/6591) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Make casting from a timestamp without timezone to a timestamp with timezone configurable [\#6555](https://github.com/apache/arrow-rs/issues/6555)
+- Add `record_batch!` macro for easy record batch creation [\#6553](https://github.com/apache/arrow-rs/issues/6553) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `Binary` --\> `Utf8View` casting [\#6531](https://github.com/apache/arrow-rs/issues/6531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `downcast_primitive_array` and `downcast_dictionary_array` are not hygienic wrt imports [\#6400](https://github.com/apache/arrow-rs/issues/6400) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Implement interleave\_record\_batch [\#6731](https://github.com/apache/arrow-rs/pull/6731) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([waynexia](https://github.com/waynexia))
+- feat: `record_batch!` macro [\#6588](https://github.com/apache/arrow-rs/pull/6588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ByteBaker](https://github.com/ByteBaker))
+
+**Fixed bugs:**
+
+- Signed decimal e-notation parsing bug [\#6728](https://github.com/apache/arrow-rs/issues/6728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add support for Utf8View -\> numeric in can\_cast\_types [\#6715](https://github.com/apache/arrow-rs/issues/6715)
+- IPC file writer produces incorrect footer when not preserving dict ID [\#6710](https://github.com/apache/arrow-rs/issues/6710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet from\_thrift\_helper incorrectly checks index [\#6693](https://github.com/apache/arrow-rs/issues/6693) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Primitive REPEATED fields not contained in LIST annotated groups aren't read as lists by record reader [\#6648](https://github.com/apache/arrow-rs/issues/6648) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- DictionaryHandling does not recurse into Map fields [\#6644](https://github.com/apache/arrow-rs/issues/6644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Array writer output empty when no record is written [\#6613](https://github.com/apache/arrow-rs/issues/6613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Archery Integration Test with c\# failing on main [\#6577](https://github.com/apache/arrow-rs/issues/6577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Potential unsoundness in `filter_run_end_array` [\#6569](https://github.com/apache/arrow-rs/issues/6569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet reader can generate incorrect validity buffer information for nested structures [\#6510](https://github.com/apache/arrow-rs/issues/6510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- arrow-array ffi: FFI\_ArrowArray.null\_count is always interpreted as unsigned and initialized during conversion from C to Rust. [\#6497](https://github.com/apache/arrow-rs/issues/6497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- Minor: Document pattern for accessing views in StringView [\#6673](https://github.com/apache/arrow-rs/pull/6673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve Array::is\_nullable documentation [\#6615](https://github.com/apache/arrow-rs/pull/6615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Minor: improve docs for ByteViewArray-\>ByteArray From impl [\#6610](https://github.com/apache/arrow-rs/pull/6610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+
+**Performance improvements:**
+
+- Speed up `filter_run_end_array` [\#6712](https://github.com/apache/arrow-rs/pull/6712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+
+**Closed issues:**
+
+- Incorrect like results for pattern starting/ending with `%` percent and containing escape characters [\#6702](https://github.com/apache/arrow-rs/issues/6702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- Fix signed decimal e-notation parsing [\#6729](https://github.com/apache/arrow-rs/pull/6729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gruuya](https://github.com/gruuya))
+- Clean up some arrow-flight tests and duplicated code [\#6725](https://github.com/apache/arrow-rs/pull/6725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
+- Update PR template section about API breaking changes [\#6723](https://github.com/apache/arrow-rs/pull/6723) ([findepi](https://github.com/findepi))
+- Support for casting `StringViewArray` to `DecimalArray` [\#6720](https://github.com/apache/arrow-rs/pull/6720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- File writer preserve dict bug [\#6711](https://github.com/apache/arrow-rs/pull/6711) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add filter\_kernel benchmark for run array [\#6706](https://github.com/apache/arrow-rs/pull/6706) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Fix string view ILIKE checks with NULL values [\#6705](https://github.com/apache/arrow-rs/pull/6705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Implement logical\_null\_count for more array types [\#6704](https://github.com/apache/arrow-rs/pull/6704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Fix LIKE with escapes [\#6703](https://github.com/apache/arrow-rs/pull/6703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Speed up `filter_bytes` [\#6699](https://github.com/apache/arrow-rs/pull/6699) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Minor: fix misleading comment in byte view [\#6695](https://github.com/apache/arrow-rs/pull/6695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jayzhan211](https://github.com/jayzhan211))
+- minor fix on checking index [\#6694](https://github.com/apache/arrow-rs/pull/6694) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jp0317](https://github.com/jp0317))
+- Undo run end filter performance regression [\#6691](https://github.com/apache/arrow-rs/pull/6691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- Reimplement `PartialEq` of `GenericByteViewArray` compares by logical value [\#6689](https://github.com/apache/arrow-rs/pull/6689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- feat: expose known\_schema from FlightDataEncoder [\#6688](https://github.com/apache/arrow-rs/pull/6688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- Update hashbrown requirement from 0.14.2 to 0.15.1 [\#6684](https://github.com/apache/arrow-rs/pull/6684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Support Duration in JSON Reader [\#6683](https://github.com/apache/arrow-rs/pull/6683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([simonvandel](https://github.com/simonvandel))
+- Check predicate and values are the same length for run end array filter safety [\#6675](https://github.com/apache/arrow-rs/pull/6675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- \[ffi\] Fix arrow-array null\_count error during conversion from C to Rust [\#6674](https://github.com/apache/arrow-rs/pull/6674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adbmal](https://github.com/adbmal))
+- Support `Utf8View` for `bit_length` kernel [\#6671](https://github.com/apache/arrow-rs/pull/6671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([austin362667](https://github.com/austin362667))
+- Fix string view LIKE checks with NULL values [\#6662](https://github.com/apache/arrow-rs/pull/6662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Improve documentation for `nullif` kernel [\#6658](https://github.com/apache/arrow-rs/pull/6658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve test\_auth error message when contains\(\) fails [\#6657](https://github.com/apache/arrow-rs/pull/6657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Let std::fmt::Debug for StructArray output Null/Validity info [\#6655](https://github.com/apache/arrow-rs/pull/6655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([XinyuZeng](https://github.com/XinyuZeng))
+- Include offending line number when processing CSV file fails [\#6653](https://github.com/apache/arrow-rs/pull/6653) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- feat: add write\_bytes for GenericBinaryBuilder [\#6652](https://github.com/apache/arrow-rs/pull/6652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tisonkun](https://github.com/tisonkun))
+- feat: Support Utf8View in JSON serialization [\#6651](https://github.com/apache/arrow-rs/pull/6651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonmmease](https://github.com/jonmmease))
+- fix: include chrono-tz in flight sql cli [\#6650](https://github.com/apache/arrow-rs/pull/6650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Handle primitive REPEATED field not contained in LIST annotated group [\#6649](https://github.com/apache/arrow-rs/pull/6649) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Implement `append_n` for `BooleanBuilder` [\#6646](https://github.com/apache/arrow-rs/pull/6646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
+- fix: recurse into Map datatype when hydrating dictionaries [\#6645](https://github.com/apache/arrow-rs/pull/6645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
+- fix: enable TLS roots for flight CLI client [\#6640](https://github.com/apache/arrow-rs/pull/6640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- doc: Clarify take kernel semantics [\#6632](https://github.com/apache/arrow-rs/pull/6632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Return error rather than panic when too many row groups are written [\#6629](https://github.com/apache/arrow-rs/pull/6629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Fix test feature selection so all feature combinations work as expected [\#6626](https://github.com/apache/arrow-rs/pull/6626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([itsjunetime](https://github.com/itsjunetime))
+- Add Parquet RowSelection benchmark [\#6623](https://github.com/apache/arrow-rs/pull/6623) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- Optimize `take_bits` to optimize `take_boolean` / `take_primitive` / `take_byte_view`: up to -25% [\#6622](https://github.com/apache/arrow-rs/pull/6622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Make downcast macros hygenic \(\#6400\) [\#6620](https://github.com/apache/arrow-rs/pull/6620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.88 to =1.0.89 [\#6618](https://github.com/apache/arrow-rs/pull/6618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Fix arrow-json writer empty [\#6614](https://github.com/apache/arrow-rs/pull/6614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gwik](https://github.com/gwik))
+- Add `ParquetObjectReader::with_runtime` [\#6612](https://github.com/apache/arrow-rs/pull/6612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
+- Re-enable `C#` arrow flight integration test [\#6611](https://github.com/apache/arrow-rs/pull/6611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add Array::logical\_null\_count for inspecting number of null values [\#6608](https://github.com/apache/arrow-rs/pull/6608) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Added casting from Binary/LargeBinary to Utf8View [\#6592](https://github.com/apache/arrow-rs/pull/6592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ngli-me](https://github.com/ngli-me))
+- Parquet AsyncReader: Don't panic when empty offset\_index is Some\(\[\]\) [\#6582](https://github.com/apache/arrow-rs/pull/6582) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jroddev](https://github.com/jroddev))
+- Skip writing down null buffers for non-nullable primitive arrays [\#6524](https://github.com/apache/arrow-rs/pull/6524) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([bkirwi](https://github.com/bkirwi))
 ## [53.2.0](https://github.com/apache/arrow-rs/tree/53.2.0) (2024-10-21)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/53.1.0...53.2.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3b729360608b..a7f2a4ff34d1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,101 +19,116 @@
 
 # Changelog
 
-## [53.3.0](https://github.com/apache/arrow-rs/tree/53.3.0) (2024-11-17)
+## [54.0.0](https://github.com/apache/arrow-rs/tree/54.0.0) (2024-12-18)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/53.2.0...53.3.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/53.3.0...54.0.0)
+
+**Breaking changes:**
+
+- avoid redundant parsing of repeated value in RleDecoder [\#6834](https://github.com/apache/arrow-rs/pull/6834) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jp0317](https://github.com/jp0317))
+- Handling nullable DictionaryArray in CSV parser [\#6830](https://github.com/apache/arrow-rs/pull/6830) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([edmondop](https://github.com/edmondop))
+- fix\(flightsql\): remove Any encoding of DoPutUpdateResult [\#6825](https://github.com/apache/arrow-rs/pull/6825) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([davisp](https://github.com/davisp))
+- arrow-ipc: Default to not preserving dict IDs [\#6788](https://github.com/apache/arrow-rs/pull/6788) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Remove some very old deprecated functions [\#6774](https://github.com/apache/arrow-rs/pull/6774) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- update to pyo3 0.23.0 [\#6745](https://github.com/apache/arrow-rs/pull/6745) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([psvri](https://github.com/psvri))
+- Remove APIs deprecated since v 4.4.0 [\#6722](https://github.com/apache/arrow-rs/pull/6722) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Return `None` when Parquet page indexes are not present in file [\#6639](https://github.com/apache/arrow-rs/pull/6639) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add `ParquetError::NeedMoreData` mark `ParquetError` as `non_exhaustive` [\#6630](https://github.com/apache/arrow-rs/pull/6630) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Remove APIs deprecated since v 2.0.0 [\#6609](https://github.com/apache/arrow-rs/pull/6609) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
 
 **Implemented enhancements:**
 
-- `PartialEq` of GenericByteViewArray \(StringViewArray / ByteViewArray\) that compares on equality rather than logical value [\#6679](https://github.com/apache/arrow-rs/issues/6679) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Need a mechanism to handle schema changes due to dictionary hydration in FlightSQL server implementations [\#6672](https://github.com/apache/arrow-rs/issues/6672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Support encoding Utf8View columns to JSON [\#6642](https://github.com/apache/arrow-rs/issues/6642) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement `append_n` for `BooleanBuilder` [\#6634](https://github.com/apache/arrow-rs/issues/6634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Some take optimizations [\#6621](https://github.com/apache/arrow-rs/issues/6621) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Error Instead of Panic On Attempting to Write More Than 32769 Row Groups [\#6591](https://github.com/apache/arrow-rs/issues/6591) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Make casting from a timestamp without timezone to a timestamp with timezone configurable [\#6555](https://github.com/apache/arrow-rs/issues/6555)
-- Add `record_batch!` macro for easy record batch creation [\#6553](https://github.com/apache/arrow-rs/issues/6553) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `Binary` --\> `Utf8View` casting [\#6531](https://github.com/apache/arrow-rs/issues/6531) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `downcast_primitive_array` and `downcast_dictionary_array` are not hygienic wrt imports [\#6400](https://github.com/apache/arrow-rs/issues/6400) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Implement interleave\_record\_batch [\#6731](https://github.com/apache/arrow-rs/pull/6731) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([waynexia](https://github.com/waynexia))
-- feat: `record_batch!` macro [\#6588](https://github.com/apache/arrow-rs/pull/6588) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ByteBaker](https://github.com/ByteBaker))
+- Parquet schema hint doesn't support integer types upcasting [\#6891](https://github.com/apache/arrow-rs/issues/6891) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Parquet UTF-8 max statistics are overly pessimistic [\#6867](https://github.com/apache/arrow-rs/issues/6867) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Add builder support for Int8 keys [\#6844](https://github.com/apache/arrow-rs/issues/6844) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Formalize the name of the nested `Field` in a list [\#6784](https://github.com/apache/arrow-rs/issues/6784) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Allow disabling the writing of Parquet Offset Index [\#6778](https://github.com/apache/arrow-rs/issues/6778) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `parquet::record::make_row` is not exposed to users, leaving no option to users to manually create `Row` objects [\#6761](https://github.com/apache/arrow-rs/issues/6761) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Avoid `from_num_days_from_ce_opt` calls in `timestamp_s_to_datetime` if we don't need [\#6746](https://github.com/apache/arrow-rs/issues/6746) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Temporal -\> Utf8View casting [\#6734](https://github.com/apache/arrow-rs/issues/6734) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add Option To Coerce List Type on Parquet Write [\#6733](https://github.com/apache/arrow-rs/issues/6733) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Numeric -\> Utf8View casting [\#6714](https://github.com/apache/arrow-rs/issues/6714) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support Utf8View \<=\> boolean casting [\#6713](https://github.com/apache/arrow-rs/issues/6713) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Fixed bugs:**
 
-- Signed decimal e-notation parsing bug [\#6728](https://github.com/apache/arrow-rs/issues/6728) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add support for Utf8View -\> numeric in can\_cast\_types [\#6715](https://github.com/apache/arrow-rs/issues/6715)
-- IPC file writer produces incorrect footer when not preserving dict ID [\#6710](https://github.com/apache/arrow-rs/issues/6710) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- parquet from\_thrift\_helper incorrectly checks index [\#6693](https://github.com/apache/arrow-rs/issues/6693) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Primitive REPEATED fields not contained in LIST annotated groups aren't read as lists by record reader [\#6648](https://github.com/apache/arrow-rs/issues/6648) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- DictionaryHandling does not recurse into Map fields [\#6644](https://github.com/apache/arrow-rs/issues/6644) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Array writer output empty when no record is written [\#6613](https://github.com/apache/arrow-rs/issues/6613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Archery Integration Test with c\# failing on main [\#6577](https://github.com/apache/arrow-rs/issues/6577) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Potential unsoundness in `filter_run_end_array` [\#6569](https://github.com/apache/arrow-rs/issues/6569) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Parquet reader can generate incorrect validity buffer information for nested structures [\#6510](https://github.com/apache/arrow-rs/issues/6510) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- arrow-array ffi: FFI\_ArrowArray.null\_count is always interpreted as unsigned and initialized during conversion from C to Rust. [\#6497](https://github.com/apache/arrow-rs/issues/6497) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `Buffer::bit_slice` loses length with byte-aligned offsets [\#6895](https://github.com/apache/arrow-rs/issues/6895) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet arrow writer doesn't track memory size correctly for fixed sized lists [\#6839](https://github.com/apache/arrow-rs/issues/6839) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Casting Decimal128 to Decimal128 with smaller precision produces incorrect results in some cases [\#6833](https://github.com/apache/arrow-rs/issues/6833) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Should empty nullable dictionary be parsed as null from arrow-csv? [\#6821](https://github.com/apache/arrow-rs/issues/6821) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Array take doesn't make fields nullable [\#6809](https://github.com/apache/arrow-rs/issues/6809)
+- Arrow Flight Encodes a Slice's List Offsets If the slice offset is starts with zero [\#6803](https://github.com/apache/arrow-rs/issues/6803) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet readers incorrectly interpret legacy nested lists [\#6756](https://github.com/apache/arrow-rs/issues/6756) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- filter\_bits under-allocates resulting boolean buffer [\#6750](https://github.com/apache/arrow-rs/issues/6750) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Multi-language support issues with Arrow FlightSQL client's execute\_update and execute\_ingest methods [\#6545](https://github.com/apache/arrow-rs/issues/6545) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
 
 **Documentation updates:**
 
-- Minor: Document pattern for accessing views in StringView [\#6673](https://github.com/apache/arrow-rs/pull/6673) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve Array::is\_nullable documentation [\#6615](https://github.com/apache/arrow-rs/pull/6615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Minor: improve docs for ByteViewArray-\>ByteArray From impl [\#6610](https://github.com/apache/arrow-rs/pull/6610) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-
-**Performance improvements:**
-
-- Speed up `filter_run_end_array` [\#6712](https://github.com/apache/arrow-rs/pull/6712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Should we document at what rate deprecated APIs are removed? [\#6851](https://github.com/apache/arrow-rs/issues/6851) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Fix docstring for `Format::with_header` in `arrow-csv` [\#6856](https://github.com/apache/arrow-rs/pull/6856) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kylebarron](https://github.com/kylebarron))
+- Add deprecation / API removal policy [\#6852](https://github.com/apache/arrow-rs/pull/6852) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Minor: add example for creating `SchemaDescriptor` [\#6841](https://github.com/apache/arrow-rs/pull/6841) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- chore: enrich panic context when BooleanBuffer fails to create [\#6810](https://github.com/apache/arrow-rs/pull/6810) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tisonkun](https://github.com/tisonkun))
 
 **Closed issues:**
 
-- Incorrect like results for pattern starting/ending with `%` percent and containing escape characters [\#6702](https://github.com/apache/arrow-rs/issues/6702) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[FlightSQL\] GetCatalogsBuilder does not sort the catalog names [\#6807](https://github.com/apache/arrow-rs/issues/6807) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Add a lint to automatically check for unused dependencies [\#6796](https://github.com/apache/arrow-rs/issues/6796) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
 
 **Merged pull requests:**
 
-- Fix signed decimal e-notation parsing [\#6729](https://github.com/apache/arrow-rs/pull/6729) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gruuya](https://github.com/gruuya))
-- Clean up some arrow-flight tests and duplicated code [\#6725](https://github.com/apache/arrow-rs/pull/6725) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
-- Update PR template section about API breaking changes [\#6723](https://github.com/apache/arrow-rs/pull/6723) ([findepi](https://github.com/findepi))
-- Support for casting `StringViewArray` to `DecimalArray` [\#6720](https://github.com/apache/arrow-rs/pull/6720) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
-- File writer preserve dict bug [\#6711](https://github.com/apache/arrow-rs/pull/6711) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Add filter\_kernel benchmark for run array [\#6706](https://github.com/apache/arrow-rs/pull/6706) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
-- Fix string view ILIKE checks with NULL values [\#6705](https://github.com/apache/arrow-rs/pull/6705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Implement logical\_null\_count for more array types [\#6704](https://github.com/apache/arrow-rs/pull/6704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Fix LIKE with escapes [\#6703](https://github.com/apache/arrow-rs/pull/6703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Speed up `filter_bytes` [\#6699](https://github.com/apache/arrow-rs/pull/6699) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Minor: fix misleading comment in byte view [\#6695](https://github.com/apache/arrow-rs/pull/6695) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jayzhan211](https://github.com/jayzhan211))
-- minor fix on checking index [\#6694](https://github.com/apache/arrow-rs/pull/6694) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jp0317](https://github.com/jp0317))
-- Undo run end filter performance regression [\#6691](https://github.com/apache/arrow-rs/pull/6691) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
-- Reimplement `PartialEq` of `GenericByteViewArray` compares by logical value [\#6689](https://github.com/apache/arrow-rs/pull/6689) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
-- feat: expose known\_schema from FlightDataEncoder [\#6688](https://github.com/apache/arrow-rs/pull/6688) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
-- Update hashbrown requirement from 0.14.2 to 0.15.1 [\#6684](https://github.com/apache/arrow-rs/pull/6684) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Support Duration in JSON Reader [\#6683](https://github.com/apache/arrow-rs/pull/6683) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([simonvandel](https://github.com/simonvandel))
-- Check predicate and values are the same length for run end array filter safety [\#6675](https://github.com/apache/arrow-rs/pull/6675) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
-- \[ffi\] Fix arrow-array null\_count error during conversion from C to Rust [\#6674](https://github.com/apache/arrow-rs/pull/6674) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adbmal](https://github.com/adbmal))
-- Support `Utf8View` for `bit_length` kernel [\#6671](https://github.com/apache/arrow-rs/pull/6671) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([austin362667](https://github.com/austin362667))
-- Fix string view LIKE checks with NULL values [\#6662](https://github.com/apache/arrow-rs/pull/6662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Improve documentation for `nullif` kernel [\#6658](https://github.com/apache/arrow-rs/pull/6658) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve test\_auth error message when contains\(\) fails [\#6657](https://github.com/apache/arrow-rs/pull/6657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
-- Let std::fmt::Debug for StructArray output Null/Validity info [\#6655](https://github.com/apache/arrow-rs/pull/6655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([XinyuZeng](https://github.com/XinyuZeng))
-- Include offending line number when processing CSV file fails [\#6653](https://github.com/apache/arrow-rs/pull/6653) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- feat: add write\_bytes for GenericBinaryBuilder [\#6652](https://github.com/apache/arrow-rs/pull/6652) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tisonkun](https://github.com/tisonkun))
-- feat: Support Utf8View in JSON serialization [\#6651](https://github.com/apache/arrow-rs/pull/6651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonmmease](https://github.com/jonmmease))
-- fix: include chrono-tz in flight sql cli [\#6650](https://github.com/apache/arrow-rs/pull/6650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
-- Handle primitive REPEATED field not contained in LIST annotated group [\#6649](https://github.com/apache/arrow-rs/pull/6649) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
-- Implement `append_n` for `BooleanBuilder` [\#6646](https://github.com/apache/arrow-rs/pull/6646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([delamarch3](https://github.com/delamarch3))
-- fix: recurse into Map datatype when hydrating dictionaries [\#6645](https://github.com/apache/arrow-rs/pull/6645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([nathanielc](https://github.com/nathanielc))
-- fix: enable TLS roots for flight CLI client [\#6640](https://github.com/apache/arrow-rs/pull/6640) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
-- doc: Clarify take kernel semantics [\#6632](https://github.com/apache/arrow-rs/pull/6632) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Return error rather than panic when too many row groups are written [\#6629](https://github.com/apache/arrow-rs/pull/6629) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
-- Fix test feature selection so all feature combinations work as expected [\#6626](https://github.com/apache/arrow-rs/pull/6626) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([itsjunetime](https://github.com/itsjunetime))
-- Add Parquet RowSelection benchmark [\#6623](https://github.com/apache/arrow-rs/pull/6623) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
-- Optimize `take_bits` to optimize `take_boolean` / `take_primitive` / `take_byte_view`: up to -25% [\#6622](https://github.com/apache/arrow-rs/pull/6622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Make downcast macros hygenic \(\#6400\) [\#6620](https://github.com/apache/arrow-rs/pull/6620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Update proc-macro2 requirement from =1.0.88 to =1.0.89 [\#6618](https://github.com/apache/arrow-rs/pull/6618) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- Fix arrow-json writer empty [\#6614](https://github.com/apache/arrow-rs/pull/6614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gwik](https://github.com/gwik))
-- Add `ParquetObjectReader::with_runtime` [\#6612](https://github.com/apache/arrow-rs/pull/6612) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([itsjunetime](https://github.com/itsjunetime))
-- Re-enable `C#` arrow flight integration test [\#6611](https://github.com/apache/arrow-rs/pull/6611) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add Array::logical\_null\_count for inspecting number of null values [\#6608](https://github.com/apache/arrow-rs/pull/6608) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
-- Added casting from Binary/LargeBinary to Utf8View [\#6592](https://github.com/apache/arrow-rs/pull/6592) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ngli-me](https://github.com/ngli-me))
-- Parquet AsyncReader: Don't panic when empty offset\_index is Some\(\[\]\) [\#6582](https://github.com/apache/arrow-rs/pull/6582) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jroddev](https://github.com/jroddev))
-- Skip writing down null buffers for non-nullable primitive arrays [\#6524](https://github.com/apache/arrow-rs/pull/6524) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([bkirwi](https://github.com/bkirwi))
+- doc: add comment for timezone string [\#6899](https://github.com/apache/arrow-rs/pull/6899) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xxchan](https://github.com/xxchan))
+- docs: fix typo [\#6890](https://github.com/apache/arrow-rs/pull/6890) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Minor: Fix deprecation notice for `arrow_to_parquet_schema` [\#6889](https://github.com/apache/arrow-rs/pull/6889) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add Field::with\_dict\_is\_ordered [\#6885](https://github.com/apache/arrow-rs/pull/6885) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Deprecate "max statistics size" property in `WriterProperties` [\#6884](https://github.com/apache/arrow-rs/pull/6884) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add deprecation warnings for everything related to `dict_id` [\#6873](https://github.com/apache/arrow-rs/pull/6873) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([brancz](https://github.com/brancz))
+- Enable matching temporal as from\_type to Utf8View [\#6872](https://github.com/apache/arrow-rs/pull/6872) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Kev1n8](https://github.com/Kev1n8))
+- Enable string-based column projections from Parquet files [\#6871](https://github.com/apache/arrow-rs/pull/6871) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Improvements to UTF-8 statistics truncation [\#6870](https://github.com/apache/arrow-rs/pull/6870) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- fix: make GetCatalogsBuilder sort catalog names  [\#6864](https://github.com/apache/arrow-rs/pull/6864) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([niebayes](https://github.com/niebayes))
+- add buffered data\_pages to parquet column writer total bytes estimation [\#6862](https://github.com/apache/arrow-rs/pull/6862) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([onursatici](https://github.com/onursatici))
+- Update prost-build requirement from =0.13.3 to =0.13.4 [\#6860](https://github.com/apache/arrow-rs/pull/6860) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Minor: add comments explaining bad MSRV, output in json [\#6857](https://github.com/apache/arrow-rs/pull/6857) ([alamb](https://github.com/alamb))
+- perf: Use Cow in get\_format\_string in FFI\_ArrowSchema [\#6853](https://github.com/apache/arrow-rs/pull/6853) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
+- chore: add cast\_decimal benchmark [\#6850](https://github.com/apache/arrow-rs/pull/6850) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([andygrove](https://github.com/andygrove))
+- arrow-array::builder: support Int8, Int16 and Int64 keys [\#6845](https://github.com/apache/arrow-rs/pull/6845) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ajwerner](https://github.com/ajwerner))
+- Add `ArrowToParquetSchemaConverter`, deprecate `arrow_to_parquet_schema` [\#6840](https://github.com/apache/arrow-rs/pull/6840) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Remove APIs deprecated in 50.0.0 [\#6838](https://github.com/apache/arrow-rs/pull/6838) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- fix: decimal conversion looses value on lower precision [\#6836](https://github.com/apache/arrow-rs/pull/6836) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([himadripal](https://github.com/himadripal))
+- Update sysinfo requirement from 0.32.0 to 0.33.0 [\#6835](https://github.com/apache/arrow-rs/pull/6835) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Optionally coerce names of maps and lists to match Parquet specification [\#6828](https://github.com/apache/arrow-rs/pull/6828) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Remove deprecated unary\_dyn and try\_unary\_dyn [\#6824](https://github.com/apache/arrow-rs/pull/6824) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Remove deprecated flight\_data\_from\_arrow\_batch [\#6823](https://github.com/apache/arrow-rs/pull/6823) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- \[arrow-cast\] Support cast boolean from/to string view [\#6822](https://github.com/apache/arrow-rs/pull/6822) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- Hook up Avro Decoder [\#6820](https://github.com/apache/arrow-rs/pull/6820) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix arrow-avro compilation without default features [\#6819](https://github.com/apache/arrow-rs/pull/6819) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Support shrink to empty [\#6817](https://github.com/apache/arrow-rs/pull/6817) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- \[arrow-cast\] Support cast numeric to string view \(alternate\) [\#6816](https://github.com/apache/arrow-rs/pull/6816) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Hide implicit optional dependency features in arrow-flight [\#6806](https://github.com/apache/arrow-rs/pull/6806) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- fix: Encoding of List offsets was incorrect when slice offsets begin with zero [\#6805](https://github.com/apache/arrow-rs/pull/6805) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([HawaiianSpork](https://github.com/HawaiianSpork))
+- Enable unused\_crate\_dependencies Rust lint, remove unused dependencies [\#6804](https://github.com/apache/arrow-rs/pull/6804) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Minor: Fix docstrings for `ColumnProperties::statistics_enabled` property [\#6798](https://github.com/apache/arrow-rs/pull/6798) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add option to disable writing of Parquet offset index [\#6797](https://github.com/apache/arrow-rs/pull/6797) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Remove unused dependencies [\#6792](https://github.com/apache/arrow-rs/pull/6792) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([findepi](https://github.com/findepi))
+- Add `Array::shrink_to_fit(&mut self)` [\#6790](https://github.com/apache/arrow-rs/pull/6790) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([emilk](https://github.com/emilk))
+- Formalize the default nested list field name to `item` [\#6785](https://github.com/apache/arrow-rs/pull/6785) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([gruuya](https://github.com/gruuya))
+- Improve UnionArray logical\_nulls tests [\#6781](https://github.com/apache/arrow-rs/pull/6781) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gstvg](https://github.com/gstvg))
+- Improve list builder usage example in docs [\#6775](https://github.com/apache/arrow-rs/pull/6775) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Update proc-macro2 requirement from =1.0.89 to =1.0.92 [\#6772](https://github.com/apache/arrow-rs/pull/6772) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Allow NullBuffer construction directly from array  [\#6769](https://github.com/apache/arrow-rs/pull/6769) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Include license and notice files in published crates [\#6767](https://github.com/apache/arrow-rs/pull/6767) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([ankane](https://github.com/ankane))
+- fix: remove redundant `bit_util::ceil` [\#6766](https://github.com/apache/arrow-rs/pull/6766) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([miroim](https://github.com/miroim))
+- Remove 'make\_row', expose a 'Row::new' method instead. [\#6763](https://github.com/apache/arrow-rs/pull/6763) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94))
+- Read nested Parquet 2-level lists correctly [\#6757](https://github.com/apache/arrow-rs/pull/6757) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Split `timestamp_s_to_datetime` to `date` and `time` to avoid unnecessary computation [\#6755](https://github.com/apache/arrow-rs/pull/6755) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jayzhan211](https://github.com/jayzhan211))
+- More trivial implementation of `Box<dyn AsyncArrowWriter>` and `Box<dyn AsyncArrowReader>` [\#6748](https://github.com/apache/arrow-rs/pull/6748) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([ethe](https://github.com/ethe))
+- Update cache action to v4 [\#6744](https://github.com/apache/arrow-rs/pull/6744) ([findepi](https://github.com/findepi))
+- Remove redundant implementation of `StringArrayType` [\#6743](https://github.com/apache/arrow-rs/pull/6743) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tlm365](https://github.com/tlm365))
+- Fix Dictionary logical nulls for RunArray/UnionArray Values [\#6740](https://github.com/apache/arrow-rs/pull/6740) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Allow reading Parquet maps that lack a `values` field [\#6730](https://github.com/apache/arrow-rs/pull/6730) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Improve default implementation of Array::is\_nullable [\#6721](https://github.com/apache/arrow-rs/pull/6721) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([findepi](https://github.com/findepi))
+- Fix Buffer::bit\_slice losing length with byte-aligned offsets [\#6707](https://github.com/apache/arrow-rs/pull/6707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([itsjunetime](https://github.com/itsjunetime))
 
 
 
diff --git a/Cargo.toml b/Cargo.toml
index 375a4efac551..75ba410f12a6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,7 +62,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "53.3.0"
+version = "54.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -77,20 +77,20 @@ edition = "2021"
 rust-version = "1.62"
 
 [workspace.dependencies]
-arrow = { version = "53.3.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "53.3.0", path = "./arrow-arith" }
-arrow-array = { version = "53.3.0", path = "./arrow-array" }
-arrow-buffer = { version = "53.3.0", path = "./arrow-buffer" }
-arrow-cast = { version = "53.3.0", path = "./arrow-cast" }
-arrow-csv = { version = "53.3.0", path = "./arrow-csv" }
-arrow-data = { version = "53.3.0", path = "./arrow-data" }
-arrow-ipc = { version = "53.3.0", path = "./arrow-ipc" }
-arrow-json = { version = "53.3.0", path = "./arrow-json" }
-arrow-ord = { version = "53.3.0", path = "./arrow-ord" }
-arrow-row = { version = "53.3.0", path = "./arrow-row" }
-arrow-schema = { version = "53.3.0", path = "./arrow-schema" }
-arrow-select = { version = "53.3.0", path = "./arrow-select" }
-arrow-string = { version = "53.3.0", path = "./arrow-string" }
-parquet = { version = "53.3.0", path = "./parquet", default-features = false }
+arrow = { version = "54.0.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "54.0.0", path = "./arrow-arith" }
+arrow-array = { version = "54.0.0", path = "./arrow-array" }
+arrow-buffer = { version = "54.0.0", path = "./arrow-buffer" }
+arrow-cast = { version = "54.0.0", path = "./arrow-cast" }
+arrow-csv = { version = "54.0.0", path = "./arrow-csv" }
+arrow-data = { version = "54.0.0", path = "./arrow-data" }
+arrow-ipc = { version = "54.0.0", path = "./arrow-ipc" }
+arrow-json = { version = "54.0.0", path = "./arrow-json" }
+arrow-ord = { version = "54.0.0", path = "./arrow-ord" }
+arrow-row = { version = "54.0.0", path = "./arrow-row" }
+arrow-schema = { version = "54.0.0", path = "./arrow-schema" }
+arrow-select = { version = "54.0.0", path = "./arrow-select" }
+arrow-string = { version = "54.0.0", path = "./arrow-string" }
+parquet = { version = "54.0.0", path = "./parquet", default-features = false }
 
 chrono = { version = "0.4.34", default-features = false, features = ["clock"] }
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index 661abfc58691..3ffc8780c2f8 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -31,7 +31,7 @@ Add this to your Cargo.toml:
 
 ```toml
 [dependencies]
-arrow-flight = "53.3.0"
+arrow-flight = "54.0.0"
 ```
 
 Apache Arrow Flight is a gRPC based protocol for exchanging Arrow data between processes. See the blog post [Introducing Apache Arrow Flight: A Framework for Fast Data Transport](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) for more information.
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index d00cc498625f..4a2f5e3f1987 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="53.2.0"
-FUTURE_RELEASE="53.3.0"
+SINCE_TAG="53.3.0"
+FUTURE_RELEASE="54.0.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"

From 5f01ed410f07c711caab20662fb1ca96314b0f66 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 19 Dec 2024 14:51:30 -0500
Subject: [PATCH 13/68] [object store] Add planned release schedule (#6904)

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 723249ad29e5..ed42f630514b 100644
--- a/README.md
+++ b/README.md
@@ -83,6 +83,13 @@ versions approximately every 2 months.
 
 [`object_store`]: https://crates.io/crates/object_store
 
+Planned Release Schedule
+
+| Approximate Date | Version  | Notes                                   |
+| ---------------- | -------- | --------------------------------------- |
+| Dec 2024         | `0.11.2` | Minor, NO breaking API changes          |
+| Feb 2025         | `0.12.0` | Major, potentially breaking API changes |
+
 ### Deprecation Guidelines
 
 Minor releases may deprecate, but not remove APIs. Deprecating APIs allows

From f6eaca587c93afc2df262bc4b48c3dce13dc4793 Mon Sep 17 00:00:00 2001
From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com>
Date: Thu, 19 Dec 2024 21:52:18 +0200
Subject: [PATCH 14/68] add `extend_dictionary` in dictionary builder for
 improved performance (#6875)

* add `extend_dictionary` in dictionary builder for improved performance

* fix extends all nulls

* support null in mapped value

* adding comment

* run `clippy` and `fmt`

* fix ci

* Apply suggestions from code review

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 .../generic_bytes_dictionary_builder.rs       | 187 ++++++++++++++++-
 .../builder/primitive_dictionary_builder.rs   | 198 +++++++++++++++++-
 2 files changed, 379 insertions(+), 6 deletions(-)

diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index bb0fb5e91be2..ead151d5ceea 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -17,7 +17,7 @@
 
 use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
 use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
-use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray};
+use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray, TypedDictionaryArray};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
 use hashbrown::HashTable;
@@ -305,6 +305,63 @@ where
         };
     }
 
+    /// Extends builder with an existing dictionary array.
+    ///
+    /// This is the same as [`Self::extend`] but is faster as it translates
+    /// the dictionary values once rather than doing a lookup for each item in the iterator
+    ///
+    /// when dictionary values are null (the actual mapped values) the keys are null
+    ///
+    pub fn extend_dictionary(
+        &mut self,
+        dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>,
+    ) -> Result<(), ArrowError> {
+        let values = dictionary.values();
+
+        let v_len = values.len();
+        let k_len = dictionary.keys().len();
+        if v_len == 0 && k_len == 0 {
+            return Ok(());
+        }
+
+        // All nulls
+        if v_len == 0 {
+            self.append_nulls(k_len);
+            return Ok(());
+        }
+
+        if k_len == 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Dictionary keys should not be empty when values are not empty".to_string(),
+            ));
+        }
+
+        // Orphan values will be carried over to the new dictionary
+        let mapped_values = values
+            .iter()
+            // Dictionary values can technically be null, so we need to handle that
+            .map(|dict_value| {
+                dict_value
+                    .map(|dict_value| self.get_or_insert_key(dict_value))
+                    .transpose()
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        // Just insert the keys without additional lookups
+        dictionary.keys().iter().for_each(|key| match key {
+            None => self.append_null(),
+            Some(original_dict_index) => {
+                let index = original_dict_index.as_usize().min(v_len - 1);
+                match mapped_values[index] {
+                    None => self.append_null(),
+                    Some(mapped_value) => self.keys_builder.append_value(mapped_value),
+                }
+            }
+        });
+
+        Ok(())
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.dedup.clear();
@@ -445,8 +502,9 @@ mod tests {
     use super::*;
 
     use crate::array::Int8Array;
+    use crate::cast::AsArray;
     use crate::types::{Int16Type, Int32Type, Int8Type, Utf8Type};
-    use crate::{BinaryArray, StringArray};
+    use crate::{ArrowPrimitiveType, BinaryArray, StringArray};
 
     fn test_bytes_dictionary_builder<T>(values: Vec<&T::Native>)
     where
@@ -664,4 +722,129 @@ mod tests {
         assert_eq!(dict.keys().values(), &[0, 1, 2, 0, 1, 2, 2, 3, 0]);
         assert_eq!(dict.values().len(), 4);
     }
+
+    #[test]
+    fn test_extend_dictionary() {
+        let some_dict = {
+            let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+            builder.extend(["a", "b", "c", "a", "b", "c"].into_iter().map(Some));
+            builder.extend([None::<&str>]);
+            builder.extend(["c", "d", "a"].into_iter().map(Some));
+            builder.append_null();
+            builder.finish()
+        };
+
+        let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+        builder.extend(["e", "e", "f", "e", "d"].into_iter().map(Some));
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 6);
+
+        let values = dict
+            .downcast_dict::<GenericByteArray<Utf8Type>>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            values,
+            [
+                Some("e"),
+                Some("e"),
+                Some("f"),
+                Some("e"),
+                Some("d"),
+                Some("a"),
+                Some("b"),
+                Some("c"),
+                Some("a"),
+                Some("b"),
+                Some("c"),
+                None,
+                Some("c"),
+                Some("d"),
+                Some("a"),
+                None
+            ]
+        );
+    }
+    #[test]
+    fn test_extend_dictionary_with_null_in_mapped_value() {
+        let some_dict = {
+            let mut values_builder = GenericByteBuilder::<Utf8Type>::new();
+            let mut keys_builder = PrimitiveBuilder::<Int32Type>::new();
+
+            // Manually build a dictionary values that the mapped values have null
+            values_builder.append_null();
+            keys_builder.append_value(0);
+            values_builder.append_value("I like worm hugs");
+            keys_builder.append_value(1);
+
+            let values = values_builder.finish();
+            let keys = keys_builder.finish();
+
+            let data_type = DataType::Dictionary(
+                Box::new(Int32Type::DATA_TYPE),
+                Box::new(Utf8Type::DATA_TYPE),
+            );
+
+            let builder = keys
+                .into_data()
+                .into_builder()
+                .data_type(data_type)
+                .child_data(vec![values.into_data()]);
+
+            DictionaryArray::from(unsafe { builder.build_unchecked() })
+        };
+
+        let some_dict_values = some_dict.values().as_string::<i32>();
+        assert_eq!(
+            some_dict_values.into_iter().collect::<Vec<_>>(),
+            &[None, Some("I like worm hugs")]
+        );
+
+        let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 1);
+
+        let values = dict
+            .downcast_dict::<GenericByteArray<Utf8Type>>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(values, [None, Some("I like worm hugs")]);
+    }
+
+    #[test]
+    fn test_extend_all_null_dictionary() {
+        let some_dict = {
+            let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+            builder.append_nulls(2);
+            builder.finish()
+        };
+
+        let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 0);
+
+        let values = dict
+            .downcast_dict::<GenericByteArray<Utf8Type>>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(values, [None, None]);
+    }
 }
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index ac40f8a469d3..282f0ae9d5b1 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -17,7 +17,9 @@
 
 use crate::builder::{ArrayBuilder, PrimitiveBuilder};
 use crate::types::ArrowDictionaryKeyType;
-use crate::{Array, ArrayRef, ArrowPrimitiveType, DictionaryArray};
+use crate::{
+    Array, ArrayRef, ArrowPrimitiveType, DictionaryArray, PrimitiveArray, TypedDictionaryArray,
+};
 use arrow_buffer::{ArrowNativeType, ToByteSlice};
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -44,7 +46,7 @@ impl<T: ToByteSlice> PartialEq for Value<T> {
 
 impl<T: ToByteSlice> Eq for Value<T> {}
 
-/// Builder for [`DictionaryArray`] of [`PrimitiveArray`](crate::array::PrimitiveArray)
+/// Builder for [`DictionaryArray`] of [`PrimitiveArray`]
 ///
 /// # Example:
 ///
@@ -303,6 +305,63 @@ where
         };
     }
 
+    /// Extends builder with dictionary
+    ///
+    /// This is the same as [`Self::extend`] but is faster as it translates
+    /// the dictionary values once rather than doing a lookup for each item in the iterator
+    ///
+    /// when dictionary values are null (the actual mapped values) the keys are null
+    ///
+    pub fn extend_dictionary(
+        &mut self,
+        dictionary: &TypedDictionaryArray<K, PrimitiveArray<V>>,
+    ) -> Result<(), ArrowError> {
+        let values = dictionary.values();
+
+        let v_len = values.len();
+        let k_len = dictionary.keys().len();
+        if v_len == 0 && k_len == 0 {
+            return Ok(());
+        }
+
+        // All nulls
+        if v_len == 0 {
+            self.append_nulls(k_len);
+            return Ok(());
+        }
+
+        if k_len == 0 {
+            return Err(ArrowError::InvalidArgumentError(
+                "Dictionary keys should not be empty when values are not empty".to_string(),
+            ));
+        }
+
+        // Orphan values will be carried over to the new dictionary
+        let mapped_values = values
+            .iter()
+            // Dictionary values can technically be null, so we need to handle that
+            .map(|dict_value| {
+                dict_value
+                    .map(|dict_value| self.get_or_insert_key(dict_value))
+                    .transpose()
+            })
+            .collect::<Result<Vec<_>, _>>()?;
+
+        // Just insert the keys without additional lookups
+        dictionary.keys().iter().for_each(|key| match key {
+            None => self.append_null(),
+            Some(original_dict_index) => {
+                let index = original_dict_index.as_usize().min(v_len - 1);
+                match mapped_values[index] {
+                    None => self.append_null(),
+                    Some(mapped_value) => self.keys_builder.append_value(mapped_value),
+                }
+            }
+        });
+
+        Ok(())
+    }
+
     /// Builds the `DictionaryArray` and reset this builder.
     pub fn finish(&mut self) -> DictionaryArray<K> {
         self.map.clear();
@@ -368,9 +427,9 @@ impl<K: ArrowDictionaryKeyType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
 mod tests {
     use super::*;
 
-    use crate::array::UInt32Array;
-    use crate::array::UInt8Array;
+    use crate::array::{Int32Array, UInt32Array, UInt8Array};
     use crate::builder::Decimal128Builder;
+    use crate::cast::AsArray;
     use crate::types::{Decimal128Type, Int32Type, UInt32Type, UInt8Type};
 
     #[test]
@@ -443,4 +502,135 @@ mod tests {
             )
         );
     }
+
+    #[test]
+    fn test_extend_dictionary() {
+        let some_dict = {
+            let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
+            builder.extend([1, 2, 3, 1, 2, 3, 1, 2, 3].into_iter().map(Some));
+            builder.extend([None::<i32>]);
+            builder.extend([4, 5, 1, 3, 1].into_iter().map(Some));
+            builder.append_null();
+            builder.finish()
+        };
+
+        let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
+        builder.extend([6, 6, 7, 6, 5].into_iter().map(Some));
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 7);
+
+        let values = dict
+            .downcast_dict::<Int32Array>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(
+            values,
+            [
+                Some(6),
+                Some(6),
+                Some(7),
+                Some(6),
+                Some(5),
+                Some(1),
+                Some(2),
+                Some(3),
+                Some(1),
+                Some(2),
+                Some(3),
+                Some(1),
+                Some(2),
+                Some(3),
+                None,
+                Some(4),
+                Some(5),
+                Some(1),
+                Some(3),
+                Some(1),
+                None
+            ]
+        );
+    }
+
+    #[test]
+    fn test_extend_dictionary_with_null_in_mapped_value() {
+        let some_dict = {
+            let mut values_builder = PrimitiveBuilder::<Int32Type>::new();
+            let mut keys_builder = PrimitiveBuilder::<Int32Type>::new();
+
+            // Manually build a dictionary values that the mapped values have null
+            values_builder.append_null();
+            keys_builder.append_value(0);
+            values_builder.append_value(42);
+            keys_builder.append_value(1);
+
+            let values = values_builder.finish();
+            let keys = keys_builder.finish();
+
+            let data_type = DataType::Dictionary(
+                Box::new(Int32Type::DATA_TYPE),
+                Box::new(values.data_type().clone()),
+            );
+
+            let builder = keys
+                .into_data()
+                .into_builder()
+                .data_type(data_type)
+                .child_data(vec![values.into_data()]);
+
+            DictionaryArray::from(unsafe { builder.build_unchecked() })
+        };
+
+        let some_dict_values = some_dict.values().as_primitive::<Int32Type>();
+        assert_eq!(
+            some_dict_values.into_iter().collect::<Vec<_>>(),
+            &[None, Some(42)]
+        );
+
+        let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 1);
+
+        let values = dict
+            .downcast_dict::<Int32Array>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(values, [None, Some(42)]);
+    }
+
+    #[test]
+    fn test_extend_all_null_dictionary() {
+        let some_dict = {
+            let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
+            builder.append_nulls(2);
+            builder.finish()
+        };
+
+        let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
+        builder
+            .extend_dictionary(&some_dict.downcast_dict().unwrap())
+            .unwrap();
+        let dict = builder.finish();
+
+        assert_eq!(dict.values().len(), 0);
+
+        let values = dict
+            .downcast_dict::<Int32Array>()
+            .unwrap()
+            .into_iter()
+            .collect::<Vec<_>>();
+
+        assert_eq!(values, [None, None]);
+    }
 }

From 02377a0a1df41d5b25bb2d363ca86b185b148245 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 20 Dec 2024 16:18:55 -0500
Subject: [PATCH 15/68] [object_store]: Version and Changelog for 0.11.2
 (#6908)

* [object_store]: Version and Changelog for 0.11.2

* increment version

* update script

* changelog

* tweaks

* Update object_store/CHANGELOG.md

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 object_store/CHANGELOG-old.md                 | 39 ++++++++++++++
 object_store/CHANGELOG.md                     | 51 ++++++++++---------
 object_store/Cargo.toml                       |  2 +-
 object_store/dev/release/README.md            |  5 +-
 object_store/dev/release/update_change_log.sh |  4 +-
 5 files changed, 72 insertions(+), 29 deletions(-)

diff --git a/object_store/CHANGELOG-old.md b/object_store/CHANGELOG-old.md
index 28dbde4e7b7f..c42689240dd9 100644
--- a/object_store/CHANGELOG-old.md
+++ b/object_store/CHANGELOG-old.md
@@ -19,6 +19,45 @@
 
 # Historical Changelog
 
+
+## [object_store_0.11.1](https://github.com/apache/arrow-rs/tree/object_store_0.11.1) (2024-10-15)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.0...object_store_0.11.1)
+
+**Implemented enhancements:**
+
+- There is no way to pass object store client options as environment variables [\#6333](https://github.com/apache/arrow-rs/issues/6333) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Better Document Backoff Algorithm [\#6324](https://github.com/apache/arrow-rs/issues/6324) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Add direction to `list_with_offset` [\#6274](https://github.com/apache/arrow-rs/issues/6274) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support server-side encryption with customer-provided keys \(SSE-C\) [\#6229](https://github.com/apache/arrow-rs/issues/6229) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Fixed bugs:**
+
+- \[object-store\] Requested tokio version is too old - does not compile [\#6458](https://github.com/apache/arrow-rs/issues/6458) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Azure SAS tokens are visible when retry errors are logged via object\_store [\#6322](https://github.com/apache/arrow-rs/issues/6322) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Merged pull requests:**
+
+- object\_store: fix typo in with\_connect\_timeout\_disabled that actually disabled non-connect timeouts [\#6563](https://github.com/apache/arrow-rs/pull/6563) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([adriangb](https://github.com/adriangb))
+- object\_store: Clarify what is a prefix in list\(\) documentation [\#6520](https://github.com/apache/arrow-rs/pull/6520) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([progval](https://github.com/progval))
+- object\_store: enable lint `unreachable_pub` [\#6512](https://github.com/apache/arrow-rs/pull/6512) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker))
+- \[object\_store\] Retry S3 requests with 200 response with "Error" in body [\#6508](https://github.com/apache/arrow-rs/pull/6508) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([PeterKeDer](https://github.com/PeterKeDer))
+- \[object-store\] Require tokio 1.29.0. [\#6459](https://github.com/apache/arrow-rs/pull/6459) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ashtuchkin](https://github.com/ashtuchkin))
+- feat: expose HTTP/2 max frame size in `object_store` [\#6442](https://github.com/apache/arrow-rs/pull/6442) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum))
+- Derive `Clone` for `object_store::aws::AmazonS3` [\#6414](https://github.com/apache/arrow-rs/pull/6414) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ethe](https://github.com/ethe))
+- object\_score: Support Azure Fabric OAuth Provider [\#6382](https://github.com/apache/arrow-rs/pull/6382) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([RobinLin666](https://github.com/RobinLin666))
+- `object_store::GetOptions` derive `Clone` [\#6361](https://github.com/apache/arrow-rs/pull/6361) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([samuelcolvin](https://github.com/samuelcolvin))
+- \[object\_store\] Propagate env vars as object store client options [\#6334](https://github.com/apache/arrow-rs/pull/6334) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ccciudatu](https://github.com/ccciudatu))
+- docs\[object\_store\]: clarify the backoff strategy that is actually implemented [\#6325](https://github.com/apache/arrow-rs/pull/6325) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([westonpace](https://github.com/westonpace))
+- fix: azure sas token visible in logs [\#6323](https://github.com/apache/arrow-rs/pull/6323) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- object\_store/delimited: Fix `TrailingEscape` condition [\#6265](https://github.com/apache/arrow-rs/pull/6265) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
+- fix\(object\_store\): only add encryption headers for SSE-C in get request [\#6260](https://github.com/apache/arrow-rs/pull/6260) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb))
+- docs: Add parquet\_opendal in related projects [\#6236](https://github.com/apache/arrow-rs/pull/6236) ([Xuanwo](https://github.com/Xuanwo))
+- feat\(object\_store\): add support for server-side encryption with customer-provided keys \(SSE-C\) [\#6230](https://github.com/apache/arrow-rs/pull/6230) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb))
+- feat: further TLS options on ClientOptions: \#5034 [\#6148](https://github.com/apache/arrow-rs/pull/6148) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker))
+
+
+
 ## [object_store_0.11.0](https://github.com/apache/arrow-rs/tree/object_store_0.11.0) (2024-08-12)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.10.2...object_store_0.11.0)
diff --git a/object_store/CHANGELOG.md b/object_store/CHANGELOG.md
index 95585983572c..0e834c5e2ef2 100644
--- a/object_store/CHANGELOG.md
+++ b/object_store/CHANGELOG.md
@@ -19,41 +19,42 @@
 
 # Changelog
 
-## [object_store_0.11.1](https://github.com/apache/arrow-rs/tree/object_store_0.11.1) (2024-10-15)
+## [object_store_0.11.2](https://github.com/apache/arrow-rs/tree/object_store_0.11.2) (2024-12-20)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.0...object_store_0.11.1)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.11.1...object_store_0.11.2)
 
 **Implemented enhancements:**
 
-- There is no way to pass object store client options as environment variables [\#6333](https://github.com/apache/arrow-rs/issues/6333) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Better Document Backoff Algorithm [\#6324](https://github.com/apache/arrow-rs/issues/6324) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Add direction to `list_with_offset` [\#6274](https://github.com/apache/arrow-rs/issues/6274) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Support server-side encryption with customer-provided keys \(SSE-C\) [\#6229](https://github.com/apache/arrow-rs/issues/6229) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object-store's AzureClient should protect against multiple streams performing put\_block in parallel for the same BLOB path [\#6868](https://github.com/apache/arrow-rs/issues/6868) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support S3 Put IfMatch [\#6799](https://github.com/apache/arrow-rs/issues/6799) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store Azure Government using OAuth [\#6759](https://github.com/apache/arrow-rs/issues/6759) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support for AWS Requester Pays buckets [\#6716](https://github.com/apache/arrow-rs/issues/6716) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- \[object-store\]: Implement credential\_process support for S3 [\#6422](https://github.com/apache/arrow-rs/issues/6422) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store: Conditional put and rename\_if\_not\_exist on S3 [\#6285](https://github.com/apache/arrow-rs/issues/6285) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
 
 **Fixed bugs:**
 
-- \[object-store\] Requested tokio version is too old - does not compile [\#6458](https://github.com/apache/arrow-rs/issues/6458) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Azure SAS tokens are visible when retry errors are logged via object\_store [\#6322](https://github.com/apache/arrow-rs/issues/6322) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- `object_store` errors when `reqwest` `gzip` feature is enabled [\#6842](https://github.com/apache/arrow-rs/issues/6842) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Multi-part s3 uploads fail when using checksum [\#6793](https://github.com/apache/arrow-rs/issues/6793) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- `with_unsigned_payload` shouldn't generate payload hash [\#6697](https://github.com/apache/arrow-rs/issues/6697) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- \[Object\_store\] min\_ttl is too high for GKE tokens [\#6625](https://github.com/apache/arrow-rs/issues/6625) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store `test_private_bucket` fails - store: "S3", source: BucketNotFound { bucket: "bloxbender" } [\#6600](https://github.com/apache/arrow-rs/issues/6600) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- S3 endpoint and trailing slash result in weird/invalid requests [\#6580](https://github.com/apache/arrow-rs/issues/6580) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
 
 **Merged pull requests:**
 
-- object\_store: fix typo in with\_connect\_timeout\_disabled that actually disabled non-connect timeouts [\#6563](https://github.com/apache/arrow-rs/pull/6563) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([adriangb](https://github.com/adriangb))
-- object\_store: Clarify what is a prefix in list\(\) documentation [\#6520](https://github.com/apache/arrow-rs/pull/6520) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([progval](https://github.com/progval))
-- object\_store: enable lint `unreachable_pub` [\#6512](https://github.com/apache/arrow-rs/pull/6512) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker))
-- \[object\_store\] Retry S3 requests with 200 response with "Error" in body [\#6508](https://github.com/apache/arrow-rs/pull/6508) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([PeterKeDer](https://github.com/PeterKeDer))
-- \[object-store\] Require tokio 1.29.0. [\#6459](https://github.com/apache/arrow-rs/pull/6459) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ashtuchkin](https://github.com/ashtuchkin))
-- feat: expose HTTP/2 max frame size in `object_store` [\#6442](https://github.com/apache/arrow-rs/pull/6442) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum))
-- Derive `Clone` for `object_store::aws::AmazonS3` [\#6414](https://github.com/apache/arrow-rs/pull/6414) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ethe](https://github.com/ethe))
-- object\_score: Support Azure Fabric OAuth Provider [\#6382](https://github.com/apache/arrow-rs/pull/6382) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([RobinLin666](https://github.com/RobinLin666))
-- `object_store::GetOptions` derive `Clone` [\#6361](https://github.com/apache/arrow-rs/pull/6361) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([samuelcolvin](https://github.com/samuelcolvin))
-- \[object\_store\] Propagate env vars as object store client options [\#6334](https://github.com/apache/arrow-rs/pull/6334) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ccciudatu](https://github.com/ccciudatu))
-- docs\[object\_store\]: clarify the backoff strategy that is actually implemented [\#6325](https://github.com/apache/arrow-rs/pull/6325) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([westonpace](https://github.com/westonpace))
-- fix: azure sas token visible in logs [\#6323](https://github.com/apache/arrow-rs/pull/6323) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
-- object\_store/delimited: Fix `TrailingEscape` condition [\#6265](https://github.com/apache/arrow-rs/pull/6265) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
-- fix\(object\_store\): only add encryption headers for SSE-C in get request [\#6260](https://github.com/apache/arrow-rs/pull/6260) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb))
-- docs: Add parquet\_opendal in related projects [\#6236](https://github.com/apache/arrow-rs/pull/6236) ([Xuanwo](https://github.com/Xuanwo))
-- feat\(object\_store\): add support for server-side encryption with customer-provided keys \(SSE-C\) [\#6230](https://github.com/apache/arrow-rs/pull/6230) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([jiachengdb](https://github.com/jiachengdb))
-- feat: further TLS options on ClientOptions: \#5034 [\#6148](https://github.com/apache/arrow-rs/pull/6148) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([ByteBaker](https://github.com/ByteBaker))
+- Use randomized content ID for Azure multipart uploads [\#6869](https://github.com/apache/arrow-rs/pull/6869) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([avarnon](https://github.com/avarnon))
+- Always explicitly disable `gzip` automatic decompression on reqwest client used by object\_store [\#6843](https://github.com/apache/arrow-rs/pull/6843) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([phillipleblanc](https://github.com/phillipleblanc))
+- object-store: remove S3ConditionalPut::ETagPutIfNotExists [\#6802](https://github.com/apache/arrow-rs/pull/6802) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([benesch](https://github.com/benesch))
+- Fix multipart uploads with checksums on object locked buckets [\#6794](https://github.com/apache/arrow-rs/pull/6794) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([avantgardnerio](https://github.com/avantgardnerio))
+- Add AuthorityHost to AzureConfigKey [\#6773](https://github.com/apache/arrow-rs/pull/6773) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([zadeluca](https://github.com/zadeluca))
+- object\_store: Add support for requester pays buckets [\#6768](https://github.com/apache/arrow-rs/pull/6768) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([kylebarron](https://github.com/kylebarron))
+- check sign\_payload instead of skip\_signature before computing checksum [\#6698](https://github.com/apache/arrow-rs/pull/6698) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mherrerarendon](https://github.com/mherrerarendon))
+- Update quick-xml requirement from 0.36.0 to 0.37.0 in /object\_store [\#6687](https://github.com/apache/arrow-rs/pull/6687) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([crepererum](https://github.com/crepererum))
+- Support native S3 conditional writes [\#6682](https://github.com/apache/arrow-rs/pull/6682) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([benesch](https://github.com/benesch))
+- \[object\_store\] fix S3 endpoint and trailing slash result in invalid requests [\#6641](https://github.com/apache/arrow-rs/pull/6641) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([adbmal](https://github.com/adbmal))
+- Lower GCP token min\_ttl to 4 minutes and add backoff to token refresh logic [\#6638](https://github.com/apache/arrow-rs/pull/6638) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([mwylde](https://github.com/mwylde))
+- Remove `test_private_bucket` object\_store test [\#6601](https://github.com/apache/arrow-rs/pull/6601) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb))
 
 
 
diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index bcc8e0b92243..bf254b3a0bbd 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "object_store"
-version = "0.11.1"
+version = "0.11.2"
 edition = "2021"
 license = "MIT/Apache-2.0"
 readme = "README.md"
diff --git a/object_store/dev/release/README.md b/object_store/dev/release/README.md
index 912ff4cd8bac..2dd1f6243c09 100644
--- a/object_store/dev/release/README.md
+++ b/object_store/dev/release/README.md
@@ -24,7 +24,10 @@
 
 This file documents the release process for the `object_store` crate.
 
-At the time of writing, we release a new version of `object_store` on demand rather than on a regular schedule.
+We release a new version of `object_store` according to the schedule listed in 
+the [main README.md]
+
+[main README.md]: https://github.com/apache/arrow-rs?tab=readme-ov-file#object_store-crate
 
 As we are still in an early phase, we use the 0.x version scheme. If any code has 
 been merged to main that has a breaking API change, as defined in [Rust RFC 1105]
diff --git a/object_store/dev/release/update_change_log.sh b/object_store/dev/release/update_change_log.sh
index 30724478ae1e..2797b62c0010 100755
--- a/object_store/dev/release/update_change_log.sh
+++ b/object_store/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="object_store_0.11.0"
-FUTURE_RELEASE="object_store_0.11.1"
+SINCE_TAG="object_store_0.11.1"
+FUTURE_RELEASE="object_store_0.11.2"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"

From d9885da7c61e83cf23f8b9e66fab40391e49fa03 Mon Sep 17 00:00:00 2001
From: Xuanwo <github@xuanwo.io>
Date: Tue, 24 Dec 2024 22:22:33 +0800
Subject: [PATCH 16/68] feat(parquet): Add next_row_group API for
 ParquetRecordBatchStream (#6907)

* feat(parquet): Add next_row_group API for ParquetRecordBatchStream

Signed-off-by: Xuanwo <github@xuanwo.io>

* chore: Returning error instead of using unreachable

Signed-off-by: Xuanwo <github@xuanwo.io>

---------

Signed-off-by: Xuanwo <github@xuanwo.io>
---
 parquet/src/arrow/async_reader/mod.rs | 132 ++++++++++++++++++++++++++
 1 file changed, 132 insertions(+)

diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index c408456df147..96715e1164b2 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -613,6 +613,9 @@ impl<T> std::fmt::Debug for StreamState<T> {
 
 /// An asynchronous [`Stream`](https://docs.rs/futures/latest/futures/stream/trait.Stream.html) of [`RecordBatch`]
 /// for a parquet file that can be constructed using [`ParquetRecordBatchStreamBuilder`].
+///
+/// `ParquetRecordBatchStream` also provides [`ParquetRecordBatchStream::next_row_group`] for fetching row groups,
+/// allowing users to decode record batches separately from I/O.
 pub struct ParquetRecordBatchStream<T> {
     metadata: Arc<ParquetMetaData>,
 
@@ -654,6 +657,70 @@ impl<T> ParquetRecordBatchStream<T> {
     }
 }
 
+impl<T> ParquetRecordBatchStream<T>
+where
+    T: AsyncFileReader + Unpin + Send + 'static,
+{
+    /// Fetches the next row group from the stream.
+    ///
+    /// Users can continue to call this function to get row groups and decode them concurrently.
+    ///
+    /// ## Notes
+    ///
+    /// ParquetRecordBatchStream should be used either as a `Stream` or with `next_row_group`; they should not be used simultaneously.
+    ///
+    /// ## Returns
+    ///
+    /// - `Ok(None)` if the stream has ended.
+    /// - `Err(error)` if the stream has errored. All subsequent calls will return `Ok(None)`.
+    /// - `Ok(Some(reader))` which holds all the data for the row group.
+    pub async fn next_row_group(&mut self) -> Result<Option<ParquetRecordBatchReader>> {
+        loop {
+            match &mut self.state {
+                StreamState::Decoding(_) | StreamState::Reading(_) => {
+                    return Err(ParquetError::General(
+                        "Cannot combine the use of next_row_group with the Stream API".to_string(),
+                    ))
+                }
+                StreamState::Init => {
+                    let row_group_idx = match self.row_groups.pop_front() {
+                        Some(idx) => idx,
+                        None => return Ok(None),
+                    };
+
+                    let row_count = self.metadata.row_group(row_group_idx).num_rows() as usize;
+
+                    let selection = self.selection.as_mut().map(|s| s.split_off(row_count));
+
+                    let reader_factory = self.reader.take().expect("lost reader");
+
+                    let (reader_factory, maybe_reader) = reader_factory
+                        .read_row_group(
+                            row_group_idx,
+                            selection,
+                            self.projection.clone(),
+                            self.batch_size,
+                        )
+                        .await
+                        .map_err(|err| {
+                            self.state = StreamState::Error;
+                            err
+                        })?;
+                    self.reader = Some(reader_factory);
+
+                    if let Some(reader) = maybe_reader {
+                        return Ok(Some(reader));
+                    } else {
+                        // All rows skipped, read next row group
+                        continue;
+                    }
+                }
+                StreamState::Error => return Ok(None), // Ends the stream as error happens.
+            }
+        }
+    }
+}
+
 impl<T> Stream for ParquetRecordBatchStream<T>
 where
     T: AsyncFileReader + Unpin + Send + 'static,
@@ -1020,6 +1087,71 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn test_async_reader_with_next_row_group() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/alltypes_plain.parquet");
+        let data = Bytes::from(std::fs::read(path).unwrap());
+
+        let metadata = ParquetMetaDataReader::new()
+            .parse_and_finish(&data)
+            .unwrap();
+        let metadata = Arc::new(metadata);
+
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        let async_reader = TestReader {
+            data: data.clone(),
+            metadata: metadata.clone(),
+            requests: Default::default(),
+        };
+
+        let requests = async_reader.requests.clone();
+        let builder = ParquetRecordBatchStreamBuilder::new(async_reader)
+            .await
+            .unwrap();
+
+        let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![1, 2]);
+        let mut stream = builder
+            .with_projection(mask.clone())
+            .with_batch_size(1024)
+            .build()
+            .unwrap();
+
+        let mut readers = vec![];
+        while let Some(reader) = stream.next_row_group().await.unwrap() {
+            readers.push(reader);
+        }
+
+        let async_batches: Vec<_> = readers
+            .into_iter()
+            .flat_map(|r| r.map(|v| v.unwrap()).collect::<Vec<_>>())
+            .collect();
+
+        let sync_batches = ParquetRecordBatchReaderBuilder::try_new(data)
+            .unwrap()
+            .with_projection(mask)
+            .with_batch_size(104)
+            .build()
+            .unwrap()
+            .collect::<ArrowResult<Vec<_>>>()
+            .unwrap();
+
+        assert_eq!(async_batches, sync_batches);
+
+        let requests = requests.lock().unwrap();
+        let (offset_1, length_1) = metadata.row_group(0).column(1).byte_range();
+        let (offset_2, length_2) = metadata.row_group(0).column(2).byte_range();
+
+        assert_eq!(
+            &requests[..],
+            &[
+                offset_1 as usize..(offset_1 + length_1) as usize,
+                offset_2 as usize..(offset_2 + length_2) as usize
+            ]
+        );
+    }
+
     #[tokio::test]
     async fn test_async_reader_with_index() {
         let testdata = arrow::util::test_util::parquet_test_data();

From 7ef432b277ef09a94e6a6898aeef8c402d863231 Mon Sep 17 00:00:00 2001
From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com>
Date: Thu, 26 Dec 2024 16:49:36 +0200
Subject: [PATCH 17/68] chore(arrow-ord): move `can_rank` to the `rank` file
 (#6910)

---
 arrow-ord/src/rank.rs |  9 +++++++++
 arrow-ord/src/sort.rs | 11 +----------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs
index ecc693bab4e4..e61cebef38ec 100644
--- a/arrow-ord/src/rank.rs
+++ b/arrow-ord/src/rank.rs
@@ -24,6 +24,15 @@ use arrow_buffer::NullBuffer;
 use arrow_schema::{ArrowError, DataType, SortOptions};
 use std::cmp::Ordering;
 
+/// Whether `arrow_ord::rank` can rank an array of given data type.
+pub(crate) fn can_rank(data_type: &DataType) -> bool {
+    data_type.is_primitive()
+        || matches!(
+            data_type,
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary
+        )
+}
+
 /// Assigns a rank to each value in `array` based on its position in the sorted order
 ///
 /// Where values are equal, they will be assigned the highest of their ranks,
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 60fc4a918525..51a6659e631b 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -30,7 +30,7 @@ use arrow_select::take::take;
 use std::cmp::Ordering;
 use std::sync::Arc;
 
-use crate::rank::rank;
+use crate::rank::{can_rank, rank};
 pub use arrow_schema::SortOptions;
 
 /// Sort the `ArrayRef` using `SortOptions`.
@@ -190,15 +190,6 @@ fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
     }
 }
 
-/// Whether `arrow_ord::rank` can rank an array of given data type.
-fn can_rank(data_type: &DataType) -> bool {
-    data_type.is_primitive()
-        || matches!(
-            data_type,
-            DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary
-        )
-}
-
 /// Whether `sort_to_indices` can sort an array of given data type.
 fn can_sort_to_indices(data_type: &DataType) -> bool {
     data_type.is_primitive()

From df87b132b22ac8d0a8352aeaf8298414705220da Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 19 Oct 2024 10:15:04 -0700
Subject: [PATCH 18/68] preliminary changes

---
 arrow-cast/src/cast/mod.rs | 4 ++--
 arrow-data/src/data.rs     | 4 ++++
 arrow-schema/src/ffi.rs    | 8 ++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 391ffce90cbe..d7edba261e6c 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -181,9 +181,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             UInt8 | UInt16 | UInt32 | UInt64) |
         // decimal to signed numeric
         (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) |
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
         // decimal to string
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) |
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) => true,
         // string to decimal
         (Utf8View | Utf8 | LargeUtf8, Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) => true,
         (Struct(from_fields), Struct(to_fields)) => {
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 6f016d213675..7c5b9ea52ed4 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -143,6 +143,10 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
+        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
+            MutableBuffer::new(capacity * mem::size_of::<u8>()),
+            empty_buffer,
+        ],
         DataType::Union(_, mode) => {
             let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
             match mode {
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index e99ea8d67899..56bc93559ed8 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -709,12 +709,8 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
-        DataType::Decimal32(precision, scale) => {
-            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
-        }
-        DataType::Decimal64(precision, scale) => {
-            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
-        }
+        DataType::Decimal32(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},32"))),
+        DataType::Decimal64(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},64"))),
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))

From 78c899fe2f8b30abc96067f857fc7ba5f7e709fd Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 12 Jan 2025 19:09:56 -0800
Subject: [PATCH 19/68] Decimal32/64 mostly done

---
 arrow-array/src/cast.rs                       |  12 +
 arrow-cast/src/cast/mod.rs                    | 117 +++++--
 arrow-data/src/data.rs                        |   4 -
 arrow-json/src/writer/mod.rs                  |  48 +++
 arrow-ord/src/comparison.rs                   | 208 ++++++++++++
 arrow-ord/src/ord.rs                          |  28 +-
 arrow-ord/src/sort.rs                         | 319 ++++++------------
 arrow-row/src/lib.rs                          |  60 ++++
 arrow/tests/array_cast.rs                     |  48 ++-
 .../src/arrow/array_reader/primitive_array.rs |  26 +-
 parquet/src/arrow/arrow_reader/mod.rs         |  79 ++++-
 parquet/src/arrow/schema/mod.rs               |   2 +
 parquet/src/arrow/schema/primitive.rs         |   4 +-
 parquet/tests/arrow_reader/mod.rs             |  85 ++++-
 parquet/tests/arrow_reader/statistics.rs      |  86 ++++-
 15 files changed, 840 insertions(+), 286 deletions(-)

diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index a06ca34a02e7..9947c36d4619 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -1014,6 +1014,18 @@ mod tests {
         assert!(!as_string_array(&array).is_empty())
     }
 
+    #[test]
+    fn test_decimal32array() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal32Type>(&a).is_empty());
+    }
+
+    #[test]
+    fn test_decimal64array() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal64Type>(&a).is_empty());
+    }
+
     #[test]
     fn test_decimal128array() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index d7edba261e6c..483680b1d39d 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -830,6 +830,7 @@ pub fn cast_with_options(
         (Map(_, ordered1), Map(_, ordered2)) if ordered1 == ordered2 => {
             cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned())
         }
+        // Decimal to decimal, same width
         (Decimal32(p1, s1), Decimal32(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal32Type>(
                 array.as_primitive(),
@@ -840,6 +841,37 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        (Decimal64(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal128(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal128Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal256(p1, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal256Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        // Decimal to decimal, different width
         (Decimal32(_, s1), Decimal64(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
                 array.as_primitive(),
@@ -867,10 +899,9 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Decimal64(p1, s1), Decimal64(p2, s2)) => {
-            cast_decimal_to_decimal_same_type::<Decimal64Type>(
+        (Decimal64(_, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal32Type>(
                 array.as_primitive(),
-                *p1,
                 *s1,
                 *p2,
                 *s2,
@@ -895,20 +926,18 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Decimal128(p1, s1), Decimal128(p2, s2)) => {
-            cast_decimal_to_decimal_same_type::<Decimal128Type>(
+        (Decimal128(_, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal32Type>(
                 array.as_primitive(),
-                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal256(p1, s1), Decimal256(p2, s2)) => {
-            cast_decimal_to_decimal_same_type::<Decimal256Type>(
+        (Decimal128(_, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal64Type>(
                 array.as_primitive(),
-                *p1,
                 *s1,
                 *p2,
                 *s2,
@@ -924,6 +953,24 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        (Decimal256(_, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal32Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal256(_, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal64Type>(
+                array.as_primitive(),
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal256(_, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal128Type>(
                 array.as_primitive(),
@@ -933,6 +980,7 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        // Decimal to non-decimal
         (Decimal32(_, scale), _) if !to_type.is_temporal() => {
             cast_from_decimal::<Decimal32Type, _>(
                 array,
@@ -977,6 +1025,7 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        // Non-decimal to decimal
         (_, Decimal32(precision, scale)) if !from_type.is_temporal() => {
             cast_to_decimal::<Decimal32Type, _>(
                 array,
@@ -2584,7 +2633,7 @@ mod tests {
             .with_precision_and_scale(precision, scale)
     }
 
-    fn create_decimal_array(
+    fn create_decimal128_array(
         array: Vec<Option<i128>>,
         precision: u8,
         scale: i8,
@@ -2653,7 +2702,7 @@ mod tests {
             Some(-3123456),
             None,
         ];
-        let array = create_decimal_array(array, 20, 4).unwrap();
+        let array = create_decimal128_array(array, 20, 4).unwrap();
         // decimal128 to decimal128
         let input_type = DataType::Decimal128(20, 4);
         let output_type = DataType::Decimal128(20, 3);
@@ -2804,7 +2853,7 @@ mod tests {
         let output_type = DataType::Decimal128(20, 4);
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
-        let array = create_decimal_array(array, 20, 3).unwrap();
+        let array = create_decimal128_array(array, 20, 3).unwrap();
         generate_cast_test_case!(
             &array,
             Decimal128Array,
@@ -2818,7 +2867,7 @@ mod tests {
         );
         // negative test
         let array = vec![Some(123456), None];
-        let array = create_decimal_array(array, 10, 0).unwrap();
+        let array = create_decimal128_array(array, 10, 0).unwrap();
         let result_safe = cast(&array, &DataType::Decimal128(2, 2));
         assert!(result_safe.is_ok());
         let options = CastOptions {
@@ -2874,7 +2923,7 @@ mod tests {
         );
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
-        let array = create_decimal_array(array, p, s).unwrap();
+        let array = create_decimal128_array(array, p, s).unwrap();
         let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
         assert_eq!(cast_array.data_type(), &output_type);
     }
@@ -2890,7 +2939,7 @@ mod tests {
         );
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
-        let array = create_decimal_array(array, p, s).unwrap();
+        let array = create_decimal128_array(array, p, s).unwrap();
         let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
         assert_eq!(cast_array.data_type(), &output_type);
     }
@@ -2942,7 +2991,7 @@ mod tests {
         assert!(can_cast_types(&input_type, &output_type));
 
         let array = vec![Some(i128::MAX)];
-        let array = create_decimal_array(array, 38, 3).unwrap();
+        let array = create_decimal128_array(array, 38, 3).unwrap();
         let result = cast_with_options(
             &array,
             &output_type,
@@ -2962,7 +3011,7 @@ mod tests {
         assert!(can_cast_types(&input_type, &output_type));
 
         let array = vec![Some(i128::MAX)];
-        let array = create_decimal_array(array, 38, 3).unwrap();
+        let array = create_decimal128_array(array, 38, 3).unwrap();
         let result = cast_with_options(
             &array,
             &output_type,
@@ -3019,7 +3068,7 @@ mod tests {
         let output_type = DataType::Decimal256(20, 4);
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
-        let array = create_decimal_array(array, 20, 3).unwrap();
+        let array = create_decimal128_array(array, 20, 3).unwrap();
         generate_cast_test_case!(
             &array,
             Decimal256Array,
@@ -3227,13 +3276,13 @@ mod tests {
     #[test]
     fn test_cast_decimal_to_numeric() {
         let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+        let array = create_decimal128_array(value_array, 38, 2).unwrap();
 
         generate_decimal_to_numeric_cast_test_case!(&array);
 
         // overflow test: out of range of max u8
         let value_array: Vec<Option<i128>> = vec![Some(51300)];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+        let array = create_decimal128_array(value_array, 38, 2).unwrap();
         let casted_array = cast_with_options(
             &array,
             &DataType::UInt8,
@@ -3260,7 +3309,7 @@ mod tests {
 
         // overflow test: out of range of max i8
         let value_array: Vec<Option<i128>> = vec![Some(24400)];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+        let array = create_decimal128_array(value_array, 38, 2).unwrap();
         let casted_array = cast_with_options(
             &array,
             &DataType::Int8,
@@ -3297,7 +3346,7 @@ mod tests {
             Some(112345678),
             Some(112345679),
         ];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+        let array = create_decimal128_array(value_array, 38, 2).unwrap();
         generate_cast_test_case!(
             &array,
             Float32Array,
@@ -3324,7 +3373,7 @@ mod tests {
             Some(112345678901234568),
             Some(112345678901234560),
         ];
-        let array = create_decimal_array(value_array, 38, 2).unwrap();
+        let array = create_decimal128_array(value_array, 38, 2).unwrap();
         generate_cast_test_case!(
             &array,
             Float64Array,
@@ -8638,7 +8687,7 @@ mod tests {
         let output_type = DataType::Decimal128(20, -1);
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(1123450), Some(2123455), Some(3123456), None];
-        let input_decimal_array = create_decimal_array(array, 20, 0).unwrap();
+        let input_decimal_array = create_decimal128_array(array, 20, 0).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
         generate_cast_test_case!(
             &array,
@@ -8696,7 +8745,7 @@ mod tests {
         let output_type = DataType::Decimal128(10, -2);
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(123)];
-        let input_decimal_array = create_decimal_array(array, 10, -1).unwrap();
+        let input_decimal_array = create_decimal128_array(array, 10, -1).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
         generate_cast_test_case!(&array, Decimal128Array, &output_type, vec![Some(12_i128),]);
 
@@ -8706,7 +8755,7 @@ mod tests {
         assert_eq!("1200", decimal_arr.value_as_string(0));
 
         let array = vec![Some(125)];
-        let input_decimal_array = create_decimal_array(array, 10, -1).unwrap();
+        let input_decimal_array = create_decimal128_array(array, 10, -1).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
         generate_cast_test_case!(&array, Decimal128Array, &output_type, vec![Some(13_i128),]);
 
@@ -8722,7 +8771,7 @@ mod tests {
         let output_type = DataType::Decimal256(10, 5);
         assert!(can_cast_types(&input_type, &output_type));
         let array = vec![Some(123456), Some(-123456)];
-        let input_decimal_array = create_decimal_array(array, 10, 3).unwrap();
+        let input_decimal_array = create_decimal128_array(array, 10, 3).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
 
         let hundred = i256::from_i128(100);
@@ -9586,15 +9635,15 @@ mod tests {
         
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8View,
-            create_decimal_array(array128.clone(), 7, 3).unwrap(),
+            create_decimal128_array(array128.clone(), 7, 3).unwrap(),
         );
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8,
-            create_decimal_array(array128.clone(), 7, 3).unwrap(),
+            create_decimal128_array(array128.clone(), 7, 3).unwrap(),
         );
         test_decimal_to_string::<Decimal128Type, i64>(
             DataType::LargeUtf8,
-            create_decimal_array(array128, 7, 3).unwrap(),
+            create_decimal128_array(array128, 7, 3).unwrap(),
         );
 
         test_decimal_to_string::<Decimal256Type, i32>(
@@ -10242,7 +10291,7 @@ mod tests {
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() {
         let array = vec![Some(123456789)];
-        let array = create_decimal_array(array, 24, 2).unwrap();
+        let array = create_decimal128_array(array, 24, 2).unwrap();
         println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal128(6, 2);
@@ -10260,7 +10309,7 @@ mod tests {
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() {
         let array = vec![Some(123456789)];
-        let array = create_decimal_array(array, 24, 2).unwrap();
+        let array = create_decimal128_array(array, 24, 2).unwrap();
         println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 4);
         let output_type = DataType::Decimal128(6, 2);
@@ -10278,7 +10327,7 @@ mod tests {
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale() {
         let array = vec![Some(123456789)];
-        let array = create_decimal_array(array, 24, 2).unwrap();
+        let array = create_decimal128_array(array, 24, 2).unwrap();
         println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal128(6, 3);
@@ -10296,7 +10345,7 @@ mod tests {
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type() {
         let array = vec![Some(123456789)];
-        let array = create_decimal_array(array, 24, 2).unwrap();
+        let array = create_decimal128_array(array, 24, 2).unwrap();
         println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal256(6, 2);
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 7c5b9ea52ed4..6f016d213675 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -143,10 +143,6 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
-        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
         DataType::Union(_, mode) => {
             let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
             match mode {
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index ee6d83a0a1f0..1cc43ec4e8d0 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -1878,6 +1878,54 @@ mod tests {
         )
     }
 
+    #[test]
+    fn test_decimal32_encoder() {
+        let array = Decimal32Array::from_iter_values([1234, 5678, 9012])
+            .with_precision_and_scale(8, 2)
+            .unwrap();
+        let field = Arc::new(Field::new("decimal", array.data_type().clone(), true));
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"decimal":12.34}
+{"decimal":56.78}
+{"decimal":90.12}
+"#,
+        );
+    }
+
+    #[test]
+    fn test_decimal64_encoder() {
+        let array = Decimal64Array::from_iter_values([1234, 5678, 9012])
+            .with_precision_and_scale(10, 2)
+            .unwrap();
+        let field = Arc::new(Field::new("decimal", array.data_type().clone(), true));
+        let schema = Schema::new(vec![field]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"decimal":12.34}
+{"decimal":56.78}
+{"decimal":90.12}
+"#,
+        );
+    }
+
     #[test]
     fn test_decimal128_encoder() {
         let array = Decimal128Array::from_iter_values([1234, 5678, 9012])
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index bb82f54d4918..83f765229f57 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -3059,6 +3059,117 @@ mod tests {
         );
     }
 
+    fn create_decimal_array<T: DecimalType>(data: Vec<Option<T::Native>>) -> PrimitiveArray<T> {
+        data.into_iter().collect::<PrimitiveArray::<T>>()
+    }
+
+    fn test_cmp_dict_decimal<T: DecimalType>(values1: Vec<Option<T::Native>>, values2: Vec<Option<T::Native>>) {
+        let values = create_decimal_array::<T>(values1);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = create_decimal_array::<T>(values2);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+    
+    #[test]
+    fn test_cmp_dict_decimal32() {
+        test_cmp_dict_decimal::<Decimal32Type>(
+            vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
+            vec![Some(7), Some(-3), Some(4), Some(3), Some(5)],
+        );
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal32() {
+        let array1: Decimal32Array = Decimal32Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal32Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_decimal64() {
+        let values = Decimal64Array::from_iter_values([0, 1, 2, 3, 4, 5]);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal64() {
+        let array1: Decimal64Array = Decimal64Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
     #[test]
     fn test_cmp_dict_decimal128() {
         let values = Decimal128Array::from_iter_values([0, 1, 2, 3, 4, 5]);
@@ -3163,6 +3274,103 @@ mod tests {
         assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
     }
 
+    #[test]
+    fn test_decimal32() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal32Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal32_scalar() {
+        let a = Decimal32Array::from(vec![Some(1), Some(2), Some(3), None, Some(4), Some(5)]);
+        let b = Decimal32Array::new_scalar(3_i32);
+        // array eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array neq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::neq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal64Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
     #[test]
     fn test_decimal128() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 55e397cd8aa4..9dc22e1f5a4c 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -549,7 +549,33 @@ mod tests {
     }
 
     #[test]
-    fn test_decimal() {
+    fn test_decimali32() {
+        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
+            .into_iter()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(8, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali64() {
+        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
+            .into_iter()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(16, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali128() {
         let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
             .into_iter()
             .collect::<Decimal128Array>()
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 51a6659e631b..e18979524eb0 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -793,10 +793,18 @@ mod tests {
     use rand::rngs::StdRng;
     use rand::{Rng, RngCore, SeedableRng};
 
-    fn create_decimal128_array(data: Vec<Option<i128>>) -> Decimal128Array {
+    fn create_decimal_array<T: DecimalType>(
+        data: Vec<Option<usize>>,
+        precision: u8,
+        scale: i8
+    ) -> PrimitiveArray<T> {
         data.into_iter()
-            .collect::<Decimal128Array>()
-            .with_precision_and_scale(23, 6)
+            .map(|x| match x {
+                None => None,
+                Some(y) => T::Native::from_usize(y),
+            })
+            .collect::<PrimitiveArray::<T>>()
+            .with_precision_and_scale(precision, scale)
             .unwrap()
     }
 
@@ -807,13 +815,15 @@ mod tests {
             .unwrap()
     }
 
-    fn test_sort_to_indices_decimal128_array(
-        data: Vec<Option<i128>>,
+    fn test_sort_to_indices_decimal_array<T: DecimalType>(
+        data: Vec<Option<usize>>,
         options: Option<SortOptions>,
         limit: Option<usize>,
         expected_data: Vec<u32>,
+        precision: u8,
+        scale: i8,
     ) {
-        let output = create_decimal128_array(data);
+        let output = create_decimal_array::<T>(data, precision, scale);
         let expected = UInt32Array::from(expected_data);
         let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
@@ -831,14 +841,16 @@ mod tests {
         assert_eq!(output, expected)
     }
 
-    fn test_sort_decimal128_array(
-        data: Vec<Option<i128>>,
+    fn test_sort_decimal_array<T: DecimalType>(
+        data: Vec<Option<usize>>,
         options: Option<SortOptions>,
         limit: Option<usize>,
-        expected_data: Vec<Option<i128>>,
+        expected_data: Vec<Option<usize>>,
+        p: u8,
+        s: i8,
     ) {
-        let output = create_decimal128_array(data);
-        let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef;
+        let output = create_decimal_array::<T>(data, p, s);
+        let expected = Arc::new(create_decimal_array::<T>(expected_data, p, s)) as ArrayRef;
         let output = match limit {
             Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
@@ -1541,17 +1553,18 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_sort_indices_decimal128() {
+    fn test_sort_indices_decimal<T: DecimalType>(precision: u8, scale: i8) {
         // decimal default
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             None,
             vec![0, 6, 4, 2, 3, 5, 1],
+            precision,
+            scale,
         );
         // decimal descending
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1559,9 +1572,11 @@ mod tests {
             }),
             None,
             vec![1, 5, 3, 2, 4, 0, 6],
+            precision,
+            scale,
         );
         // decimal null_first and descending
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1569,9 +1584,11 @@ mod tests {
             }),
             None,
             vec![0, 6, 1, 5, 3, 2, 4],
+            precision,
+            scale,
         );
         // decimal null_first
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1579,16 +1596,20 @@ mod tests {
             }),
             None,
             vec![0, 6, 4, 2, 3, 5, 1],
+            precision,
+            scale,
         );
         // limit
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             Some(3),
             vec![0, 6, 4],
+            precision,
+            scale,
         );
         // limit descending
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1596,9 +1617,11 @@ mod tests {
             }),
             Some(3),
             vec![1, 5, 3],
+            precision,
+            scale,
         );
         // limit descending null_first
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1606,9 +1629,11 @@ mod tests {
             }),
             Some(3),
             vec![0, 6, 1],
+            precision,
+            scale,
         );
         // limit null_first
-        test_sort_to_indices_decimal128_array(
+        test_sort_to_indices_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1616,85 +1641,29 @@ mod tests {
             }),
             Some(3),
             vec![0, 6, 4],
+            precision,
+            scale,
         );
     }
 
     #[test]
-    fn test_sort_indices_decimal256() {
-        let data = vec![
-            None,
-            Some(i256::from_i128(5)),
-            Some(i256::from_i128(2)),
-            Some(i256::from_i128(3)),
-            Some(i256::from_i128(1)),
-            Some(i256::from_i128(4)),
-            None,
-        ];
+    fn test_sort_indices_decimal32() {
+        test_sort_indices_decimal::<Decimal32Type>(8, 3);
+    }
 
-        // decimal default
-        test_sort_to_indices_decimal256_array(data.clone(), None, None, vec![0, 6, 4, 2, 3, 5, 1]);
-        // decimal descending
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            vec![1, 5, 3, 2, 4, 0, 6],
-        );
-        // decimal null_first and descending
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            vec![0, 6, 1, 5, 3, 2, 4],
-        );
-        // decimal null_first
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            vec![0, 6, 4, 2, 3, 5, 1],
-        );
-        // limit
-        test_sort_to_indices_decimal256_array(data.clone(), None, Some(3), vec![0, 6, 4]);
-        // limit descending
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            Some(3),
-            vec![1, 5, 3],
-        );
-        // limit descending null_first
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![0, 6, 1],
-        );
-        // limit null_first
-        test_sort_to_indices_decimal256_array(
-            data,
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(3),
-            vec![0, 6, 4],
-        );
+    #[test]
+    fn test_sort_indices_decimal64() {
+        test_sort_indices_decimal::<Decimal64Type>(17, 5);
+    }
+
+    #[test]
+    fn test_sort_indices_decimal128() {
+        test_sort_indices_decimal::<Decimal128Type>(23, 6);
+    }
+
+    #[test]
+    fn test_sort_indices_decimal256() {
+        test_sort_indices_decimal::<Decimal256Type>(53, 6);
     }
 
     #[test]
@@ -1747,17 +1716,18 @@ mod tests {
         );
     }
 
-    #[test]
-    fn test_sort_decimal128() {
+    fn test_sort_decimal<T: DecimalType>(precision: u8, scale: i8) {
         // decimal default
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             None,
             vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
+            precision,
+            scale,
         );
         // decimal descending
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1765,9 +1735,11 @@ mod tests {
             }),
             None,
             vec![Some(5), Some(4), Some(3), Some(2), Some(1), None, None],
+            precision,
+            scale,
         );
         // decimal null_first and descending
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1775,9 +1747,11 @@ mod tests {
             }),
             None,
             vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
-        );
+            precision,
+            scale,
+       );
         // decimal null_first
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1785,16 +1759,20 @@ mod tests {
             }),
             None,
             vec![None, None, Some(1), Some(2), Some(3), Some(4), Some(5)],
+            precision,
+            scale,
         );
         // limit
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             None,
             Some(3),
             vec![None, None, Some(1)],
+            precision,
+            scale,
         );
         // limit descending
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1802,9 +1780,11 @@ mod tests {
             }),
             Some(3),
             vec![Some(5), Some(4), Some(3)],
+            precision,
+            scale,
         );
         // limit descending null_first
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: true,
@@ -1812,9 +1792,11 @@ mod tests {
             }),
             Some(3),
             vec![None, None, Some(5)],
+            precision,
+            scale,
         );
         // limit null_first
-        test_sort_decimal128_array(
+        test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
             Some(SortOptions {
                 descending: false,
@@ -1822,118 +1804,29 @@ mod tests {
             }),
             Some(3),
             vec![None, None, Some(1)],
+            precision,
+            scale,
         );
     }
 
+    #[test]
+    fn test_sort_decimal32() {
+        test_sort_decimal::<Decimal32Type>(8, 3);
+    }
+
+    #[test]
+    fn test_sort_decimal64() {
+        test_sort_decimal::<Decimal64Type>(17, 5);
+    }
+
+    #[test]
+    fn test_sort_decimal128() {
+        test_sort_decimal::<Decimal128Type>(23, 6);
+    }
+
     #[test]
     fn test_sort_decimal256() {
-        let data = vec![
-            None,
-            Some(i256::from_i128(5)),
-            Some(i256::from_i128(2)),
-            Some(i256::from_i128(3)),
-            Some(i256::from_i128(1)),
-            Some(i256::from_i128(4)),
-            None,
-        ];
-        // decimal default
-        test_sort_decimal256_array(
-            data.clone(),
-            None,
-            None,
-            [None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // decimal descending
-        test_sort_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            None,
-            [Some(5), Some(4), Some(3), Some(2), Some(1), None, None]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // decimal null_first and descending
-        test_sort_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            None,
-            [None, None, Some(5), Some(4), Some(3), Some(2), Some(1)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // decimal null_first
-        test_sort_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            None,
-            [None, None, Some(1), Some(2), Some(3), Some(4), Some(5)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // limit
-        test_sort_decimal256_array(
-            data.clone(),
-            None,
-            Some(3),
-            [None, None, Some(1)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // limit descending
-        test_sort_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: false,
-            }),
-            Some(3),
-            [Some(5), Some(4), Some(3)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // limit descending null_first
-        test_sort_decimal256_array(
-            data.clone(),
-            Some(SortOptions {
-                descending: true,
-                nulls_first: true,
-            }),
-            Some(3),
-            [None, None, Some(5)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
-        // limit null_first
-        test_sort_decimal256_array(
-            data,
-            Some(SortOptions {
-                descending: false,
-                nulls_first: true,
-            }),
-            Some(3),
-            [None, None, Some(1)]
-                .iter()
-                .map(|v| v.map(i256::from_i128))
-                .collect(),
-        );
+        test_sort_decimal::<Decimal256Type>(53, 6);
     }
 
     #[test]
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index d0fad12210db..5421029304cd 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1510,6 +1510,66 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_decimal32() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal32(
+            DECIMAL32_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal32Array::from_iter([
+                None,
+                Some(i32::MIN),
+                Some(-13),
+                Some(46_i32),
+                Some(5456_i32),
+                Some(i32::MAX),
+            ])
+            .with_precision_and_scale(9, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal64(
+            DECIMAL64_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal64Array::from_iter([
+                None,
+                Some(i64::MIN),
+                Some(-13),
+                Some(46_i64),
+                Some(5456_i64),
+                Some(i64::MAX),
+            ])
+            .with_precision_and_scale(18, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
     #[test]
     fn test_decimal128() {
         let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index bf9962b69f7b..addaafb540ca 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -24,7 +24,8 @@ use arrow_array::types::{
 };
 use arrow_array::{
     Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
+    DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
     DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array,
     Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
     IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
@@ -262,7 +263,14 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
         Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
-        Arc::new(create_decimal_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
+        Arc::new(create_decimal32_array(vec![Some(1), Some(2), Some(3)], 9, 0).unwrap()),
+        Arc::new(create_decimal64_array(vec![Some(1), Some(2), Some(3)], 18, 0).unwrap()),
+        Arc::new(create_decimal128_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
+        Arc::new(create_decimal256_array(vec![
+            Some(i256::from_i128(1)),
+            Some(i256::from_i128(2)),
+            Some(i256::from_i128(3))
+        ], 40, 0).unwrap()),
         make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
         make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
         make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
@@ -428,7 +436,29 @@ fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
     Arc::new(b.finish())
 }
 
-fn create_decimal_array(
+fn create_decimal32_array(
+    array: Vec<Option<i132>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal32Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal32Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
+fn create_decimal64_array(
+    array: Vec<Option<i64>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal64Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal64Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
+fn create_decimal128_array(
     array: Vec<Option<i128>>,
     precision: u8,
     scale: i8,
@@ -439,6 +469,17 @@ fn create_decimal_array(
         .with_precision_and_scale(precision, scale)
 }
 
+fn create_decimal256_array(
+    array: Vec<Option<i256>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal256Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal256Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
 // Get a selection of datatypes to try and cast to
 fn get_all_types() -> Vec<DataType> {
     use DataType::*;
@@ -519,6 +560,7 @@ fn get_all_types() -> Vec<DataType> {
                 Dictionary(Box::new(key_type.clone()), Box::new(Binary)),
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeBinary)),
                 Dictionary(Box::new(key_type.clone()), Box::new(Decimal32(9, 0))),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal64(18, 0))),
                 Dictionary(Box::new(key_type.clone()), Box::new(Decimal128(38, 0))),
                 Dictionary(Box::new(key_type), Box::new(Decimal256(76, 0))),
             ]
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 375db933b511..c7b3e69450a6 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -25,7 +25,7 @@ use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use arrow_array::{
     builder::TimestampNanosecondBufferBuilder, ArrayRef, BooleanArray,
-    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
     Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array,
     UInt64Array,
 };
@@ -220,10 +220,30 @@ where
                 let a = arrow_cast::cast(&array, &ArrowType::Date32)?;
                 arrow_cast::cast(&a, target_type)?
             }
-            ArrowType::Decimal128(p, s) => {
+            ArrowType::Decimal64(p, s) if *(array.data_type()) == ArrowType::Int32 => {
                 // Apply conversion to all elements regardless of null slots as the conversion
-                // to `i128` is infallible. This improves performance by avoiding a branch in
+                // to `i64` is infallible. This improves performance by avoiding a branch in
                 // the inner loop (see docs for `PrimitiveArray::unary`).
+                let array = match array.data_type() {
+                    ArrowType::Int32 => array
+                        .as_any()
+                        .downcast_ref::<Int32Array>()
+                        .unwrap()
+                        .unary(|i| i as i64)
+                        as Decimal64Array,
+                    _ => {
+                        return Err(arrow_err!(
+                            "Cannot convert {:?} to decimal",
+                            array.data_type()
+                        ));
+                    }
+                }
+                .with_precision_and_scale(*p, *s)?;
+
+                Arc::new(array) as ArrayRef                
+            }
+            ArrowType::Decimal128(p, s) => {
+                // See above comment. Conversion to `i128` is likewise infallible.
                 let array = match array.data_type() {
                     ArrowType::Int32 => array
                         .as_any()
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 6eba04c86f91..6feedfcf8e0f 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -932,8 +932,9 @@ mod tests {
     use arrow_array::builder::*;
     use arrow_array::cast::AsArray;
     use arrow_array::types::{
-        Date32Type, Date64Type, Decimal128Type, Decimal256Type, DecimalType, Float16Type,
-        Float32Type, Float64Type, Time32MillisecondType, Time64MicrosecondType,
+        Date32Type, Date64Type, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type,
+        DecimalType, Float16Type, Float32Type, Float64Type, Time32MillisecondType,
+        Time64MicrosecondType,
     };
     use arrow_array::*;
     use arrow_buffer::{i256, ArrowNativeType, Buffer, IntervalDayTime};
@@ -4024,6 +4025,78 @@ mod tests {
         assert_eq!(out, batch.slice(2, 1));
     }
 
+    fn test_decimal32_roundtrip() {
+        let d = |values: Vec<i32>, p: u8| {
+            let iter = values.into_iter();
+            PrimitiveArray::<Decimal32Type>::from_iter_values(iter)
+                .with_precision_and_scale(p, 2)
+                .unwrap()
+        };
+
+        let d1 = d(vec![1, 2, 3, 4, 5], 9);
+        let batch = RecordBatch::try_from_iter([
+            ("d1", Arc::new(d1) as ArrayRef),
+        ])
+        .unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let t1 = builder.parquet_schema().columns()[0].physical_type();
+        assert_eq!(t1, PhysicalType::INT32);
+
+        let mut reader = builder.build().unwrap();
+        assert_eq!(batch.schema(), reader.schema());
+
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(batch, out);
+    }
+
+    fn test_decimal64_roundtrip() {
+        // Precision <= 9 -> INT32
+        // Precision <= 18 -> INT64
+
+        let d = |values: Vec<i64>, p: u8| {
+            let iter = values.into_iter();
+            PrimitiveArray::<Decimal64Type>::from_iter_values(iter)
+                .with_precision_and_scale(p, 2)
+                .unwrap()
+        };
+
+        let d1 = d(vec![1, 2, 3, 4, 5], 9);
+        let d2 = d(vec![1, 2, 3, 4, 10.pow(10) - 1], 10);
+        let d3 = d(vec![1, 2, 3, 4, 10.pow(18) - 1], 18);
+
+        let batch = RecordBatch::try_from_iter([
+            ("d1", Arc::new(d1) as ArrayRef),
+            ("d2", Arc::new(d2) as ArrayRef),
+            ("d3", Arc::new(d3) as ArrayRef),
+        ])
+        .unwrap();
+
+        let mut buffer = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let t1 = builder.parquet_schema().columns()[0].physical_type();
+        assert_eq!(t1, PhysicalType::INT32);
+        let t2 = builder.parquet_schema().columns()[1].physical_type();
+        assert_eq!(t2, PhysicalType::INT64);
+        let t3 = builder.parquet_schema().columns()[2].physical_type();
+        assert_eq!(t3, PhysicalType::INT64);
+
+        let mut reader = builder.build().unwrap();
+        assert_eq!(batch.schema(), reader.schema());
+
+        let out = reader.next().unwrap().unwrap();
+        assert_eq!(batch, out);
+    }
+
     fn test_decimal_roundtrip<T: DecimalType>() {
         // Precision <= 9 -> INT32
         // Precision <= 18 -> INT64
@@ -4073,6 +4146,8 @@ mod tests {
 
     #[test]
     fn test_decimal() {
+        test_decimal32_roundtrip();
+        test_decimal64_roundtrip();
         test_decimal_roundtrip::<Decimal128Type>();
         test_decimal_roundtrip::<Decimal256Type>();
     }
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index c9051062204d..689f7a103276 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -2014,6 +2014,8 @@ mod tests {
                     false, // fails to roundtrip keys_sorted
                     false,
                 ),
+                Field::new("c42", DataType::Decimal32(5, 2), false),
+                Field::new("c43", DataType::Decimal64(18, 12), true),
             ],
             meta(&[("Key", "Value")]),
         );
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index 9f215b4dc07e..522582b408e2 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -69,7 +69,9 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType {
         // Determine interval time unit (#1666)
         (DataType::Interval(_), DataType::Interval(_)) => hint,
 
-        // Promote to Decimal256
+        // Promote to Decimal256 or narrow to Decimal32 or Decimal64
+        (DataType::Decimal128(_, _), DataType::Decimal32(_, _)) => hint,
+        (DataType::Decimal128(_, _), DataType::Decimal64(_, _)) => hint,
         (DataType::Decimal128(_, _), DataType::Decimal256(_, _)) => hint,
 
         // Potentially preserve dictionary encoding
diff --git a/parquet/tests/arrow_reader/mod.rs b/parquet/tests/arrow_reader/mod.rs
index 0e6783583cd5..e6bec8279658 100644
--- a/parquet/tests/arrow_reader/mod.rs
+++ b/parquet/tests/arrow_reader/mod.rs
@@ -18,12 +18,13 @@
 use arrow_array::types::{Int32Type, Int8Type};
 use arrow_array::{
     Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, DictionaryArray, FixedSizeBinaryArray, Float16Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
-    LargeStringArray, RecordBatch, StringArray, StringViewArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, DictionaryArray,
+    FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array,
+    Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, RecordBatch, StringArray,
+    StringViewArray, StructArray, Time32MillisecondArray, Time32SecondArray,
+    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
+    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
+    UInt32Array, UInt64Array, UInt8Array,
 };
 use arrow_buffer::i256;
 use arrow_schema::{DataType, Field, Schema, TimeUnit};
@@ -84,7 +85,9 @@ enum Scenario {
     Float16,
     Float32,
     Float64,
-    Decimal,
+    Decimal32,
+    Decimal64,
+    Decimal128,
     Decimal256,
     ByteArray,
     Dictionary,
@@ -369,13 +372,49 @@ fn make_f16_batch(v: Vec<f16>) -> RecordBatch {
     RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
 }
 
-/// Return record batch with decimal vector
+/// Return record batch with decimal32 vector
 ///
 /// Columns are named
-/// "decimal_col" -> DecimalArray
-fn make_decimal_batch(v: Vec<i128>, precision: u8, scale: i8) -> RecordBatch {
+/// "decimal32_col" -> Decimal32Array
+fn make_decimal32_batch(v: Vec<i32>, precision: u8, scale: i8) -> RecordBatch {
     let schema = Arc::new(Schema::new(vec![Field::new(
-        "decimal_col",
+        "decimal32_col",
+        DataType::Decimal32(precision, scale),
+        true,
+    )]));
+    let array = Arc::new(
+        Decimal32Array::from(v)
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    ) as ArrayRef;
+    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
+/// Return record batch with decimal64 vector
+///
+/// Columns are named
+/// "decimal64_col" -> Decimal64Array
+fn make_decimal64_batch(v: Vec<i64>, precision: u8, scale: i8) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "decimal64_col",
+        DataType::Decimal64(precision, scale),
+        true,
+    )]));
+    let array = Arc::new(
+        Decimal64Array::from(v)
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    ) as ArrayRef;
+    RecordBatch::try_new(schema, vec![array.clone()]).unwrap()
+}
+
+/// Return record batch with decimal128 vector
+///
+/// Columns are named
+/// "decimal128_col" -> Decimal128Array
+fn make_decimal128_batch(v: Vec<i128>, precision: u8, scale: i8) -> RecordBatch {
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "decimal128_col",
         DataType::Decimal128(precision, scale),
         true,
     )]));
@@ -730,12 +769,28 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
                 make_f64_batch(vec![5.0, 6.0, 7.0, 8.0, 9.0]),
             ]
         }
-        Scenario::Decimal => {
+        Scenario::Decimal32 => {
+            // decimal record batch
+            vec![
+                make_decimal32_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal32_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal32_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+            ]
+        }
+        Scenario::Decimal64 => {
+            // decimal record batch
+            vec![
+                make_decimal64_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal64_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal64_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+            ]
+        }
+        Scenario::Decimal128 => {
             // decimal record batch
             vec![
-                make_decimal_batch(vec![100, 200, 300, 400, 600], 9, 2),
-                make_decimal_batch(vec![-500, 100, 300, 400, 600], 9, 2),
-                make_decimal_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
+                make_decimal128_batch(vec![100, 200, 300, 400, 600], 9, 2),
+                make_decimal128_batch(vec![-500, 100, 300, 400, 600], 9, 2),
+                make_decimal128_batch(vec![2000, 3000, 3000, 4000, 6000], 9, 2),
             ]
         }
         Scenario::Decimal256 => {
diff --git a/parquet/tests/arrow_reader/statistics.rs b/parquet/tests/arrow_reader/statistics.rs
index 0eb0fc2b277f..64e92d972c54 100644
--- a/parquet/tests/arrow_reader/statistics.rs
+++ b/parquet/tests/arrow_reader/statistics.rs
@@ -31,12 +31,13 @@ use arrow::datatypes::{
 };
 use arrow_array::{
     make_array, new_null_array, Array, ArrayRef, BinaryArray, BinaryViewArray, BooleanArray,
-    Date32Array, Date64Array, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
-    LargeStringArray, RecordBatch, StringArray, StringViewArray, Time32MillisecondArray,
-    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    Date32Array, Date64Array, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
+    FixedSizeBinaryArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array,
+    Int64Array, Int8Array, LargeBinaryArray, LargeStringArray, RecordBatch, StringArray,
+    StringViewArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
+    UInt8Array,
 };
 use arrow_schema::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use half::f16;
@@ -526,6 +527,9 @@ async fn test_data_page_stats_with_all_null_page() {
         DataType::Utf8,
         DataType::LargeUtf8,
         DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+        DataType::Decimal32(8, 2),   // as INT32
+        DataType::Decimal64(8, 2),   // as INT32
+        DataType::Decimal64(10, 2),  // as INT64
         DataType::Decimal128(8, 2),  // as INT32
         DataType::Decimal128(10, 2), // as INT64
         DataType::Decimal128(20, 2), // as FIXED_LEN_BYTE_ARRAY
@@ -1713,11 +1717,71 @@ async fn test_float16() {
 }
 
 #[tokio::test]
-async fn test_decimal() {
-    // This creates a parquet file of 1 column "decimal_col" with decimal data type and precicion 9, scale 2
+async fn test_decimal32() {
+    // This creates a parquet file of 1 column "decimal32_col" with decimal data type and precicion 9, scale 2
     // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
     let reader = TestReader {
-        scenario: Scenario::Decimal,
+        scenario: Scenario::Decimal32,
+        row_per_group: 5,
+    }
+    .build()
+    .await;
+
+    Test {
+        reader: &reader,
+        expected_min: Arc::new(
+            Decimal32Array::from(vec![100, -500, 2000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_max: Arc::new(
+            Decimal32Array::from(vec![600, 600, 6000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
+        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        column_name: "decimal32_col",
+        check: Check::Both,
+    }
+    .run();
+}
+#[tokio::test]
+async fn test_decimal64() {
+    // This creates a parquet file of 1 column "decimal64_col" with decimal data type and precicion 9, scale 2
+    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
+    let reader = TestReader {
+        scenario: Scenario::Decimal64,
+        row_per_group: 5,
+    }
+    .build()
+    .await;
+
+    Test {
+        reader: &reader,
+        expected_min: Arc::new(
+            Decimal64Array::from(vec![100, -500, 2000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_max: Arc::new(
+            Decimal64Array::from(vec![600, 600, 6000])
+                .with_precision_and_scale(9, 2)
+                .unwrap(),
+        ),
+        expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
+        expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        column_name: "decimal64_col",
+        check: Check::Both,
+    }
+    .run();
+}
+#[tokio::test]
+async fn test_decimal128() {
+    // This creates a parquet file of 1 column "decimal128_col" with decimal data type and precicion 9, scale 2
+    // file has 3 record batches, each has 5 rows. They will be saved into 3 row groups
+    let reader = TestReader {
+        scenario: Scenario::Decimal128,
         row_per_group: 5,
     }
     .build()
@@ -1737,7 +1801,7 @@ async fn test_decimal() {
         ),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
-        column_name: "decimal_col",
+        column_name: "decimal128_col",
         check: Check::Both,
     }
     .run();
@@ -2316,6 +2380,8 @@ mod test {
             // DataType::Struct(Fields),
             // DataType::Union(UnionFields, UnionMode),
             // DataType::Dictionary(Box<DataType>, Box<DataType>),
+            // DataType::Decimal32(u8, i8),
+            // DataType::Decimal64(u8, i8),
             // DataType::Decimal128(u8, i8),
             // DataType::Decimal256(u8, i8),
             // DataType::Map(FieldRef, bool),

From 48d5b441bb24020e6868ec6446585ce84de508ca Mon Sep 17 00:00:00 2001
From: Tom Forbes <tom@tomforb.es>
Date: Sun, 29 Dec 2024 14:22:46 +0000
Subject: [PATCH 20/68] Fix error message typos with Parquet compression
 (#6918)

---
 parquet/src/basic.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 97e8c22f1b2f..99f122fe4c3e 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -426,14 +426,19 @@ fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), Pa
 
 fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
     if level.is_some() {
-        return Err(ParquetError::General("level is not support".to_string()));
+        return Err(ParquetError::General(
+            "compression level is not supported".to_string(),
+        ));
     }
 
     Ok(())
 }
 
 fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
-    level.ok_or(ParquetError::General(format!("{} require level", codec)))
+    level.ok_or(ParquetError::General(format!(
+        "{} requires a compression level",
+        codec
+    )))
 }
 
 impl FromStr for Compression {

From 3b96eaa1535cd0003c7ba3db68d1cece56f6f58c Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Mon, 30 Dec 2024 05:57:59 -0500
Subject: [PATCH 21/68] chore: expose arrow-schema methods, for use when
 writing parquet outside of ArrowWriter (#6916)

---
 parquet/src/arrow/mod.rs        | 4 ++--
 parquet/src/arrow/schema/mod.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 6777e00fb05c..1305bbac83f0 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -123,8 +123,8 @@ use arrow_schema::{FieldRef, Schema};
 pub use self::schema::arrow_to_parquet_schema;
 
 pub use self::schema::{
-    parquet_to_arrow_field_levels, parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns,
-    ArrowSchemaConverter, FieldLevels,
+    add_encoded_arrow_schema_to_metadata, encode_arrow_schema, parquet_to_arrow_field_levels,
+    parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns, ArrowSchemaConverter, FieldLevels,
 };
 
 /// Schema metadata key used to store serialized Arrow IPC schema
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 689f7a103276..d1fa3eeb84d7 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -170,7 +170,7 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Result<Schema> {
 }
 
 /// Encodes the Arrow schema into the IPC format, and base64 encodes it
-fn encode_arrow_schema(schema: &Schema) -> String {
+pub fn encode_arrow_schema(schema: &Schema) -> String {
     let options = writer::IpcWriteOptions::default();
     #[allow(deprecated)]
     let mut dictionary_tracker =
@@ -192,7 +192,7 @@ fn encode_arrow_schema(schema: &Schema) -> String {
 
 /// Mutates writer metadata by storing the encoded Arrow schema.
 /// If there is an existing Arrow schema metadata, it is replaced.
-pub(crate) fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterProperties) {
+pub fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut WriterProperties) {
     let encoded = encode_arrow_schema(schema);
 
     let schema_kv = KeyValue {

From bf24a301ba2a25f442a30262753ab5b811af7794 Mon Sep 17 00:00:00 2001
From: Takahiro Ebato <takahiro.ebato@gmail.com>
Date: Mon, 30 Dec 2024 19:58:56 +0900
Subject: [PATCH 22/68] Improve error message for unsupported cast between
 struct and other types (#6919)

---
 arrow-cast/src/cast/mod.rs | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 483680b1d39d..0946af53a60f 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -1081,12 +1081,12 @@ pub fn cast_with_options(
             let array = StructArray::try_new(to_fields.clone(), fields, array.nulls().cloned())?;
             Ok(Arc::new(array) as ArrayRef)
         }
-        (Struct(_), _) => Err(ArrowError::CastError(
-            "Cannot cast from struct to other types except struct".to_string(),
-        )),
-        (_, Struct(_)) => Err(ArrowError::CastError(
-            "Cannot cast to struct from other types except struct".to_string(),
-        )),
+        (Struct(_), _) => Err(ArrowError::CastError(format!(
+            "Casting from {from_type:?} to {to_type:?} not supported"
+        ))),
+        (_, Struct(_)) => Err(ArrowError::CastError(format!(
+            "Casting from {from_type:?} to {to_type:?} not supported"
+        ))),
         (_, Boolean) => match from_type {
             UInt8 => cast_numeric_to_bool::<UInt8Type>(array),
             UInt16 => cast_numeric_to_bool::<UInt16Type>(array),
@@ -10288,6 +10288,32 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_struct_to_non_struct() {
+        let boolean = Arc::new(BooleanArray::from(vec![true, false]));
+        let struct_array = StructArray::from(vec![(
+            Arc::new(Field::new("a", DataType::Boolean, false)),
+            boolean.clone() as ArrayRef,
+        )]);
+        let to_type = DataType::Utf8;
+        let result = cast(&struct_array, &to_type);
+        assert_eq!(
+            r#"Cast error: Casting from Struct([Field { name: "a", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]) to Utf8 not supported"#,
+            result.unwrap_err().to_string()
+        );
+    }
+
+    #[test]
+    fn test_cast_non_struct_to_struct() {
+        let array = StringArray::from(vec!["a", "b"]);
+        let to_type = DataType::Struct(vec![Field::new("a", DataType::Boolean, false)].into());
+        let result = cast(&array, &to_type);
+        assert_eq!(
+            r#"Cast error: Casting from Utf8 to Struct([Field { name: "a", data_type: Boolean, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]) not supported"#,
+            result.unwrap_err().to_string()
+        );
+    }
+
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() {
         let array = vec![Some(123456789)];

From d58348d1f934ba56e6a426867b95c720176ebce5 Mon Sep 17 00:00:00 2001
From: Tai Le Manh <manhtai.lmt@gmail.com>
Date: Mon, 30 Dec 2024 19:42:37 +0700
Subject: [PATCH 23/68] [arrow-string] Implement string view support for
 `regexp_match` (#6849)

* [arrow-string] Implement string view suport for regexp match

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

* update unit tests

* fix clippy warnings

* Add test cases

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>

---------

Signed-off-by: Tai Le Manh <manhtai.lmt@gmail.com>
---
 arrow-string/src/regexp.rs | 640 ++++++++++++++++++++++++++-----------
 1 file changed, 452 insertions(+), 188 deletions(-)

diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index d14662be7280..f3893cd5bd13 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -20,7 +20,9 @@
 
 use crate::like::StringArrayType;
 
-use arrow_array::builder::{BooleanBufferBuilder, GenericStringBuilder, ListBuilder};
+use arrow_array::builder::{
+    BooleanBufferBuilder, GenericStringBuilder, ListBuilder, StringViewBuilder,
+};
 use arrow_array::cast::AsArray;
 use arrow_array::*;
 use arrow_buffer::NullBuffer;
@@ -243,78 +245,96 @@ where
     Ok(BooleanArray::from(data))
 }
 
-fn regexp_array_match<OffsetSize: OffsetSizeTrait>(
-    array: &GenericStringArray<OffsetSize>,
-    regex_array: &GenericStringArray<OffsetSize>,
-    flags_array: Option<&GenericStringArray<OffsetSize>>,
-) -> Result<ArrayRef, ArrowError> {
-    let mut patterns: HashMap<String, Regex> = HashMap::new();
-    let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
-    let mut list_builder = ListBuilder::new(builder);
+macro_rules! process_regexp_array_match {
+    ($array:expr, $regex_array:expr, $flags_array:expr, $list_builder:expr) => {
+        let mut patterns: HashMap<String, Regex> = HashMap::new();
 
-    let complete_pattern = match flags_array {
-        Some(flags) => Box::new(
-            regex_array
-                .iter()
-                .zip(flags.iter())
-                .map(|(pattern, flags)| {
+        let complete_pattern = match $flags_array {
+            Some(flags) => Box::new($regex_array.iter().zip(flags.iter()).map(
+                |(pattern, flags)| {
                     pattern.map(|pattern| match flags {
                         Some(value) => format!("(?{value}){pattern}"),
                         None => pattern.to_string(),
                     })
-                }),
-        ) as Box<dyn Iterator<Item = Option<String>>>,
-        None => Box::new(
-            regex_array
-                .iter()
-                .map(|pattern| pattern.map(|pattern| pattern.to_string())),
-        ),
-    };
+                },
+            )) as Box<dyn Iterator<Item = Option<String>>>,
+            None => Box::new(
+                $regex_array
+                    .iter()
+                    .map(|pattern| pattern.map(|pattern| pattern.to_string())),
+            ),
+        };
 
-    array
-        .iter()
-        .zip(complete_pattern)
-        .map(|(value, pattern)| {
-            match (value, pattern) {
-                // Required for Postgres compatibility:
-                // SELECT regexp_match('foobarbequebaz', ''); = {""}
-                (Some(_), Some(pattern)) if pattern == *"" => {
-                    list_builder.values().append_value("");
-                    list_builder.append(true);
-                }
-                (Some(value), Some(pattern)) => {
-                    let existing_pattern = patterns.get(&pattern);
-                    let re = match existing_pattern {
-                        Some(re) => re,
-                        None => {
-                            let re = Regex::new(pattern.as_str()).map_err(|e| {
-                                ArrowError::ComputeError(format!(
-                                    "Regular expression did not compile: {e:?}"
-                                ))
-                            })?;
-                            patterns.entry(pattern).or_insert(re)
-                        }
-                    };
-                    match re.captures(value) {
-                        Some(caps) => {
-                            let mut iter = caps.iter();
-                            if caps.len() > 1 {
-                                iter.next();
-                            }
-                            for m in iter.flatten() {
-                                list_builder.values().append_value(m.as_str());
+        $array
+            .iter()
+            .zip(complete_pattern)
+            .map(|(value, pattern)| {
+                match (value, pattern) {
+                    // Required for Postgres compatibility:
+                    // SELECT regexp_match('foobarbequebaz', ''); = {""}
+                    (Some(_), Some(pattern)) if pattern == *"" => {
+                        $list_builder.values().append_value("");
+                        $list_builder.append(true);
+                    }
+                    (Some(value), Some(pattern)) => {
+                        let existing_pattern = patterns.get(&pattern);
+                        let re = match existing_pattern {
+                            Some(re) => re,
+                            None => {
+                                let re = Regex::new(pattern.as_str()).map_err(|e| {
+                                    ArrowError::ComputeError(format!(
+                                        "Regular expression did not compile: {e:?}"
+                                    ))
+                                })?;
+                                patterns.entry(pattern).or_insert(re)
                             }
+                        };
+                        match re.captures(value) {
+                            Some(caps) => {
+                                let mut iter = caps.iter();
+                                if caps.len() > 1 {
+                                    iter.next();
+                                }
+                                for m in iter.flatten() {
+                                    $list_builder.values().append_value(m.as_str());
+                                }
 
-                            list_builder.append(true);
+                                $list_builder.append(true);
+                            }
+                            None => $list_builder.append(false),
                         }
-                        None => list_builder.append(false),
                     }
+                    _ => $list_builder.append(false),
                 }
-                _ => list_builder.append(false),
-            }
-            Ok(())
-        })
-        .collect::<Result<Vec<()>, ArrowError>>()?;
+                Ok(())
+            })
+            .collect::<Result<Vec<()>, ArrowError>>()?;
+    };
+}
+
+fn regexp_array_match<OffsetSize: OffsetSizeTrait>(
+    array: &GenericStringArray<OffsetSize>,
+    regex_array: &GenericStringArray<OffsetSize>,
+    flags_array: Option<&GenericStringArray<OffsetSize>>,
+) -> Result<ArrayRef, ArrowError> {
+    let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
+    let mut list_builder = ListBuilder::new(builder);
+
+    process_regexp_array_match!(array, regex_array, flags_array, list_builder);
+
+    Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_array_match_utf8view(
+    array: &StringViewArray,
+    regex_array: &StringViewArray,
+    flags_array: Option<&StringViewArray>,
+) -> Result<ArrayRef, ArrowError> {
+    let builder = StringViewBuilder::with_capacity(0);
+    let mut list_builder = ListBuilder::new(builder);
+
+    process_regexp_array_match!(array, regex_array, flags_array, list_builder);
+
     Ok(Arc::new(list_builder.finish()))
 }
 
@@ -333,6 +353,54 @@ fn get_scalar_pattern_flag<'a, OffsetSize: OffsetSizeTrait>(
     }
 }
 
+fn get_scalar_pattern_flag_utf8view<'a>(
+    regex_array: &'a dyn Array,
+    flag_array: Option<&'a dyn Array>,
+) -> (Option<&'a str>, Option<&'a str>) {
+    let regex = regex_array.as_string_view();
+    let regex = regex.is_valid(0).then(|| regex.value(0));
+
+    if let Some(flag_array) = flag_array {
+        let flag = flag_array.as_string_view();
+        (regex, flag.is_valid(0).then(|| flag.value(0)))
+    } else {
+        (regex, None)
+    }
+}
+
+macro_rules! process_regexp_match {
+    ($array:expr, $regex:expr, $list_builder:expr) => {
+        $array
+            .iter()
+            .map(|value| {
+                match value {
+                    // Required for Postgres compatibility:
+                    // SELECT regexp_match('foobarbequebaz', ''); = {""}
+                    Some(_) if $regex.as_str().is_empty() => {
+                        $list_builder.values().append_value("");
+                        $list_builder.append(true);
+                    }
+                    Some(value) => match $regex.captures(value) {
+                        Some(caps) => {
+                            let mut iter = caps.iter();
+                            if caps.len() > 1 {
+                                iter.next();
+                            }
+                            for m in iter.flatten() {
+                                $list_builder.values().append_value(m.as_str());
+                            }
+                            $list_builder.append(true);
+                        }
+                        None => $list_builder.append(false),
+                    },
+                    None => $list_builder.append(false),
+                }
+                Ok(())
+            })
+            .collect::<Result<Vec<()>, ArrowError>>()?
+    };
+}
+
 fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
     array: &GenericStringArray<OffsetSize>,
     regex: &Regex,
@@ -340,35 +408,19 @@ fn regexp_scalar_match<OffsetSize: OffsetSizeTrait>(
     let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
     let mut list_builder = ListBuilder::new(builder);
 
-    array
-        .iter()
-        .map(|value| {
-            match value {
-                // Required for Postgres compatibility:
-                // SELECT regexp_match('foobarbequebaz', ''); = {""}
-                Some(_) if regex.as_str() == "" => {
-                    list_builder.values().append_value("");
-                    list_builder.append(true);
-                }
-                Some(value) => match regex.captures(value) {
-                    Some(caps) => {
-                        let mut iter = caps.iter();
-                        if caps.len() > 1 {
-                            iter.next();
-                        }
-                        for m in iter.flatten() {
-                            list_builder.values().append_value(m.as_str());
-                        }
+    process_regexp_match!(array, regex, list_builder);
 
-                        list_builder.append(true);
-                    }
-                    None => list_builder.append(false),
-                },
-                _ => list_builder.append(false),
-            }
-            Ok(())
-        })
-        .collect::<Result<Vec<()>, ArrowError>>()?;
+    Ok(Arc::new(list_builder.finish()))
+}
+
+fn regexp_scalar_match_utf8view(
+    array: &StringViewArray,
+    regex: &Regex,
+) -> Result<ArrayRef, ArrowError> {
+    let builder = StringViewBuilder::with_capacity(0);
+    let mut list_builder = ListBuilder::new(builder);
+
+    process_regexp_match!(array, regex, list_builder);
 
     Ok(Arc::new(list_builder.finish()))
 }
@@ -406,7 +458,7 @@ pub fn regexp_match(
 
     if array.data_type() != rhs.data_type() {
         return Err(ArrowError::ComputeError(
-            "regexp_match() requires both array and pattern to be either Utf8 or LargeUtf8"
+            "regexp_match() requires both array and pattern to be either Utf8, Utf8View or LargeUtf8"
                 .to_string(),
         ));
     }
@@ -428,7 +480,7 @@ pub fn regexp_match(
 
     if flags_array.is_some() && rhs.data_type() != flags.unwrap().data_type() {
         return Err(ArrowError::ComputeError(
-            "regexp_match() requires both pattern and flags to be either string or largestring"
+            "regexp_match() requires both pattern and flags to be either Utf8, Utf8View or LargeUtf8"
                 .to_string(),
         ));
     }
@@ -436,11 +488,13 @@ pub fn regexp_match(
     if is_rhs_scalar {
         // Regex and flag is scalars
         let (regex, flag) = match rhs.data_type() {
+            DataType::Utf8View => get_scalar_pattern_flag_utf8view(rhs, flags),
             DataType::Utf8 => get_scalar_pattern_flag::<i32>(rhs, flags),
             DataType::LargeUtf8 => get_scalar_pattern_flag::<i64>(rhs, flags),
             _ => {
                 return Err(ArrowError::ComputeError(
-                    "regexp_match() requires pattern to be either Utf8 or LargeUtf8".to_string(),
+                    "regexp_match() requires pattern to be either Utf8, Utf8View or LargeUtf8"
+                        .to_string(),
                 ));
             }
         };
@@ -468,14 +522,21 @@ pub fn regexp_match(
         })?;
 
         match array.data_type() {
+            DataType::Utf8View => regexp_scalar_match_utf8view(array.as_string_view(), &re),
             DataType::Utf8 => regexp_scalar_match(array.as_string::<i32>(), &re),
             DataType::LargeUtf8 => regexp_scalar_match(array.as_string::<i64>(), &re),
             _ => Err(ArrowError::ComputeError(
-                "regexp_match() requires array to be either Utf8 or LargeUtf8".to_string(),
+                "regexp_match() requires array to be either Utf8, Utf8View or LargeUtf8"
+                    .to_string(),
             )),
         }
     } else {
         match array.data_type() {
+            DataType::Utf8View => {
+                let regex_array = rhs.as_string_view();
+                let flags_array = flags.map(|flags| flags.as_string_view());
+                regexp_array_match_utf8view(array.as_string_view(), regex_array, flags_array)
+            }
             DataType::Utf8 => {
                 let regex_array = rhs.as_string();
                 let flags_array = flags.map(|flags| flags.as_string());
@@ -487,7 +548,8 @@ pub fn regexp_match(
                 regexp_array_match(array.as_string::<i64>(), regex_array, flags_array)
             }
             _ => Err(ArrowError::ComputeError(
-                "regexp_match() requires array to be either Utf8 or LargeUtf8".to_string(),
+                "regexp_match() requires array to be either Utf8, Utf8View or LargeUtf8"
+                    .to_string(),
             )),
         }
     }
@@ -497,114 +559,316 @@ pub fn regexp_match(
 mod tests {
     use super::*;
 
-    #[test]
-    fn match_single_group() {
-        let values = vec![
+    macro_rules! test_match_single_group {
+        ($test_name:ident, $values:expr, $patterns:expr, $arr_type:ty, $builder_type:ty, $expected:expr) => {
+            #[test]
+            fn $test_name() {
+                let array: $arr_type = <$arr_type>::from($values);
+                let pattern: $arr_type = <$arr_type>::from($patterns);
+
+                let actual = regexp_match(&array, &pattern, None).unwrap();
+
+                let elem_builder: $builder_type = <$builder_type>::new();
+                let mut expected_builder = ListBuilder::new(elem_builder);
+
+                for val in $expected {
+                    match val {
+                        Some(v) => {
+                            expected_builder.values().append_value(v);
+                            expected_builder.append(true);
+                        }
+                        None => expected_builder.append(false),
+                    }
+                }
+
+                let expected = expected_builder.finish();
+                let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
+                assert_eq!(&expected, result);
+            }
+        };
+    }
+
+    test_match_single_group!(
+        match_single_group_string,
+        vec![
             Some("abc-005-def"),
             Some("X-7-5"),
             Some("X545"),
             None,
             Some("foobarbequebaz"),
             Some("foobarbequebaz"),
-        ];
-        let array = StringArray::from(values);
-        let mut pattern_values = vec![r".*-(\d*)-.*"; 4];
-        pattern_values.push(r"(bar)(bequ1e)");
-        pattern_values.push("");
-        let pattern = GenericStringArray::<i32>::from(pattern_values);
-        let actual = regexp_match(&array, &pattern, None).unwrap();
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new();
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.values().append_value("005");
-        expected_builder.append(true);
-        expected_builder.values().append_value("7");
-        expected_builder.append(true);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        expected_builder.values().append_value("");
-        expected_builder.append(true);
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-    }
+        ],
+        vec![
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r"(bar)(bequ1e)",
+            ""
+        ],
+        StringArray,
+        GenericStringBuilder<i32>,
+        [Some("005"), Some("7"), None, None, None, Some("")]
+    );
+    test_match_single_group!(
+        match_single_group_string_view,
+        vec![
+            Some("abc-005-def"),
+            Some("X-7-5"),
+            Some("X545"),
+            None,
+            Some("foobarbequebaz"),
+            Some("foobarbequebaz"),
+        ],
+        vec![
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r".*-(\d*)-.*",
+            r"(bar)(bequ1e)",
+            ""
+        ],
+        StringViewArray,
+        StringViewBuilder,
+        [Some("005"), Some("7"), None, None, None, Some("")]
+    );
+
+    macro_rules! test_match_single_group_with_flags {
+        ($test_name:ident, $values:expr, $patterns:expr, $flags:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
+            #[test]
+            fn $test_name() {
+                let array: $array_type = <$array_type>::from($values);
+                let pattern: $array_type = <$array_type>::from($patterns);
+                let flags: $array_type = <$array_type>::from($flags);
+
+                let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
 
-    #[test]
-    fn match_single_group_with_flags() {
-        let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
-        let flags = StringArray::from(vec!["i"; 4]);
-        let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::with_capacity(0, 0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.append(false);
-        expected_builder.values().append_value("7");
-        expected_builder.append(true);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
+                let elem_builder: $builder_type = <$builder_type>::new();
+                let mut expected_builder = ListBuilder::new(elem_builder);
+
+                for val in $expected {
+                    match val {
+                        Some(v) => {
+                            expected_builder.values().append_value(v);
+                            expected_builder.append(true);
+                        }
+                        None => {
+                            expected_builder.append(false);
+                        }
+                    }
+                }
+
+                let expected = expected_builder.finish();
+                let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
+                assert_eq!(&expected, result);
+            }
+        };
     }
 
-    #[test]
-    fn match_scalar_pattern() {
-        let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let pattern = Scalar::new(StringArray::from(vec![r"x.*-(\d*)-.*"; 1]));
-        let flags = Scalar::new(StringArray::from(vec!["i"; 1]));
-        let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::with_capacity(0, 0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.append(false);
-        expected_builder.values().append_value("7");
-        expected_builder.append(true);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
-
-        // No flag
-        let values = vec![Some("abc-005-def"), Some("x-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let actual = regexp_match(&array, &pattern, None).unwrap();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
+    test_match_single_group_with_flags!(
+        match_single_group_with_flags_string,
+        vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+        vec![r"x.*-(\d*)-.*"; 4],
+        vec!["i"; 4],
+        StringArray,
+        GenericStringBuilder<i32>,
+        [None, Some("7"), None, None]
+    );
+    test_match_single_group_with_flags!(
+        match_single_group_with_flags_stringview,
+        vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+        vec![r"x.*-(\d*)-.*"; 4],
+        vec!["i"; 4],
+        StringViewArray,
+        StringViewBuilder,
+        [None, Some("7"), None, None]
+    );
+
+    macro_rules! test_match_scalar_pattern {
+        ($test_name:ident, $values:expr, $pattern:expr, $flag:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
+            #[test]
+            fn $test_name() {
+                let array: $array_type = <$array_type>::from($values);
+
+                let pattern_scalar = Scalar::new(<$array_type>::from(vec![$pattern; 1]));
+                let flag_scalar = Scalar::new(<$array_type>::from(vec![$flag; 1]));
+
+                let actual = regexp_match(&array, &pattern_scalar, Some(&flag_scalar)).unwrap();
+
+                let elem_builder: $builder_type = <$builder_type>::new();
+                let mut expected_builder = ListBuilder::new(elem_builder);
+
+                for val in $expected {
+                    match val {
+                        Some(v) => {
+                            expected_builder.values().append_value(v);
+                            expected_builder.append(true);
+                        }
+                        None => expected_builder.append(false),
+                    }
+                }
+
+                let expected = expected_builder.finish();
+                let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
+                assert_eq!(&expected, result);
+            }
+        };
     }
 
-    #[test]
-    fn match_scalar_no_pattern() {
-        let values = vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None];
-        let array = StringArray::from(values);
-        let pattern = Scalar::new(new_null_array(&DataType::Utf8, 1));
-        let actual = regexp_match(&array, &pattern, None).unwrap();
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::with_capacity(0, 0);
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        expected_builder.append(false);
-        let expected = expected_builder.finish();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        assert_eq!(&expected, result);
+    test_match_scalar_pattern!(
+        match_scalar_pattern_string_with_flags,
+        vec![
+            Some("abc-005-def"),
+            Some("x-7-5"),
+            Some("X-0-Y"),
+            Some("X545"),
+            None
+        ],
+        r"x.*-(\d*)-.*",
+        Some("i"),
+        StringArray,
+        GenericStringBuilder<i32>,
+        [None, Some("7"), Some("0"), None, None]
+    );
+    test_match_scalar_pattern!(
+        match_scalar_pattern_stringview_with_flags,
+        vec![
+            Some("abc-005-def"),
+            Some("x-7-5"),
+            Some("X-0-Y"),
+            Some("X545"),
+            None
+        ],
+        r"x.*-(\d*)-.*",
+        Some("i"),
+        StringViewArray,
+        StringViewBuilder,
+        [None, Some("7"), Some("0"), None, None]
+    );
+
+    test_match_scalar_pattern!(
+        match_scalar_pattern_string_no_flags,
+        vec![
+            Some("abc-005-def"),
+            Some("x-7-5"),
+            Some("X-0-Y"),
+            Some("X545"),
+            None
+        ],
+        r"x.*-(\d*)-.*",
+        None::<&str>,
+        StringArray,
+        GenericStringBuilder<i32>,
+        [None, Some("7"), None, None, None]
+    );
+    test_match_scalar_pattern!(
+        match_scalar_pattern_stringview_no_flags,
+        vec![
+            Some("abc-005-def"),
+            Some("x-7-5"),
+            Some("X-0-Y"),
+            Some("X545"),
+            None
+        ],
+        r"x.*-(\d*)-.*",
+        None::<&str>,
+        StringViewArray,
+        StringViewBuilder,
+        [None, Some("7"), None, None, None]
+    );
+
+    macro_rules! test_match_scalar_no_pattern {
+        ($test_name:ident, $values:expr, $array_type:ty, $pattern_type:expr, $builder_type:ty, $expected:expr) => {
+            #[test]
+            fn $test_name() {
+                let array: $array_type = <$array_type>::from($values);
+                let pattern = Scalar::new(new_null_array(&$pattern_type, 1));
+
+                let actual = regexp_match(&array, &pattern, None).unwrap();
+
+                let elem_builder: $builder_type = <$builder_type>::new();
+                let mut expected_builder = ListBuilder::new(elem_builder);
+
+                for val in $expected {
+                    match val {
+                        Some(v) => {
+                            expected_builder.values().append_value(v);
+                            expected_builder.append(true);
+                        }
+                        None => expected_builder.append(false),
+                    }
+                }
+
+                let expected = expected_builder.finish();
+                let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
+                assert_eq!(&expected, result);
+            }
+        };
     }
 
-    #[test]
-    fn test_single_group_not_skip_match() {
-        let array = StringArray::from(vec![Some("foo"), Some("bar")]);
-        let pattern = GenericStringArray::<i32>::from(vec![r"foo"]);
-        let actual = regexp_match(&array, &pattern, None).unwrap();
-        let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
-        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::new();
-        let mut expected_builder = ListBuilder::new(elem_builder);
-        expected_builder.values().append_value("foo");
-        expected_builder.append(true);
-        let expected = expected_builder.finish();
-        assert_eq!(&expected, result);
+    test_match_scalar_no_pattern!(
+        match_scalar_no_pattern_string,
+        vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+        StringArray,
+        DataType::Utf8,
+        GenericStringBuilder<i32>,
+        [None::<&str>, None, None, None]
+    );
+    test_match_scalar_no_pattern!(
+        match_scalar_no_pattern_stringview,
+        vec![Some("abc-005-def"), Some("X-7-5"), Some("X545"), None],
+        StringViewArray,
+        DataType::Utf8View,
+        StringViewBuilder,
+        [None::<&str>, None, None, None]
+    );
+
+    macro_rules! test_match_single_group_not_skip {
+        ($test_name:ident, $values:expr, $pattern:expr, $array_type:ty, $builder_type:ty, $expected:expr) => {
+            #[test]
+            fn $test_name() {
+                let array: $array_type = <$array_type>::from($values);
+                let pattern: $array_type = <$array_type>::from(vec![$pattern]);
+
+                let actual = regexp_match(&array, &pattern, None).unwrap();
+
+                let elem_builder: $builder_type = <$builder_type>::new();
+                let mut expected_builder = ListBuilder::new(elem_builder);
+
+                for val in $expected {
+                    match val {
+                        Some(v) => {
+                            expected_builder.values().append_value(v);
+                            expected_builder.append(true);
+                        }
+                        None => expected_builder.append(false),
+                    }
+                }
+
+                let expected = expected_builder.finish();
+                let result = actual.as_any().downcast_ref::<ListArray>().unwrap();
+                assert_eq!(&expected, result);
+            }
+        };
     }
 
+    test_match_single_group_not_skip!(
+        match_single_group_not_skip_string,
+        vec![Some("foo"), Some("bar")],
+        r"foo",
+        StringArray,
+        GenericStringBuilder<i32>,
+        [Some("foo")]
+    );
+    test_match_single_group_not_skip!(
+        match_single_group_not_skip_stringview,
+        vec![Some("foo"), Some("bar")],
+        r"foo",
+        StringViewArray,
+        StringViewBuilder,
+        [Some("foo")]
+    );
+
     macro_rules! test_flag_utf8 {
         ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
             #[test]

From 8880bde3de808e1d159ae77964525f064124fd9d Mon Sep 17 00:00:00 2001
From: Kyle Barron <kylebarron2@gmail.com>
Date: Wed, 1 Jan 2025 06:25:04 -0800
Subject: [PATCH 24/68] Add doctest example for `Buffer::from_bytes` (#6920)

* Add doctest example for

* Remove typo

* Update arrow-buffer/src/buffer/immutable.rs

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 arrow-buffer/src/buffer/immutable.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index d0c8ffa39783..cf1d6f366751 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -60,6 +60,14 @@ unsafe impl Sync for Buffer where Bytes: Sync {}
 
 impl Buffer {
     /// Auxiliary method to create a new Buffer
+    ///
+    /// This can be used with a [`bytes::Bytes`] via `into()`:
+    ///
+    /// ```
+    /// # use arrow_buffer::Buffer;
+    /// let bytes = bytes::Bytes::from_static(b"foo");
+    /// let buffer = Buffer::from_bytes(bytes.into());
+    /// ```
     #[inline]
     pub fn from_bytes(bytes: Bytes) -> Self {
         let length = bytes.len();

From 7289a998f33d1243a9447968970a1650a217796a Mon Sep 17 00:00:00 2001
From: Tobias Bieniek <tobias@bieniek.cloud>
Date: Thu, 2 Jan 2025 10:09:34 +0100
Subject: [PATCH 25/68] object_store: Add enabled-by-default "fs" feature
 (#6636)

---
 .github/workflows/object_store.yml |  4 ++++
 object_store/Cargo.toml            |  4 +++-
 object_store/src/chunked.rs        |  4 ++++
 object_store/src/lib.rs            | 17 +++++++++++------
 object_store/src/limit.rs          |  1 +
 object_store/src/parse.rs          |  4 ++--
 object_store/src/throttle.rs       |  2 ++
 object_store/src/util.rs           |  2 +-
 8 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/object_store.yml b/.github/workflows/object_store.yml
index 93f809aaabd4..899318f01324 100644
--- a/.github/workflows/object_store.yml
+++ b/.github/workflows/object_store.yml
@@ -54,6 +54,10 @@ jobs:
       # targets.
       - name: Run clippy with default features
         run: cargo clippy -- -D warnings
+      - name: Run clippy without default features
+        run: cargo clippy --no-default-features -- -D warnings
+      - name: Run clippy with fs features
+        run: cargo clippy --no-default-features --features fs -- -D warnings
       - name: Run clippy with aws feature
         run: cargo clippy --features aws -- -D warnings
       - name: Run clippy with gcp feature
diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index bf254b3a0bbd..a127be3602ef 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -41,7 +41,7 @@ percent-encoding = "2.1"
 snafu = { version = "0.8", default-features = false, features = ["std", "rust_1_61"] }
 tracing = { version = "0.1" }
 url = "2.2"
-walkdir = "2"
+walkdir = { version = "2", optional = true }
 
 # Cloud storage support
 base64 = { version = "0.22", default-features = false, features = ["std"], optional = true }
@@ -61,8 +61,10 @@ httparse = { version = "1.8.0", default-features = false, features = ["std"], op
 nix = { version = "0.29.0", features = ["fs"] }
 
 [features]
+default = ["fs"]
 cloud = ["serde", "serde_json", "quick-xml", "hyper", "reqwest", "reqwest/json", "reqwest/stream", "chrono/serde", "base64", "rand", "ring"]
 azure = ["cloud", "httparse"]
+fs = ["walkdir"]
 gcp = ["cloud", "rustls-pemfile"]
 aws = ["cloud", "md-5"]
 http = ["cloud"]
diff --git a/object_store/src/chunked.rs b/object_store/src/chunked.rs
index 98cc20498013..3f83c1336dc4 100644
--- a/object_store/src/chunked.rs
+++ b/object_store/src/chunked.rs
@@ -86,6 +86,7 @@ impl ObjectStore for ChunkedStore {
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
         let r = self.inner.get_opts(location, options).await?;
         let stream = match r.payload {
+            #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
             GetResultPayload::File(file, path) => {
                 crate::local::chunked_stream(file, path, r.range.clone(), self.chunk_size)
             }
@@ -178,7 +179,9 @@ impl ObjectStore for ChunkedStore {
 mod tests {
     use futures::StreamExt;
 
+    #[cfg(feature = "fs")]
     use crate::integration::*;
+    #[cfg(feature = "fs")]
     use crate::local::LocalFileSystem;
     use crate::memory::InMemory;
     use crate::path::Path;
@@ -209,6 +212,7 @@ mod tests {
         }
     }
 
+    #[cfg(feature = "fs")]
     #[tokio::test]
     async fn test_chunked() {
         let temporary = tempfile::tempdir().unwrap();
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 4d8d8f02a0bc..6f5733226922 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -66,10 +66,13 @@
 //! By default, this crate provides the following implementations:
 //!
 //! * Memory: [`InMemory`](memory::InMemory)
-//! * Local filesystem: [`LocalFileSystem`](local::LocalFileSystem)
 //!
 //! Feature flags are used to enable support for other implementations:
 //!
+#![cfg_attr(
+    feature = "fs",
+    doc = "* Local filesystem: [`LocalFileSystem`](local::LocalFileSystem)"
+)]
 #![cfg_attr(
     feature = "gcp",
     doc = "* [`gcp`]: [Google Cloud Storage](https://cloud.google.com/storage/) support. See [`GoogleCloudStorageBuilder`](gcp::GoogleCloudStorageBuilder)"
@@ -513,7 +516,7 @@ pub mod gcp;
 #[cfg(feature = "http")]
 pub mod http;
 pub mod limit;
-#[cfg(not(target_arch = "wasm32"))]
+#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 pub mod local;
 pub mod memory;
 pub mod path;
@@ -557,7 +560,7 @@ pub use upload::*;
 pub use util::{coalesce_ranges, collect_bytes, GetRange, OBJECT_STORE_COALESCE_DEFAULT};
 
 use crate::path::Path;
-#[cfg(not(target_arch = "wasm32"))]
+#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 use crate::util::maybe_spawn_blocking;
 use async_trait::async_trait;
 use bytes::Bytes;
@@ -565,7 +568,7 @@ use chrono::{DateTime, Utc};
 use futures::{stream::BoxStream, StreamExt, TryStreamExt};
 use snafu::Snafu;
 use std::fmt::{Debug, Formatter};
-#[cfg(not(target_arch = "wasm32"))]
+#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 use std::io::{Read, Seek, SeekFrom};
 use std::ops::Range;
 use std::sync::Arc;
@@ -1028,6 +1031,7 @@ pub struct GetResult {
 /// be able to optimise the case of a file already present on local disk
 pub enum GetResultPayload {
     /// The file, path
+    #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
     File(std::fs::File, std::path::PathBuf),
     /// An opaque stream of bytes
     Stream(BoxStream<'static, Result<Bytes>>),
@@ -1036,6 +1040,7 @@ pub enum GetResultPayload {
 impl Debug for GetResultPayload {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
+            #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
             Self::File(_, _) => write!(f, "GetResultPayload(File)"),
             Self::Stream(_) => write!(f, "GetResultPayload(Stream)"),
         }
@@ -1047,7 +1052,7 @@ impl GetResult {
     pub async fn bytes(self) -> Result<Bytes> {
         let len = self.range.end - self.range.start;
         match self.payload {
-            #[cfg(not(target_arch = "wasm32"))]
+            #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
             GetResultPayload::File(mut file, path) => {
                 maybe_spawn_blocking(move || {
                     file.seek(SeekFrom::Start(self.range.start as _))
@@ -1087,7 +1092,7 @@ impl GetResult {
     /// no additional complexity or overheads
     pub fn into_stream(self) -> BoxStream<'static, Result<Bytes>> {
         match self.payload {
-            #[cfg(not(target_arch = "wasm32"))]
+            #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
             GetResultPayload::File(file, path) => {
                 const CHUNK_SIZE: usize = 8 * 1024;
                 local::chunked_stream(file, path, self.range, CHUNK_SIZE)
diff --git a/object_store/src/limit.rs b/object_store/src/limit.rs
index 64b96ad1a96c..6a3c3b574e62 100644
--- a/object_store/src/limit.rs
+++ b/object_store/src/limit.rs
@@ -199,6 +199,7 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
 
 fn permit_get_result(r: GetResult, permit: OwnedSemaphorePermit) -> GetResult {
     let payload = match r.payload {
+        #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
         v @ GetResultPayload::File(_, _) => v,
         GetResultPayload::Stream(s) => {
             GetResultPayload::Stream(PermitWrapper::new(s, permit).boxed())
diff --git a/object_store/src/parse.rs b/object_store/src/parse.rs
index debc9e529312..a3919305281d 100644
--- a/object_store/src/parse.rs
+++ b/object_store/src/parse.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#[cfg(not(target_arch = "wasm32"))]
+#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 use crate::local::LocalFileSystem;
 use crate::memory::InMemory;
 use crate::path::Path;
@@ -179,7 +179,7 @@ where
     let path = Path::parse(path)?;
 
     let store = match scheme {
-        #[cfg(not(target_arch = "wasm32"))]
+        #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
         ObjectStoreScheme::Local => Box::new(LocalFileSystem::new()) as _,
         ObjectStoreScheme::Memory => Box::new(InMemory::new()) as _,
         #[cfg(feature = "aws")]
diff --git a/object_store/src/throttle.rs b/object_store/src/throttle.rs
index d07276c3dcad..b9dff5c6d1d2 100644
--- a/object_store/src/throttle.rs
+++ b/object_store/src/throttle.rs
@@ -307,8 +307,10 @@ fn usize_to_u32_saturate(x: usize) -> u32 {
 }
 
 fn throttle_get(result: GetResult, wait_get_per_byte: Duration) -> GetResult {
+    #[allow(clippy::infallible_destructuring_match)]
     let s = match result.payload {
         GetResultPayload::Stream(s) => s,
+        #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
         GetResultPayload::File(_, _) => unimplemented!(),
     };
 
diff --git a/object_store/src/util.rs b/object_store/src/util.rs
index ecf90f95d7c7..99102a99e61e 100644
--- a/object_store/src/util.rs
+++ b/object_store/src/util.rs
@@ -75,7 +75,7 @@ where
     }
 }
 
-#[cfg(not(target_arch = "wasm32"))]
+#[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 /// Takes a function and spawns it to a tokio blocking pool if available
 pub(crate) async fn maybe_spawn_blocking<F, T>(f: F) -> Result<T>
 where

From 95dae6a21a3ead3e9eca5eea35cfc2bcceb3d8dd Mon Sep 17 00:00:00 2001
From: Tobias Bieniek <tobias@bieniek.cloud>
Date: Thu, 2 Jan 2025 23:25:44 +0100
Subject: [PATCH 26/68] object_store: Migrate from `snafu` to `thiserror`
 (#6266)

* object_store: Add `thiserror` dependency

* object_store/memory: Migrate from `snafu` to `thiserror`

* object_store/parse: Migrate from `snafu` to `thiserror`

* object_store/util: Migrate from `snafu` to `thiserror`

* object_store/local: Migrate from `snafu` to `thiserror`

* object_store/delimited: Migrate from `snafu` to `thiserror`

* object_store/path/parts: Migrate from `snafu` to `thiserror`

* object_store/path: Migrate from `snafu` to `thiserror`

* object_store/http: Migrate from `snafu` to `thiserror`

* object_store/client: Migrate from `snafu` to `thiserror`

* object_store/aws: Migrate from `snafu` to `thiserror`

* object_store/azure: Migrate from `snafu` to `thiserror`

* object_store/gcp: Migrate from `snafu` to `thiserror`

* object_store/lib: Migrate from `snafu` to `thiserror`

* Remove `snafu` dependency
---
 object_store/Cargo.toml              |   2 +-
 object_store/src/aws/builder.rs      |  52 +++++---
 object_store/src/aws/client.rs       |  87 ++++++------
 object_store/src/aws/credential.rs   |  17 ++-
 object_store/src/aws/resolve.rs      |  30 ++---
 object_store/src/azure/builder.rs    |  65 +++++----
 object_store/src/azure/client.rs     |  93 +++++++------
 object_store/src/azure/credential.rs |  41 +++---
 object_store/src/client/get.rs       |  97 +++++++-------
 object_store/src/client/header.rs    |  54 +++++---
 object_store/src/client/retry.rs     |  13 +-
 object_store/src/delimited.rs        |  15 ++-
 object_store/src/gcp/builder.rs      |  48 ++++---
 object_store/src/gcp/client.rs       |  91 +++++++------
 object_store/src/gcp/credential.rs   |  57 ++++----
 object_store/src/http/client.rs      |  52 +++++---
 object_store/src/http/mod.rs         |  13 +-
 object_store/src/lib.rs              |  36 +++--
 object_store/src/local.rs            | 191 ++++++++++++---------------
 object_store/src/memory.rs           |  31 ++---
 object_store/src/parse.rs            |  12 +-
 object_store/src/path/mod.rs         |  35 +++--
 object_store/src/path/parts.rs       |   7 +-
 object_store/src/util.rs             |   9 +-
 24 files changed, 620 insertions(+), 528 deletions(-)

diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index a127be3602ef..6f5e9db1bc70 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -38,7 +38,7 @@ humantime = "2.1"
 itertools = "0.13.0"
 parking_lot = { version = "0.12" }
 percent-encoding = "2.1"
-snafu = { version = "0.8", default-features = false, features = ["std", "rust_1_61"] }
+thiserror = "2.0.2"
 tracing = { version = "0.1" }
 url = "2.2"
 walkdir = { version = "2", optional = true }
diff --git a/object_store/src/aws/builder.rs b/object_store/src/aws/builder.rs
index 840245a7b5d4..d29fa782e8ff 100644
--- a/object_store/src/aws/builder.rs
+++ b/object_store/src/aws/builder.rs
@@ -32,7 +32,6 @@ use itertools::Itertools;
 use md5::{Digest, Md5};
 use reqwest::header::{HeaderMap, HeaderValue};
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
@@ -43,46 +42,46 @@ use url::Url;
 static DEFAULT_METADATA_ENDPOINT: &str = "http://169.254.169.254";
 
 /// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Missing bucket name"))]
+    #[error("Missing bucket name")]
     MissingBucketName,
 
-    #[snafu(display("Missing AccessKeyId"))]
+    #[error("Missing AccessKeyId")]
     MissingAccessKeyId,
 
-    #[snafu(display("Missing SecretAccessKey"))]
+    #[error("Missing SecretAccessKey")]
     MissingSecretAccessKey,
 
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    #[error("Unable parse source url. Url: {}, Error: {}", url, source)]
     UnableToParseUrl {
         source: url::ParseError,
         url: String,
     },
 
-    #[snafu(display(
+    #[error(
         "Unknown url scheme cannot be parsed into storage location: {}",
         scheme
-    ))]
+    )]
     UnknownUrlScheme { scheme: String },
 
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    #[error("URL did not match any known pattern for scheme: {}", url)]
     UrlNotRecognised { url: String },
 
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    #[error("Configuration key: '{}' is not known.", key)]
     UnknownConfigurationKey { key: String },
 
-    #[snafu(display("Invalid Zone suffix for bucket '{bucket}'"))]
+    #[error("Invalid Zone suffix for bucket '{bucket}'")]
     ZoneSuffix { bucket: String },
 
-    #[snafu(display("Invalid encryption type: {}. Valid values are \"AES256\", \"sse:kms\", \"sse:kms:dsse\" and \"sse-c\".", passed))]
+    #[error("Invalid encryption type: {}. Valid values are \"AES256\", \"sse:kms\", \"sse:kms:dsse\" and \"sse-c\".", passed)]
     InvalidEncryptionType { passed: String },
 
-    #[snafu(display(
+    #[error(
         "Invalid encryption header values. Header: {}, source: {}",
         header,
         source
-    ))]
+    )]
     InvalidEncryptionHeader {
         header: &'static str,
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
@@ -603,8 +602,15 @@ impl AmazonS3Builder {
     /// This is a separate member function to allow fallible computation to
     /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
     fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+        let parsed = Url::parse(url).map_err(|source| {
+            let url = url.into();
+            Error::UnableToParseUrl { url, source }
+        })?;
+
+        let host = parsed
+            .host_str()
+            .ok_or_else(|| Error::UrlNotRecognised { url: url.into() })?;
+
         match parsed.scheme() {
             "s3" | "s3a" => self.bucket_name = Some(host.to_string()),
             "https" => match host.splitn(4, '.').collect_tuple() {
@@ -630,9 +636,12 @@ impl AmazonS3Builder {
                         self.bucket_name = Some(bucket.into());
                     }
                 }
-                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
+                _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
             },
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+            scheme => {
+                let scheme = scheme.into();
+                return Err(Error::UnknownUrlScheme { scheme }.into());
+            }
         };
         Ok(())
     }
@@ -875,7 +884,7 @@ impl AmazonS3Builder {
             self.parse_url(&url)?;
         }
 
-        let bucket = self.bucket_name.context(MissingBucketNameSnafu)?;
+        let bucket = self.bucket_name.ok_or(Error::MissingBucketName)?;
         let region = self.region.unwrap_or_else(|| "us-east-1".to_string());
         let checksum = self.checksum_algorithm.map(|x| x.get()).transpose()?;
         let copy_if_not_exists = self.copy_if_not_exists.map(|x| x.get()).transpose()?;
@@ -957,7 +966,10 @@ impl AmazonS3Builder {
 
         let (session_provider, zonal_endpoint) = match self.s3_express.get()? {
             true => {
-                let zone = parse_bucket_az(&bucket).context(ZoneSuffixSnafu { bucket: &bucket })?;
+                let zone = parse_bucket_az(&bucket).ok_or_else(|| {
+                    let bucket = bucket.clone();
+                    Error::ZoneSuffix { bucket }
+                })?;
 
                 // https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-express-Regions-and-Zones.html
                 let endpoint = format!("https://{bucket}.s3express-{zone}.{region}.amazonaws.com");
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 81015e82b39c..25fdd3311c95 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -56,7 +56,6 @@ use reqwest::{Client as ReqwestClient, Method, RequestBuilder, Response};
 use ring::digest;
 use ring::digest::Context;
 use serde::{Deserialize, Serialize};
-use snafu::{ResultExt, Snafu};
 use std::sync::Arc;
 
 const VERSION_HEADER: &str = "x-amz-version-id";
@@ -65,56 +64,56 @@ const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-amz-meta-";
 const ALGORITHM: &str = "x-amz-checksum-algorithm";
 
 /// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
-    #[snafu(display("Error performing DeleteObjects request: {}", source))]
+    #[error("Error performing DeleteObjects request: {}", source)]
     DeleteObjectsRequest { source: crate::client::retry::Error },
 
-    #[snafu(display(
+    #[error(
         "DeleteObjects request failed for key {}: {} (code: {})",
         path,
         message,
         code
-    ))]
+    )]
     DeleteFailed {
         path: String,
         code: String,
         message: String,
     },
 
-    #[snafu(display("Error getting DeleteObjects response body: {}", source))]
+    #[error("Error getting DeleteObjects response body: {}", source)]
     DeleteObjectsResponse { source: reqwest::Error },
 
-    #[snafu(display("Got invalid DeleteObjects response: {}", source))]
+    #[error("Got invalid DeleteObjects response: {}", source)]
     InvalidDeleteObjectsResponse {
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
-    #[snafu(display("Error performing list request: {}", source))]
+    #[error("Error performing list request: {}", source)]
     ListRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting list response body: {}", source))]
+    #[error("Error getting list response body: {}", source)]
     ListResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Error getting create multipart response body: {}", source))]
+    #[error("Error getting create multipart response body: {}", source)]
     CreateMultipartResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Error performing complete multipart request: {}: {}", path, source))]
+    #[error("Error performing complete multipart request: {}: {}", path, source)]
     CompleteMultipartRequest {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error getting complete multipart response body: {}", source))]
+    #[error("Error getting complete multipart response body: {}", source)]
     CompleteMultipartResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid list response: {}", source))]
+    #[error("Got invalid list response: {}", source)]
     InvalidListResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Got invalid multipart response: {}", source))]
+    #[error("Got invalid multipart response: {}", source)]
     InvalidMultipartResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    #[error("Unable to extract metadata from headers: {}", source)]
     Metadata {
         source: crate::client::header::Error,
     },
@@ -263,10 +262,15 @@ impl SessionCredential<'_> {
     }
 }
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub enum RequestError {
-    #[snafu(context(false))]
-    Generic { source: crate::Error },
+    #[error(transparent)]
+    Generic {
+        #[from]
+        source: crate::Error,
+    },
+
+    #[error("Retry")]
     Retry {
         source: crate::client::retry::Error,
         path: String,
@@ -426,12 +430,16 @@ impl<'a> Request<'a> {
             .payload(self.payload)
             .send()
             .await
-            .context(RetrySnafu { path })
+            .map_err(|source| {
+                let path = path.into();
+                RequestError::Retry { source, path }
+            })
     }
 
     pub(crate) async fn do_put(self) -> Result<PutResult> {
         let response = self.send().await?;
-        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+        Ok(get_put_result(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?)
     }
 }
 
@@ -535,10 +543,10 @@ impl S3Client {
             .with_aws_sigv4(credential.authorizer(), Some(digest.as_ref()))
             .send_retry(&self.config.retry_config)
             .await
-            .context(DeleteObjectsRequestSnafu {})?
+            .map_err(|source| Error::DeleteObjectsRequest { source })?
             .bytes()
             .await
-            .context(DeleteObjectsResponseSnafu {})?;
+            .map_err(|source| Error::DeleteObjectsResponse { source })?;
 
         let response: BatchDeleteResponse =
             quick_xml::de::from_reader(response.reader()).map_err(|err| {
@@ -635,10 +643,10 @@ impl S3Client {
             .await?
             .bytes()
             .await
-            .context(CreateMultipartResponseBodySnafu)?;
+            .map_err(|source| Error::CreateMultipartResponseBody { source })?;
 
-        let response: InitiateMultipartUploadResult =
-            quick_xml::de::from_reader(response.reader()).context(InvalidMultipartResponseSnafu)?;
+        let response: InitiateMultipartUploadResult = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::InvalidMultipartResponse { source })?;
 
         Ok(response.upload_id)
     }
@@ -683,14 +691,14 @@ impl S3Client {
             .map(|v| v.to_string());
 
         let e_tag = match is_copy {
-            false => get_etag(response.headers()).context(MetadataSnafu)?,
+            false => get_etag(response.headers()).map_err(|source| Error::Metadata { source })?,
             true => {
                 let response = response
                     .bytes()
                     .await
-                    .context(CreateMultipartResponseBodySnafu)?;
+                    .map_err(|source| Error::CreateMultipartResponseBody { source })?;
                 let response: CopyPartResult = quick_xml::de::from_reader(response.reader())
-                    .context(InvalidMultipartResponseSnafu)?;
+                    .map_err(|source| Error::InvalidMultipartResponse { source })?;
                 response.e_tag
             }
         };
@@ -764,19 +772,21 @@ impl S3Client {
             .retry_error_body(true)
             .send()
             .await
-            .context(CompleteMultipartRequestSnafu {
-                path: location.as_ref(),
+            .map_err(|source| Error::CompleteMultipartRequest {
+                source,
+                path: location.as_ref().to_string(),
             })?;
 
-        let version = get_version(response.headers(), VERSION_HEADER).context(MetadataSnafu)?;
+        let version = get_version(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?;
 
         let data = response
             .bytes()
             .await
-            .context(CompleteMultipartResponseBodySnafu)?;
+            .map_err(|source| Error::CompleteMultipartResponseBody { source })?;
 
-        let response: CompleteMultipartUploadResult =
-            quick_xml::de::from_reader(data.reader()).context(InvalidMultipartResponseSnafu)?;
+        let response: CompleteMultipartUploadResult = quick_xml::de::from_reader(data.reader())
+            .map_err(|source| Error::InvalidMultipartResponse { source })?;
 
         Ok(PutResult {
             e_tag: Some(response.e_tag),
@@ -884,13 +894,14 @@ impl ListClient for S3Client {
             .with_aws_sigv4(credential.authorizer(), None)
             .send_retry(&self.config.retry_config)
             .await
-            .context(ListRequestSnafu)?
+            .map_err(|source| Error::ListRequest { source })?
             .bytes()
             .await
-            .context(ListResponseBodySnafu)?;
+            .map_err(|source| Error::ListResponseBody { source })?;
+
+        let mut response: ListResponse = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::InvalidListResponse { source })?;
 
-        let mut response: ListResponse =
-            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
         let token = response.next_continuation_token.take();
 
         Ok((response.try_into()?, token))
diff --git a/object_store/src/aws/credential.rs b/object_store/src/aws/credential.rs
index ee2f8e2ec953..9c74e1c6526a 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -29,23 +29,22 @@ use percent_encoding::utf8_percent_encode;
 use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION};
 use reqwest::{Client, Method, Request, RequestBuilder, StatusCode};
 use serde::Deserialize;
-use snafu::{ResultExt, Snafu};
 use std::collections::BTreeMap;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tracing::warn;
 use url::Url;
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 #[allow(clippy::enum_variant_names)]
 enum Error {
-    #[snafu(display("Error performing CreateSession request: {source}"))]
+    #[error("Error performing CreateSession request: {source}")]
     CreateSessionRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting CreateSession response: {source}"))]
+    #[error("Error getting CreateSession response: {source}")]
     CreateSessionResponse { source: reqwest::Error },
 
-    #[snafu(display("Invalid CreateSessionOutput response: {source}"))]
+    #[error("Invalid CreateSessionOutput response: {source}")]
     CreateSessionOutput { source: quick_xml::DeError },
 }
 
@@ -726,13 +725,13 @@ impl TokenProvider for SessionProvider {
             .with_aws_sigv4(Some(authorizer), None)
             .send_retry(retry)
             .await
-            .context(CreateSessionRequestSnafu)?
+            .map_err(|source| Error::CreateSessionRequest { source })?
             .bytes()
             .await
-            .context(CreateSessionResponseSnafu)?;
+            .map_err(|source| Error::CreateSessionResponse { source })?;
 
-        let resp: CreateSessionOutput =
-            quick_xml::de::from_reader(bytes.reader()).context(CreateSessionOutputSnafu)?;
+        let resp: CreateSessionOutput = quick_xml::de::from_reader(bytes.reader())
+            .map_err(|source| Error::CreateSessionOutput { source })?;
 
         let creds = resp.credentials;
         Ok(TemporaryToken {
diff --git a/object_store/src/aws/resolve.rs b/object_store/src/aws/resolve.rs
index 25bc74f32f29..db899ea989e3 100644
--- a/object_store/src/aws/resolve.rs
+++ b/object_store/src/aws/resolve.rs
@@ -17,21 +17,20 @@
 
 use crate::aws::STORE;
 use crate::{ClientOptions, Result};
-use snafu::{ensure, OptionExt, ResultExt, Snafu};
 
 /// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Bucket '{}' not found", bucket))]
+    #[error("Bucket '{}' not found", bucket)]
     BucketNotFound { bucket: String },
 
-    #[snafu(display("Failed to resolve region for bucket '{}'", bucket))]
+    #[error("Failed to resolve region for bucket '{}'", bucket)]
     ResolveRegion {
         bucket: String,
         source: reqwest::Error,
     },
 
-    #[snafu(display("Failed to parse the region for bucket '{}'", bucket))]
+    #[error("Failed to parse the region for bucket '{}'", bucket)]
     RegionParse { bucket: String },
 }
 
@@ -54,22 +53,23 @@ pub async fn resolve_bucket_region(bucket: &str, client_options: &ClientOptions)
 
     let client = client_options.client()?;
 
-    let response = client
-        .head(&endpoint)
-        .send()
-        .await
-        .context(ResolveRegionSnafu { bucket })?;
+    let response = client.head(&endpoint).send().await.map_err(|source| {
+        let bucket = bucket.into();
+        Error::ResolveRegion { bucket, source }
+    })?;
 
-    ensure!(
-        response.status() != StatusCode::NOT_FOUND,
-        BucketNotFoundSnafu { bucket }
-    );
+    if response.status() == StatusCode::NOT_FOUND {
+        let bucket = bucket.into();
+        return Err(Error::BucketNotFound { bucket }.into());
+    }
 
     let region = response
         .headers()
         .get("x-amz-bucket-region")
         .and_then(|x| x.to_str().ok())
-        .context(RegionParseSnafu { bucket })?;
+        .ok_or_else(|| Error::RegionParse {
+            bucket: bucket.into(),
+        })?;
 
     Ok(region.to_string())
 }
diff --git a/object_store/src/azure/builder.rs b/object_store/src/azure/builder.rs
index 08c9a232393d..f0572ebe6358 100644
--- a/object_store/src/azure/builder.rs
+++ b/object_store/src/azure/builder.rs
@@ -26,7 +26,6 @@ use crate::config::ConfigValue;
 use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
 use percent_encoding::percent_decode_str;
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
 use std::str::FromStr;
 use std::sync::Arc;
 use url::Url;
@@ -45,48 +44,48 @@ const EMULATOR_ACCOUNT_KEY: &str =
 const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
 
 /// A specialized `Error` for Azure builder-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    #[error("Unable parse source url. Url: {}, Error: {}", url, source)]
     UnableToParseUrl {
         source: url::ParseError,
         url: String,
     },
 
-    #[snafu(display(
+    #[error(
         "Unable parse emulator url {}={}, Error: {}",
         env_name,
         env_value,
         source
-    ))]
+    )]
     UnableToParseEmulatorUrl {
         env_name: String,
         env_value: String,
         source: url::ParseError,
     },
 
-    #[snafu(display("Account must be specified"))]
+    #[error("Account must be specified")]
     MissingAccount {},
 
-    #[snafu(display("Container name must be specified"))]
+    #[error("Container name must be specified")]
     MissingContainerName {},
 
-    #[snafu(display(
+    #[error(
         "Unknown url scheme cannot be parsed into storage location: {}",
         scheme
-    ))]
+    )]
     UnknownUrlScheme { scheme: String },
 
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    #[error("URL did not match any known pattern for scheme: {}", url)]
     UrlNotRecognised { url: String },
 
-    #[snafu(display("Failed parsing an SAS key"))]
+    #[error("Failed parsing an SAS key")]
     DecodeSasKey { source: std::str::Utf8Error },
 
-    #[snafu(display("Missing component in SAS query pair"))]
+    #[error("Missing component in SAS query pair")]
     MissingSasComponent {},
 
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    #[error("Configuration key: '{}' is not known.", key)]
     UnknownConfigurationKey { key: String },
 }
 
@@ -642,11 +641,17 @@ impl MicrosoftAzureBuilder {
     /// This is a separate member function to allow fallible computation to
     /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
     fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+        let parsed = Url::parse(url).map_err(|source| {
+            let url = url.into();
+            Error::UnableToParseUrl { url, source }
+        })?;
+
+        let host = parsed
+            .host_str()
+            .ok_or_else(|| Error::UrlNotRecognised { url: url.into() })?;
 
         let validate = |s: &str| match s.contains('.') {
-            true => Err(UrlNotRecognisedSnafu { url }.build()),
+            true => Err(Error::UrlNotRecognised { url: url.into() }),
             false => Ok(s.to_string()),
         };
 
@@ -665,7 +670,7 @@ impl MicrosoftAzureBuilder {
                     self.account_name = Some(validate(a)?);
                     self.use_fabric_endpoint = true.into();
                 } else {
-                    return Err(UrlNotRecognisedSnafu { url }.build().into());
+                    return Err(Error::UrlNotRecognised { url: url.into() }.into());
                 }
             }
             "https" => match host.split_once('.') {
@@ -689,9 +694,12 @@ impl MicrosoftAzureBuilder {
                     }
                     self.use_fabric_endpoint = true.into();
                 }
-                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
+                _ => return Err(Error::UrlNotRecognised { url: url.into() }.into()),
             },
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+            scheme => {
+                let scheme = scheme.into();
+                return Err(Error::UnknownUrlScheme { scheme }.into());
+            }
         }
         Ok(())
     }
@@ -924,8 +932,10 @@ impl MicrosoftAzureBuilder {
                 },
             };
 
-            let url =
-                Url::parse(&account_url).context(UnableToParseUrlSnafu { url: account_url })?;
+            let url = Url::parse(&account_url).map_err(|source| {
+                let url = account_url.clone();
+                Error::UnableToParseUrl { url, source }
+            })?;
 
             let credential = if let Some(credential) = self.credentials {
                 credential
@@ -1030,10 +1040,13 @@ impl MicrosoftAzureBuilder {
 /// if present, otherwise falls back to default_url
 fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
     let url = match std::env::var(env_name) {
-        Ok(env_value) => Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
-            env_name,
-            env_value,
-        })?,
+        Ok(env_value) => {
+            Url::parse(&env_value).map_err(|source| Error::UnableToParseEmulatorUrl {
+                env_name: env_name.into(),
+                env_value,
+                source,
+            })?
+        }
         Err(_) => Url::parse(default_url).expect("Failed to parse default URL"),
     };
     Ok(url)
@@ -1042,7 +1055,7 @@ fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
 fn split_sas(sas: &str) -> Result<Vec<(String, String)>, Error> {
     let sas = percent_decode_str(sas)
         .decode_utf8()
-        .context(DecodeSasKeySnafu {})?;
+        .map_err(|source| Error::DecodeSasKey { source })?;
     let kv_str_pairs = sas
         .trim_start_matches('?')
         .split('&')
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index 69ff39526bef..ea3a5faf3ad8 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -42,7 +42,6 @@ use reqwest::{
     Client as ReqwestClient, Method, RequestBuilder, Response,
 };
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
@@ -60,84 +59,84 @@ static MS_CONTENT_LANGUAGE: HeaderName = HeaderName::from_static("x-ms-blob-cont
 static TAGS_HEADER: HeaderName = HeaderName::from_static("x-ms-tags");
 
 /// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
-    #[snafu(display("Error performing get request {}: {}", path, source))]
+    #[error("Error performing get request {}: {}", path, source)]
     GetRequest {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error performing put request {}: {}", path, source))]
+    #[error("Error performing put request {}: {}", path, source)]
     PutRequest {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error performing delete request {}: {}", path, source))]
+    #[error("Error performing delete request {}: {}", path, source)]
     DeleteRequest {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error performing bulk delete request: {}", source))]
+    #[error("Error performing bulk delete request: {}", source)]
     BulkDeleteRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error receiving bulk delete request body: {}", source))]
+    #[error("Error receiving bulk delete request body: {}", source)]
     BulkDeleteRequestBody { source: reqwest::Error },
 
-    #[snafu(display(
+    #[error(
         "Bulk delete request failed due to invalid input: {} (code: {})",
         reason,
         code
-    ))]
+    )]
     BulkDeleteRequestInvalidInput { code: String, reason: String },
 
-    #[snafu(display("Got invalid bulk delete response: {}", reason))]
+    #[error("Got invalid bulk delete response: {}", reason)]
     InvalidBulkDeleteResponse { reason: String },
 
-    #[snafu(display(
+    #[error(
         "Bulk delete request failed for key {}: {} (code: {})",
         path,
         reason,
         code
-    ))]
+    )]
     DeleteFailed {
         path: String,
         code: String,
         reason: String,
     },
 
-    #[snafu(display("Error performing list request: {}", source))]
+    #[error("Error performing list request: {}", source)]
     ListRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting list response body: {}", source))]
+    #[error("Error getting list response body: {}", source)]
     ListResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid list response: {}", source))]
+    #[error("Got invalid list response: {}", source)]
     InvalidListResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    #[error("Unable to extract metadata from headers: {}", source)]
     Metadata {
         source: crate::client::header::Error,
     },
 
-    #[snafu(display("ETag required for conditional update"))]
+    #[error("ETag required for conditional update")]
     MissingETag,
 
-    #[snafu(display("Error requesting user delegation key: {}", source))]
+    #[error("Error requesting user delegation key: {}", source)]
     DelegationKeyRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting user delegation key response body: {}", source))]
+    #[error("Error getting user delegation key response body: {}", source)]
     DelegationKeyResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid user delegation key response: {}", source))]
+    #[error("Got invalid user delegation key response: {}", source)]
     DelegationKeyResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Generating SAS keys with SAS tokens auth is not supported"))]
+    #[error("Generating SAS keys with SAS tokens auth is not supported")]
     SASforSASNotSupported,
 
-    #[snafu(display("Generating SAS keys while skipping signatures is not supported"))]
+    #[error("Generating SAS keys while skipping signatures is not supported")]
     SASwithSkipSignature,
 }
 
@@ -268,8 +267,9 @@ impl<'a> PutRequest<'a> {
             .payload(Some(self.payload))
             .send()
             .await
-            .context(PutRequestSnafu {
-                path: self.path.as_ref(),
+            .map_err(|source| {
+                let path = self.path.as_ref().into();
+                Error::PutRequest { path, source }
             })?;
 
         Ok(response)
@@ -544,13 +544,14 @@ impl AzureClient {
             PutMode::Overwrite => builder.idempotent(true),
             PutMode::Create => builder.header(&IF_NONE_MATCH, "*"),
             PutMode::Update(v) => {
-                let etag = v.e_tag.as_ref().context(MissingETagSnafu)?;
+                let etag = v.e_tag.as_ref().ok_or(Error::MissingETag)?;
                 builder.header(&IF_MATCH, etag)
             }
         };
 
         let response = builder.header(&BLOB_TYPE, "BlockBlob").send().await?;
-        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+        Ok(get_put_result(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?)
     }
 
     /// PUT a block <https://learn.microsoft.com/en-us/rest/api/storageservices/put-block>
@@ -595,7 +596,8 @@ impl AzureClient {
             .send()
             .await?;
 
-        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+        Ok(get_put_result(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?)
     }
 
     /// Make an Azure Delete request <https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob>
@@ -620,8 +622,9 @@ impl AzureClient {
             .sensitive(sensitive)
             .send()
             .await
-            .context(DeleteRequestSnafu {
-                path: path.as_ref(),
+            .map_err(|source| {
+                let path = path.as_ref().into();
+                Error::DeleteRequest { source, path }
             })?;
 
         Ok(())
@@ -693,14 +696,14 @@ impl AzureClient {
             .with_azure_authorization(&credential, &self.config.account)
             .send_retry(&self.config.retry_config)
             .await
-            .context(BulkDeleteRequestSnafu {})?;
+            .map_err(|source| Error::BulkDeleteRequest { source })?;
 
         let boundary = parse_multipart_response_boundary(&batch_response)?;
 
         let batch_body = batch_response
             .bytes()
             .await
-            .context(BulkDeleteRequestBodySnafu {})?;
+            .map_err(|source| Error::BulkDeleteRequestBody { source })?;
 
         let results = parse_blob_batch_delete_body(batch_body, boundary, &paths).await?;
 
@@ -780,13 +783,13 @@ impl AzureClient {
             .idempotent(true)
             .send()
             .await
-            .context(DelegationKeyRequestSnafu)?
+            .map_err(|source| Error::DelegationKeyRequest { source })?
             .bytes()
             .await
-            .context(DelegationKeyResponseBodySnafu)?;
+            .map_err(|source| Error::DelegationKeyResponseBody { source })?;
 
-        let response: UserDelegationKey =
-            quick_xml::de::from_reader(response.reader()).context(DelegationKeyResponseSnafu)?;
+        let response: UserDelegationKey = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::DelegationKeyResponse { source })?;
 
         Ok(response)
     }
@@ -842,9 +845,11 @@ impl AzureClient {
             .sensitive(sensitive)
             .send()
             .await
-            .context(GetRequestSnafu {
-                path: path.as_ref(),
+            .map_err(|source| {
+                let path = path.as_ref().into();
+                Error::GetRequest { source, path }
             })?;
+
         Ok(response)
     }
 }
@@ -900,8 +905,9 @@ impl GetClient for AzureClient {
             .sensitive(sensitive)
             .send()
             .await
-            .context(GetRequestSnafu {
-                path: path.as_ref(),
+            .map_err(|source| {
+                let path = path.as_ref().into();
+                Error::GetRequest { source, path }
             })?;
 
         match response.headers().get("x-ms-resource-type") {
@@ -962,13 +968,14 @@ impl ListClient for AzureClient {
             .sensitive(sensitive)
             .send()
             .await
-            .context(ListRequestSnafu)?
+            .map_err(|source| Error::ListRequest { source })?
             .bytes()
             .await
-            .context(ListResponseBodySnafu)?;
+            .map_err(|source| Error::ListResponseBody { source })?;
+
+        let mut response: ListResultInternal = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::InvalidListResponse { source })?;
 
-        let mut response: ListResultInternal =
-            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
         let token = response.next_marker.take();
 
         Ok((to_list_result(response, prefix)?, token))
diff --git a/object_store/src/azure/credential.rs b/object_store/src/azure/credential.rs
index 2832eed72256..c9e6ac640b4a 100644
--- a/object_store/src/azure/credential.rs
+++ b/object_store/src/azure/credential.rs
@@ -32,7 +32,6 @@ use reqwest::header::{
 };
 use reqwest::{Client, Method, Request, RequestBuilder};
 use serde::Deserialize;
-use snafu::{ResultExt, Snafu};
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::Debug;
@@ -71,27 +70,27 @@ const AZURE_STORAGE_SCOPE: &str = "https://storage.azure.com/.default";
 /// <https://learn.microsoft.com/en-us/azure/storage/blobs/authorize-access-azure-active-directory#microsoft-authentication-library-msal>
 const AZURE_STORAGE_RESOURCE: &str = "https://storage.azure.com";
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub enum Error {
-    #[snafu(display("Error performing token request: {}", source))]
+    #[error("Error performing token request: {}", source)]
     TokenRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting token response body: {}", source))]
+    #[error("Error getting token response body: {}", source)]
     TokenResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Error reading federated token file "))]
+    #[error("Error reading federated token file ")]
     FederatedTokenFile,
 
-    #[snafu(display("Invalid Access Key: {}", source))]
+    #[error("Invalid Access Key: {}", source)]
     InvalidAccessKey { source: base64::DecodeError },
 
-    #[snafu(display("'az account get-access-token' command failed: {message}"))]
+    #[error("'az account get-access-token' command failed: {message}")]
     AzureCli { message: String },
 
-    #[snafu(display("Failed to parse azure cli response: {source}"))]
+    #[error("Failed to parse azure cli response: {source}")]
     AzureCliResponse { source: serde_json::Error },
 
-    #[snafu(display("Generating SAS keys with SAS tokens auth is not supported"))]
+    #[error("Generating SAS keys with SAS tokens auth is not supported")]
     SASforSASNotSupported,
 }
 
@@ -113,7 +112,10 @@ pub struct AzureAccessKey(Vec<u8>);
 impl AzureAccessKey {
     /// Create a new [`AzureAccessKey`], checking it for validity
     pub fn try_new(key: &str) -> Result<Self> {
-        let key = BASE64_STANDARD.decode(key).context(InvalidAccessKeySnafu)?;
+        let key = BASE64_STANDARD
+            .decode(key)
+            .map_err(|source| Error::InvalidAccessKey { source })?;
+
         Ok(Self(key))
     }
 }
@@ -636,10 +638,10 @@ impl TokenProvider for ClientSecretOAuthProvider {
             .idempotent(true)
             .send()
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .json()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
 
         Ok(TemporaryToken {
             token: Arc::new(AzureCredential::BearerToken(response.access_token)),
@@ -744,10 +746,10 @@ impl TokenProvider for ImdsManagedIdentityProvider {
         let response: ImdsTokenResponse = builder
             .send_retry(retry)
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .json()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
 
         Ok(TemporaryToken {
             token: Arc::new(AzureCredential::BearerToken(response.access_token)),
@@ -820,10 +822,10 @@ impl TokenProvider for WorkloadIdentityOAuthProvider {
             .idempotent(true)
             .send()
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .json()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
 
         Ok(TemporaryToken {
             token: Arc::new(AzureCredential::BearerToken(response.access_token)),
@@ -900,7 +902,8 @@ impl AzureCliCredential {
                 })?;
 
                 let token_response = serde_json::from_str::<AzureCliTokenResponse>(output)
-                    .context(AzureCliResponseSnafu)?;
+                    .map_err(|source| Error::AzureCliResponse { source })?;
+
                 if !token_response.token_type.eq_ignore_ascii_case("bearer") {
                     return Err(Error::AzureCli {
                         message: format!(
@@ -1033,10 +1036,10 @@ impl TokenProvider for FabricTokenOAuthProvider {
             .idempotent(true)
             .send()
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .text()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
         let exp_in = Self::validate_and_get_expiry(&access_token)
             .map_or(3600, |expiry| expiry - Self::get_current_timestamp());
         Ok(TemporaryToken {
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index 5dd62cbece5a..57aca8956452 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -29,7 +29,6 @@ use hyper::header::{
 use hyper::StatusCode;
 use reqwest::header::ToStrError;
 use reqwest::Response;
-use snafu::{ensure, OptionExt, ResultExt, Snafu};
 
 /// A client that can perform a get request
 #[async_trait]
@@ -95,49 +94,51 @@ impl ContentRange {
 }
 
 /// A specialized `Error` for get-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum GetResultError {
-    #[snafu(context(false))]
+    #[error(transparent)]
     Header {
+        #[from]
         source: crate::client::header::Error,
     },
 
-    #[snafu(transparent)]
+    #[error(transparent)]
     InvalidRangeRequest {
+        #[from]
         source: crate::util::InvalidGetRange,
     },
 
-    #[snafu(display("Received non-partial response when range requested"))]
+    #[error("Received non-partial response when range requested")]
     NotPartial,
 
-    #[snafu(display("Content-Range header not present in partial response"))]
+    #[error("Content-Range header not present in partial response")]
     NoContentRange,
 
-    #[snafu(display("Failed to parse value for CONTENT_RANGE header: \"{value}\""))]
+    #[error("Failed to parse value for CONTENT_RANGE header: \"{value}\"")]
     ParseContentRange { value: String },
 
-    #[snafu(display("Content-Range header contained non UTF-8 characters"))]
+    #[error("Content-Range header contained non UTF-8 characters")]
     InvalidContentRange { source: ToStrError },
 
-    #[snafu(display("Cache-Control header contained non UTF-8 characters"))]
+    #[error("Cache-Control header contained non UTF-8 characters")]
     InvalidCacheControl { source: ToStrError },
 
-    #[snafu(display("Content-Disposition header contained non UTF-8 characters"))]
+    #[error("Content-Disposition header contained non UTF-8 characters")]
     InvalidContentDisposition { source: ToStrError },
 
-    #[snafu(display("Content-Encoding header contained non UTF-8 characters"))]
+    #[error("Content-Encoding header contained non UTF-8 characters")]
     InvalidContentEncoding { source: ToStrError },
 
-    #[snafu(display("Content-Language header contained non UTF-8 characters"))]
+    #[error("Content-Language header contained non UTF-8 characters")]
     InvalidContentLanguage { source: ToStrError },
 
-    #[snafu(display("Content-Type header contained non UTF-8 characters"))]
+    #[error("Content-Type header contained non UTF-8 characters")]
     InvalidContentType { source: ToStrError },
 
-    #[snafu(display("Metadata value for \"{key:?}\" contained non UTF-8 characters"))]
+    #[error("Metadata value for \"{key:?}\" contained non UTF-8 characters")]
     InvalidMetadata { key: String },
 
-    #[snafu(display("Requested {expected:?}, got {actual:?}"))]
+    #[error("Requested {expected:?}, got {actual:?}")]
     UnexpectedRange {
         expected: Range<usize>,
         actual: Range<usize>,
@@ -153,17 +154,24 @@ fn get_result<T: GetClient>(
 
     // ensure that we receive the range we asked for
     let range = if let Some(expected) = range {
-        ensure!(
-            response.status() == StatusCode::PARTIAL_CONTENT,
-            NotPartialSnafu
-        );
+        if response.status() != StatusCode::PARTIAL_CONTENT {
+            return Err(GetResultError::NotPartial);
+        }
+
         let val = response
             .headers()
             .get(CONTENT_RANGE)
-            .context(NoContentRangeSnafu)?;
+            .ok_or(GetResultError::NoContentRange)?;
+
+        let value = val
+            .to_str()
+            .map_err(|source| GetResultError::InvalidContentRange { source })?;
+
+        let value = ContentRange::from_str(value).ok_or_else(|| {
+            let value = value.into();
+            GetResultError::ParseContentRange { value }
+        })?;
 
-        let value = val.to_str().context(InvalidContentRangeSnafu)?;
-        let value = ContentRange::from_str(value).context(ParseContentRangeSnafu { value })?;
         let actual = value.range;
 
         // Update size to reflect full size of object (#5272)
@@ -171,10 +179,9 @@ fn get_result<T: GetClient>(
 
         let expected = expected.as_range(meta.size)?;
 
-        ensure!(
-            actual == expected,
-            UnexpectedRangeSnafu { expected, actual }
-        );
+        if actual != expected {
+            return Err(GetResultError::UnexpectedRange { expected, actual });
+        }
 
         actual
     } else {
@@ -182,11 +189,11 @@ fn get_result<T: GetClient>(
     };
 
     macro_rules! parse_attributes {
-        ($headers:expr, $(($header:expr, $attr:expr, $err:expr)),*) => {{
+        ($headers:expr, $(($header:expr, $attr:expr, $map_err:expr)),*) => {{
             let mut attributes = Attributes::new();
             $(
             if let Some(x) = $headers.get($header) {
-                let x = x.to_str().context($err)?;
+                let x = x.to_str().map_err($map_err)?;
                 attributes.insert($attr, x.to_string().into());
             }
             )*
@@ -196,31 +203,23 @@ fn get_result<T: GetClient>(
 
     let mut attributes = parse_attributes!(
         response.headers(),
-        (
-            CACHE_CONTROL,
-            Attribute::CacheControl,
-            InvalidCacheControlSnafu
-        ),
+        (CACHE_CONTROL, Attribute::CacheControl, |source| {
+            GetResultError::InvalidCacheControl { source }
+        }),
         (
             CONTENT_DISPOSITION,
             Attribute::ContentDisposition,
-            InvalidContentDispositionSnafu
-        ),
-        (
-            CONTENT_ENCODING,
-            Attribute::ContentEncoding,
-            InvalidContentEncodingSnafu
+            |source| GetResultError::InvalidContentDisposition { source }
         ),
-        (
-            CONTENT_LANGUAGE,
-            Attribute::ContentLanguage,
-            InvalidContentLanguageSnafu
-        ),
-        (
-            CONTENT_TYPE,
-            Attribute::ContentType,
-            InvalidContentTypeSnafu
-        )
+        (CONTENT_ENCODING, Attribute::ContentEncoding, |source| {
+            GetResultError::InvalidContentEncoding { source }
+        }),
+        (CONTENT_LANGUAGE, Attribute::ContentLanguage, |source| {
+            GetResultError::InvalidContentLanguage { source }
+        }),
+        (CONTENT_TYPE, Attribute::ContentType, |source| {
+            GetResultError::InvalidContentType { source }
+        })
     );
 
     // Add attributes that match the user-defined metadata prefix (e.g. x-amz-meta-)
diff --git a/object_store/src/client/header.rs b/object_store/src/client/header.rs
index 07c04c11945a..db06da6345d5 100644
--- a/object_store/src/client/header.rs
+++ b/object_store/src/client/header.rs
@@ -22,7 +22,6 @@ use crate::ObjectMeta;
 use chrono::{DateTime, TimeZone, Utc};
 use hyper::header::{CONTENT_LENGTH, ETAG, LAST_MODIFIED};
 use hyper::HeaderMap;
-use snafu::{OptionExt, ResultExt, Snafu};
 
 #[derive(Debug, Copy, Clone)]
 /// Configuration for header extraction
@@ -44,27 +43,27 @@ pub(crate) struct HeaderConfig {
     pub user_defined_metadata_prefix: Option<&'static str>,
 }
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
-    #[snafu(display("ETag Header missing from response"))]
+    #[error("ETag Header missing from response")]
     MissingEtag,
 
-    #[snafu(display("Received header containing non-ASCII data"))]
+    #[error("Received header containing non-ASCII data")]
     BadHeader { source: reqwest::header::ToStrError },
 
-    #[snafu(display("Last-Modified Header missing from response"))]
+    #[error("Last-Modified Header missing from response")]
     MissingLastModified,
 
-    #[snafu(display("Content-Length Header missing from response"))]
+    #[error("Content-Length Header missing from response")]
     MissingContentLength,
 
-    #[snafu(display("Invalid last modified '{}': {}", last_modified, source))]
+    #[error("Invalid last modified '{}': {}", last_modified, source)]
     InvalidLastModified {
         last_modified: String,
         source: chrono::ParseError,
     },
 
-    #[snafu(display("Invalid content length '{}': {}", content_length, source))]
+    #[error("Invalid content length '{}': {}", content_length, source)]
     InvalidContentLength {
         content_length: String,
         source: std::num::ParseIntError,
@@ -86,7 +85,11 @@ pub(crate) fn get_put_result(
 #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 pub(crate) fn get_version(headers: &HeaderMap, version: &str) -> Result<Option<String>, Error> {
     Ok(match headers.get(version) {
-        Some(x) => Some(x.to_str().context(BadHeaderSnafu)?.to_string()),
+        Some(x) => Some(
+            x.to_str()
+                .map_err(|source| Error::BadHeader { source })?
+                .to_string(),
+        ),
         None => None,
     })
 }
@@ -94,7 +97,10 @@ pub(crate) fn get_version(headers: &HeaderMap, version: &str) -> Result<Option<S
 /// Extracts an etag from the provided [`HeaderMap`]
 pub(crate) fn get_etag(headers: &HeaderMap) -> Result<String, Error> {
     let e_tag = headers.get(ETAG).ok_or(Error::MissingEtag)?;
-    Ok(e_tag.to_str().context(BadHeaderSnafu)?.to_string())
+    Ok(e_tag
+        .to_str()
+        .map_err(|source| Error::BadHeader { source })?
+        .to_string())
 }
 
 /// Extracts [`ObjectMeta`] from the provided [`HeaderMap`]
@@ -105,9 +111,15 @@ pub(crate) fn header_meta(
 ) -> Result<ObjectMeta, Error> {
     let last_modified = match headers.get(LAST_MODIFIED) {
         Some(last_modified) => {
-            let last_modified = last_modified.to_str().context(BadHeaderSnafu)?;
+            let last_modified = last_modified
+                .to_str()
+                .map_err(|source| Error::BadHeader { source })?;
+
             DateTime::parse_from_rfc2822(last_modified)
-                .context(InvalidLastModifiedSnafu { last_modified })?
+                .map_err(|source| Error::InvalidLastModified {
+                    last_modified: last_modified.into(),
+                    source,
+                })?
                 .with_timezone(&Utc)
         }
         None if cfg.last_modified_required => return Err(Error::MissingLastModified),
@@ -122,15 +134,25 @@ pub(crate) fn header_meta(
 
     let content_length = headers
         .get(CONTENT_LENGTH)
-        .context(MissingContentLengthSnafu)?;
+        .ok_or(Error::MissingContentLength)?;
+
+    let content_length = content_length
+        .to_str()
+        .map_err(|source| Error::BadHeader { source })?;
 
-    let content_length = content_length.to_str().context(BadHeaderSnafu)?;
     let size = content_length
         .parse()
-        .context(InvalidContentLengthSnafu { content_length })?;
+        .map_err(|source| Error::InvalidContentLength {
+            content_length: content_length.into(),
+            source,
+        })?;
 
     let version = match cfg.version_header.and_then(|h| headers.get(h)) {
-        Some(v) => Some(v.to_str().context(BadHeaderSnafu)?.to_string()),
+        Some(v) => Some(
+            v.to_str()
+                .map_err(|source| Error::BadHeader { source })?
+                .to_string(),
+        ),
         None => None,
     };
 
diff --git a/object_store/src/client/retry.rs b/object_store/src/client/retry.rs
index a8a8e58de4d0..8938b0861cca 100644
--- a/object_store/src/client/retry.rs
+++ b/object_store/src/client/retry.rs
@@ -22,30 +22,29 @@ use crate::PutPayload;
 use futures::future::BoxFuture;
 use reqwest::header::LOCATION;
 use reqwest::{Client, Request, Response, StatusCode};
-use snafu::Error as SnafuError;
-use snafu::Snafu;
+use std::error::Error as StdError;
 use std::time::{Duration, Instant};
 use tracing::info;
 
 /// Retry request error
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub enum Error {
-    #[snafu(display("Received redirect without LOCATION, this normally indicates an incorrectly configured region"))]
+    #[error("Received redirect without LOCATION, this normally indicates an incorrectly configured region")]
     BareRedirect,
 
-    #[snafu(display("Server error, body contains Error, with status {status}: {}", body.as_deref().unwrap_or("No Body")))]
+    #[error("Server error, body contains Error, with status {status}: {}", body.as_deref().unwrap_or("No Body"))]
     Server {
         status: StatusCode,
         body: Option<String>,
     },
 
-    #[snafu(display("Client error with status {status}: {}", body.as_deref().unwrap_or("No Body")))]
+    #[error("Client error with status {status}: {}", body.as_deref().unwrap_or("No Body"))]
     Client {
         status: StatusCode,
         body: Option<String>,
     },
 
-    #[snafu(display("Error after {retries} retries in {elapsed:?}, max_retries:{max_retries}, retry_timeout:{retry_timeout:?}, source:{source}"))]
+    #[error("Error after {retries} retries in {elapsed:?}, max_retries:{max_retries}, retry_timeout:{retry_timeout:?}, source:{source}")]
     Reqwest {
         retries: usize,
         max_retries: usize,
diff --git a/object_store/src/delimited.rs b/object_store/src/delimited.rs
index 96f88bf41ff7..5b11a0bf7eb1 100644
--- a/object_store/src/delimited.rs
+++ b/object_store/src/delimited.rs
@@ -21,16 +21,15 @@ use std::collections::VecDeque;
 
 use bytes::Bytes;
 use futures::{Stream, StreamExt};
-use snafu::{ensure, Snafu};
 
 use super::Result;
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("encountered unterminated string"))]
+    #[error("encountered unterminated string")]
     UnterminatedString,
 
-    #[snafu(display("encountered trailing escape character"))]
+    #[error("encountered trailing escape character")]
     TrailingEscape,
 }
 
@@ -125,8 +124,12 @@ impl LineDelimiter {
     /// Returns `true` if there is no remaining data to be read
     fn finish(&mut self) -> Result<bool> {
         if !self.remainder.is_empty() {
-            ensure!(!self.is_quote, UnterminatedStringSnafu);
-            ensure!(!self.is_escape, TrailingEscapeSnafu);
+            if self.is_quote {
+                Err(Error::UnterminatedString)?;
+            }
+            if self.is_escape {
+                Err(Error::TrailingEscape)?;
+            }
 
             self.complete
                 .push_back(Bytes::from(std::mem::take(&mut self.remainder)))
diff --git a/object_store/src/gcp/builder.rs b/object_store/src/gcp/builder.rs
index fac923c4b9a0..cc5c1e1a0745 100644
--- a/object_store/src/gcp/builder.rs
+++ b/object_store/src/gcp/builder.rs
@@ -27,7 +27,6 @@ use crate::gcp::{
 };
 use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
@@ -37,33 +36,33 @@ use super::credential::{AuthorizedUserSigningCredentials, InstanceSigningCredent
 
 const TOKEN_MIN_TTL: Duration = Duration::from_secs(4 * 60);
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Missing bucket name"))]
+    #[error("Missing bucket name")]
     MissingBucketName {},
 
-    #[snafu(display("One of service account path or service account key may be provided."))]
+    #[error("One of service account path or service account key may be provided.")]
     ServiceAccountPathAndKeyProvided,
 
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    #[error("Unable parse source url. Url: {}, Error: {}", url, source)]
     UnableToParseUrl {
         source: url::ParseError,
         url: String,
     },
 
-    #[snafu(display(
+    #[error(
         "Unknown url scheme cannot be parsed into storage location: {}",
         scheme
-    ))]
+    )]
     UnknownUrlScheme { scheme: String },
 
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    #[error("URL did not match any known pattern for scheme: {}", url)]
     UrlNotRecognised { url: String },
 
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    #[error("Configuration key: '{}' is not known.", key)]
     UnknownConfigurationKey { key: String },
 
-    #[snafu(display("GCP credential error: {}", source))]
+    #[error("GCP credential error: {}", source)]
     Credential { source: credential::Error },
 }
 
@@ -319,12 +318,21 @@ impl GoogleCloudStorageBuilder {
     /// This is a separate member function to allow fallible computation to
     /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
     fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+        let parsed = Url::parse(url).map_err(|source| Error::UnableToParseUrl {
+            source,
+            url: url.to_string(),
+        })?;
+
+        let host = parsed.host_str().ok_or_else(|| Error::UrlNotRecognised {
+            url: url.to_string(),
+        })?;
 
         match parsed.scheme() {
             "gs" => self.bucket_name = Some(host.to_string()),
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+            scheme => {
+                let scheme = scheme.to_string();
+                return Err(Error::UnknownUrlScheme { scheme }.into());
+            }
         }
         Ok(())
     }
@@ -428,12 +436,14 @@ impl GoogleCloudStorageBuilder {
         // First try to initialize from the service account information.
         let service_account_credentials =
             match (self.service_account_path, self.service_account_key) {
-                (Some(path), None) => {
-                    Some(ServiceAccountCredentials::from_file(path).context(CredentialSnafu)?)
-                }
-                (None, Some(key)) => {
-                    Some(ServiceAccountCredentials::from_key(&key).context(CredentialSnafu)?)
-                }
+                (Some(path), None) => Some(
+                    ServiceAccountCredentials::from_file(path)
+                        .map_err(|source| Error::Credential { source })?,
+                ),
+                (None, Some(key)) => Some(
+                    ServiceAccountCredentials::from_key(&key)
+                        .map_err(|source| Error::Credential { source })?,
+                ),
                 (None, None) => None,
                 (Some(_), Some(_)) => return Err(Error::ServiceAccountPathAndKeyProvided.into()),
             };
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index ccc9c341f2fe..1928d13b4739 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -44,7 +44,6 @@ use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC};
 use reqwest::header::HeaderName;
 use reqwest::{Client, Method, RequestBuilder, Response, StatusCode};
 use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
 use std::sync::Arc;
 
 const VERSION_HEADER: &str = "x-goog-generation";
@@ -53,62 +52,62 @@ const USER_DEFINED_METADATA_HEADER_PREFIX: &str = "x-goog-meta-";
 
 static VERSION_MATCH: HeaderName = HeaderName::from_static("x-goog-if-generation-match");
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Error performing list request: {}", source))]
+    #[error("Error performing list request: {}", source)]
     ListRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting list response body: {}", source))]
+    #[error("Error getting list response body: {}", source)]
     ListResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid list response: {}", source))]
+    #[error("Got invalid list response: {}", source)]
     InvalidListResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Error performing get request {}: {}", path, source))]
+    #[error("Error performing get request {}: {}", path, source)]
     GetRequest {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error performing request {}: {}", path, source))]
+    #[error("Error performing request {}: {}", path, source)]
     Request {
         source: crate::client::retry::Error,
         path: String,
     },
 
-    #[snafu(display("Error getting put response body: {}", source))]
+    #[error("Error getting put response body: {}", source)]
     PutResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid put request: {}", source))]
+    #[error("Got invalid put request: {}", source)]
     InvalidPutRequest { source: quick_xml::se::SeError },
 
-    #[snafu(display("Got invalid put response: {}", source))]
+    #[error("Got invalid put response: {}", source)]
     InvalidPutResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    #[error("Unable to extract metadata from headers: {}", source)]
     Metadata {
         source: crate::client::header::Error,
     },
 
-    #[snafu(display("Version required for conditional update"))]
+    #[error("Version required for conditional update")]
     MissingVersion,
 
-    #[snafu(display("Error performing complete multipart request: {}", source))]
+    #[error("Error performing complete multipart request: {}", source)]
     CompleteMultipartRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting complete multipart response body: {}", source))]
+    #[error("Error getting complete multipart response body: {}", source)]
     CompleteMultipartResponseBody { source: reqwest::Error },
 
-    #[snafu(display("Got invalid multipart response: {}", source))]
+    #[error("Got invalid multipart response: {}", source)]
     InvalidMultipartResponse { source: quick_xml::de::DeError },
 
-    #[snafu(display("Error signing blob: {}", source))]
+    #[error("Error signing blob: {}", source)]
     SignBlobRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Got invalid signing blob response: {}", source))]
+    #[error("Got invalid signing blob response: {}", source)]
     InvalidSignBlobResponse { source: reqwest::Error },
 
-    #[snafu(display("Got invalid signing blob signature: {}", source))]
+    #[error("Got invalid signing blob signature: {}", source)]
     InvalidSignBlobSignature { source: base64::DecodeError },
 }
 
@@ -236,15 +235,17 @@ impl<'a> Request<'a> {
             .payload(self.payload)
             .send()
             .await
-            .context(RequestSnafu {
-                path: self.path.as_ref(),
+            .map_err(|source| {
+                let path = self.path.as_ref().into();
+                Error::Request { source, path }
             })?;
         Ok(resp)
     }
 
     async fn do_put(self) -> Result<PutResult> {
         let response = self.send().await?;
-        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+        Ok(get_put_result(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?)
     }
 }
 
@@ -336,17 +337,17 @@ impl GoogleCloudStorageClient {
             .idempotent(true)
             .send()
             .await
-            .context(SignBlobRequestSnafu)?;
+            .map_err(|source| Error::SignBlobRequest { source })?;
 
         //If successful, the signature is returned in the signedBlob field in the response.
         let response = response
             .json::<SignBlobResponse>()
             .await
-            .context(InvalidSignBlobResponseSnafu)?;
+            .map_err(|source| Error::InvalidSignBlobResponse { source })?;
 
         let signed_blob = BASE64_STANDARD
             .decode(response.signed_blob)
-            .context(InvalidSignBlobSignatureSnafu)?;
+            .map_err(|source| Error::InvalidSignBlobSignature { source })?;
 
         Ok(hex_encode(&signed_blob))
     }
@@ -389,7 +390,7 @@ impl GoogleCloudStorageClient {
             PutMode::Overwrite => builder.idempotent(true),
             PutMode::Create => builder.header(&VERSION_MATCH, "0"),
             PutMode::Update(v) => {
-                let etag = v.version.as_ref().context(MissingVersionSnafu)?;
+                let etag = v.version.as_ref().ok_or(Error::MissingVersion)?;
                 builder.header(&VERSION_MATCH, etag)
             }
         };
@@ -443,9 +444,14 @@ impl GoogleCloudStorageClient {
             .send()
             .await?;
 
-        let data = response.bytes().await.context(PutResponseBodySnafu)?;
+        let data = response
+            .bytes()
+            .await
+            .map_err(|source| Error::PutResponseBody { source })?;
+
         let result: InitiateMultipartUploadResult =
-            quick_xml::de::from_reader(data.as_ref().reader()).context(InvalidPutResponseSnafu)?;
+            quick_xml::de::from_reader(data.as_ref().reader())
+                .map_err(|source| Error::InvalidPutResponse { source })?;
 
         Ok(result.upload_id)
     }
@@ -467,8 +473,9 @@ impl GoogleCloudStorageClient {
             .query(&[("uploadId", multipart_id)])
             .send_retry(&self.config.retry_config)
             .await
-            .context(RequestSnafu {
-                path: path.as_ref(),
+            .map_err(|source| {
+                let path = path.as_ref().into();
+                Error::Request { source, path }
             })?;
 
         Ok(())
@@ -498,7 +505,7 @@ impl GoogleCloudStorageClient {
         let credential = self.get_credential().await?;
 
         let data = quick_xml::se::to_string(&upload_info)
-            .context(InvalidPutRequestSnafu)?
+            .map_err(|source| Error::InvalidPutRequest { source })?
             // We cannot disable the escaping that transforms "/" to "&quote;" :(
             // https://github.com/tafia/quick-xml/issues/362
             // https://github.com/tafia/quick-xml/issues/350
@@ -514,17 +521,18 @@ impl GoogleCloudStorageClient {
             .idempotent(true)
             .send()
             .await
-            .context(CompleteMultipartRequestSnafu)?;
+            .map_err(|source| Error::CompleteMultipartRequest { source })?;
 
-        let version = get_version(response.headers(), VERSION_HEADER).context(MetadataSnafu)?;
+        let version = get_version(response.headers(), VERSION_HEADER)
+            .map_err(|source| Error::Metadata { source })?;
 
         let data = response
             .bytes()
             .await
-            .context(CompleteMultipartResponseBodySnafu)?;
+            .map_err(|source| Error::CompleteMultipartResponseBody { source })?;
 
-        let response: CompleteMultipartUploadResult =
-            quick_xml::de::from_reader(data.reader()).context(InvalidMultipartResponseSnafu)?;
+        let response: CompleteMultipartUploadResult = quick_xml::de::from_reader(data.reader())
+            .map_err(|source| Error::InvalidMultipartResponse { source })?;
 
         Ok(PutResult {
             e_tag: Some(response.e_tag),
@@ -615,8 +623,9 @@ impl GetClient for GoogleCloudStorageClient {
             .with_get_options(options)
             .send_retry(&self.config.retry_config)
             .await
-            .context(GetRequestSnafu {
-                path: path.as_ref(),
+            .map_err(|source| {
+                let path = path.as_ref().into();
+                Error::GetRequest { source, path }
             })?;
 
         Ok(response)
@@ -665,13 +674,13 @@ impl ListClient for GoogleCloudStorageClient {
             .bearer_auth(&credential.bearer)
             .send_retry(&self.config.retry_config)
             .await
-            .context(ListRequestSnafu)?
+            .map_err(|source| Error::ListRequest { source })?
             .bytes()
             .await
-            .context(ListResponseBodySnafu)?;
+            .map_err(|source| Error::ListResponseBody { source })?;
 
-        let mut response: ListResponse =
-            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
+        let mut response: ListResponse = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::InvalidListResponse { source })?;
 
         let token = response.next_continuation_token.take();
         Ok((response.try_into()?, token))
diff --git a/object_store/src/gcp/credential.rs b/object_store/src/gcp/credential.rs
index 155a80b343b2..4b21ad1d3eab 100644
--- a/object_store/src/gcp/credential.rs
+++ b/object_store/src/gcp/credential.rs
@@ -33,7 +33,6 @@ use percent_encoding::utf8_percent_encode;
 use reqwest::{Client, Method};
 use ring::signature::RsaKeyPair;
 use serde::Deserialize;
-use snafu::{ResultExt, Snafu};
 use std::collections::BTreeMap;
 use std::env;
 use std::fs::File;
@@ -54,36 +53,39 @@ const DEFAULT_GCS_SIGN_BLOB_HOST: &str = "storage.googleapis.com";
 const DEFAULT_METADATA_HOST: &str = "metadata.google.internal";
 const DEFAULT_METADATA_IP: &str = "169.254.169.254";
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub enum Error {
-    #[snafu(display("Unable to open service account file from {}: {}", path.display(), source))]
+    #[error("Unable to open service account file from {}: {}", path.display(), source)]
     OpenCredentials {
         source: std::io::Error,
         path: PathBuf,
     },
 
-    #[snafu(display("Unable to decode service account file: {}", source))]
+    #[error("Unable to decode service account file: {}", source)]
     DecodeCredentials { source: serde_json::Error },
 
-    #[snafu(display("No RSA key found in pem file"))]
+    #[error("No RSA key found in pem file")]
     MissingKey,
 
-    #[snafu(display("Invalid RSA key: {}", source), context(false))]
-    InvalidKey { source: ring::error::KeyRejected },
+    #[error("Invalid RSA key: {}", source)]
+    InvalidKey {
+        #[from]
+        source: ring::error::KeyRejected,
+    },
 
-    #[snafu(display("Error signing: {}", source))]
+    #[error("Error signing: {}", source)]
     Sign { source: ring::error::Unspecified },
 
-    #[snafu(display("Error encoding jwt payload: {}", source))]
+    #[error("Error encoding jwt payload: {}", source)]
     Encode { source: serde_json::Error },
 
-    #[snafu(display("Unsupported key encoding: {}", encoding))]
+    #[error("Unsupported key encoding: {}", encoding)]
     UnsupportedKey { encoding: String },
 
-    #[snafu(display("Error performing token request: {}", source))]
+    #[error("Error performing token request: {}", source)]
     TokenRequest { source: crate::client::retry::Error },
 
-    #[snafu(display("Error getting token response body: {}", source))]
+    #[error("Error getting token response body: {}", source)]
     TokenResponseBody { source: reqwest::Error },
 }
 
@@ -153,7 +155,7 @@ impl ServiceAccountKey {
                 string_to_sign.as_bytes(),
                 &mut signature,
             )
-            .context(SignSnafu)?;
+            .map_err(|source| Error::Sign { source })?;
 
         Ok(hex_encode(&signature))
     }
@@ -289,7 +291,7 @@ impl TokenProvider for SelfSignedJwt {
                 message.as_bytes(),
                 &mut sig_bytes,
             )
-            .context(SignSnafu)?;
+            .map_err(|source| Error::Sign { source })?;
 
         let signature = BASE64_URL_SAFE_NO_PAD.encode(sig_bytes);
         let bearer = [message, signature].join(".");
@@ -305,11 +307,12 @@ fn read_credentials_file<T>(service_account_path: impl AsRef<std::path::Path>) -
 where
     T: serde::de::DeserializeOwned,
 {
-    let file = File::open(&service_account_path).context(OpenCredentialsSnafu {
-        path: service_account_path.as_ref().to_owned(),
+    let file = File::open(&service_account_path).map_err(|source| {
+        let path = service_account_path.as_ref().to_owned();
+        Error::OpenCredentials { source, path }
     })?;
     let reader = BufReader::new(file);
-    serde_json::from_reader(reader).context(DecodeCredentialsSnafu)
+    serde_json::from_reader(reader).map_err(|source| Error::DecodeCredentials { source })
 }
 
 /// A deserialized `service-account-********.json`-file.
@@ -341,7 +344,7 @@ impl ServiceAccountCredentials {
 
     /// Create a new [`ServiceAccountCredentials`] from a string.
     pub(crate) fn from_key(key: &str) -> Result<Self> {
-        serde_json::from_str(key).context(DecodeCredentialsSnafu)
+        serde_json::from_str(key).map_err(|source| Error::DecodeCredentials { source })
     }
 
     /// Create a [`SelfSignedJwt`] from this credentials struct.
@@ -380,7 +383,7 @@ fn seconds_since_epoch() -> u64 {
 }
 
 fn b64_encode_obj<T: serde::Serialize>(obj: &T) -> Result<String> {
-    let string = serde_json::to_string(obj).context(EncodeSnafu)?;
+    let string = serde_json::to_string(obj).map_err(|source| Error::Encode { source })?;
     Ok(BASE64_URL_SAFE_NO_PAD.encode(string))
 }
 
@@ -404,10 +407,10 @@ async fn make_metadata_request(
         .query(&[("audience", "https://www.googleapis.com/oauth2/v4/token")])
         .send_retry(retry)
         .await
-        .context(TokenRequestSnafu)?
+        .map_err(|source| Error::TokenRequest { source })?
         .json()
         .await
-        .context(TokenResponseBodySnafu)?;
+        .map_err(|source| Error::TokenResponseBody { source })?;
     Ok(response)
 }
 
@@ -467,10 +470,10 @@ async fn make_metadata_request_for_email(
         .header("Metadata-Flavor", "Google")
         .send_retry(retry)
         .await
-        .context(TokenRequestSnafu)?
+        .map_err(|source| Error::TokenRequest { source })?
         .text()
         .await
-        .context(TokenResponseBodySnafu)?;
+        .map_err(|source| Error::TokenResponseBody { source })?;
     Ok(response)
 }
 
@@ -608,10 +611,10 @@ impl AuthorizedUserSigningCredentials {
             .query(&[("access_token", &self.credential.refresh_token)])
             .send_retry(retry)
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .json::<EmailResponse>()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
 
         Ok(response.email)
     }
@@ -659,10 +662,10 @@ impl TokenProvider for AuthorizedUserCredentials {
             .idempotent(true)
             .send()
             .await
-            .context(TokenRequestSnafu)?
+            .map_err(|source| Error::TokenRequest { source })?
             .json::<TokenResponse>()
             .await
-            .context(TokenResponseBodySnafu)?;
+            .map_err(|source| Error::TokenResponseBody { source })?;
 
         Ok(TemporaryToken {
             token: Arc::new(GcpCredential {
diff --git a/object_store/src/http/client.rs b/object_store/src/http/client.rs
index eeb7e5694228..41e6464c1999 100644
--- a/object_store/src/http/client.rs
+++ b/object_store/src/http/client.rs
@@ -32,42 +32,41 @@ use hyper::header::{
 use percent_encoding::percent_decode_str;
 use reqwest::{Method, Response, StatusCode};
 use serde::Deserialize;
-use snafu::{OptionExt, ResultExt, Snafu};
 use url::Url;
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Request error: {}", source))]
+    #[error("Request error: {}", source)]
     Request { source: retry::Error },
 
-    #[snafu(display("Request error: {}", source))]
+    #[error("Request error: {}", source)]
     Reqwest { source: reqwest::Error },
 
-    #[snafu(display("Range request not supported by {}", href))]
+    #[error("Range request not supported by {}", href)]
     RangeNotSupported { href: String },
 
-    #[snafu(display("Error decoding PROPFIND response: {}", source))]
+    #[error("Error decoding PROPFIND response: {}", source)]
     InvalidPropFind { source: quick_xml::de::DeError },
 
-    #[snafu(display("Missing content size for {}", href))]
+    #[error("Missing content size for {}", href)]
     MissingSize { href: String },
 
-    #[snafu(display("Error getting properties of \"{}\" got \"{}\"", href, status))]
+    #[error("Error getting properties of \"{}\" got \"{}\"", href, status)]
     PropStatus { href: String, status: String },
 
-    #[snafu(display("Failed to parse href \"{}\": {}", href, source))]
+    #[error("Failed to parse href \"{}\": {}", href, source)]
     InvalidHref {
         href: String,
         source: url::ParseError,
     },
 
-    #[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
+    #[error("Path \"{}\" contained non-unicode characters: {}", path, source)]
     NonUnicode {
         path: String,
         source: std::str::Utf8Error,
     },
 
-    #[snafu(display("Encountered invalid path \"{}\": {}", path, source))]
+    #[error("Encountered invalid path \"{}\": {}", path, source)]
     InvalidPath {
         path: String,
         source: crate::path::Error,
@@ -129,7 +128,7 @@ impl Client {
             .request(method, url)
             .send_retry(&self.retry_config)
             .await
-            .context(RequestSnafu)?;
+            .map_err(|source| Error::Request { source })?;
 
         Ok(())
     }
@@ -236,7 +235,10 @@ impl Client {
             .await;
 
         let response = match result {
-            Ok(result) => result.bytes().await.context(ReqwestSnafu)?,
+            Ok(result) => result
+                .bytes()
+                .await
+                .map_err(|source| Error::Reqwest { source })?,
             Err(e) if matches!(e.status(), Some(StatusCode::NOT_FOUND)) => {
                 return match depth {
                     "0" => {
@@ -255,7 +257,9 @@ impl Client {
             Err(source) => return Err(Error::Request { source }.into()),
         };
 
-        let status = quick_xml::de::from_reader(response.reader()).context(InvalidPropFindSnafu)?;
+        let status = quick_xml::de::from_reader(response.reader())
+            .map_err(|source| Error::InvalidPropFind { source })?;
+
         Ok(status)
     }
 
@@ -397,14 +401,23 @@ impl MultiStatusResponse {
         let url = Url::options()
             .base_url(Some(base_url))
             .parse(&self.href)
-            .context(InvalidHrefSnafu { href: &self.href })?;
+            .map_err(|source| Error::InvalidHref {
+                href: self.href.clone(),
+                source,
+            })?;
 
         // Reverse any percent encoding
         let path = percent_decode_str(url.path())
             .decode_utf8()
-            .context(NonUnicodeSnafu { path: url.path() })?;
+            .map_err(|source| Error::NonUnicode {
+                path: url.path().into(),
+                source,
+            })?;
 
-        Ok(Path::parse(path.as_ref()).context(InvalidPathSnafu { path })?)
+        Ok(Path::parse(path.as_ref()).map_err(|source| {
+            let path = path.into();
+            Error::InvalidPath { path, source }
+        })?)
     }
 
     fn size(&self) -> Result<usize> {
@@ -412,7 +425,10 @@ impl MultiStatusResponse {
             .prop_stat
             .prop
             .content_length
-            .context(MissingSizeSnafu { href: &self.href })?;
+            .ok_or_else(|| Error::MissingSize {
+                href: self.href.clone(),
+            })?;
+
         Ok(size)
     }
 
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index 4b1c927e74f5..417f72856722 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -35,7 +35,6 @@ use async_trait::async_trait;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
 use itertools::Itertools;
-use snafu::{OptionExt, ResultExt, Snafu};
 use url::Url;
 
 use crate::client::get::GetClientExt;
@@ -49,18 +48,18 @@ use crate::{
 
 mod client;
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("Must specify a URL"))]
+    #[error("Must specify a URL")]
     MissingUrl,
 
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    #[error("Unable parse source url. Url: {}, Error: {}", url, source)]
     UnableToParseUrl {
         source: url::ParseError,
         url: String,
     },
 
-    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    #[error("Unable to extract metadata from headers: {}", source)]
     Metadata {
         source: crate::client::header::Error,
     },
@@ -235,8 +234,8 @@ impl HttpBuilder {
 
     /// Build an [`HttpStore`] with the configured options
     pub fn build(self) -> Result<HttpStore> {
-        let url = self.url.context(MissingUrlSnafu)?;
-        let parsed = Url::parse(&url).context(UnableToParseUrlSnafu { url })?;
+        let url = self.url.ok_or(Error::MissingUrl)?;
+        let parsed = Url::parse(&url).map_err(|source| Error::UnableToParseUrl { url, source })?;
 
         Ok(HttpStore {
             client: Client::new(parsed, self.client_options, self.retry_config)?,
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 6f5733226922..987ffacc6e49 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -566,7 +566,6 @@ use async_trait::async_trait;
 use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use futures::{stream::BoxStream, StreamExt, TryStreamExt};
-use snafu::Snafu;
 use std::fmt::{Debug, Formatter};
 #[cfg(all(feature = "fs", not(target_arch = "wasm32")))]
 use std::io::{Read, Seek, SeekFrom};
@@ -1229,11 +1228,11 @@ pub struct PutResult {
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 /// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 #[non_exhaustive]
 pub enum Error {
     /// A fallback error type when no variant matches
-    #[snafu(display("Generic {} error: {}", store, source))]
+    #[error("Generic {} error: {}", store, source)]
     Generic {
         /// The store this error originated from
         store: &'static str,
@@ -1242,7 +1241,7 @@ pub enum Error {
     },
 
     /// Error when the object is not found at given location
-    #[snafu(display("Object at location {} not found: {}", path, source))]
+    #[error("Object at location {} not found: {}", path, source)]
     NotFound {
         /// The path to file
         path: String,
@@ -1251,31 +1250,30 @@ pub enum Error {
     },
 
     /// Error for invalid path
-    #[snafu(
-        display("Encountered object with invalid path: {}", source),
-        context(false)
-    )]
+    #[error("Encountered object with invalid path: {}", source)]
     InvalidPath {
         /// The wrapped error
+        #[from]
         source: path::Error,
     },
 
     /// Error when `tokio::spawn` failed
-    #[snafu(display("Error joining spawned task: {}", source), context(false))]
+    #[error("Error joining spawned task: {}", source)]
     JoinError {
         /// The wrapped error
+        #[from]
         source: tokio::task::JoinError,
     },
 
     /// Error when the attempted operation is not supported
-    #[snafu(display("Operation not supported: {}", source))]
+    #[error("Operation not supported: {}", source)]
     NotSupported {
         /// The wrapped error
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
     },
 
     /// Error when the object already exists
-    #[snafu(display("Object at location {} already exists: {}", path, source))]
+    #[error("Object at location {} already exists: {}", path, source)]
     AlreadyExists {
         /// The path to the
         path: String,
@@ -1284,7 +1282,7 @@ pub enum Error {
     },
 
     /// Error when the required conditions failed for the operation
-    #[snafu(display("Request precondition failure for path {}: {}", path, source))]
+    #[error("Request precondition failure for path {}: {}", path, source)]
     Precondition {
         /// The path to the file
         path: String,
@@ -1293,7 +1291,7 @@ pub enum Error {
     },
 
     /// Error when the object at the location isn't modified
-    #[snafu(display("Object at location {} not modified: {}", path, source))]
+    #[error("Object at location {} not modified: {}", path, source)]
     NotModified {
         /// The path to the file
         path: String,
@@ -1302,16 +1300,16 @@ pub enum Error {
     },
 
     /// Error when an operation is not implemented
-    #[snafu(display("Operation not yet implemented."))]
+    #[error("Operation not yet implemented.")]
     NotImplemented,
 
     /// Error when the used credentials don't have enough permission
     /// to perform the requested operation
-    #[snafu(display(
+    #[error(
         "The operation lacked the necessary privileges to complete for path {}: {}",
         path,
         source
-    ))]
+    )]
     PermissionDenied {
         /// The path to the file
         path: String,
@@ -1320,11 +1318,11 @@ pub enum Error {
     },
 
     /// Error when the used credentials lack valid authentication
-    #[snafu(display(
+    #[error(
         "The operation lacked valid authentication credentials for path {}: {}",
         path,
         source
-    ))]
+    )]
     Unauthenticated {
         /// The path to the file
         path: String,
@@ -1333,7 +1331,7 @@ pub enum Error {
     },
 
     /// Error when a configuration key is invalid for the store used
-    #[snafu(display("Configuration key: '{}' is not valid for store '{}'.", key, store))]
+    #[error("Configuration key: '{}' is not valid for store '{}'.", key, store)]
     UnknownConfigurationKey {
         /// The object store used
         store: &'static str,
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 78fce9c26224..b193481ae7b8 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -30,7 +30,6 @@ use chrono::{DateTime, Utc};
 use futures::{stream::BoxStream, StreamExt};
 use futures::{FutureExt, TryStreamExt};
 use parking_lot::Mutex;
-use snafu::{ensure, OptionExt, ResultExt, Snafu};
 use url::Url;
 use walkdir::{DirEntry, WalkDir};
 
@@ -43,117 +42,80 @@ use crate::{
 };
 
 /// A specialized `Error` for filesystem object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub(crate) enum Error {
-    #[snafu(display("File size for {} did not fit in a usize: {}", path, source))]
+    #[error("File size for {} did not fit in a usize: {}", path, source)]
     FileSizeOverflowedUsize {
         source: std::num::TryFromIntError,
         path: String,
     },
 
-    #[snafu(display("Unable to walk dir: {}", source))]
-    UnableToWalkDir {
-        source: walkdir::Error,
-    },
+    #[error("Unable to walk dir: {}", source)]
+    UnableToWalkDir { source: walkdir::Error },
 
-    #[snafu(display("Unable to access metadata for {}: {}", path, source))]
+    #[error("Unable to access metadata for {}: {}", path, source)]
     Metadata {
         source: Box<dyn std::error::Error + Send + Sync + 'static>,
         path: String,
     },
 
-    #[snafu(display("Unable to copy data to file: {}", source))]
-    UnableToCopyDataToFile {
-        source: io::Error,
-    },
+    #[error("Unable to copy data to file: {}", source)]
+    UnableToCopyDataToFile { source: io::Error },
 
-    #[snafu(display("Unable to rename file: {}", source))]
-    UnableToRenameFile {
-        source: io::Error,
-    },
+    #[error("Unable to rename file: {}", source)]
+    UnableToRenameFile { source: io::Error },
 
-    #[snafu(display("Unable to create dir {}: {}", path.display(), source))]
-    UnableToCreateDir {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Unable to create dir {}: {}", path.display(), source)]
+    UnableToCreateDir { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Unable to create file {}: {}", path.display(), source))]
-    UnableToCreateFile {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Unable to create file {}: {}", path.display(), source)]
+    UnableToCreateFile { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Unable to delete file {}: {}", path.display(), source))]
-    UnableToDeleteFile {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Unable to delete file {}: {}", path.display(), source)]
+    UnableToDeleteFile { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Unable to open file {}: {}", path.display(), source))]
-    UnableToOpenFile {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Unable to open file {}: {}", path.display(), source)]
+    UnableToOpenFile { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Unable to read data from file {}: {}", path.display(), source))]
-    UnableToReadBytes {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Unable to read data from file {}: {}", path.display(), source)]
+    UnableToReadBytes { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Out of range of file {}, expected: {}, actual: {}", path.display(), expected, actual))]
+    #[error("Out of range of file {}, expected: {}, actual: {}", path.display(), expected, actual)]
     OutOfRange {
         path: PathBuf,
         expected: usize,
         actual: usize,
     },
 
-    #[snafu(display("Requested range was invalid"))]
-    InvalidRange {
-        source: InvalidGetRange,
-    },
+    #[error("Requested range was invalid")]
+    InvalidRange { source: InvalidGetRange },
 
-    #[snafu(display("Unable to copy file from {} to {}: {}", from.display(), to.display(), source))]
+    #[error("Unable to copy file from {} to {}: {}", from.display(), to.display(), source)]
     UnableToCopyFile {
         from: PathBuf,
         to: PathBuf,
         source: io::Error,
     },
 
-    NotFound {
-        path: PathBuf,
-        source: io::Error,
-    },
+    #[error("NotFound")]
+    NotFound { path: PathBuf, source: io::Error },
 
-    #[snafu(display("Error seeking file {}: {}", path.display(), source))]
-    Seek {
-        source: io::Error,
-        path: PathBuf,
-    },
+    #[error("Error seeking file {}: {}", path.display(), source)]
+    Seek { source: io::Error, path: PathBuf },
 
-    #[snafu(display("Unable to convert URL \"{}\" to filesystem path", url))]
-    InvalidUrl {
-        url: Url,
-    },
+    #[error("Unable to convert URL \"{}\" to filesystem path", url)]
+    InvalidUrl { url: Url },
 
-    AlreadyExists {
-        path: String,
-        source: io::Error,
-    },
+    #[error("AlreadyExists")]
+    AlreadyExists { path: String, source: io::Error },
 
-    #[snafu(display("Unable to canonicalize filesystem root: {}", path.display()))]
-    UnableToCanonicalize {
-        path: PathBuf,
-        source: io::Error,
-    },
+    #[error("Unable to canonicalize filesystem root: {}", path.display())]
+    UnableToCanonicalize { path: PathBuf, source: io::Error },
 
-    #[snafu(display("Filenames containing trailing '/#\\d+/' are not supported: {}", path))]
-    InvalidPath {
-        path: String,
-    },
+    #[error("Filenames containing trailing '/#\\d+/' are not supported: {}", path)]
+    InvalidPath { path: String },
 
-    #[snafu(display("Upload aborted"))]
+    #[error("Upload aborted")]
     Aborted,
 }
 
@@ -276,8 +238,9 @@ impl LocalFileSystem {
     /// Returns an error if the path does not exist
     ///
     pub fn new_with_prefix(prefix: impl AsRef<std::path::Path>) -> Result<Self> {
-        let path = std::fs::canonicalize(&prefix).context(UnableToCanonicalizeSnafu {
-            path: prefix.as_ref(),
+        let path = std::fs::canonicalize(&prefix).map_err(|source| {
+            let path = prefix.as_ref().into();
+            Error::UnableToCanonicalize { source, path }
         })?;
 
         Ok(Self {
@@ -290,12 +253,12 @@ impl LocalFileSystem {
 
     /// Return an absolute filesystem path of the given file location
     pub fn path_to_filesystem(&self, location: &Path) -> Result<PathBuf> {
-        ensure!(
-            is_valid_file_path(location),
-            InvalidPathSnafu {
-                path: location.as_ref()
-            }
-        );
+        if !is_valid_file_path(location) {
+            let path = location.as_ref().into();
+            let error = Error::InvalidPath { path };
+            return Err(error.into());
+        }
+
         let path = self.config.prefix_to_filesystem(location)?;
 
         #[cfg(target_os = "windows")]
@@ -451,7 +414,9 @@ impl ObjectStore for LocalFileSystem {
             options.check_preconditions(&meta)?;
 
             let range = match options.range {
-                Some(r) => r.as_range(meta.size).context(InvalidRangeSnafu)?,
+                Some(r) => r
+                    .as_range(meta.size)
+                    .map_err(|source| Error::InvalidRange { source })?,
                 None => 0..meta.size,
             };
 
@@ -721,12 +686,15 @@ impl ObjectStore for LocalFileSystem {
 
 /// Creates the parent directories of `path` or returns an error based on `source` if no parent
 fn create_parent_dirs(path: &std::path::Path, source: io::Error) -> Result<()> {
-    let parent = path.parent().ok_or_else(|| Error::UnableToCreateFile {
-        path: path.to_path_buf(),
-        source,
+    let parent = path.parent().ok_or_else(|| {
+        let path = path.to_path_buf();
+        Error::UnableToCreateFile { path, source }
     })?;
 
-    std::fs::create_dir_all(parent).context(UnableToCreateDirSnafu { path: parent })?;
+    std::fs::create_dir_all(parent).map_err(|source| {
+        let path = parent.into();
+        Error::UnableToCreateDir { source, path }
+    })?;
     Ok(())
 }
 
@@ -796,12 +764,14 @@ impl MultipartUpload for LocalUpload {
         let s = Arc::clone(&self.state);
         maybe_spawn_blocking(move || {
             let mut file = s.file.lock();
-            file.seek(SeekFrom::Start(offset))
-                .context(SeekSnafu { path: &s.dest })?;
+            file.seek(SeekFrom::Start(offset)).map_err(|source| {
+                let path = s.dest.clone();
+                Error::Seek { source, path }
+            })?;
 
             data.iter()
                 .try_for_each(|x| file.write_all(x))
-                .context(UnableToCopyDataToFileSnafu)?;
+                .map_err(|source| Error::UnableToCopyDataToFile { source })?;
 
             Ok(())
         })
@@ -809,12 +779,13 @@ impl MultipartUpload for LocalUpload {
     }
 
     async fn complete(&mut self) -> Result<PutResult> {
-        let src = self.src.take().context(AbortedSnafu)?;
+        let src = self.src.take().ok_or(Error::Aborted)?;
         let s = Arc::clone(&self.state);
         maybe_spawn_blocking(move || {
             // Ensure no inflight writes
             let file = s.file.lock();
-            std::fs::rename(&src, &s.dest).context(UnableToRenameFileSnafu)?;
+            std::fs::rename(&src, &s.dest)
+                .map_err(|source| Error::UnableToRenameFile { source })?;
             let metadata = file.metadata().map_err(|e| Error::Metadata {
                 source: e.into(),
                 path: src.to_string_lossy().to_string(),
@@ -829,9 +800,10 @@ impl MultipartUpload for LocalUpload {
     }
 
     async fn abort(&mut self) -> Result<()> {
-        let src = self.src.take().context(AbortedSnafu)?;
+        let src = self.src.take().ok_or(Error::Aborted)?;
         maybe_spawn_blocking(move || {
-            std::fs::remove_file(&src).context(UnableToDeleteFileSnafu { path: &src })?;
+            std::fs::remove_file(&src)
+                .map_err(|source| Error::UnableToDeleteFile { source, path: src })?;
             Ok(())
         })
         .await
@@ -898,22 +870,30 @@ pub(crate) fn chunked_stream(
 pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<usize>) -> Result<Bytes> {
     let to_read = range.end - range.start;
     file.seek(SeekFrom::Start(range.start as u64))
-        .context(SeekSnafu { path })?;
+        .map_err(|source| {
+            let path = path.into();
+            Error::Seek { source, path }
+        })?;
 
     let mut buf = Vec::with_capacity(to_read);
     let read = file
         .take(to_read as u64)
         .read_to_end(&mut buf)
-        .context(UnableToReadBytesSnafu { path })?;
+        .map_err(|source| {
+            let path = path.into();
+            Error::UnableToReadBytes { source, path }
+        })?;
 
-    ensure!(
-        read == to_read,
-        OutOfRangeSnafu {
-            path,
+    if read != to_read {
+        let error = Error::OutOfRange {
+            path: path.into(),
             expected: to_read,
-            actual: read
-        }
-    );
+            actual: read,
+        };
+
+        return Err(error.into());
+    }
+
     Ok(buf.into())
 }
 
@@ -982,8 +962,9 @@ fn get_etag(metadata: &Metadata) -> String {
 
 fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> {
     let last_modified = last_modified(&metadata);
-    let size = usize::try_from(metadata.len()).context(FileSizeOverflowedUsizeSnafu {
-        path: location.as_ref(),
+    let size = usize::try_from(metadata.len()).map_err(|source| {
+        let path = location.as_ref().into();
+        Error::FileSizeOverflowedUsize { source, path }
     })?;
 
     Ok(ObjectMeta {
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index a467e3b88a26..3f3cff3390db 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -25,7 +25,6 @@ use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use futures::{stream::BoxStream, StreamExt};
 use parking_lot::RwLock;
-use snafu::{OptionExt, ResultExt, Snafu};
 
 use crate::multipart::{MultipartStore, PartId};
 use crate::util::InvalidGetRange;
@@ -37,24 +36,24 @@ use crate::{
 use crate::{GetOptions, PutPayload};
 
 /// A specialized `Error` for in-memory object store-related errors
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[snafu(display("No data in memory found. Location: {path}"))]
+    #[error("No data in memory found. Location: {path}")]
     NoDataInMemory { path: String },
 
-    #[snafu(display("Invalid range: {source}"))]
+    #[error("Invalid range: {source}")]
     Range { source: InvalidGetRange },
 
-    #[snafu(display("Object already exists at that location: {path}"))]
+    #[error("Object already exists at that location: {path}")]
     AlreadyExists { path: String },
 
-    #[snafu(display("ETag required for conditional update"))]
+    #[error("ETag required for conditional update")]
     MissingETag,
 
-    #[snafu(display("MultipartUpload not found: {id}"))]
+    #[error("MultipartUpload not found: {id}")]
     UploadNotFound { id: String },
 
-    #[snafu(display("Missing part at index: {part}"))]
+    #[error("Missing part at index: {part}")]
     MissingPart { part: usize },
 }
 
@@ -158,7 +157,7 @@ impl Storage {
             }),
             Some(e) => {
                 let existing = e.e_tag.to_string();
-                let expected = v.e_tag.context(MissingETagSnafu)?;
+                let expected = v.e_tag.ok_or(Error::MissingETag)?;
                 if existing == expected {
                     *e = entry;
                     Ok(())
@@ -177,7 +176,7 @@ impl Storage {
             .parse()
             .ok()
             .and_then(|x| self.uploads.get_mut(&x))
-            .context(UploadNotFoundSnafu { id })?;
+            .ok_or_else(|| Error::UploadNotFound { id: id.into() })?;
         Ok(parts)
     }
 
@@ -186,7 +185,7 @@ impl Storage {
             .parse()
             .ok()
             .and_then(|x| self.uploads.remove(&x))
-            .context(UploadNotFoundSnafu { id })?;
+            .ok_or_else(|| Error::UploadNotFound { id: id.into() })?;
         Ok(parts)
     }
 }
@@ -250,7 +249,9 @@ impl ObjectStore for InMemory {
 
         let (range, data) = match options.range {
             Some(range) => {
-                let r = range.as_range(entry.data.len()).context(RangeSnafu)?;
+                let r = range
+                    .as_range(entry.data.len())
+                    .map_err(|source| Error::Range { source })?;
                 (r.clone(), entry.data.slice(r))
             }
             None => (0..entry.data.len(), entry.data),
@@ -272,7 +273,7 @@ impl ObjectStore for InMemory {
             .map(|range| {
                 let r = GetRange::Bounded(range.clone())
                     .as_range(entry.data.len())
-                    .context(RangeSnafu)?;
+                    .map_err(|source| Error::Range { source })?;
 
                 Ok(entry.data.slice(r))
             })
@@ -435,7 +436,7 @@ impl MultipartStore for InMemory {
 
         let mut cap = 0;
         for (part, x) in upload.parts.iter().enumerate() {
-            cap += x.as_ref().context(MissingPartSnafu { part })?.len();
+            cap += x.as_ref().ok_or(Error::MissingPart { part })?.len();
         }
         let mut buf = Vec::with_capacity(cap);
         for x in &upload.parts {
@@ -474,7 +475,7 @@ impl InMemory {
             .map
             .get(location)
             .cloned()
-            .context(NoDataInMemorySnafu {
+            .ok_or_else(|| Error::NoDataInMemory {
                 path: location.to_string(),
             })?;
 
diff --git a/object_store/src/parse.rs b/object_store/src/parse.rs
index a3919305281d..bc65a0b8d1c8 100644
--- a/object_store/src/parse.rs
+++ b/object_store/src/parse.rs
@@ -20,16 +20,18 @@ use crate::local::LocalFileSystem;
 use crate::memory::InMemory;
 use crate::path::Path;
 use crate::ObjectStore;
-use snafu::Snafu;
 use url::Url;
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub enum Error {
-    #[snafu(display("Unable to recognise URL \"{}\"", url))]
+    #[error("Unable to recognise URL \"{}\"", url)]
     Unrecognised { url: Url },
 
-    #[snafu(context(false))]
-    Path { source: crate::path::Error },
+    #[error(transparent)]
+    Path {
+        #[from]
+        source: crate::path::Error,
+    },
 }
 
 impl From<Error> for super::Error {
diff --git a/object_store/src/path/mod.rs b/object_store/src/path/mod.rs
index 4c9bb5f05186..f8affe8dfbb9 100644
--- a/object_store/src/path/mod.rs
+++ b/object_store/src/path/mod.rs
@@ -19,7 +19,6 @@
 
 use itertools::Itertools;
 use percent_encoding::percent_decode;
-use snafu::{ensure, ResultExt, Snafu};
 use std::fmt::Formatter;
 #[cfg(not(target_arch = "wasm32"))]
 use url::Url;
@@ -35,18 +34,18 @@ mod parts;
 pub use parts::{InvalidPart, PathPart};
 
 /// Error returned by [`Path::parse`]
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 #[non_exhaustive]
 pub enum Error {
     /// Error when there's an empty segment between two slashes `/` in the path
-    #[snafu(display("Path \"{}\" contained empty path segment", path))]
+    #[error("Path \"{}\" contained empty path segment", path)]
     EmptySegment {
         /// The source path
         path: String,
     },
 
     /// Error when an invalid segment is encountered in the given path
-    #[snafu(display("Error parsing Path \"{}\": {}", path, source))]
+    #[error("Error parsing Path \"{}\": {}", path, source)]
     BadSegment {
         /// The source path
         path: String,
@@ -55,7 +54,7 @@ pub enum Error {
     },
 
     /// Error when path cannot be canonicalized
-    #[snafu(display("Failed to canonicalize path \"{}\": {}", path.display(), source))]
+    #[error("Failed to canonicalize path \"{}\": {}", path.display(), source)]
     Canonicalize {
         /// The source path
         path: std::path::PathBuf,
@@ -64,14 +63,14 @@ pub enum Error {
     },
 
     /// Error when the path is not a valid URL
-    #[snafu(display("Unable to convert path \"{}\" to URL", path.display()))]
+    #[error("Unable to convert path \"{}\" to URL", path.display())]
     InvalidPath {
         /// The source path
         path: std::path::PathBuf,
     },
 
     /// Error when a path contains non-unicode characters
-    #[snafu(display("Path \"{}\" contained non-unicode characters: {}", path, source))]
+    #[error("Path \"{}\" contained non-unicode characters: {}", path, source)]
     NonUnicode {
         /// The source path
         path: String,
@@ -80,7 +79,7 @@ pub enum Error {
     },
 
     /// Error when the a path doesn't start with given prefix
-    #[snafu(display("Path {} does not start with prefix {}", path, prefix))]
+    #[error("Path {} does not start with prefix {}", path, prefix)]
     PrefixMismatch {
         /// The source path
         path: String,
@@ -173,8 +172,14 @@ impl Path {
         let stripped = stripped.strip_suffix(DELIMITER).unwrap_or(stripped);
 
         for segment in stripped.split(DELIMITER) {
-            ensure!(!segment.is_empty(), EmptySegmentSnafu { path });
-            PathPart::parse(segment).context(BadSegmentSnafu { path })?;
+            if segment.is_empty() {
+                return Err(Error::EmptySegment { path: path.into() });
+            }
+
+            PathPart::parse(segment).map_err(|source| {
+                let path = path.into();
+                Error::BadSegment { source, path }
+            })?;
         }
 
         Ok(Self {
@@ -190,8 +195,9 @@ impl Path {
     ///
     /// Note: this will canonicalize the provided path, resolving any symlinks
     pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
-        let absolute = std::fs::canonicalize(&path).context(CanonicalizeSnafu {
-            path: path.as_ref(),
+        let absolute = std::fs::canonicalize(&path).map_err(|source| {
+            let path = path.as_ref().into();
+            Error::Canonicalize { source, path }
         })?;
 
         Self::from_absolute_path(absolute)
@@ -241,7 +247,10 @@ impl Path {
         let path = path.as_ref();
         let decoded = percent_decode(path.as_bytes())
             .decode_utf8()
-            .context(NonUnicodeSnafu { path })?;
+            .map_err(|source| {
+                let path = path.into();
+                Error::NonUnicode { source, path }
+            })?;
 
         Self::parse(decoded)
     }
diff --git a/object_store/src/path/parts.rs b/object_store/src/path/parts.rs
index de2e1a75c955..9c6612bf9331 100644
--- a/object_store/src/path/parts.rs
+++ b/object_store/src/path/parts.rs
@@ -19,15 +19,14 @@ use percent_encoding::{percent_encode, AsciiSet, CONTROLS};
 use std::borrow::Cow;
 
 use crate::path::DELIMITER_BYTE;
-use snafu::Snafu;
 
 /// Error returned by [`PathPart::parse`]
-#[derive(Debug, Snafu)]
-#[snafu(display(
+#[derive(Debug, thiserror::Error)]
+#[error(
     "Encountered illegal character sequence \"{}\" whilst parsing path segment \"{}\"",
     illegal,
     segment
-))]
+)]
 #[allow(missing_copy_implementations)]
 pub struct InvalidPart {
     segment: String,
diff --git a/object_store/src/util.rs b/object_store/src/util.rs
index 99102a99e61e..6d638f3cb2b8 100644
--- a/object_store/src/util.rs
+++ b/object_store/src/util.rs
@@ -24,7 +24,6 @@ use std::{
 use super::Result;
 use bytes::Bytes;
 use futures::{stream::StreamExt, Stream, TryStreamExt};
-use snafu::Snafu;
 
 #[cfg(any(feature = "azure", feature = "http"))]
 pub(crate) static RFC1123_FMT: &str = "%a, %d %h %Y %T GMT";
@@ -204,14 +203,12 @@ pub enum GetRange {
     Suffix(usize),
 }
 
-#[derive(Debug, Snafu)]
+#[derive(Debug, thiserror::Error)]
 pub(crate) enum InvalidGetRange {
-    #[snafu(display(
-        "Wanted range starting at {requested}, but object was only {length} bytes long"
-    ))]
+    #[error("Wanted range starting at {requested}, but object was only {length} bytes long")]
     StartTooLarge { requested: usize, length: usize },
 
-    #[snafu(display("Range started at {start} and ended at {end}"))]
+    #[error("Range started at {start} and ended at {end}")]
     Inconsistent { start: usize, end: usize },
 }
 

From debd2e872c11f7a4f06488a88a1ad170fca41145 Mon Sep 17 00:00:00 2001
From: Kikkon <19528375+Kikkon@users.noreply.github.com>
Date: Fri, 3 Jan 2025 19:44:34 +0800
Subject: [PATCH 27/68] feat: add GenericListViewBuilder (#6552)

* feat: add GenericListViewBuilder

* remove uszie

* fix tests

* remove static

* lint

* chore: add comment for should fail test

* Update arrow-array/src/builder/generic_list_view_builder.rs

Co-authored-by: Marco Neumann <marco@crepererum.net>

* Update arrow-array/src/builder/generic_list_view_builder.rs

Co-authored-by: Marco Neumann <marco@crepererum.net>

* fix name & lint

---------

Co-authored-by: Marco Neumann <marco@crepererum.net>
---
 .../src/builder/generic_list_view_builder.rs  | 707 ++++++++++++++++++
 arrow-array/src/builder/mod.rs                |   8 +
 arrow-array/src/builder/struct_builder.rs     |  10 +
 arrow-array/src/cast.rs                       |  16 +
 4 files changed, 741 insertions(+)
 create mode 100644 arrow-array/src/builder/generic_list_view_builder.rs

diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs
new file mode 100644
index 000000000000..5aaf9efefe24
--- /dev/null
+++ b/arrow-array/src/builder/generic_list_view_builder.rs
@@ -0,0 +1,707 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::builder::ArrayBuilder;
+use crate::{ArrayRef, GenericListViewArray, OffsetSizeTrait};
+use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer};
+use arrow_schema::{Field, FieldRef};
+use std::any::Any;
+use std::sync::Arc;
+
+/// Builder for [`GenericListViewArray`]
+#[derive(Debug)]
+pub struct GenericListViewBuilder<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> {
+    offsets_builder: BufferBuilder<OffsetSize>,
+    sizes_builder: BufferBuilder<OffsetSize>,
+    null_buffer_builder: NullBufferBuilder,
+    values_builder: T,
+    field: Option<FieldRef>,
+    current_offset: OffsetSize,
+}
+
+impl<O: OffsetSizeTrait, T: ArrayBuilder + Default> Default for GenericListViewBuilder<O, T> {
+    fn default() -> Self {
+        Self::new(T::default())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> ArrayBuilder
+    for GenericListViewBuilder<OffsetSize, T>
+{
+    /// Returns the builder as a non-mutable `Any` reference.
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    /// Returns the builder as a mutable `Any` reference.
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        self
+    }
+
+    /// Returns the boxed builder as a box of `Any`.
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+
+    /// Returns the number of array slots in the builder
+    fn len(&self) -> usize {
+        self.null_buffer_builder.len()
+    }
+
+    /// Builds the array and reset this builder.
+    fn finish(&mut self) -> ArrayRef {
+        Arc::new(self.finish())
+    }
+
+    /// Builds the array without resetting the builder.
+    fn finish_cloned(&self) -> ArrayRef {
+        Arc::new(self.finish_cloned())
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListViewBuilder<OffsetSize, T> {
+    /// Creates a new [`GenericListViewBuilder`] from a given values array builder
+    pub fn new(values_builder: T) -> Self {
+        let capacity = values_builder.len();
+        Self::with_capacity(values_builder, capacity)
+    }
+
+    /// Creates a new [`GenericListViewBuilder`] from a given values array builder
+    /// `capacity` is the number of items to pre-allocate space for in this builder
+    pub fn with_capacity(values_builder: T, capacity: usize) -> Self {
+        let offsets_builder = BufferBuilder::<OffsetSize>::new(capacity);
+        let sizes_builder = BufferBuilder::<OffsetSize>::new(capacity);
+        Self {
+            offsets_builder,
+            null_buffer_builder: NullBufferBuilder::new(capacity),
+            values_builder,
+            sizes_builder,
+            field: None,
+            current_offset: OffsetSize::zero(),
+        }
+    }
+
+    ///
+    /// By default a nullable field is created with the name `item`
+    ///
+    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
+    /// field's data type does not match that of `T`
+    pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
+        Self {
+            field: Some(field.into()),
+            ..self
+        }
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder> GenericListViewBuilder<OffsetSize, T>
+where
+    T: 'static,
+{
+    /// Returns the child array builder as a mutable reference.
+    ///
+    /// This mutable reference can be used to append values into the child array builder,
+    /// but you must call [`append`](#method.append) to delimit each distinct list value.
+    pub fn values(&mut self) -> &mut T {
+        &mut self.values_builder
+    }
+
+    /// Returns the child array builder as an immutable reference
+    pub fn values_ref(&self) -> &T {
+        &self.values_builder
+    }
+
+    /// Finish the current variable-length list array slot
+    ///
+    /// # Panics
+    ///
+    /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX`
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) {
+        self.offsets_builder.append(self.current_offset);
+        self.sizes_builder.append(
+            OffsetSize::from_usize(
+                self.values_builder.len() - self.current_offset.to_usize().unwrap(),
+            )
+            .unwrap(),
+        );
+        self.null_buffer_builder.append(is_valid);
+        self.current_offset = OffsetSize::from_usize(self.values_builder.len()).unwrap();
+    }
+
+    /// Append value into this [`GenericListViewBuilder`]
+    #[inline]
+    pub fn append_value<I, V>(&mut self, i: I)
+    where
+        T: Extend<Option<V>>,
+        I: IntoIterator<Item = Option<V>>,
+    {
+        self.extend(std::iter::once(Some(i)))
+    }
+
+    /// Append a null to this [`GenericListViewBuilder`]
+    ///
+    /// See [`Self::append_value`] for an example use.
+    #[inline]
+    pub fn append_null(&mut self) {
+        self.offsets_builder.append(self.current_offset);
+        self.sizes_builder
+            .append(OffsetSize::from_usize(0).unwrap());
+        self.null_buffer_builder.append_null();
+    }
+
+    /// Appends an optional value into this [`GenericListViewBuilder`]
+    ///
+    /// If `Some` calls [`Self::append_value`] otherwise calls [`Self::append_null`]
+    #[inline]
+    pub fn append_option<I, V>(&mut self, i: Option<I>)
+    where
+        T: Extend<Option<V>>,
+        I: IntoIterator<Item = Option<V>>,
+    {
+        match i {
+            Some(i) => self.append_value(i),
+            None => self.append_null(),
+        }
+    }
+
+    /// Builds the [`GenericListViewArray`] and reset this builder.
+    pub fn finish(&mut self) -> GenericListViewArray<OffsetSize> {
+        let values = self.values_builder.finish();
+        let nulls = self.null_buffer_builder.finish();
+        let offsets = self.offsets_builder.finish();
+        self.current_offset = OffsetSize::zero();
+
+        // Safety: Safe by construction
+        let offsets = ScalarBuffer::from(offsets);
+        let sizes = self.sizes_builder.finish();
+        let sizes = ScalarBuffer::from(sizes);
+        let field = match &self.field {
+            Some(f) => f.clone(),
+            None => Arc::new(Field::new("item", values.data_type().clone(), true)),
+        };
+        GenericListViewArray::new(field, offsets, sizes, values, nulls)
+    }
+
+    /// Builds the [`GenericListViewArray`] without resetting the builder.
+    pub fn finish_cloned(&self) -> GenericListViewArray<OffsetSize> {
+        let values = self.values_builder.finish_cloned();
+        let nulls = self.null_buffer_builder.finish_cloned();
+
+        let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice());
+        // Safety: safe by construction
+        let offsets = ScalarBuffer::from(offsets);
+
+        let sizes = Buffer::from_slice_ref(self.sizes_builder.as_slice());
+        let sizes = ScalarBuffer::from(sizes);
+
+        let field = match &self.field {
+            Some(f) => f.clone(),
+            None => Arc::new(Field::new("item", values.data_type().clone(), true)),
+        };
+
+        GenericListViewArray::new(field, offsets, sizes, values, nulls)
+    }
+
+    /// Returns the current offsets buffer as a slice
+    pub fn offsets_slice(&self) -> &[OffsetSize] {
+        self.offsets_builder.as_slice()
+    }
+}
+
+impl<O, B, V, E> Extend<Option<V>> for GenericListViewBuilder<O, B>
+where
+    O: OffsetSizeTrait,
+    B: ArrayBuilder + Extend<E>,
+    V: IntoIterator<Item = E>,
+{
+    #[inline]
+    fn extend<T: IntoIterator<Item = Option<V>>>(&mut self, iter: T) {
+        for v in iter {
+            match v {
+                Some(elements) => {
+                    self.values_builder.extend(elements);
+                    self.append(true);
+                }
+                None => self.append(false),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::builder::{make_builder, Int32Builder, ListViewBuilder};
+    use crate::cast::AsArray;
+    use crate::types::Int32Type;
+    use crate::{Array, Int32Array};
+    use arrow_schema::DataType;
+
+    fn test_generic_list_view_array_builder_impl<O: OffsetSizeTrait>() {
+        let values_builder = Int32Builder::with_capacity(10);
+        let mut builder = GenericListViewBuilder::<O, _>::new(values_builder);
+
+        //  [[0, 1, 2], [3, 4, 5], [6, 7]]
+        builder.values().append_value(0);
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true);
+        builder.values().append_value(3);
+        builder.values().append_value(4);
+        builder.values().append_value(5);
+        builder.append(true);
+        builder.values().append_value(6);
+        builder.values().append_value(7);
+        builder.append(true);
+        let list_array = builder.finish();
+
+        let list_values = list_array.values().as_primitive::<Int32Type>();
+        assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]);
+        assert_eq!(list_array.value_offsets(), [0, 3, 6].map(O::usize_as));
+        assert_eq!(list_array.value_sizes(), [3, 3, 2].map(O::usize_as));
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(3, list_array.len());
+        assert_eq!(0, list_array.null_count());
+        assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]);
+        assert_eq!(O::from_usize(2).unwrap(), list_array.value_sizes()[2]);
+        for i in 0..2 {
+            assert!(list_array.is_valid(i));
+            assert!(!list_array.is_null(i));
+        }
+    }
+
+    #[test]
+    fn test_list_view_array_builder() {
+        test_generic_list_view_array_builder_impl::<i32>()
+    }
+
+    #[test]
+    fn test_large_list_view_array_builder() {
+        test_generic_list_view_array_builder_impl::<i64>()
+    }
+
+    fn test_generic_list_view_array_builder_nulls_impl<O: OffsetSizeTrait>() {
+        let values_builder = Int32Builder::with_capacity(10);
+        let mut builder = GenericListViewBuilder::<O, _>::new(values_builder);
+
+        //  [[0, 1, 2], null, [3, null, 5], [6, 7]]
+        builder.values().append_value(0);
+        builder.values().append_value(1);
+        builder.values().append_value(2);
+        builder.append(true);
+        builder.append(false);
+        builder.values().append_value(3);
+        builder.values().append_null();
+        builder.values().append_value(5);
+        builder.append(true);
+        builder.values().append_value(6);
+        builder.values().append_value(7);
+        builder.append(true);
+
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]);
+        assert_eq!(O::from_usize(3).unwrap(), list_array.value_sizes()[2]);
+    }
+
+    #[test]
+    fn test_list_view_array_builder_nulls() {
+        test_generic_list_view_array_builder_nulls_impl::<i32>()
+    }
+
+    #[test]
+    fn test_large_list_view_array_builder_nulls() {
+        test_generic_list_view_array_builder_nulls_impl::<i64>()
+    }
+
+    #[test]
+    fn test_list_view_array_builder_finish() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = ListViewBuilder::new(values_builder);
+
+        builder.values().append_slice(&[1, 2, 3]);
+        builder.append(true);
+        builder.values().append_slice(&[4, 5, 6]);
+        builder.append(true);
+
+        let mut arr = builder.finish();
+        assert_eq!(2, arr.len());
+        assert!(builder.is_empty());
+
+        builder.values().append_slice(&[7, 8, 9]);
+        builder.append(true);
+        arr = builder.finish();
+        assert_eq!(1, arr.len());
+        assert!(builder.is_empty());
+    }
+
+    #[test]
+    fn test_list_view_array_builder_finish_cloned() {
+        let values_builder = Int32Array::builder(5);
+        let mut builder = ListViewBuilder::new(values_builder);
+
+        builder.values().append_slice(&[1, 2, 3]);
+        builder.append(true);
+        builder.values().append_slice(&[4, 5, 6]);
+        builder.append(true);
+
+        let mut arr = builder.finish_cloned();
+        assert_eq!(2, arr.len());
+        assert!(!builder.is_empty());
+
+        builder.values().append_slice(&[7, 8, 9]);
+        builder.append(true);
+        arr = builder.finish();
+        assert_eq!(3, arr.len());
+        assert!(builder.is_empty());
+    }
+
+    #[test]
+    fn test_list_view_list_view_array_builder() {
+        let primitive_builder = Int32Builder::with_capacity(10);
+        let values_builder = ListViewBuilder::new(primitive_builder);
+        let mut builder = ListViewBuilder::new(values_builder);
+
+        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+        builder.values().values().append_value(1);
+        builder.values().values().append_value(2);
+        builder.values().append(true);
+        builder.values().values().append_value(3);
+        builder.values().values().append_value(4);
+        builder.values().append(true);
+        builder.append(true);
+
+        builder.values().values().append_value(5);
+        builder.values().values().append_value(6);
+        builder.values().values().append_value(7);
+        builder.values().append(true);
+        builder.values().append(false);
+        builder.values().values().append_value(8);
+        builder.values().append(true);
+        builder.append(true);
+
+        builder.append(false);
+
+        builder.values().values().append_value(9);
+        builder.values().values().append_value(10);
+        builder.values().append(true);
+        builder.append(true);
+
+        let l1 = builder.finish();
+
+        assert_eq!(4, l1.len());
+        assert_eq!(1, l1.null_count());
+
+        assert_eq!(l1.value_offsets(), &[0, 2, 5, 5]);
+        assert_eq!(l1.value_sizes(), &[2, 3, 0, 1]);
+
+        let l2 = l1.values().as_list_view::<i32>();
+
+        assert_eq!(6, l2.len());
+        assert_eq!(1, l2.null_count());
+        assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8]);
+        assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2]);
+
+        let i1 = l2.values().as_primitive::<Int32Type>();
+        assert_eq!(10, i1.len());
+        assert_eq!(0, i1.null_count());
+        assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    }
+
+    #[test]
+    fn test_extend() {
+        let mut builder = ListViewBuilder::new(Int32Builder::new());
+        builder.extend([
+            Some(vec![Some(1), Some(2), Some(7), None]),
+            Some(vec![]),
+            Some(vec![Some(4), Some(5)]),
+            None,
+        ]);
+
+        let array = builder.finish();
+        assert_eq!(array.value_offsets(), [0, 4, 4, 6]);
+        assert_eq!(array.value_sizes(), [4, 0, 2, 0]);
+        assert_eq!(array.null_count(), 1);
+        assert!(array.is_null(3));
+        let elements = array.values().as_primitive::<Int32Type>();
+        assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]);
+        assert_eq!(elements.null_count(), 1);
+        assert!(elements.is_null(3));
+    }
+
+    #[test]
+    fn test_boxed_primitive_array_builder() {
+        let values_builder = make_builder(&DataType::Int32, 5);
+        let mut builder = ListViewBuilder::new(values_builder);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_slice(&[1, 2, 3]);
+        builder.append(true);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_slice(&[4, 5, 6]);
+        builder.append(true);
+
+        let arr = builder.finish();
+        assert_eq!(2, arr.len());
+
+        let elements = arr.values().as_primitive::<Int32Type>();
+        assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
+    }
+
+    #[test]
+    fn test_boxed_list_view_list_view_array_builder() {
+        // This test is same as `test_list_list_array_builder` but uses boxed builders.
+        let values_builder = make_builder(
+            &DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true))),
+            10,
+        );
+        test_boxed_generic_list_view_generic_list_view_array_builder::<i32>(values_builder);
+    }
+
+    #[test]
+    fn test_boxed_large_list_view_large_list_view_array_builder() {
+        // This test is same as `test_list_list_array_builder` but uses boxed builders.
+        let values_builder = make_builder(
+            &DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true))),
+            10,
+        );
+        test_boxed_generic_list_view_generic_list_view_array_builder::<i64>(values_builder);
+    }
+
+    fn test_boxed_generic_list_view_generic_list_view_array_builder<O>(
+        values_builder: Box<dyn ArrayBuilder>,
+    ) where
+        O: OffsetSizeTrait + PartialEq,
+    {
+        let mut builder: GenericListViewBuilder<O, Box<dyn ArrayBuilder>> =
+            GenericListViewBuilder::<O, Box<dyn ArrayBuilder>>::new(values_builder);
+
+        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(1);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(2);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(true);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(3);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(4);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(true);
+        builder.append(true);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(5);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(6);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append_value(7);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(true);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(false);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(8);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(true);
+        builder.append(true);
+
+        builder.append(false);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(9);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(10);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<GenericListViewBuilder<O, Box<dyn ArrayBuilder>>>()
+            .expect("should be an (Large)ListViewBuilder")
+            .append(true);
+        builder.append(true);
+
+        let l1 = builder.finish();
+        assert_eq!(4, l1.len());
+        assert_eq!(1, l1.null_count());
+        assert_eq!(l1.value_offsets(), &[0, 2, 5, 5].map(O::usize_as));
+        assert_eq!(l1.value_sizes(), &[2, 3, 0, 1].map(O::usize_as));
+
+        let l2 = l1.values().as_list_view::<O>();
+        assert_eq!(6, l2.len());
+        assert_eq!(1, l2.null_count());
+        assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8].map(O::usize_as));
+        assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2].map(O::usize_as));
+
+        let i1 = l2.values().as_primitive::<Int32Type>();
+        assert_eq!(10, i1.len());
+        assert_eq!(0, i1.null_count());
+        assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    }
+
+    #[test]
+    fn test_with_field() {
+        let field = Arc::new(Field::new("bar", DataType::Int32, false));
+        let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone());
+        builder.append_value([Some(1), Some(2), Some(3)]);
+        builder.append_null(); // This is fine as nullability refers to nullability of values
+        builder.append_value([Some(4)]);
+        let array = builder.finish();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::ListView(field.clone()));
+
+        builder.append_value([Some(4), Some(5)]);
+        let array = builder.finish();
+        assert_eq!(array.data_type(), &DataType::ListView(field));
+        assert_eq!(array.len(), 1);
+    }
+
+    #[test]
+    #[should_panic(
+        expected = r#"Non-nullable field of ListViewArray \"item\" cannot contain nulls"#
+    )]
+    // If a non-nullable type is declared but a null value is used, it will be intercepted by the null check.
+    fn test_checks_nullability() {
+        let field = Arc::new(Field::new("item", DataType::Int32, false));
+        let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone());
+        builder.append_value([Some(1), None]);
+        builder.finish();
+    }
+
+    #[test]
+    #[should_panic(expected = "ListViewArray expected data type Int64 got Int32")]
+    // If the declared type does not match the actual appended type, it will be intercepted by type checking in the finish function.
+    fn test_checks_data_type() {
+        let field = Arc::new(Field::new("item", DataType::Int64, false));
+        let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone());
+        builder.append_value([Some(1)]);
+        builder.finish();
+    }
+}
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 89a96280eb87..982e8788b90d 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -180,6 +180,8 @@ mod generic_byte_run_builder;
 pub use generic_byte_run_builder::*;
 mod generic_bytes_view_builder;
 pub use generic_bytes_view_builder::*;
+mod generic_list_view_builder;
+pub use generic_list_view_builder::*;
 mod union_builder;
 
 pub use union_builder::*;
@@ -304,6 +306,12 @@ pub type ListBuilder<T> = GenericListBuilder<i32, T>;
 /// Builder for [`LargeListArray`](crate::array::LargeListArray)
 pub type LargeListBuilder<T> = GenericListBuilder<i64, T>;
 
+/// Builder for [`ListViewArray`](crate::array::ListViewArray)
+pub type ListViewBuilder<T> = GenericListViewBuilder<i32, T>;
+
+/// Builder for [`LargeListViewArray`](crate::array::LargeListViewArray)
+pub type LargeListViewBuilder<T> = GenericListViewBuilder<i64, T>;
+
 /// Builder for [`BinaryArray`](crate::array::BinaryArray)
 ///
 /// See examples on [`GenericBinaryBuilder`]
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 69c551fdb5b6..c7299d076ab0 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -276,6 +276,16 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
                     .with_field(field.clone()),
             )
         }
+        DataType::ListView(field) => {
+            let builder = make_builder(field.data_type(), capacity);
+            Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()))
+        }
+        DataType::LargeListView(field) => {
+            let builder = make_builder(field.data_type(), capacity);
+            Box::new(
+                LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()),
+            )
+        }
         DataType::Map(field, _) => match field.data_type() {
             DataType::Struct(fields) => {
                 let map_field_names = MapFieldNames {
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 9947c36d4619..0b76193c7565 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -838,6 +838,14 @@ pub trait AsArray: private::Sealed {
         self.as_list_opt().expect("list array")
     }
 
+    /// Downcast this to a [`GenericListViewArray`] returning `None` if not possible
+    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>>;
+
+    /// Downcast this to a [`GenericListViewArray`] panicking if not possible
+    fn as_list_view<O: OffsetSizeTrait>(&self) -> &GenericListViewArray<O> {
+        self.as_list_view_opt().expect("list view array")
+    }
+
     /// Downcast this to a [`FixedSizeBinaryArray`] returning `None` if not possible
     fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray>;
 
@@ -911,6 +919,10 @@ impl AsArray for dyn Array + '_ {
         self.as_any().downcast_ref()
     }
 
+    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>> {
+        self.as_any().downcast_ref()
+    }
+
     fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> {
         self.as_any().downcast_ref()
     }
@@ -966,6 +978,10 @@ impl AsArray for ArrayRef {
         self.as_ref().as_list_opt()
     }
 
+    fn as_list_view_opt<O: OffsetSizeTrait>(&self) -> Option<&GenericListViewArray<O>> {
+        self.as_ref().as_list_view_opt()
+    }
+
     fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> {
         self.as_ref().as_fixed_size_binary_opt()
     }

From d31e780d652aba6966e975ab4c20ef9c90bbd242 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 3 Jan 2025 13:29:36 +0100
Subject: [PATCH 28/68] Update itertools requirement from 0.13.0 to 0.14.0 in
 /object_store (#6925)

Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version.
- [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-itertools/itertools/compare/v0.13.0...v0.14.0)

---
updated-dependencies:
- dependency-name: itertools
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 object_store/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index 6f5e9db1bc70..992ae6662cdb 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -35,7 +35,7 @@ bytes = "1.0"
 chrono = { version = "0.4.34", default-features = false, features = ["clock"] }
 futures = "0.3"
 humantime = "2.1"
-itertools = "0.13.0"
+itertools = "0.14.0"
 parking_lot = { version = "0.12" }
 percent-encoding = "2.1"
 thiserror = "2.0.2"

From e4989aad5acb8b62cfe98b130f78961f8bcc34bb Mon Sep 17 00:00:00 2001
From: wiedld <wiedld@users.noreply.github.com>
Date: Sat, 4 Jan 2025 05:21:59 -0500
Subject: [PATCH 29/68] Document how to use Extend for generic methods on
 ArrayBuilders (#6932)

* chore: add docs for how to use Extend for generic methods on ArrayBuilders

* chore: move to mod docs and add more examples
---
 arrow-array/src/builder/mod.rs | 69 ++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 982e8788b90d..29d75024ea72 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -78,6 +78,73 @@
 //! ))
 //! ```
 //!
+//! # Using the [`Extend`] trait to append values from an iterable:
+//!
+//! ```
+//! # use arrow_array::{Array};
+//! # use arrow_array::builder::{ArrayBuilder, StringBuilder};
+//!
+//! let mut builder = StringBuilder::new();
+//! builder.extend(vec![Some("🍐"), Some("🍎"), None]);
+//! assert_eq!(builder.finish().len(), 3);
+//! ```
+//!
+//! # Using the [`Extend`] trait to write generic functions:
+//!
+//! ```
+//! # use arrow_array::{Array, ArrayRef, StringArray};
+//! # use arrow_array::builder::{ArrayBuilder, Int32Builder, ListBuilder, StringBuilder};
+//!
+//! // For generic methods that fill a list of values for an [`ArrayBuilder`], use the [`Extend`] trait.
+//! fn filter_and_fill<V, I: IntoIterator<Item = V>>(builder: &mut impl Extend<V>, values: I, filter: V)
+//! where V: PartialEq
+//! {
+//!     builder.extend(values.into_iter().filter(|v| *v == filter));
+//! }
+//! let mut string_builder = StringBuilder::new();
+//! filter_and_fill(
+//!     &mut string_builder,
+//!     vec![Some("🍐"), Some("🍎"), None],
+//!     Some("🍎"),
+//! );
+//! assert_eq!(string_builder.finish().len(), 1);
+//!
+//! let mut int_builder = Int32Builder::new();
+//! filter_and_fill(
+//!     &mut int_builder,
+//!     vec![Some(11), Some(42), None],
+//!     Some(42),
+//! );
+//! assert_eq!(int_builder.finish().len(), 1);
+//!
+//! // For generic methods that fill lists-of-lists for an [`ArrayBuilder`], use the [`Extend`] trait.
+//! fn filter_and_fill_if_contains<T, V, I: IntoIterator<Item = Option<V>>>(
+//!     list_builder: &mut impl Extend<Option<V>>,
+//!     values: I,
+//!     filter: Option<T>,
+//! ) where
+//!     T: PartialEq,
+//!     for<'a> &'a V: IntoIterator<Item = &'a Option<T>>,
+//! {
+//!     list_builder.extend(values.into_iter().filter(|string: &Option<V>| {
+//!         string
+//!             .as_ref()
+//!             .map(|str: &V| str.into_iter().any(|ch: &Option<T>| ch == &filter))
+//!             .unwrap_or(false)
+//!     }));
+//!  }
+//! let builder = StringBuilder::new();
+//! let mut list_builder = ListBuilder::new(builder);
+//! let pear_pear = vec![Some("🍐"),Some("🍐")];
+//! let pear_app = vec![Some("🍐"),Some("🍎")];
+//! filter_and_fill_if_contains(
+//!     &mut list_builder,
+//!     vec![Some(pear_pear), Some(pear_app), None],
+//!     Some("🍎"),
+//! );
+//! assert_eq!(list_builder.finish().len(), 1);
+//! ```
+//!
 //! # Custom Builders
 //!
 //! It is common to have a collection of statically defined Rust types that
@@ -134,6 +201,8 @@
 //!     }
 //! }
 //!
+//! /// For building arrays in generic code, use Extend instead of the append_* methods
+//! /// e.g. append_value, append_option, append_null
 //! impl<'a> Extend<&'a MyRow> for MyRowBuilder {
 //!     fn extend<T: IntoIterator<Item = &'a MyRow>>(&mut self, iter: T) {
 //!         iter.into_iter().for_each(|row| self.append(row));

From ce4be76a40a9dd4818e229261da49b89635becfd Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Sat, 4 Jan 2025 05:23:12 -0500
Subject: [PATCH 30/68] [Parquet] Add projection utility functions (#6931)

* projection utilities

* improve docs

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 parquet/src/arrow/mod.rs | 101 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 1305bbac83f0..35f5897c18f8 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -281,6 +281,45 @@ impl ProjectionMask {
     pub fn leaf_included(&self, leaf_idx: usize) -> bool {
         self.mask.as_ref().map(|m| m[leaf_idx]).unwrap_or(true)
     }
+
+    /// Union two projection masks
+    ///
+    /// Example:
+    /// ```text
+    /// mask1 = [true, false, true]
+    /// mask2 = [false, true, true]
+    /// union(mask1, mask2) = [true, true, true]
+    /// ```
+    pub fn union(&mut self, other: &Self) {
+        match (self.mask.as_ref(), other.mask.as_ref()) {
+            (None, _) | (_, None) => self.mask = None,
+            (Some(a), Some(b)) => {
+                debug_assert_eq!(a.len(), b.len());
+                let mask = a.iter().zip(b.iter()).map(|(&a, &b)| a || b).collect();
+                self.mask = Some(mask);
+            }
+        }
+    }
+
+    /// Intersect two projection masks
+    ///
+    /// Example:
+    /// ```text
+    /// mask1 = [true, false, true]
+    /// mask2 = [false, true, true]
+    /// intersect(mask1, mask2) = [false, false, true]
+    /// ```
+    pub fn intersect(&mut self, other: &Self) {
+        match (self.mask.as_ref(), other.mask.as_ref()) {
+            (None, _) => self.mask = other.mask.clone(),
+            (_, None) => {}
+            (Some(a), Some(b)) => {
+                debug_assert_eq!(a.len(), b.len());
+                let mask = a.iter().zip(b.iter()).map(|(&a, &b)| a && b).collect();
+                self.mask = Some(mask);
+            }
+        }
+    }
 }
 
 /// Lookups up the parquet column by name
@@ -551,4 +590,66 @@ mod test {
         let mask = ProjectionMask::columns(&schema, ["a", "e"]);
         assert_eq!(mask.mask.unwrap(), [true, false, true, false, true]);
     }
+
+    #[test]
+    fn test_projection_mask_union() {
+        let mut mask1 = ProjectionMask {
+            mask: Some(vec![true, false, true]),
+        };
+        let mask2 = ProjectionMask {
+            mask: Some(vec![false, true, true]),
+        };
+        mask1.union(&mask2);
+        assert_eq!(mask1.mask, Some(vec![true, true, true]));
+
+        let mut mask1 = ProjectionMask { mask: None };
+        let mask2 = ProjectionMask {
+            mask: Some(vec![false, true, true]),
+        };
+        mask1.union(&mask2);
+        assert_eq!(mask1.mask, None);
+
+        let mut mask1 = ProjectionMask {
+            mask: Some(vec![true, false, true]),
+        };
+        let mask2 = ProjectionMask { mask: None };
+        mask1.union(&mask2);
+        assert_eq!(mask1.mask, None);
+
+        let mut mask1 = ProjectionMask { mask: None };
+        let mask2 = ProjectionMask { mask: None };
+        mask1.union(&mask2);
+        assert_eq!(mask1.mask, None);
+    }
+
+    #[test]
+    fn test_projection_mask_intersect() {
+        let mut mask1 = ProjectionMask {
+            mask: Some(vec![true, false, true]),
+        };
+        let mask2 = ProjectionMask {
+            mask: Some(vec![false, true, true]),
+        };
+        mask1.intersect(&mask2);
+        assert_eq!(mask1.mask, Some(vec![false, false, true]));
+
+        let mut mask1 = ProjectionMask { mask: None };
+        let mask2 = ProjectionMask {
+            mask: Some(vec![false, true, true]),
+        };
+        mask1.intersect(&mask2);
+        assert_eq!(mask1.mask, Some(vec![false, true, true]));
+
+        let mut mask1 = ProjectionMask {
+            mask: Some(vec![true, false, true]),
+        };
+        let mask2 = ProjectionMask { mask: None };
+        mask1.intersect(&mask2);
+        assert_eq!(mask1.mask, Some(vec![true, false, true]));
+
+        let mut mask1 = ProjectionMask { mask: None };
+        let mask2 = ProjectionMask { mask: None };
+        mask1.intersect(&mask2);
+        assert_eq!(mask1.mask, None);
+    }
 }

From 94a079f0e67b08910632acb26aa128138c3941b5 Mon Sep 17 00:00:00 2001
From: Raz Luvaton <16746759+rluvaton@users.noreply.github.com>
Date: Sat, 4 Jan 2025 12:24:48 +0200
Subject: [PATCH 31/68] feat(arrow-select): `concat` kernel will merge
 dictionary values for list of dictionaries (#6893)

* feat(arrow-select): make list of dictionary merge dictionary keys

TODO:
- [ ] Add support to nested lists
- [ ] Add more tests
- [ ] Fix failing test

* fix concat lists of dictionaries

* format

* remove unused import

* improve test helper

* feat: add merge offset buffers into one

* format

* add reproduction tst

* recommit

* fix clippy

* fix clippy

* fix clippy

* improve offsets code according to code review

* use concat dictionaries

* add specialize code to concat lists to be able to use the concat dictionary logic

* remove the use of ArrayData
---
 arrow-buffer/src/buffer/offset.rs |  52 ++++++++
 arrow-select/src/concat.rs        | 191 ++++++++++++++++++++++++++++--
 2 files changed, 232 insertions(+), 11 deletions(-)

diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs
index a6be2b67af84..164af6f01d0e 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -133,6 +133,38 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
         Self(out.into())
     }
 
+    /// Get an Iterator over the lengths of this [`OffsetBuffer`]
+    ///
+    /// ```
+    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
+    /// let offsets = OffsetBuffer::<_>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
+    /// assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
+    /// ```
+    ///
+    /// Empty [`OffsetBuffer`] will return an empty iterator
+    /// ```
+    /// # use arrow_buffer::OffsetBuffer;
+    /// let offsets = OffsetBuffer::<i32>::new_empty();
+    /// assert_eq!(offsets.lengths().count(), 0);
+    /// ```
+    ///
+    /// This can be used to merge multiple [`OffsetBuffer`]s to one
+    /// ```
+    /// # use arrow_buffer::{OffsetBuffer, ScalarBuffer};
+    ///
+    /// let buffer1 = OffsetBuffer::<i32>::from_lengths([2, 6, 3, 7, 2]);
+    /// let buffer2 = OffsetBuffer::<i32>::from_lengths([1, 3, 5, 7, 9]);
+    ///
+    /// let merged = OffsetBuffer::<i32>::from_lengths(
+    ///     vec![buffer1, buffer2].iter().flat_map(|x| x.lengths())
+    /// );
+    ///
+    /// assert_eq!(merged.lengths().collect::<Vec<_>>(), &[2, 6, 3, 7, 2, 1, 3, 5, 7, 9]);
+    /// ```
+    pub fn lengths(&self) -> impl ExactSizeIterator<Item = usize> + '_ {
+        self.0.windows(2).map(|x| x[1].as_usize() - x[0].as_usize())
+    }
+
     /// Free up unused memory.
     pub fn shrink_to_fit(&mut self) {
         self.0.shrink_to_fit();
@@ -244,4 +276,24 @@ mod tests {
     fn from_lengths_usize_overflow() {
         OffsetBuffer::<i32>::from_lengths([usize::MAX, 1]);
     }
+
+    #[test]
+    fn get_lengths() {
+        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
+        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![1, 3, 5]);
+    }
+
+    #[test]
+    fn get_lengths_should_be_with_fixed_size() {
+        let offsets = OffsetBuffer::<i32>::new(ScalarBuffer::<i32>::from(vec![0, 1, 4, 9]));
+        let iter = offsets.lengths();
+        assert_eq!(iter.size_hint(), (3, Some(3)));
+        assert_eq!(iter.len(), 3);
+    }
+
+    #[test]
+    fn get_lengths_from_empty_offset_buffer_should_be_empty_iterator() {
+        let offsets = OffsetBuffer::<i32>::new_empty();
+        assert_eq!(offsets.lengths().collect::<Vec<usize>>(), vec![]);
+    }
 }
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index 129b90ee0470..4855e0087cc6 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -34,9 +34,9 @@ use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values}
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer};
+use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer, OffsetBuffer};
 use arrow_data::transform::{Capacities, MutableArrayData};
-use arrow_schema::{ArrowError, DataType, SchemaRef};
+use arrow_schema::{ArrowError, DataType, FieldRef, SchemaRef};
 use std::sync::Arc;
 
 fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
@@ -129,6 +129,54 @@ fn concat_dictionaries<K: ArrowDictionaryKeyType>(
     Ok(Arc::new(array))
 }
 
+fn concat_lists<OffsetSize: OffsetSizeTrait>(
+    arrays: &[&dyn Array],
+    field: &FieldRef,
+) -> Result<ArrayRef, ArrowError> {
+    let mut output_len = 0;
+    let mut list_has_nulls = false;
+
+    let lists = arrays
+        .iter()
+        .map(|x| x.as_list::<OffsetSize>())
+        .inspect(|l| {
+            output_len += l.len();
+            list_has_nulls |= l.null_count() != 0;
+        })
+        .collect::<Vec<_>>();
+
+    let lists_nulls = list_has_nulls.then(|| {
+        let mut nulls = BooleanBufferBuilder::new(output_len);
+        for l in &lists {
+            match l.nulls() {
+                Some(n) => nulls.append_buffer(n.inner()),
+                None => nulls.append_n(l.len(), true),
+            }
+        }
+        NullBuffer::new(nulls.finish())
+    });
+
+    let values: Vec<&dyn Array> = lists
+        .iter()
+        .map(|x| x.values().as_ref())
+        .collect::<Vec<_>>();
+
+    let concatenated_values = concat(values.as_slice())?;
+
+    // Merge value offsets from the lists
+    let value_offset_buffer =
+        OffsetBuffer::<OffsetSize>::from_lengths(lists.iter().flat_map(|x| x.offsets().lengths()));
+
+    let array = GenericListArray::<OffsetSize>::try_new(
+        Arc::clone(field),
+        value_offset_buffer,
+        concatenated_values,
+        lists_nulls,
+    )?;
+
+    Ok(Arc::new(array))
+}
+
 macro_rules! dict_helper {
     ($t:ty, $arrays:expr) => {
         return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
@@ -163,14 +211,20 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
             "It is not possible to concatenate arrays of different data types.".to_string(),
         ));
     }
-    if let DataType::Dictionary(k, _) = d {
-        downcast_integer! {
-            k.as_ref() => (dict_helper, arrays),
-            _ => unreachable!("illegal dictionary key type {k}")
-        };
-    } else {
-        let capacity = get_capacity(arrays, d);
-        concat_fallback(arrays, capacity)
+
+    match d {
+        DataType::Dictionary(k, _) => {
+            downcast_integer! {
+                k.as_ref() => (dict_helper, arrays),
+                _ => unreachable!("illegal dictionary key type {k}")
+            }
+        }
+        DataType::List(field) => concat_lists::<i32>(arrays, field),
+        DataType::LargeList(field) => concat_lists::<i64>(arrays, field),
+        _ => {
+            let capacity = get_capacity(arrays, d);
+            concat_fallback(arrays, capacity)
+        }
     }
 }
 
@@ -228,8 +282,9 @@ pub fn concat_batches<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::StringDictionaryBuilder;
+    use arrow_array::builder::{GenericListBuilder, StringDictionaryBuilder};
     use arrow_schema::{Field, Schema};
+    use std::fmt::Debug;
 
     #[test]
     fn test_concat_empty_vec() {
@@ -851,4 +906,118 @@ mod tests {
         assert_eq!(array.null_count(), 10);
         assert_eq!(array.logical_null_count(), 10);
     }
+
+    #[test]
+    fn concat_dictionary_list_array_simple() {
+        let scalars = vec![
+            create_single_row_list_of_dict(vec![Some("a")]),
+            create_single_row_list_of_dict(vec![Some("a")]),
+            create_single_row_list_of_dict(vec![Some("b")]),
+        ];
+
+        let arrays = scalars
+            .iter()
+            .map(|a| a as &(dyn Array))
+            .collect::<Vec<_>>();
+        let concat_res = concat(arrays.as_slice()).unwrap();
+
+        let expected_list = create_list_of_dict(vec![
+            // Row 1
+            Some(vec![Some("a")]),
+            Some(vec![Some("a")]),
+            Some(vec![Some("b")]),
+        ]);
+
+        let list = concat_res.as_list::<i32>();
+
+        // Assert that the list is equal to the expected list
+        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
+            assert_eq!(a, b);
+        });
+
+        assert_dictionary_has_unique_values::<_, StringArray>(
+            list.values().as_dictionary::<Int32Type>(),
+        );
+    }
+
+    #[test]
+    fn concat_many_dictionary_list_arrays() {
+        let number_of_unique_values = 8;
+        let scalars = (0..80000)
+            .map(|i| {
+                create_single_row_list_of_dict(vec![Some(
+                    (i % number_of_unique_values).to_string(),
+                )])
+            })
+            .collect::<Vec<_>>();
+
+        let arrays = scalars
+            .iter()
+            .map(|a| a as &(dyn Array))
+            .collect::<Vec<_>>();
+        let concat_res = concat(arrays.as_slice()).unwrap();
+
+        let expected_list = create_list_of_dict(
+            (0..80000)
+                .map(|i| Some(vec![Some((i % number_of_unique_values).to_string())]))
+                .collect::<Vec<_>>(),
+        );
+
+        let list = concat_res.as_list::<i32>();
+
+        // Assert that the list is equal to the expected list
+        list.iter().zip(expected_list.iter()).for_each(|(a, b)| {
+            assert_eq!(a, b);
+        });
+
+        assert_dictionary_has_unique_values::<_, StringArray>(
+            list.values().as_dictionary::<Int32Type>(),
+        );
+    }
+
+    fn create_single_row_list_of_dict(
+        list_items: Vec<Option<impl AsRef<str>>>,
+    ) -> GenericListArray<i32> {
+        let rows = list_items.into_iter().map(Some).collect();
+
+        create_list_of_dict(vec![rows])
+    }
+
+    fn create_list_of_dict(
+        rows: Vec<Option<Vec<Option<impl AsRef<str>>>>>,
+    ) -> GenericListArray<i32> {
+        let mut builder =
+            GenericListBuilder::<i32, _>::new(StringDictionaryBuilder::<Int32Type>::new());
+
+        for row in rows {
+            builder.append_option(row);
+        }
+
+        builder.finish()
+    }
+
+    fn assert_dictionary_has_unique_values<'a, K, V>(array: &'a DictionaryArray<K>)
+    where
+        K: ArrowDictionaryKeyType,
+        V: Sync + Send + 'static,
+        &'a V: ArrayAccessor + IntoIterator,
+
+        <&'a V as ArrayAccessor>::Item: Default + Clone + PartialEq + Debug + Ord,
+        <&'a V as IntoIterator>::Item: Clone + PartialEq + Debug + Ord,
+    {
+        let dict = array.downcast_dict::<V>().unwrap();
+        let mut values = dict.values().into_iter().collect::<Vec<_>>();
+
+        // remove duplicates must be sorted first so we can compare
+        values.sort();
+
+        let mut unique_values = values.clone();
+
+        unique_values.dedup();
+
+        assert_eq!(
+            values, unique_values,
+            "There are duplicates in the value list (the value list here is sorted which is only for the assertion)"
+        );
+    }
 }

From 91e9380f20cb18638ec4f0d249c35182afc01a24 Mon Sep 17 00:00:00 2001
From: Himadri Pal <mehimu@gmail.com>
Date: Sat, 4 Jan 2025 11:21:37 -0800
Subject: [PATCH 32/68] remove println (#6935)

---
 arrow-cast/src/cast/mod.rs | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 0946af53a60f..0e56d7633a80 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -4072,7 +4072,6 @@ mod tests {
             Arc::new(StringViewArray::from(vec![Some("1.5"), Some("2.5"), None]));
 
         for array in inputs {
-            println!("type: {}", array.data_type());
             assert!(can_cast_types(array.data_type(), &DataType::Utf8View));
             let arr = cast(&array, &DataType::Utf8View).unwrap();
             assert_eq!(expected.as_ref(), arr.as_ref());
@@ -10318,7 +10317,6 @@ mod tests {
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_same_scale() {
         let array = vec![Some(123456789)];
         let array = create_decimal128_array(array, 24, 2).unwrap();
-        println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal128(6, 2);
         assert!(can_cast_types(&input_type, &output_type));
@@ -10336,7 +10334,6 @@ mod tests {
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() {
         let array = vec![Some(123456789)];
         let array = create_decimal128_array(array, 24, 2).unwrap();
-        println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 4);
         let output_type = DataType::Decimal128(6, 2);
         assert!(can_cast_types(&input_type, &output_type));
@@ -10347,14 +10344,13 @@ mod tests {
         };
         let result = cast_with_options(&array, &output_type, &options);
         assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999");
+                   "Invalid argument error: 1234568 is too large to store in a Decimal128 of precision 6. Max is 999999");
     }
 
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_greater_scale() {
         let array = vec![Some(123456789)];
         let array = create_decimal128_array(array, 24, 2).unwrap();
-        println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal128(6, 3);
         assert!(can_cast_types(&input_type, &output_type));
@@ -10372,7 +10368,6 @@ mod tests {
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_diff_type() {
         let array = vec![Some(123456789)];
         let array = create_decimal128_array(array, 24, 2).unwrap();
-        println!("{:?}", array);
         let input_type = DataType::Decimal128(24, 2);
         let output_type = DataType::Decimal256(6, 2);
         assert!(can_cast_types(&input_type, &output_type));

From 005bfe8ae1c1f462154228ddcd57d29716bd5795 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 4 Jan 2025 14:32:17 -0500
Subject: [PATCH 33/68] Minor: improve `zip` kernel docs, add examples (#6928)

* Minor: improve `zip` kernel docs`

* Add example for zip with scalar
---
 arrow-select/src/zip.rs | 66 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 59 insertions(+), 7 deletions(-)

diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index acb31dfa3bc2..2efd2e749921 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -15,20 +15,72 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy`
+//! [`zip`]: Combine values from two arrays based on boolean mask
 
 use crate::filter::SlicesIterator;
 use arrow_array::*;
 use arrow_data::transform::MutableArrayData;
 use arrow_schema::ArrowError;
 
-/// Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy`
-/// are taken, where the mask evaluates `false` values of `falsy` are taken.
+/// Zip two arrays by some boolean mask.
 ///
-/// # Arguments
-/// * `mask` - Boolean values used to determine from which array to take the values.
-/// * `truthy` - Values of this array are taken if mask evaluates `true`
-/// * `falsy` - Values of this array are taken if mask evaluates `false`
+/// - Where `mask` is `true`, values of `truthy` are taken
+/// - Where `mask` is `false` or `NULL`, values of `falsy` are taken
+///
+/// # Example: `zip` two arrays
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
+/// # use arrow_select::zip::zip;
+/// // mask: [true, true, false, NULL, true]
+/// let mask = BooleanArray::from(vec![
+///   Some(true), Some(true), Some(false), None, Some(true)
+/// ]);
+/// // truthy array: [1, NULL, 3, 4, 5]
+/// let truthy = Int32Array::from(vec![
+///   Some(1), None, Some(3), Some(4), Some(5)
+/// ]);
+/// // falsy array: [10, 20, 30, 40, 50]
+/// let falsy = Int32Array::from(vec![
+///   Some(10), Some(20), Some(30), Some(40), Some(50)
+/// ]);
+/// // zip with this mask select the first, second and last value from `truthy`
+/// // and the third and fourth value from `falsy`
+/// let result = zip(&mask, &truthy, &falsy).unwrap();
+/// // Expected: [1, NULL, 30, 40, 5]
+/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+///   Some(1), None, Some(30), Some(40), Some(5)
+/// ]));
+/// assert_eq!(&result, &expected);
+/// ```
+///
+/// # Example: `zip` and array with a scalar
+///
+/// Use `zip` to replace certain values in an array with a scalar
+///
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
+/// # use arrow_select::zip::zip;
+/// // mask: [true, true, false, NULL, true]
+/// let mask = BooleanArray::from(vec![
+///   Some(true), Some(true), Some(false), None, Some(true)
+/// ]);
+/// //  array: [1, NULL, 3, 4, 5]
+/// let arr = Int32Array::from(vec![
+///   Some(1), None, Some(3), Some(4), Some(5)
+/// ]);
+/// // scalar: 42
+/// let scalar = Int32Array::new_scalar(42);
+/// // zip the array with the  mask select the first, second and last value from `arr`
+/// // and fill the third and fourth value with the scalar 42
+/// let result = zip(&mask, &arr, &scalar).unwrap();
+/// // Expected: [1, NULL, 42, 42, 5]
+/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
+///   Some(1), None, Some(42), Some(42), Some(5)
+/// ]));
+/// assert_eq!(&result, &expected);
+/// ```
 pub fn zip(
     mask: &BooleanArray,
     truthy: &dyn Datum,

From 496de6692946614e2133aac680c7cf37cee27b2a Mon Sep 17 00:00:00 2001
From: Vrishabh <psvrishabh@gmail.com>
Date: Sun, 5 Jan 2025 15:54:14 +0530
Subject: [PATCH 34/68] Minor clippy fixes (#6942)

---
 arrow-cast/src/parse.rs               | 14 ++++----------
 arrow-flight/src/encode.rs            |  2 +-
 object_store/src/aws/client.rs        |  2 +-
 object_store/src/azure/client.rs      |  2 +-
 object_store/src/gcp/client.rs        |  2 +-
 parquet/src/arrow/async_reader/mod.rs |  2 +-
 parquet/src/column/writer/mod.rs      |  4 ++--
 7 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index f4c4639c1c08..4e93e9787cc8 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -881,7 +881,7 @@ pub fn parse_decimal<T: DecimalType>(
                 for (_, b) in bs.by_ref() {
                     if !b.is_ascii_digit() {
                         if *b == b'e' || *b == b'E' {
-                            result = match parse_e_notation::<T>(
+                            result = parse_e_notation::<T>(
                                 s,
                                 digits as u16,
                                 fractionals as i16,
@@ -889,10 +889,7 @@ pub fn parse_decimal<T: DecimalType>(
                                 point_index,
                                 precision as u16,
                                 scale as i16,
-                            ) {
-                                Err(e) => return Err(e),
-                                Ok(v) => v,
-                            };
+                            )?;
 
                             is_e_notation = true;
 
@@ -926,7 +923,7 @@ pub fn parse_decimal<T: DecimalType>(
                 }
             }
             b'e' | b'E' => {
-                result = match parse_e_notation::<T>(
+                result = parse_e_notation::<T>(
                     s,
                     digits as u16,
                     fractionals as i16,
@@ -934,10 +931,7 @@ pub fn parse_decimal<T: DecimalType>(
                     index,
                     precision as u16,
                     scale as i16,
-                ) {
-                    Err(e) => return Err(e),
-                    Ok(v) => v,
-                };
+                )?;
 
                 is_e_notation = true;
 
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 315b7b3cb6e5..19fe42474405 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -1833,7 +1833,7 @@ mod tests {
             .flight_descriptor
             .as_ref()
             .map(|descriptor| {
-                let path_len: usize = descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
+                let path_len: usize = descriptor.path.iter().map(|p| p.len()).sum();
 
                 std::mem::size_of_val(descriptor) + descriptor.cmd.len() + path_len
             })
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 25fdd3311c95..b81be0c0efad 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -299,7 +299,7 @@ pub(crate) struct Request<'a> {
     retry_error_body: bool,
 }
 
-impl<'a> Request<'a> {
+impl Request<'_> {
     pub(crate) fn query<T: Serialize + ?Sized + Sync>(self, query: &T) -> Self {
         let builder = self.builder.query(query);
         Self { builder, ..self }
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index ea3a5faf3ad8..bd72d0c6aee1 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -198,7 +198,7 @@ struct PutRequest<'a> {
     idempotent: bool,
 }
 
-impl<'a> PutRequest<'a> {
+impl PutRequest<'_> {
     fn header(self, k: &HeaderName, v: &str) -> Self {
         let builder = self.builder.header(k, v);
         Self { builder, ..self }
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index 1928d13b4739..d6f89ca71740 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -173,7 +173,7 @@ pub(crate) struct Request<'a> {
     idempotent: bool,
 }
 
-impl<'a> Request<'a> {
+impl Request<'_> {
     fn header(self, k: &HeaderName, v: &str) -> Self {
         let builder = self.builder.header(k, v);
         Self { builder, ..self }
diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 96715e1164b2..4f3befe42662 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -792,7 +792,7 @@ struct InMemoryRowGroup<'a> {
     row_count: usize,
 }
 
-impl<'a> InMemoryRowGroup<'a> {
+impl InMemoryRowGroup<'_> {
     /// Fetches the necessary column data into memory
     async fn fetch<T: AsyncFileReader + Send>(
         &mut self,
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 8dc1d0db4476..5f34f34cbb7a 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -3275,8 +3275,8 @@ mod tests {
     fn test_truncate_utf8() {
         // No-op
         let data = "❤️🧡💛💚💙💜";
-        let r = truncate_utf8(data, data.as_bytes().len()).unwrap();
-        assert_eq!(r.len(), data.as_bytes().len());
+        let r = truncate_utf8(data, data.len()).unwrap();
+        assert_eq!(r.len(), data.len());
         assert_eq!(&r, data.as_bytes());
 
         // We slice it away from the UTF8 boundary

From 30f46c73b021256743a5ef9ea6062f20c0553cc2 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 6 Jan 2025 16:03:51 -0500
Subject: [PATCH 35/68] Improve `Buffer` documentation, deprecate
 `Buffer::from_bytes` add `From<Bytes>` and `From<bytes::Bytes>` impls (#6939)

* Improve Bytes documentation

* Improve Buffer documentation, add From<Bytes> and From<bytes::Bytes> impls

* avoid linking to private docs

* Deprecate `Buffer::from_bytes`

* Apply suggestions from code review

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>

---------

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 arrow-buffer/src/buffer/immutable.rs          | 118 ++++++++++++++----
 arrow-buffer/src/buffer/mutable.rs            |   2 +-
 arrow-buffer/src/bytes.rs                     |   8 +-
 arrow-flight/src/decode.rs                    |   2 +-
 arrow-flight/src/sql/client.rs                |   2 +-
 .../src/arrow/array_reader/byte_view_array.rs |  10 +-
 6 files changed, 104 insertions(+), 38 deletions(-)

diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index cf1d6f366751..fd145ce2306e 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -28,8 +28,43 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 use super::ops::bitwise_unary_op_helper;
 use super::{MutableBuffer, ScalarBuffer};
 
-/// Buffer represents a contiguous memory region that can be shared with other buffers and across
-/// thread boundaries.
+/// A contiguous memory region that can be shared with other buffers and across
+/// thread boundaries that stores Arrow data.
+///
+/// `Buffer`s can be sliced and cloned without copying the underlying data and can
+/// be created from memory allocated by non-Rust sources such as C/C++.
+///
+/// # Example: Create a `Buffer` from a `Vec` (without copying)
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let vec: Vec<u32> = vec![1, 2, 3];
+/// let buffer = Buffer::from(vec);
+/// ```
+///
+/// # Example: Convert a `Buffer` to a `Vec` (without copying)
+///
+/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
+/// no other references and the types are aligned correctly.
+/// ```
+/// # use arrow_buffer::Buffer;
+/// # let vec: Vec<u32> = vec![1, 2, 3];
+/// # let buffer = Buffer::from(vec);
+/// // convert the buffer back into a Vec of u32
+/// // note this will fail if the buffer is shared or not aligned correctly
+/// let vec: Vec<u32> = buffer.into_vec().unwrap();
+/// ```
+///
+/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
+///
+/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
+/// regions. You can create a buffer from a `Bytes` instance using the `From`
+/// implementation, also without copying.
+///
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let bytes = bytes::Bytes::from("hello");
+/// let buffer = Buffer::from(bytes);
+///```
 #[derive(Clone, Debug)]
 pub struct Buffer {
     /// the internal byte buffer.
@@ -59,24 +94,15 @@ unsafe impl Send for Buffer where Bytes: Send {}
 unsafe impl Sync for Buffer where Bytes: Sync {}
 
 impl Buffer {
-    /// Auxiliary method to create a new Buffer
+    /// Create a new Buffer from a (internal) `Bytes`
     ///
-    /// This can be used with a [`bytes::Bytes`] via `into()`:
+    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
+    /// and is different than [`bytes::Bytes`].
     ///
-    /// ```
-    /// # use arrow_buffer::Buffer;
-    /// let bytes = bytes::Bytes::from_static(b"foo");
-    /// let buffer = Buffer::from_bytes(bytes.into());
-    /// ```
-    #[inline]
+    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
+    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
     pub fn from_bytes(bytes: Bytes) -> Self {
-        let length = bytes.len();
-        let ptr = bytes.as_ptr();
-        Buffer {
-            data: Arc::new(bytes),
-            ptr,
-            length,
-        }
+        Self::from(bytes)
     }
 
     /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
@@ -107,8 +133,11 @@ impl Buffer {
         buffer.into()
     }
 
-    /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting
-    /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero.
+    /// Creates a buffer from an existing memory region.
+    ///
+    /// Ownership of the memory is tracked via reference counting
+    /// and the memory will be freed using the `drop` method of
+    /// [crate::alloc::Allocation] when the reference count reaches zero.
     ///
     /// # Arguments
     ///
@@ -155,7 +184,7 @@ impl Buffer {
         self.data.capacity()
     }
 
-    /// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory.
+    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
     ///
     /// If the buffer is shared, this is a no-op.
     ///
@@ -190,7 +219,7 @@ impl Buffer {
         }
     }
 
-    /// Returns whether the buffer is empty.
+    /// Returns true if the buffer is empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
         self.length == 0
@@ -206,7 +235,9 @@ impl Buffer {
     }
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the
+    /// same memory region to be shared between buffers.
     ///
     /// # Panics
     ///
@@ -240,7 +271,10 @@ impl Buffer {
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
     /// with `length` bytes.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the same
+    /// memory region to be shared between buffers.
+    ///
     /// # Panics
     /// Panics iff `(offset + length)` is larger than the existing length.
     pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
@@ -328,10 +362,16 @@ impl Buffer {
             })
     }
 
-    /// Returns `Vec` for mutating the buffer
+    /// Converts self into a `Vec`, if possible.
+    ///
+    /// This can be used to reuse / mutate the underlying data.
     ///
-    /// Returns `Err(self)` if this buffer does not have the same [`Layout`] as
-    /// the destination Vec or contains a non-zero offset
+    /// # Errors
+    ///
+    /// Returns `Err(self)` if
+    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
+    /// 2. contains a non-zero offset
+    /// 3. The buffer is shared
     pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
         let layout = match self.data.deallocation() {
             Deallocation::Standard(l) => l,
@@ -414,7 +454,29 @@ impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
     }
 }
 
-/// Creating a `Buffer` instance by storing the boolean values into the buffer
+/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
+impl From<Bytes> for Buffer {
+    #[inline]
+    fn from(bytes: Bytes) -> Self {
+        let length = bytes.len();
+        let ptr = bytes.as_ptr();
+        Self {
+            data: Arc::new(bytes),
+            ptr,
+            length,
+        }
+    }
+}
+
+/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
+impl From<bytes::Bytes> for Buffer {
+    fn from(bytes: bytes::Bytes) -> Self {
+        let bytes: Bytes = bytes.into();
+        Self::from(bytes)
+    }
+}
+
+/// Create a `Buffer` instance by storing the boolean values into the buffer
 impl FromIterator<bool> for Buffer {
     fn from_iter<I>(iter: I) -> Self
     where
@@ -447,7 +509,9 @@ impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
 
 impl Buffer {
     /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
+    ///
     /// Prefer this to `collect` whenever possible, as it is ~60% faster.
+    ///
     /// # Example
     /// ```
     /// # use arrow_buffer::buffer::Buffer;
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index c4315a1d64cd..5ad55e306e2a 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -328,7 +328,7 @@ impl MutableBuffer {
     pub(super) fn into_buffer(self) -> Buffer {
         let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
         std::mem::forget(self);
-        Buffer::from_bytes(bytes)
+        Buffer::from(bytes)
     }
 
     /// View this buffer as a mutable slice of a specific type.
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index 77724137aef7..b811bd2c6b40 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -28,14 +28,18 @@ use crate::buffer::dangling_ptr;
 
 /// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
 ///
-/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's
-/// global allocator nor u8 alignment.
+/// Note that this structure is an internal implementation detail of the
+/// arrow-rs crate. While it has the same name and similar API as
+/// [`bytes::Bytes`] it is not limited to rust's global allocator nor u8
+/// alignment. It is possible to create a `Bytes` from `bytes::Bytes` using the
+/// `From` implementation.
 ///
 /// In the most common case, this buffer is allocated using [`alloc`](std::alloc::alloc)
 /// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT)
 ///
 /// When the region is allocated by a different allocator, [Deallocation::Custom], this calls the
 /// custom deallocator to deallocate the region when it is no longer needed.
+///
 pub struct Bytes {
     /// The raw pointer to be beginning of the region
     ptr: NonNull<u8>,
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 7bafc384306b..760fc926fca6 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -295,7 +295,7 @@ impl FlightDataDecoder {
                     ));
                 };
 
-                let buffer = Buffer::from_bytes(data.data_body.into());
+                let buffer = Buffer::from(data.data_body);
                 let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| {
                     FlightError::protocol(
                         "Could not get dictionary batch from DictionaryBatch message",
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index a6e228737b3f..6d3ac3dbe610 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -721,7 +721,7 @@ pub fn arrow_data_from_flight_data(
 
             let dictionaries_by_field = HashMap::new();
             let record_batch = read_record_batch(
-                &Buffer::from_bytes(flight_data.data_body.into()),
+                &Buffer::from(flight_data.data_body),
                 ipc_record_batch,
                 arrow_schema_ref.clone(),
                 &dictionaries_by_field,
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 5845e2c08cec..92a8b0592d0d 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -316,9 +316,8 @@ impl ByteViewArrayDecoderPlain {
     }
 
     pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result<usize> {
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
-        let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let buf = arrow_buffer::Buffer::from(self.buf.clone());
         let block_id = output.append_block(buf);
 
         let to_read = len.min(self.max_remaining_values);
@@ -549,9 +548,8 @@ impl ByteViewArrayDecoderDeltaLength {
 
         let src_lengths = &self.lengths[self.length_offset..self.length_offset + to_read];
 
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
-        let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let bytes = Buffer::from(self.data.clone());
         let block_id = output.append_block(bytes);
 
         let mut current_offset = self.data_offset;

From 618d81ce1f3bd7591ae0c40be19065e13d3d68d6 Mon Sep 17 00:00:00 2001
From: Jinpeng <zjpzlz@163.com>
Date: Mon, 6 Jan 2025 16:09:05 -0500
Subject: [PATCH 36/68] Convert some panics that happen on invalid parquet
 files to error results (#6738)

* Reduce  panics

* t pushmove integer logical type from format.rs to schema type.rs

* remove some changes as per reviews

* use wrapping_shl

* fix typo in error message

* return error for invalid decimal length

---------

Co-authored-by: jp0317 <zjpzlz@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 parquet/src/errors.rs                  |  7 ++++
 parquet/src/file/metadata/reader.rs    | 26 ++++++-------
 parquet/src/file/serialized_reader.rs  | 53 ++++++++++++++++++++++----
 parquet/src/file/statistics.rs         | 26 +++++++++++++
 parquet/src/schema/types.rs            | 25 +++++++++++-
 parquet/src/thrift.rs                  | 35 ++++++++++++++---
 parquet/tests/arrow_reader/bad_data.rs |  2 +-
 7 files changed, 146 insertions(+), 28 deletions(-)

diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index 8dc97f4ca2e6..d749287bba62 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -17,6 +17,7 @@
 
 //! Common Parquet errors and macros.
 
+use core::num::TryFromIntError;
 use std::error::Error;
 use std::{cell, io, result, str};
 
@@ -81,6 +82,12 @@ impl Error for ParquetError {
     }
 }
 
+impl From<TryFromIntError> for ParquetError {
+    fn from(e: TryFromIntError) -> ParquetError {
+        ParquetError::General(format!("Integer overflow: {e}"))
+    }
+}
+
 impl From<io::Error> for ParquetError {
     fn from(e: io::Error) -> ParquetError {
         ParquetError::External(Box::new(e))
diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs
index ec2cd1094d3a..c6715a33b5ae 100644
--- a/parquet/src/file/metadata/reader.rs
+++ b/parquet/src/file/metadata/reader.rs
@@ -627,7 +627,8 @@ impl ParquetMetaDataReader {
         for rg in t_file_metadata.row_groups {
             row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?);
         }
-        let column_orders = Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr);
+        let column_orders =
+            Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?;
 
         let file_metadata = FileMetaData::new(
             t_file_metadata.version,
@@ -645,15 +646,13 @@ impl ParquetMetaDataReader {
     fn parse_column_orders(
         t_column_orders: Option<Vec<TColumnOrder>>,
         schema_descr: &SchemaDescriptor,
-    ) -> Option<Vec<ColumnOrder>> {
+    ) -> Result<Option<Vec<ColumnOrder>>> {
         match t_column_orders {
             Some(orders) => {
                 // Should always be the case
-                assert_eq!(
-                    orders.len(),
-                    schema_descr.num_columns(),
-                    "Column order length mismatch"
-                );
+                if orders.len() != schema_descr.num_columns() {
+                    return Err(general_err!("Column order length mismatch"));
+                };
                 let mut res = Vec::new();
                 for (i, column) in schema_descr.columns().iter().enumerate() {
                     match orders[i] {
@@ -667,9 +666,9 @@ impl ParquetMetaDataReader {
                         }
                     }
                 }
-                Some(res)
+                Ok(Some(res))
             }
-            None => None,
+            None => Ok(None),
         }
     }
 }
@@ -741,7 +740,7 @@ mod tests {
         ]);
 
         assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr),
+            ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr).unwrap(),
             Some(vec![
                 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
                 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
@@ -750,20 +749,21 @@ mod tests {
 
         // Test when no column orders are defined.
         assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(None, &schema_descr),
+            ParquetMetaDataReader::parse_column_orders(None, &schema_descr).unwrap(),
             None
         );
     }
 
     #[test]
-    #[should_panic(expected = "Column order length mismatch")]
     fn test_metadata_column_orders_len_mismatch() {
         let schema = SchemaType::group_type_builder("schema").build().unwrap();
         let schema_descr = SchemaDescriptor::new(Arc::new(schema));
 
         let t_column_orders = Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
 
-        ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr);
+        let res = ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr);
+        assert!(res.is_err());
+        assert!(format!("{:?}", res.unwrap_err()).contains("Column order length mismatch"));
     }
 
     #[test]
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index 06f3cf9fb23f..a942481f7e4d 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -435,7 +435,7 @@ pub(crate) fn decode_page(
             let is_sorted = dict_header.is_sorted.unwrap_or(false);
             Page::DictionaryPage {
                 buf: buffer,
-                num_values: dict_header.num_values as u32,
+                num_values: dict_header.num_values.try_into()?,
                 encoding: Encoding::try_from(dict_header.encoding)?,
                 is_sorted,
             }
@@ -446,7 +446,7 @@ pub(crate) fn decode_page(
                 .ok_or_else(|| ParquetError::General("Missing V1 data page header".to_string()))?;
             Page::DataPage {
                 buf: buffer,
-                num_values: header.num_values as u32,
+                num_values: header.num_values.try_into()?,
                 encoding: Encoding::try_from(header.encoding)?,
                 def_level_encoding: Encoding::try_from(header.definition_level_encoding)?,
                 rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?,
@@ -460,12 +460,12 @@ pub(crate) fn decode_page(
             let is_compressed = header.is_compressed.unwrap_or(true);
             Page::DataPageV2 {
                 buf: buffer,
-                num_values: header.num_values as u32,
+                num_values: header.num_values.try_into()?,
                 encoding: Encoding::try_from(header.encoding)?,
-                num_nulls: header.num_nulls as u32,
-                num_rows: header.num_rows as u32,
-                def_levels_byte_len: header.definition_levels_byte_length as u32,
-                rep_levels_byte_len: header.repetition_levels_byte_length as u32,
+                num_nulls: header.num_nulls.try_into()?,
+                num_rows: header.num_rows.try_into()?,
+                def_levels_byte_len: header.definition_levels_byte_length.try_into()?,
+                rep_levels_byte_len: header.repetition_levels_byte_length.try_into()?,
                 is_compressed,
                 statistics: statistics::from_thrift(physical_type, header.statistics)?,
             }
@@ -578,6 +578,27 @@ impl<R: ChunkReader> Iterator for SerializedPageReader<R> {
     }
 }
 
+fn verify_page_header_len(header_len: usize, remaining_bytes: usize) -> Result<()> {
+    if header_len > remaining_bytes {
+        return Err(eof_err!("Invalid page header"));
+    }
+    Ok(())
+}
+
+fn verify_page_size(
+    compressed_size: i32,
+    uncompressed_size: i32,
+    remaining_bytes: usize,
+) -> Result<()> {
+    // The page's compressed size should not exceed the remaining bytes that are
+    // available to read. The page's uncompressed size is the expected size
+    // after decompression, which can never be negative.
+    if compressed_size < 0 || compressed_size as usize > remaining_bytes || uncompressed_size < 0 {
+        return Err(eof_err!("Invalid page header"));
+    }
+    Ok(())
+}
+
 impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
     fn get_next_page(&mut self) -> Result<Option<Page>> {
         loop {
@@ -596,10 +617,16 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         *header
                     } else {
                         let (header_len, header) = read_page_header_len(&mut read)?;
+                        verify_page_header_len(header_len, *remaining)?;
                         *offset += header_len;
                         *remaining -= header_len;
                         header
                     };
+                    verify_page_size(
+                        header.compressed_page_size,
+                        header.uncompressed_page_size,
+                        *remaining,
+                    )?;
                     let data_len = header.compressed_page_size as usize;
                     *offset += data_len;
                     *remaining -= data_len;
@@ -683,6 +710,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                     } else {
                         let mut read = self.reader.get_read(*offset as u64)?;
                         let (header_len, header) = read_page_header_len(&mut read)?;
+                        verify_page_header_len(header_len, *remaining_bytes)?;
                         *offset += header_len;
                         *remaining_bytes -= header_len;
                         let page_meta = if let Ok(page_meta) = (&header).try_into() {
@@ -733,12 +761,23 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                 next_page_header,
             } => {
                 if let Some(buffered_header) = next_page_header.take() {
+                    verify_page_size(
+                        buffered_header.compressed_page_size,
+                        buffered_header.uncompressed_page_size,
+                        *remaining_bytes,
+                    )?;
                     // The next page header has already been peeked, so just advance the offset
                     *offset += buffered_header.compressed_page_size as usize;
                     *remaining_bytes -= buffered_header.compressed_page_size as usize;
                 } else {
                     let mut read = self.reader.get_read(*offset as u64)?;
                     let (header_len, header) = read_page_header_len(&mut read)?;
+                    verify_page_header_len(header_len, *remaining_bytes)?;
+                    verify_page_size(
+                        header.compressed_page_size,
+                        header.uncompressed_page_size,
+                        *remaining_bytes,
+                    )?;
                     let data_page_size = header.compressed_page_size as usize;
                     *offset += header_len + data_page_size;
                     *remaining_bytes -= header_len + data_page_size;
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index 2e05b83369cf..b7522a76f0fc 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -157,6 +157,32 @@ pub fn from_thrift(
                 stats.max_value
             };
 
+            fn check_len(min: &Option<Vec<u8>>, max: &Option<Vec<u8>>, len: usize) -> Result<()> {
+                if let Some(min) = min {
+                    if min.len() < len {
+                        return Err(ParquetError::General(
+                            "Insufficient bytes to parse min statistic".to_string(),
+                        ));
+                    }
+                }
+                if let Some(max) = max {
+                    if max.len() < len {
+                        return Err(ParquetError::General(
+                            "Insufficient bytes to parse max statistic".to_string(),
+                        ));
+                    }
+                }
+                Ok(())
+            }
+
+            match physical_type {
+                Type::BOOLEAN => check_len(&min, &max, 1),
+                Type::INT32 | Type::FLOAT => check_len(&min, &max, 4),
+                Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8),
+                Type::INT96 => check_len(&min, &max, 12),
+                _ => Ok(()),
+            }?;
+
             // Values are encoded using PLAIN encoding definition, except that
             // variable-length byte arrays do not include a length prefix.
             //
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index d168e46de047..d9e9b22e809f 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -556,7 +556,11 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 }
             }
             PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                let max_precision = (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
+                let length = self
+                    .length
+                    .checked_mul(8)
+                    .ok_or(general_err!("Invalid length {} for Decimal", self.length))?;
+                let max_precision = (2f64.powi(length - 1) - 1f64).log10().floor() as i32;
 
                 if self.precision > max_precision {
                     return Err(general_err!(
@@ -1171,9 +1175,25 @@ pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
         ));
     }
 
+    if !schema_nodes[0].is_group() {
+        return Err(general_err!("Expected root node to be a group type"));
+    }
+
     Ok(schema_nodes.remove(0))
 }
 
+/// Checks if the logical type is valid.
+fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
+    if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
+        if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
+            return Err(general_err!(
+                "Bit width must be 8, 16, 32, or 64 for Integer logical type"
+            ));
+        }
+    }
+    Ok(())
+}
+
 /// Constructs a new Type from the `elements`, starting at index `index`.
 /// The first result is the starting index for the next Type after this one. If it is
 /// equal to `elements.len()`, then this Type is the last one.
@@ -1198,6 +1218,9 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
         .logical_type
         .as_ref()
         .map(|value| LogicalType::from(value.clone()));
+
+    check_logical_type(&logical_type)?;
+
     let field_id = elements[index].field_id;
     match elements[index].num_children {
         // From parquet-format:
diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs
index ceb6b1c29fe8..b216fec6f3e7 100644
--- a/parquet/src/thrift.rs
+++ b/parquet/src/thrift.rs
@@ -67,7 +67,7 @@ impl<'a> TCompactSliceInputProtocol<'a> {
         let mut shift = 0;
         loop {
             let byte = self.read_byte()?;
-            in_progress |= ((byte & 0x7F) as u64) << shift;
+            in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift);
             shift += 7;
             if byte & 0x80 == 0 {
                 return Ok(in_progress);
@@ -96,13 +96,22 @@ impl<'a> TCompactSliceInputProtocol<'a> {
     }
 }
 
+macro_rules! thrift_unimplemented {
+    () => {
+        Err(thrift::Error::Protocol(thrift::ProtocolError {
+            kind: thrift::ProtocolErrorKind::NotImplemented,
+            message: "not implemented".to_string(),
+        }))
+    };
+}
+
 impl TInputProtocol for TCompactSliceInputProtocol<'_> {
     fn read_message_begin(&mut self) -> thrift::Result<TMessageIdentifier> {
         unimplemented!()
     }
 
     fn read_message_end(&mut self) -> thrift::Result<()> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_struct_begin(&mut self) -> thrift::Result<Option<TStructIdentifier>> {
@@ -147,7 +156,21 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
             ),
             _ => {
                 if field_delta != 0 {
-                    self.last_read_field_id += field_delta as i16;
+                    self.last_read_field_id = self
+                        .last_read_field_id
+                        .checked_add(field_delta as i16)
+                        .map_or_else(
+                            || {
+                                Err(thrift::Error::Protocol(thrift::ProtocolError {
+                                    kind: thrift::ProtocolErrorKind::InvalidData,
+                                    message: format!(
+                                        "cannot add {} to {}",
+                                        field_delta, self.last_read_field_id
+                                    ),
+                                }))
+                            },
+                            Ok,
+                        )?;
                 } else {
                     self.last_read_field_id = self.read_i16()?;
                 };
@@ -226,15 +249,15 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
     }
 
     fn read_set_begin(&mut self) -> thrift::Result<TSetIdentifier> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_set_end(&mut self) -> thrift::Result<()> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_map_begin(&mut self) -> thrift::Result<TMapIdentifier> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_map_end(&mut self) -> thrift::Result<()> {
diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs
index 74342031432a..cfd61e82d32b 100644
--- a/parquet/tests/arrow_reader/bad_data.rs
+++ b/parquet/tests/arrow_reader/bad_data.rs
@@ -106,7 +106,7 @@ fn test_arrow_rs_gh_6229_dict_header() {
     let err = read_file("ARROW-RS-GH-6229-DICTHEADER.parquet").unwrap_err();
     assert_eq!(
         err.to_string(),
-        "External: Parquet argument error: EOF: eof decoding byte array"
+        "External: Parquet argument error: Parquet error: Integer overflow: out of range integral type conversion attempted"
     );
 }
 

From 1be307761fbf4f3f7ced48e16169b0ad77287bbe Mon Sep 17 00:00:00 2001
From: June <61218022+itsjunetime@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:13:56 -0700
Subject: [PATCH 37/68] Update MSRVs to be accurate (#6742)

* Update most MSRVs

* Make cargo-msrv verify every package in repo instead of just a select few and purposefully break arrow-flight msrv

* Add test to ensure workspace rust version is being used at least somewhere

* Fix exit1 => exit 1

* Make arrow-flight work, at the very least, with 'cargo metadata'

* Fix arrow-flight/gen rust-version to make CI pass now

* Get rid of pretty msrv logging as it can't all be displayed

* Do '-mindepth 2' with find to prevent running cargo msrv on the workspace as a whole

* Use correct MSRV for object_store

* remove workspace msrv check

* revert msrv

* push object_store MSRV back down to 1.62.1

* Revert unrelated formatting changes

* Fix object_store msrv

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 .github/workflows/rust.yml                   | 28 +++++---------------
 Cargo.toml                                   |  2 +-
 arrow-flight/gen/Cargo.toml                  |  2 +-
 arrow-integration-testing/Cargo.toml         |  2 +-
 arrow-pyarrow-integration-testing/Cargo.toml |  2 +-
 arrow-schema/Cargo.toml                      |  2 +-
 arrow/Cargo.toml                             |  2 +-
 parquet/Cargo.toml                           |  2 +-
 8 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 044250b70435..ca0d2441ceae 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -123,23 +123,6 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
         run: cargo install cargo-msrv
-      - name: Downgrade arrow dependencies
-        run: cargo update -p ahash --precise 0.8.7
-      - name: Check arrow
-        working-directory: arrow
-        run: |
-          # run `cd arrow; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
-      - name: Check parquet
-        working-directory: parquet
-        run: |
-          # run `cd parquet; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
-      - name: Check arrow-flight
-        working-directory: arrow-flight
-        run: |
-          # run `cd arrow-flight; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
       - name: Downgrade object_store dependencies
         working-directory: object_store
         # Necessary because tokio 1.30.0 updates MSRV to 1.63
@@ -147,8 +130,11 @@ jobs:
         run: |
           cargo update -p tokio --precise 1.29.1
           cargo update -p url --precise 2.5.0
-      - name: Check object_store
-        working-directory: object_store
+      - name: Check all packages
         run: |
-          # run `cd object_store; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
+          # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies
+          find . -mindepth 2 -name Cargo.toml | while read -r dir
+          do
+            echo "Checking package '$dir'"
+            cargo msrv verify --manifest-path "$dir" --output-format=json || exit 1
+          done
diff --git a/Cargo.toml b/Cargo.toml
index 75ba410f12a6..39e3c0bca99a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -74,7 +74,7 @@ include = [
     "Cargo.toml",
 ]
 edition = "2021"
-rust-version = "1.62"
+rust-version = "1.70"
 
 [workspace.dependencies]
 arrow = { version = "54.0.0", path = "./arrow", default-features = false }
diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml
index 6358227a8912..e52efbf67e21 100644
--- a/arrow-flight/gen/Cargo.toml
+++ b/arrow-flight/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen"
 description = "Code generation for arrow-flight"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = { workspace = true }
+rust-version = "1.71.1"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
index 8654b4b92734..26cb05fae1c2 100644
--- a/arrow-integration-testing/Cargo.toml
+++ b/arrow-integration-testing/Cargo.toml
@@ -25,7 +25,7 @@ authors = { workspace = true }
 license = { workspace = true }
 edition = { workspace = true }
 publish = false
-rust-version = { workspace = true }
+rust-version = "1.75.0"
 
 [lib]
 crate-type = ["lib", "cdylib"]
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 03d08df30959..4ead95fcb912 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -25,7 +25,7 @@ authors = ["Apache Arrow <dev@arrow.apache.org>"]
 license = "Apache-2.0"
 keywords = [ "arrow" ]
 edition = "2021"
-rust-version = "1.62"
+rust-version = "1.70"
 publish = false
 
 [lib]
diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index 1e1f9fbde0e4..d1bcf046b7ca 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -26,7 +26,7 @@ license = { workspace = true }
 keywords = { workspace = true }
 include = { workspace = true }
 edition = { workspace = true }
-rust-version = { workspace = true }
+rust-version = "1.64"
 
 [lib]
 name = "arrow_schema"
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 8860cd61c5b3..a1c9c0ab2113 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -31,7 +31,7 @@ include = [
     "Cargo.toml",
 ]
 edition = { workspace = true }
-rust-version = "1.70.0"
+rust-version = { workspace = true }
 
 [lib]
 name = "arrow"
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 19f890710778..e4085472ea20 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -26,7 +26,7 @@ authors = { workspace = true }
 keywords = ["arrow", "parquet", "hadoop"]
 readme = "README.md"
 edition = { workspace = true }
-rust-version = "1.70.0"
+rust-version = { workspace = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }

From 70e105403922e837629f0a9edda43e02f789d32d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 8 Jan 2025 09:02:23 -0500
Subject: [PATCH 38/68] Document the `ParquetRecordBatchStream` buffering
 (#6947)

* Document the ParquetRecordBatchStream buffering

* Update parquet/src/arrow/async_reader/mod.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 parquet/src/arrow/async_reader/mod.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 4f3befe42662..5323251b07e7 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -611,11 +611,23 @@ impl<T> std::fmt::Debug for StreamState<T> {
     }
 }
 
-/// An asynchronous [`Stream`](https://docs.rs/futures/latest/futures/stream/trait.Stream.html) of [`RecordBatch`]
-/// for a parquet file that can be constructed using [`ParquetRecordBatchStreamBuilder`].
+/// An asynchronous [`Stream`]of [`RecordBatch`] constructed using [`ParquetRecordBatchStreamBuilder`] to read parquet files.
 ///
 /// `ParquetRecordBatchStream` also provides [`ParquetRecordBatchStream::next_row_group`] for fetching row groups,
 /// allowing users to decode record batches separately from I/O.
+///
+/// # I/O Buffering
+///
+/// `ParquetRecordBatchStream` buffers *all* data pages selected after predicates
+/// (projection + filtering, etc) and decodes the rows from those buffered pages.
+///
+/// For example, if all rows and columns are selected, the entire row group is
+/// buffered in memory during decode. This minimizes the number of IO operations
+/// required, which is especially important for object stores, where IO operations
+/// have latencies in the hundreds of milliseconds
+///
+///
+/// [`Stream`]: https://docs.rs/futures/latest/futures/stream/trait.Stream.html
 pub struct ParquetRecordBatchStream<T> {
     metadata: Arc<ParquetMetaData>,
 

From 06b4b8f088bcc40f7d372dfaa69daab740cbb558 Mon Sep 17 00:00:00 2001
From: Kyle Barron <kylebarron2@gmail.com>
Date: Wed, 8 Jan 2025 06:06:13 -0800
Subject: [PATCH 39/68] Return `BoxStream` with `'static` lifetime from
 `ObjectStore::list` (#6619)

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 object_store/src/aws/client.rs        |  2 +-
 object_store/src/aws/mod.rs           |  4 +--
 object_store/src/azure/client.rs      |  2 +-
 object_store/src/azure/mod.rs         |  7 ++--
 object_store/src/chunked.rs           |  4 +--
 object_store/src/client/list.rs       | 19 +++++-----
 object_store/src/client/pagination.rs | 50 ++++++++++++++++-----------
 object_store/src/gcp/client.rs        |  2 +-
 object_store/src/gcp/mod.rs           |  4 +--
 object_store/src/http/mod.rs          | 15 ++++----
 object_store/src/lib.rs               |  8 ++---
 object_store/src/limit.rs             | 14 ++++----
 object_store/src/local.rs             |  2 +-
 object_store/src/memory.rs            |  2 +-
 object_store/src/prefix.rs            | 32 ++++++++++++++---
 object_store/src/throttle.rs          | 16 +++++----
 object_store/tests/get_range_file.rs  |  2 +-
 17 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index b81be0c0efad..246f2779dd07 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -855,7 +855,7 @@ impl GetClient for S3Client {
 }
 
 #[async_trait]
-impl ListClient for S3Client {
+impl ListClient for Arc<S3Client> {
     /// Make an S3 List request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>
     async fn list_request(
         &self,
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index 7f449c49963c..82ef909de984 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -273,7 +273,7 @@ impl ObjectStore for AmazonS3 {
             .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list(prefix)
     }
 
@@ -281,7 +281,7 @@ impl ObjectStore for AmazonS3 {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         if self.client.config.is_s3_express() {
             let offset = offset.clone();
             // S3 Express does not support start-after
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index bd72d0c6aee1..fa5412c455fc 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -925,7 +925,7 @@ impl GetClient for AzureClient {
 }
 
 #[async_trait]
-impl ListClient for AzureClient {
+impl ListClient for Arc<AzureClient> {
     /// Make an Azure List request <https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs>
     async fn list_request(
         &self,
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index 81b6667bc058..ea4dd8f567a9 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -119,6 +119,9 @@ impl ObjectStore for MicrosoftAzure {
         self.client.delete_request(location, &()).await
     }
 
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
+        self.client.list(prefix)
+    }
     fn delete_stream<'a>(
         &'a self,
         locations: BoxStream<'a, Result<Path>>,
@@ -139,10 +142,6 @@ impl ObjectStore for MicrosoftAzure {
             .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
-        self.client.list(prefix)
-    }
-
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
         self.client.list_with_delimiter(prefix).await
     }
diff --git a/object_store/src/chunked.rs b/object_store/src/chunked.rs
index 3f83c1336dc4..4998e9f2a04d 100644
--- a/object_store/src/chunked.rs
+++ b/object_store/src/chunked.rs
@@ -150,7 +150,7 @@ impl ObjectStore for ChunkedStore {
         self.inner.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.inner.list(prefix)
     }
 
@@ -158,7 +158,7 @@ impl ObjectStore for ChunkedStore {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.inner.list_with_offset(prefix, offset)
     }
 
diff --git a/object_store/src/client/list.rs b/object_store/src/client/list.rs
index 4445d0d17533..fe9bfebf768d 100644
--- a/object_store/src/client/list.rs
+++ b/object_store/src/client/list.rs
@@ -44,37 +44,38 @@ pub(crate) trait ListClientExt {
         prefix: Option<&Path>,
         delimiter: bool,
         offset: Option<&Path>,
-    ) -> BoxStream<'_, Result<ListResult>>;
+    ) -> BoxStream<'static, Result<ListResult>>;
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>>;
 
     #[allow(unused)]
     fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>>;
+    ) -> BoxStream<'static, Result<ObjectMeta>>;
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult>;
 }
 
 #[async_trait]
-impl<T: ListClient> ListClientExt for T {
+impl<T: ListClient + Clone> ListClientExt for T {
     fn list_paginated(
         &self,
         prefix: Option<&Path>,
         delimiter: bool,
         offset: Option<&Path>,
-    ) -> BoxStream<'_, Result<ListResult>> {
+    ) -> BoxStream<'static, Result<ListResult>> {
         let offset = offset.map(|x| x.to_string());
         let prefix = prefix
             .filter(|x| !x.as_ref().is_empty())
             .map(|p| format!("{}{}", p.as_ref(), crate::path::DELIMITER));
 
         stream_paginated(
+            self.clone(),
             (prefix, offset),
-            move |(prefix, offset), token| async move {
-                let (r, next_token) = self
+            move |client, (prefix, offset), token| async move {
+                let (r, next_token) = client
                     .list_request(
                         prefix.as_deref(),
                         delimiter,
@@ -88,7 +89,7 @@ impl<T: ListClient> ListClientExt for T {
         .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.list_paginated(prefix, false, None)
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
@@ -99,7 +100,7 @@ impl<T: ListClient> ListClientExt for T {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.list_paginated(prefix, false, Some(offset))
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
diff --git a/object_store/src/client/pagination.rs b/object_store/src/client/pagination.rs
index 77b2a3d8e2f2..d789c7431d8c 100644
--- a/object_store/src/client/pagination.rs
+++ b/object_store/src/client/pagination.rs
@@ -35,9 +35,14 @@ use std::future::Future;
 /// finish, otherwise it will continue to call `op(state, token)` with the values returned by the
 /// previous call to `op`, until a continuation token of `None` is returned
 ///
-pub(crate) fn stream_paginated<F, Fut, S, T>(state: S, op: F) -> impl Stream<Item = Result<T>>
+pub(crate) fn stream_paginated<F, Fut, S, T, C>(
+    client: C,
+    state: S,
+    op: F,
+) -> impl Stream<Item = Result<T>>
 where
-    F: Fn(S, Option<String>) -> Fut + Copy,
+    C: Clone,
+    F: Fn(C, S, Option<String>) -> Fut + Copy,
     Fut: Future<Output = Result<(T, S, Option<String>)>>,
 {
     enum PaginationState<T> {
@@ -46,27 +51,30 @@ where
         Done,
     }
 
-    futures::stream::unfold(PaginationState::Start(state), move |state| async move {
-        let (s, page_token) = match state {
-            PaginationState::Start(s) => (s, None),
-            PaginationState::HasMore(s, page_token) if !page_token.is_empty() => {
-                (s, Some(page_token))
-            }
-            _ => {
-                return None;
-            }
-        };
+    futures::stream::unfold(PaginationState::Start(state), move |state| {
+        let client = client.clone();
+        async move {
+            let (s, page_token) = match state {
+                PaginationState::Start(s) => (s, None),
+                PaginationState::HasMore(s, page_token) if !page_token.is_empty() => {
+                    (s, Some(page_token))
+                }
+                _ => {
+                    return None;
+                }
+            };
 
-        let (resp, s, continuation) = match op(s, page_token).await {
-            Ok(resp) => resp,
-            Err(e) => return Some((Err(e), PaginationState::Done)),
-        };
+            let (resp, s, continuation) = match op(client, s, page_token).await {
+                Ok(resp) => resp,
+                Err(e) => return Some((Err(e), PaginationState::Done)),
+            };
 
-        let next_state = match continuation {
-            Some(token) => PaginationState::HasMore(s, token),
-            None => PaginationState::Done,
-        };
+            let next_state = match continuation {
+                Some(token) => PaginationState::HasMore(s, token),
+                None => PaginationState::Done,
+            };
 
-        Some((Ok(resp), next_state))
+            Some((Ok(resp), next_state))
+        }
     })
 }
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index d6f89ca71740..8dd1c69802a8 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -633,7 +633,7 @@ impl GetClient for GoogleCloudStorageClient {
 }
 
 #[async_trait]
-impl ListClient for GoogleCloudStorageClient {
+impl ListClient for Arc<GoogleCloudStorageClient> {
     /// Perform a list request <https://cloud.google.com/storage/docs/xml-api/get-bucket-list>
     async fn list_request(
         &self,
diff --git a/object_store/src/gcp/mod.rs b/object_store/src/gcp/mod.rs
index 5199135ba6b0..a2f512415a8d 100644
--- a/object_store/src/gcp/mod.rs
+++ b/object_store/src/gcp/mod.rs
@@ -183,7 +183,7 @@ impl ObjectStore for GoogleCloudStorage {
         self.client.delete_request(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list(prefix)
     }
 
@@ -191,7 +191,7 @@ impl ObjectStore for GoogleCloudStorage {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list_with_offset(prefix, offset)
     }
 
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index 417f72856722..899740d36db9 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -31,6 +31,8 @@
 //! [rfc2518]: https://datatracker.ietf.org/doc/html/rfc2518
 //! [WebDAV]: https://en.wikipedia.org/wiki/WebDAV
 
+use std::sync::Arc;
+
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
@@ -79,7 +81,7 @@ impl From<Error> for crate::Error {
 /// See [`crate::http`] for more information
 #[derive(Debug)]
 pub struct HttpStore {
-    client: Client,
+    client: Arc<Client>,
 }
 
 impl std::fmt::Display for HttpStore {
@@ -130,19 +132,20 @@ impl ObjectStore for HttpStore {
         self.client.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix_len = prefix.map(|p| p.as_ref().len()).unwrap_or_default();
         let prefix = prefix.cloned();
+        let client = Arc::clone(&self.client);
         futures::stream::once(async move {
-            let status = self.client.list(prefix.as_ref(), "infinity").await?;
+            let status = client.list(prefix.as_ref(), "infinity").await?;
 
             let iter = status
                 .response
                 .into_iter()
                 .filter(|r| !r.is_dir())
-                .map(|response| {
+                .map(move |response| {
                     response.check_ok()?;
-                    response.object_meta(self.client.base_url())
+                    response.object_meta(client.base_url())
                 })
                 // Filter out exact prefix matches
                 .filter_ok(move |r| r.location.as_ref().len() > prefix_len);
@@ -238,7 +241,7 @@ impl HttpBuilder {
         let parsed = Url::parse(&url).map_err(|source| Error::UnableToParseUrl { url, source })?;
 
         Ok(HttpStore {
-            client: Client::new(parsed, self.client_options, self.retry_config)?,
+            client: Arc::new(Client::new(parsed, self.client_options, self.retry_config)?),
         })
     }
 }
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 987ffacc6e49..53eda5a82fd5 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -722,7 +722,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x` will be included.
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>>;
 
     /// List all the objects with the given prefix and a location greater than `offset`
     ///
@@ -734,7 +734,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let offset = offset.clone();
         self.list(prefix)
             .try_filter(move |f| futures::future::ready(f.location > offset))
@@ -847,7 +847,7 @@ macro_rules! as_ref_impl {
                 self.as_ref().delete_stream(locations)
             }
 
-            fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+            fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
                 self.as_ref().list(prefix)
             }
 
@@ -855,7 +855,7 @@ macro_rules! as_ref_impl {
                 &self,
                 prefix: Option<&Path>,
                 offset: &Path,
-            ) -> BoxStream<'_, Result<ObjectMeta>> {
+            ) -> BoxStream<'static, Result<ObjectMeta>> {
                 self.as_ref().list_with_offset(prefix, offset)
             }
 
diff --git a/object_store/src/limit.rs b/object_store/src/limit.rs
index 6a3c3b574e62..77f72a0e11a1 100644
--- a/object_store/src/limit.rs
+++ b/object_store/src/limit.rs
@@ -45,7 +45,7 @@ use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 ///
 #[derive(Debug)]
 pub struct LimitStore<T: ObjectStore> {
-    inner: T,
+    inner: Arc<T>,
     max_requests: usize,
     semaphore: Arc<Semaphore>,
 }
@@ -56,7 +56,7 @@ impl<T: ObjectStore> LimitStore<T> {
     /// `max_requests`
     pub fn new(inner: T, max_requests: usize) -> Self {
         Self {
-            inner,
+            inner: Arc::new(inner),
             max_requests,
             semaphore: Arc::new(Semaphore::new(max_requests)),
         }
@@ -144,12 +144,13 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         self.inner.delete_stream(locations)
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = prefix.cloned();
+        let inner = Arc::clone(&self.inner);
         let fut = Arc::clone(&self.semaphore)
             .acquire_owned()
             .map(move |permit| {
-                let s = self.inner.list(prefix.as_ref());
+                let s = inner.list(prefix.as_ref());
                 PermitWrapper::new(s, permit.unwrap())
             });
         fut.into_stream().flatten().boxed()
@@ -159,13 +160,14 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = prefix.cloned();
         let offset = offset.clone();
+        let inner = Arc::clone(&self.inner);
         let fut = Arc::clone(&self.semaphore)
             .acquire_owned()
             .map(move |permit| {
-                let s = self.inner.list_with_offset(prefix.as_ref(), &offset);
+                let s = inner.list_with_offset(prefix.as_ref(), &offset);
                 PermitWrapper::new(s, permit.unwrap())
             });
         fut.into_stream().flatten().boxed()
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index b193481ae7b8..364026459a03 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -488,7 +488,7 @@ impl ObjectStore for LocalFileSystem {
         .await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let config = Arc::clone(&self.config);
 
         let root_path = match prefix {
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index 3f3cff3390db..6402f924346f 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -297,7 +297,7 @@ impl ObjectStore for InMemory {
         Ok(())
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let root = Path::default();
         let prefix = prefix.unwrap_or(&root);
 
diff --git a/object_store/src/prefix.rs b/object_store/src/prefix.rs
index 227887d78fd7..a0b67ca4b58e 100644
--- a/object_store/src/prefix.rs
+++ b/object_store/src/prefix.rs
@@ -74,6 +74,28 @@ impl<T: ObjectStore> PrefixStore<T> {
     }
 }
 
+// Note: This is a relative hack to move these two functions to pure functions so they don't rely
+// on the `self` lifetime. Expected to be cleaned up before merge.
+//
+/// Strip the constant prefix from a given path
+fn strip_prefix(prefix: &Path, path: Path) -> Path {
+    // Note cannot use match because of borrow checker
+    if let Some(suffix) = path.prefix_match(prefix) {
+        return suffix.collect();
+    }
+    path
+}
+
+/// Strip the constant prefix from a given ObjectMeta
+fn strip_meta(prefix: &Path, meta: ObjectMeta) -> ObjectMeta {
+    ObjectMeta {
+        last_modified: meta.last_modified,
+        size: meta.size,
+        location: strip_prefix(prefix, meta.location),
+        e_tag: meta.e_tag,
+        version: None,
+    }
+}
 #[async_trait::async_trait]
 impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
     async fn put(&self, location: &Path, payload: PutPayload) -> Result<PutResult> {
@@ -136,21 +158,23 @@ impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
         self.inner.delete(&full_path).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
         let s = self.inner.list(Some(&prefix));
-        s.map_ok(|meta| self.strip_meta(meta)).boxed()
+        let slf_prefix = self.prefix.clone();
+        s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed()
     }
 
     fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let offset = self.full_path(offset);
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
         let s = self.inner.list_with_offset(Some(&prefix), &offset);
-        s.map_ok(|meta| self.strip_meta(meta)).boxed()
+        let slf_prefix = self.prefix.clone();
+        s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/throttle.rs b/object_store/src/throttle.rs
index b9dff5c6d1d2..29cd32705ccc 100644
--- a/object_store/src/throttle.rs
+++ b/object_store/src/throttle.rs
@@ -237,11 +237,13 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         self.inner.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let stream = self.inner.list(prefix);
+        let config = Arc::clone(&self.config);
         futures::stream::once(async move {
-            let wait_list_per_entry = self.config().wait_list_per_entry;
-            sleep(self.config().wait_list_per_call).await;
+            let config = *config.lock();
+            let wait_list_per_entry = config.wait_list_per_entry;
+            sleep(config.wait_list_per_call).await;
             throttle_stream(stream, move |_| wait_list_per_entry)
         })
         .flatten()
@@ -252,11 +254,13 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let stream = self.inner.list_with_offset(prefix, offset);
+        let config = Arc::clone(&self.config);
         futures::stream::once(async move {
-            let wait_list_per_entry = self.config().wait_list_per_entry;
-            sleep(self.config().wait_list_per_call).await;
+            let config = *config.lock();
+            let wait_list_per_entry = config.wait_list_per_entry;
+            sleep(config.wait_list_per_call).await;
             throttle_stream(stream, move |_| wait_list_per_entry)
         })
         .flatten()
diff --git a/object_store/tests/get_range_file.rs b/object_store/tests/get_range_file.rs
index c5550ac21728..e500fc8ac87d 100644
--- a/object_store/tests/get_range_file.rs
+++ b/object_store/tests/get_range_file.rs
@@ -62,7 +62,7 @@ impl ObjectStore for MyStore {
         todo!()
     }
 
-    fn list(&self, _: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, _: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         todo!()
     }
 

From a89585dc889eface963423cb6420a197131bb061 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Wed, 8 Jan 2025 09:28:05 -0500
Subject: [PATCH 40/68] [Parquet] Reuse buffer in `ByteViewArrayDecoderPlain` 
 (#6930)

* reuse buffer in view array

* Update parquet/src/arrow/array_reader/byte_view_array.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

* use From<Bytes> instead

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 .../src/arrow/array_reader/byte_view_array.rs | 38 ++++++++++++++++---
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 92a8b0592d0d..0e16642940d2 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -290,7 +290,7 @@ impl ByteViewArrayDecoder {
 
 /// Decoder from [`Encoding::PLAIN`] data to [`ViewBuffer`]
 pub struct ByteViewArrayDecoderPlain {
-    buf: Bytes,
+    buf: Buffer,
     offset: usize,
 
     validate_utf8: bool,
@@ -308,7 +308,7 @@ impl ByteViewArrayDecoderPlain {
         validate_utf8: bool,
     ) -> Self {
         Self {
-            buf,
+            buf: Buffer::from(buf),
             offset: 0,
             max_remaining_values: num_values.unwrap_or(num_levels),
             validate_utf8,
@@ -316,9 +316,15 @@ impl ByteViewArrayDecoderPlain {
     }
 
     pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result<usize> {
-        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
-        let buf = arrow_buffer::Buffer::from(self.buf.clone());
-        let block_id = output.append_block(buf);
+        // avoid creating a new buffer if the last buffer is the same as the current buffer
+        // This is especially useful when row-level filtering is applied, where we call lots of small `read` over the same buffer.
+        let block_id = {
+            if output.buffers.last().is_some_and(|x| x.ptr_eq(&self.buf)) {
+                output.buffers.len() as u32 - 1
+            } else {
+                output.append_block(self.buf.clone())
+            }
+        };
 
         let to_read = len.min(self.max_remaining_values);
 
@@ -690,12 +696,13 @@ mod tests {
 
     use crate::{
         arrow::{
-            array_reader::test_util::{byte_array_all_encodings, utf8_column},
+            array_reader::test_util::{byte_array_all_encodings, encode_byte_array, utf8_column},
             buffer::view_buffer::ViewBuffer,
             record_reader::buffer::ValuesBuffer,
         },
         basic::Encoding,
         column::reader::decoder::ColumnValueDecoder,
+        data_type::ByteArray,
     };
 
     use super::*;
@@ -746,4 +753,23 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_byte_view_array_plain_decoder_reuse_buffer() {
+        let byte_array = vec!["hello", "world", "large payload over 12 bytes", "b"];
+        let byte_array: Vec<ByteArray> = byte_array.into_iter().map(|x| x.into()).collect();
+        let pages = encode_byte_array(Encoding::PLAIN, &byte_array);
+
+        let column_desc = utf8_column();
+        let mut decoder = ByteViewArrayColumnValueDecoder::new(&column_desc);
+
+        let mut view_buffer = ViewBuffer::default();
+        decoder.set_data(Encoding::PLAIN, pages, 4, None).unwrap();
+        decoder.read(&mut view_buffer, 1).unwrap();
+        decoder.read(&mut view_buffer, 1).unwrap();
+        assert_eq!(view_buffer.buffers.len(), 1);
+
+        decoder.read(&mut view_buffer, 1).unwrap();
+        assert_eq!(view_buffer.buffers.len(), 1);
+    }
 }

From 6761baba64d3a7775af6feddda5e2799790df76c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 8 Jan 2025 09:54:39 -0800
Subject: [PATCH 41/68] regenerate arrow-ipc/src/gen with patched flatbuffers
 (#6426)

* regenerate arrow-ipc/src/gen with patched flatbuffers

* use git repo instead of local path

* add backticks

* expand allowed overage to accommodate more alignment padding

* re-enable nanoarrow integration test

* add assertions that struct alignment is correct

* remove struct alignment assertions

* apply a patch to generated code rather than requiring patched flatc

* point to google/flatbuffers with pub PushAlignment

* add license header to gen.patch

* use flatbuffers 24.12.23

* remove unnecessary gen.patch
---
 .github/workflows/integration.yml |   3 +-
 arrow-flight/src/encode.rs        |  14 +-
 arrow-ipc/Cargo.toml              |   2 +-
 arrow-ipc/regen.sh                |  90 +++----
 arrow-ipc/src/gen/File.rs         |  26 +-
 arrow-ipc/src/gen/Message.rs      |  66 ++---
 arrow-ipc/src/gen/Schema.rs       | 397 +++++++++++++++---------------
 arrow-ipc/src/gen/SparseTensor.rs | 182 +++++++++++---
 arrow-ipc/src/gen/Tensor.rs       | 150 +++++++++--
 arrow-ipc/src/lib.rs              |  11 +
 10 files changed, 609 insertions(+), 332 deletions(-)

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9b23b1b5ad2e..a47195d1becf 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -65,8 +65,7 @@ jobs:
       ARROW_INTEGRATION_JAVA: ON
       ARROW_INTEGRATION_JS: ON
       ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
-      # Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052
-      ARCHERY_INTEGRATION_WITH_NANOARROW: "0"
+      ARCHERY_INTEGRATION_WITH_NANOARROW: "1"
       # https://github.com/apache/arrow/pull/38403/files#r1371281630
       ARCHERY_INTEGRATION_WITH_RUST: "1"
       # These are necessary because the github runner overrides $HOME
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 19fe42474405..57ac9f3173fe 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -1708,7 +1708,7 @@ mod tests {
         ])
         .unwrap();
 
-        verify_encoded_split(batch, 112).await;
+        verify_encoded_split(batch, 120).await;
     }
 
     #[tokio::test]
@@ -1719,7 +1719,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 4304).await;
+        verify_encoded_split(batch, 4312).await;
     }
 
     #[tokio::test]
@@ -1755,7 +1755,7 @@ mod tests {
         // 5k over limit (which is 2x larger than limit of 5k)
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 5800).await;
+        verify_encoded_split(batch, 5808).await;
     }
 
     #[tokio::test]
@@ -1771,7 +1771,7 @@ mod tests {
 
         let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
-        verify_encoded_split(batch, 48).await;
+        verify_encoded_split(batch, 56).await;
     }
 
     #[tokio::test]
@@ -1785,7 +1785,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 3328).await;
+        verify_encoded_split(batch, 3336).await;
     }
 
     #[tokio::test]
@@ -1799,7 +1799,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 5280).await;
+        verify_encoded_split(batch, 5288).await;
     }
 
     #[tokio::test]
@@ -1824,7 +1824,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 4128).await;
+        verify_encoded_split(batch, 4136).await;
     }
 
     /// Return size, in memory of flight data
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index cf91b3a3415f..4988eed4a5ed 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -38,7 +38,7 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-flatbuffers = { version = "24.3.25", default-features = false }
+flatbuffers = { version = "24.12.23", default-features = false }
 lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13.0", default-features = false, optional = true }
 
diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh
index 8d8862ccc7f4..b368bd1bc7cc 100755
--- a/arrow-ipc/regen.sh
+++ b/arrow-ipc/regen.sh
@@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 # Change to the toplevel `arrow-rs` directory
 pushd $DIR/../
 
-echo "Build flatc from source ..."
-
-FB_URL="https://github.com/google/flatbuffers"
-FB_DIR="arrow/.flatbuffers"
-FLATC="$FB_DIR/bazel-bin/flatc"
-
-if [ -z $(which bazel) ]; then
-    echo "bazel is required to build flatc"
-    exit 1
-fi
-
-echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
-
-if [ ! -e $FB_DIR ]; then
-    echo "git clone $FB_URL ..."
-    git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
+if [ -z "$FLATC" ]; then
+  echo "Build flatc from source ..."
+
+  FB_URL="https://github.com/google/flatbuffers"
+  FB_DIR="arrow/.flatbuffers"
+  FLATC="$FB_DIR/bazel-bin/flatc"
+
+  if [ -z $(which bazel) ]; then
+      echo "bazel is required to build flatc"
+      exit 1
+  fi
+
+  echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
+
+  if [ ! -e $FB_DIR ]; then
+      echo "git clone $FB_URL ..."
+      git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
+  else
+      echo "git pull $FB_URL ..."
+      git -C $FB_DIR pull
+  fi
+
+  pushd $FB_DIR
+  echo "run: bazel build :flatc ..."
+  bazel build :flatc
+  popd
 else
-    echo "git pull $FB_URL ..."
-    git -C $FB_DIR pull
+  echo "Using flatc $FLATC ..."
 fi
 
-pushd $FB_DIR
-echo "run: bazel build :flatc ..."
-bazel build :flatc
-popd
-
-
 # Execute the code generation:
 $FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs
 
@@ -99,37 +102,38 @@ for f in `ls *.rs`; do
     fi
 
     echo "Modifying: $f"
-    sed -i '' '/extern crate flatbuffers;/d' $f
-    sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
-    sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
-    sed -i '' '/pub mod org {/d' $f
-    sed -i '' '/pub mod apache {/d' $f
-    sed -i '' '/pub mod arrow {/d' $f
-    sed -i '' '/pub mod flatbuf {/d' $f
-    sed -i '' '/}  \/\/ pub mod flatbuf/d' $f
-    sed -i '' '/}  \/\/ pub mod arrow/d' $f
-    sed -i '' '/}  \/\/ pub mod apache/d' $f
-    sed -i '' '/}  \/\/ pub mod org/d' $f
-    sed -i '' '/use core::mem;/d' $f
-    sed -i '' '/use core::cmp::Ordering;/d' $f
-    sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
+    sed --in-place='' '/extern crate flatbuffers;/d' $f
+    sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f
+    sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f
+    sed --in-place='' '/pub mod org {/d' $f
+    sed --in-place='' '/pub mod apache {/d' $f
+    sed --in-place='' '/pub mod arrow {/d' $f
+    sed --in-place='' '/pub mod flatbuf {/d' $f
+    sed --in-place='' '/}  \/\/ pub mod flatbuf/d' $f
+    sed --in-place='' '/}  \/\/ pub mod arrow/d' $f
+    sed --in-place='' '/}  \/\/ pub mod apache/d' $f
+    sed --in-place='' '/}  \/\/ pub mod org/d' $f
+    sed --in-place='' '/use core::mem;/d' $f
+    sed --in-place='' '/use core::cmp::Ordering;/d' $f
+    sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
 
     # required by flatc 1.12.0+
-    sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
+    sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
     for name in ${names[@]}; do
-        sed -i '' "/use crate::${name}::\*;/d" $f
-        sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
+        sed --in-place='' "/use crate::${name}::\*;/d" $f
+        sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
     done
 
     # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
-    sed -i '' 's/type__/type_/g' $f
-    sed -i '' 's/TYPE__/TYPE_/g' $f
+    sed --in-place='' 's/type__/type_/g' $f
+    sed --in-place='' 's/TYPE__/TYPE_/g' $f
 
     # Some files need prefixes
     if [[ $f == "File.rs" ]]; then 
         # Now prefix the file with the static contents
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
     elif [[ $f == "Message.rs" ]]; then
+        sed --in-place='' 's/List<Int16>/\`List<Int16>\`/g' $f
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
     elif [[ $f == "SparseTensor.rs" ]]; then
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs
index c0c2fb183237..427cf75de096 100644
--- a/arrow-ipc/src/gen/File.rs
+++ b/arrow-ipc/src/gen/File.rs
@@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 // struct Block, aligned to 8
 #[repr(transparent)]
 #[derive(Clone, Copy, PartialEq)]
@@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block {
         let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for Block {
@@ -211,8 +217,8 @@ impl<'a> Footer<'a> {
         Footer { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FooterArgs<'args>,
     ) -> flatbuffers::WIPOffset<Footer<'bldr>> {
         let mut builder = FooterBuilder::new(_fbb);
@@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> {
     }
 }
 
-pub struct FooterBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
         self.fbb_
@@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FooterBuilder {
             fbb_: _fbb,
@@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
     flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
 }
 #[inline]
-pub fn finish_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Footer<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Footer<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Message.rs b/arrow-ipc/src/gen/Message.rs
index 1f49f1d9428b..928b41cc0699 100644
--- a/arrow-ipc/src/gen/Message.rs
+++ b/arrow-ipc/src/gen/Message.rs
@@ -386,6 +386,10 @@ impl<'b> flatbuffers::Push for FieldNode {
             ::core::slice::from_raw_parts(self as *const FieldNode as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for FieldNode {
@@ -501,8 +505,8 @@ impl<'a> BodyCompression<'a> {
         BodyCompression { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args BodyCompressionArgs,
     ) -> flatbuffers::WIPOffset<BodyCompression<'bldr>> {
         let mut builder = BodyCompressionBuilder::new(_fbb);
@@ -569,11 +573,11 @@ impl<'a> Default for BodyCompressionArgs {
     }
 }
 
-pub struct BodyCompressionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BodyCompressionBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BodyCompressionBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_codec(&mut self, codec: CompressionType) {
         self.fbb_.push_slot::<CompressionType>(
@@ -591,7 +595,9 @@ impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BodyCompressionBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> BodyCompressionBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BodyCompressionBuilder {
             fbb_: _fbb,
@@ -645,8 +651,8 @@ impl<'a> RecordBatch<'a> {
         RecordBatch { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args RecordBatchArgs<'args>,
     ) -> flatbuffers::WIPOffset<RecordBatch<'bldr>> {
         let mut builder = RecordBatchBuilder::new(_fbb);
@@ -808,11 +814,11 @@ impl<'a> Default for RecordBatchArgs<'a> {
     }
 }
 
-pub struct RecordBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct RecordBatchBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> RecordBatchBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_length(&mut self, length: i64) {
         self.fbb_
@@ -850,7 +856,9 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RecordBatchBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> RecordBatchBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         RecordBatchBuilder {
             fbb_: _fbb,
@@ -908,8 +916,8 @@ impl<'a> DictionaryBatch<'a> {
         DictionaryBatch { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DictionaryBatchArgs<'args>,
     ) -> flatbuffers::WIPOffset<DictionaryBatch<'bldr>> {
         let mut builder = DictionaryBatchBuilder::new(_fbb);
@@ -989,11 +997,11 @@ impl<'a> Default for DictionaryBatchArgs<'a> {
     }
 }
 
-pub struct DictionaryBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DictionaryBatchBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DictionaryBatchBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_id(&mut self, id: i64) {
         self.fbb_.push_slot::<i64>(DictionaryBatch::VT_ID, id, 0);
@@ -1012,7 +1020,9 @@ impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
             .push_slot::<bool>(DictionaryBatch::VT_ISDELTA, isDelta, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DictionaryBatchBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> DictionaryBatchBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DictionaryBatchBuilder {
             fbb_: _fbb,
@@ -1064,8 +1074,8 @@ impl<'a> Message<'a> {
         Message { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args MessageArgs<'args>,
     ) -> flatbuffers::WIPOffset<Message<'bldr>> {
         let mut builder = MessageBuilder::new(_fbb);
@@ -1290,11 +1300,11 @@ impl<'a> Default for MessageArgs<'a> {
     }
 }
 
-pub struct MessageBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct MessageBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MessageBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
         self.fbb_
@@ -1331,7 +1341,7 @@ impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MessageBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> MessageBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         MessageBuilder {
             fbb_: _fbb,
@@ -1474,16 +1484,16 @@ pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message {
     flatbuffers::size_prefixed_root_unchecked::<Message>(buf)
 }
 #[inline]
-pub fn finish_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Message<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Message<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index ed9dbaa249f0..223e5a2f6c6d 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -1057,15 +1057,6 @@ impl Endianness {
             _ => None,
         }
     }
-
-    /// Returns true if the endianness of the source system matches the endianness of the target system.
-    pub fn equals_to_target_endianness(self) -> bool {
-        match self {
-            Self::Little => cfg!(target_endian = "little"),
-            Self::Big => cfg!(target_endian = "big"),
-            _ => false,
-        }
-    }
 }
 impl core::fmt::Debug for Endianness {
     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
@@ -1161,6 +1152,10 @@ impl<'b> flatbuffers::Push for Buffer {
         let src = ::core::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for Buffer {
@@ -1273,8 +1268,8 @@ impl<'a> Null<'a> {
         Null { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args NullArgs,
     ) -> flatbuffers::WIPOffset<Null<'bldr>> {
         let mut builder = NullBuilder::new(_fbb);
@@ -1301,13 +1296,13 @@ impl<'a> Default for NullArgs {
     }
 }
 
-pub struct NullBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct NullBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> NullBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NullBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NullBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> NullBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         NullBuilder {
             fbb_: _fbb,
@@ -1353,8 +1348,8 @@ impl<'a> Struct_<'a> {
         Struct_ { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Struct_Args,
     ) -> flatbuffers::WIPOffset<Struct_<'bldr>> {
         let mut builder = Struct_Builder::new(_fbb);
@@ -1381,13 +1376,13 @@ impl<'a> Default for Struct_Args {
     }
 }
 
-pub struct Struct_Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Struct_Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Struct_Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Struct_Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Struct_Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Struct_Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         Struct_Builder {
             fbb_: _fbb,
@@ -1430,8 +1425,8 @@ impl<'a> List<'a> {
         List { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args ListArgs,
     ) -> flatbuffers::WIPOffset<List<'bldr>> {
         let mut builder = ListBuilder::new(_fbb);
@@ -1458,13 +1453,13 @@ impl<'a> Default for ListArgs {
     }
 }
 
-pub struct ListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct ListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> ListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         ListBuilder {
             fbb_: _fbb,
@@ -1509,8 +1504,8 @@ impl<'a> LargeList<'a> {
         LargeList { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeListArgs,
     ) -> flatbuffers::WIPOffset<LargeList<'bldr>> {
         let mut builder = LargeListBuilder::new(_fbb);
@@ -1537,13 +1532,13 @@ impl<'a> Default for LargeListArgs {
     }
 }
 
-pub struct LargeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeListBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LargeListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeListBuilder {
             fbb_: _fbb,
@@ -1589,8 +1584,8 @@ impl<'a> ListView<'a> {
         ListView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args ListViewArgs,
     ) -> flatbuffers::WIPOffset<ListView<'bldr>> {
         let mut builder = ListViewBuilder::new(_fbb);
@@ -1617,13 +1612,13 @@ impl<'a> Default for ListViewArgs {
     }
 }
 
-pub struct ListViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct ListViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> ListViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListViewBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ListViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         ListViewBuilder {
             fbb_: _fbb,
@@ -1668,8 +1663,8 @@ impl<'a> LargeListView<'a> {
         LargeListView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeListViewArgs,
     ) -> flatbuffers::WIPOffset<LargeListView<'bldr>> {
         let mut builder = LargeListViewBuilder::new(_fbb);
@@ -1696,13 +1691,15 @@ impl<'a> Default for LargeListViewArgs {
     }
 }
 
-pub struct LargeListViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeListViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeListViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeListViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListViewBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> LargeListViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeListViewBuilder {
             fbb_: _fbb,
@@ -1747,8 +1744,8 @@ impl<'a> FixedSizeList<'a> {
         FixedSizeList { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FixedSizeListArgs,
     ) -> flatbuffers::WIPOffset<FixedSizeList<'bldr>> {
         let mut builder = FixedSizeListBuilder::new(_fbb);
@@ -1793,18 +1790,20 @@ impl<'a> Default for FixedSizeListArgs {
     }
 }
 
-pub struct FixedSizeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FixedSizeListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FixedSizeListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FixedSizeListBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_listSize(&mut self, listSize: i32) {
         self.fbb_
             .push_slot::<i32>(FixedSizeList::VT_LISTSIZE, listSize, 0);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeListBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FixedSizeListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FixedSizeListBuilder {
             fbb_: _fbb,
@@ -1875,8 +1874,8 @@ impl<'a> Map<'a> {
         Map { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args MapArgs,
     ) -> flatbuffers::WIPOffset<Map<'bldr>> {
         let mut builder = MapBuilder::new(_fbb);
@@ -1921,18 +1920,18 @@ impl<'a> Default for MapArgs {
     }
 }
 
-pub struct MapBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct MapBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> MapBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MapBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_keysSorted(&mut self, keysSorted: bool) {
         self.fbb_
             .push_slot::<bool>(Map::VT_KEYSSORTED, keysSorted, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MapBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> MapBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         MapBuilder {
             fbb_: _fbb,
@@ -1983,8 +1982,8 @@ impl<'a> Union<'a> {
         Union { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args UnionArgs<'args>,
     ) -> flatbuffers::WIPOffset<Union<'bldr>> {
         let mut builder = UnionBuilder::new(_fbb);
@@ -2053,11 +2052,11 @@ impl<'a> Default for UnionArgs<'a> {
     }
 }
 
-pub struct UnionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct UnionBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> UnionBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_mode(&mut self, mode: UnionMode) {
         self.fbb_
@@ -2069,7 +2068,7 @@ impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Union::VT_TYPEIDS, typeIds);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> UnionBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> UnionBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         UnionBuilder {
             fbb_: _fbb,
@@ -2117,8 +2116,8 @@ impl<'a> Int<'a> {
         Int { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args IntArgs,
     ) -> flatbuffers::WIPOffset<Int<'bldr>> {
         let mut builder = IntBuilder::new(_fbb);
@@ -2175,11 +2174,11 @@ impl<'a> Default for IntArgs {
     }
 }
 
-pub struct IntBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct IntBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> IntBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_bitWidth(&mut self, bitWidth: i32) {
         self.fbb_.push_slot::<i32>(Int::VT_BITWIDTH, bitWidth, 0);
@@ -2190,7 +2189,7 @@ impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
             .push_slot::<bool>(Int::VT_IS_SIGNED, is_signed, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> IntBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         IntBuilder {
             fbb_: _fbb,
@@ -2237,8 +2236,8 @@ impl<'a> FloatingPoint<'a> {
         FloatingPoint { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FloatingPointArgs,
     ) -> flatbuffers::WIPOffset<FloatingPoint<'bldr>> {
         let mut builder = FloatingPointBuilder::new(_fbb);
@@ -2284,18 +2283,20 @@ impl<'a> Default for FloatingPointArgs {
     }
 }
 
-pub struct FloatingPointBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FloatingPointBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FloatingPointBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FloatingPointBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_precision(&mut self, precision: Precision) {
         self.fbb_
             .push_slot::<Precision>(FloatingPoint::VT_PRECISION, precision, Precision::HALF);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FloatingPointBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FloatingPointBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FloatingPointBuilder {
             fbb_: _fbb,
@@ -2340,8 +2341,8 @@ impl<'a> Utf8<'a> {
         Utf8 { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Utf8Args,
     ) -> flatbuffers::WIPOffset<Utf8<'bldr>> {
         let mut builder = Utf8Builder::new(_fbb);
@@ -2368,13 +2369,13 @@ impl<'a> Default for Utf8Args {
     }
 }
 
-pub struct Utf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Utf8Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Utf8Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Utf8Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Utf8Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         Utf8Builder {
             fbb_: _fbb,
@@ -2418,8 +2419,8 @@ impl<'a> Binary<'a> {
         Binary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BinaryArgs,
     ) -> flatbuffers::WIPOffset<Binary<'bldr>> {
         let mut builder = BinaryBuilder::new(_fbb);
@@ -2446,13 +2447,13 @@ impl<'a> Default for BinaryArgs {
     }
 }
 
-pub struct BinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BinaryBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BinaryBuilder {
             fbb_: _fbb,
@@ -2497,8 +2498,8 @@ impl<'a> LargeUtf8<'a> {
         LargeUtf8 { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeUtf8Args,
     ) -> flatbuffers::WIPOffset<LargeUtf8<'bldr>> {
         let mut builder = LargeUtf8Builder::new(_fbb);
@@ -2525,13 +2526,13 @@ impl<'a> Default for LargeUtf8Args {
     }
 }
 
-pub struct LargeUtf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeUtf8Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeUtf8Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeUtf8Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeUtf8Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LargeUtf8Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeUtf8Builder {
             fbb_: _fbb,
@@ -2576,8 +2577,8 @@ impl<'a> LargeBinary<'a> {
         LargeBinary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeBinaryArgs,
     ) -> flatbuffers::WIPOffset<LargeBinary<'bldr>> {
         let mut builder = LargeBinaryBuilder::new(_fbb);
@@ -2604,13 +2605,15 @@ impl<'a> Default for LargeBinaryArgs {
     }
 }
 
-pub struct LargeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeBinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeBinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeBinaryBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeBinaryBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> LargeBinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeBinaryBuilder {
             fbb_: _fbb,
@@ -2660,8 +2663,8 @@ impl<'a> Utf8View<'a> {
         Utf8View { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Utf8ViewArgs,
     ) -> flatbuffers::WIPOffset<Utf8View<'bldr>> {
         let mut builder = Utf8ViewBuilder::new(_fbb);
@@ -2688,13 +2691,13 @@ impl<'a> Default for Utf8ViewArgs {
     }
 }
 
-pub struct Utf8ViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Utf8ViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Utf8ViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Utf8ViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8ViewBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Utf8ViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         Utf8ViewBuilder {
             fbb_: _fbb,
@@ -2744,8 +2747,8 @@ impl<'a> BinaryView<'a> {
         BinaryView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BinaryViewArgs,
     ) -> flatbuffers::WIPOffset<BinaryView<'bldr>> {
         let mut builder = BinaryViewBuilder::new(_fbb);
@@ -2772,13 +2775,15 @@ impl<'a> Default for BinaryViewArgs {
     }
 }
 
-pub struct BinaryViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BinaryViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BinaryViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BinaryViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryViewBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> BinaryViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BinaryViewBuilder {
             fbb_: _fbb,
@@ -2823,8 +2828,8 @@ impl<'a> FixedSizeBinary<'a> {
         FixedSizeBinary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FixedSizeBinaryArgs,
     ) -> flatbuffers::WIPOffset<FixedSizeBinary<'bldr>> {
         let mut builder = FixedSizeBinaryBuilder::new(_fbb);
@@ -2869,18 +2874,20 @@ impl<'a> Default for FixedSizeBinaryArgs {
     }
 }
 
-pub struct FixedSizeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FixedSizeBinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FixedSizeBinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FixedSizeBinaryBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_byteWidth(&mut self, byteWidth: i32) {
         self.fbb_
             .push_slot::<i32>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeBinaryBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FixedSizeBinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FixedSizeBinaryBuilder {
             fbb_: _fbb,
@@ -2924,8 +2931,8 @@ impl<'a> Bool<'a> {
         Bool { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BoolArgs,
     ) -> flatbuffers::WIPOffset<Bool<'bldr>> {
         let mut builder = BoolBuilder::new(_fbb);
@@ -2952,13 +2959,13 @@ impl<'a> Default for BoolArgs {
     }
 }
 
-pub struct BoolBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BoolBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BoolBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BoolBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BoolBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BoolBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BoolBuilder {
             fbb_: _fbb,
@@ -3006,8 +3013,8 @@ impl<'a> RunEndEncoded<'a> {
         RunEndEncoded { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args RunEndEncodedArgs,
     ) -> flatbuffers::WIPOffset<RunEndEncoded<'bldr>> {
         let mut builder = RunEndEncodedBuilder::new(_fbb);
@@ -3034,13 +3041,15 @@ impl<'a> Default for RunEndEncodedArgs {
     }
 }
 
-pub struct RunEndEncodedBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct RunEndEncodedBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> RunEndEncodedBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> RunEndEncodedBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RunEndEncodedBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> RunEndEncodedBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         RunEndEncodedBuilder {
             fbb_: _fbb,
@@ -3091,8 +3100,8 @@ impl<'a> Decimal<'a> {
         Decimal { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DecimalArgs,
     ) -> flatbuffers::WIPOffset<Decimal<'bldr>> {
         let mut builder = DecimalBuilder::new(_fbb);
@@ -3168,11 +3177,11 @@ impl<'a> Default for DecimalArgs {
     }
 }
 
-pub struct DecimalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DecimalBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DecimalBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_precision(&mut self, precision: i32) {
         self.fbb_
@@ -3188,7 +3197,7 @@ impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
             .push_slot::<i32>(Decimal::VT_BITWIDTH, bitWidth, 128);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DecimalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DecimalBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DecimalBuilder {
             fbb_: _fbb,
@@ -3242,8 +3251,8 @@ impl<'a> Date<'a> {
         Date { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DateArgs,
     ) -> flatbuffers::WIPOffset<Date<'bldr>> {
         let mut builder = DateBuilder::new(_fbb);
@@ -3289,18 +3298,18 @@ impl<'a> Default for DateArgs {
     }
 }
 
-pub struct DateBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DateBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DateBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DateBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: DateUnit) {
         self.fbb_
             .push_slot::<DateUnit>(Date::VT_UNIT, unit, DateUnit::MILLISECOND);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DateBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DateBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DateBuilder {
             fbb_: _fbb,
@@ -3361,8 +3370,8 @@ impl<'a> Time<'a> {
         Time { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TimeArgs,
     ) -> flatbuffers::WIPOffset<Time<'bldr>> {
         let mut builder = TimeBuilder::new(_fbb);
@@ -3419,11 +3428,11 @@ impl<'a> Default for TimeArgs {
     }
 }
 
-pub struct TimeBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TimeBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TimeBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
@@ -3434,7 +3443,7 @@ impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
         self.fbb_.push_slot::<i32>(Time::VT_BITWIDTH, bitWidth, 32);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimeBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TimeBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TimeBuilder {
             fbb_: _fbb,
@@ -3587,8 +3596,8 @@ impl<'a> Timestamp<'a> {
         Timestamp { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TimestampArgs<'args>,
     ) -> flatbuffers::WIPOffset<Timestamp<'bldr>> {
         let mut builder = TimestampBuilder::new(_fbb);
@@ -3664,11 +3673,11 @@ impl<'a> Default for TimestampArgs<'a> {
     }
 }
 
-pub struct TimestampBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TimestampBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TimestampBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
@@ -3680,7 +3689,7 @@ impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Timestamp::VT_TIMEZONE, timezone);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimestampBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TimestampBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TimestampBuilder {
             fbb_: _fbb,
@@ -3727,8 +3736,8 @@ impl<'a> Interval<'a> {
         Interval { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args IntervalArgs,
     ) -> flatbuffers::WIPOffset<Interval<'bldr>> {
         let mut builder = IntervalBuilder::new(_fbb);
@@ -3774,18 +3783,18 @@ impl<'a> Default for IntervalArgs {
     }
 }
 
-pub struct IntervalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct IntervalBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> IntervalBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> IntervalBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: IntervalUnit) {
         self.fbb_
             .push_slot::<IntervalUnit>(Interval::VT_UNIT, unit, IntervalUnit::YEAR_MONTH);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntervalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> IntervalBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         IntervalBuilder {
             fbb_: _fbb,
@@ -3831,8 +3840,8 @@ impl<'a> Duration<'a> {
         Duration { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DurationArgs,
     ) -> flatbuffers::WIPOffset<Duration<'bldr>> {
         let mut builder = DurationBuilder::new(_fbb);
@@ -3878,18 +3887,18 @@ impl<'a> Default for DurationArgs {
     }
 }
 
-pub struct DurationBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DurationBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DurationBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DurationBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
             .push_slot::<TimeUnit>(Duration::VT_UNIT, unit, TimeUnit::MILLISECOND);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DurationBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DurationBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DurationBuilder {
             fbb_: _fbb,
@@ -3939,8 +3948,8 @@ impl<'a> KeyValue<'a> {
         KeyValue { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args KeyValueArgs<'args>,
     ) -> flatbuffers::WIPOffset<KeyValue<'bldr>> {
         let mut builder = KeyValueBuilder::new(_fbb);
@@ -4003,11 +4012,11 @@ impl<'a> Default for KeyValueArgs<'a> {
     }
 }
 
-pub struct KeyValueBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct KeyValueBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> KeyValueBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_key(&mut self, key: flatbuffers::WIPOffset<&'b str>) {
         self.fbb_
@@ -4019,7 +4028,7 @@ impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_VALUE, value);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> KeyValueBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> KeyValueBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         KeyValueBuilder {
             fbb_: _fbb,
@@ -4069,8 +4078,8 @@ impl<'a> DictionaryEncoding<'a> {
         DictionaryEncoding { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DictionaryEncodingArgs<'args>,
     ) -> flatbuffers::WIPOffset<DictionaryEncoding<'bldr>> {
         let mut builder = DictionaryEncodingBuilder::new(_fbb);
@@ -4181,11 +4190,11 @@ impl<'a> Default for DictionaryEncodingArgs<'a> {
     }
 }
 
-pub struct DictionaryEncodingBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DictionaryEncodingBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DictionaryEncodingBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_id(&mut self, id: i64) {
         self.fbb_.push_slot::<i64>(DictionaryEncoding::VT_ID, id, 0);
@@ -4212,8 +4221,8 @@ impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryEncodingBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> DictionaryEncodingBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DictionaryEncodingBuilder {
             fbb_: _fbb,
@@ -4271,8 +4280,8 @@ impl<'a> Field<'a> {
         Field { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FieldArgs<'args>,
     ) -> flatbuffers::WIPOffset<Field<'bldr>> {
         let mut builder = FieldBuilder::new(_fbb);
@@ -4955,11 +4964,11 @@ impl<'a> Default for FieldArgs<'a> {
     }
 }
 
-pub struct FieldBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FieldBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FieldBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
         self.fbb_
@@ -5011,7 +5020,7 @@ impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FieldBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FieldBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FieldBuilder {
             fbb_: _fbb,
@@ -5333,8 +5342,8 @@ impl<'a> Schema<'a> {
         Schema { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SchemaArgs<'args>,
     ) -> flatbuffers::WIPOffset<Schema<'bldr>> {
         let mut builder = SchemaBuilder::new(_fbb);
@@ -5453,11 +5462,11 @@ impl<'a> Default for SchemaArgs<'a> {
     }
 }
 
-pub struct SchemaBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SchemaBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SchemaBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_endianness(&mut self, endianness: Endianness) {
         self.fbb_
@@ -5494,7 +5503,7 @@ impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FEATURES, features);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SchemaBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> SchemaBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SchemaBuilder {
             fbb_: _fbb,
@@ -5579,16 +5588,16 @@ pub unsafe fn size_prefixed_root_as_schema_unchecked(buf: &[u8]) -> Schema {
     flatbuffers::size_prefixed_root_unchecked::<Schema>(buf)
 }
 #[inline]
-pub fn finish_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_schema_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Schema<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_schema_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Schema<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/SparseTensor.rs b/arrow-ipc/src/gen/SparseTensor.rs
index e03510ec0c8d..21cb7e116c24 100644
--- a/arrow-ipc/src/gen/SparseTensor.rs
+++ b/arrow-ipc/src/gen/SparseTensor.rs
@@ -24,6 +24,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 #[deprecated(
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
@@ -281,8 +283,8 @@ impl<'a> SparseTensorIndexCOO<'a> {
         SparseTensorIndexCOO { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorIndexCOOArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'bldr>> {
         let mut builder = SparseTensorIndexCOOBuilder::new(_fbb);
@@ -401,11 +403,11 @@ impl<'a> Default for SparseTensorIndexCOOArgs<'a> {
     }
 }
 
-pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorIndexCOOBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
         self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
@@ -435,8 +437,8 @@ impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCOOBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorIndexCOOBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorIndexCOOBuilder {
             fbb_: _fbb,
@@ -494,8 +496,8 @@ impl<'a> SparseMatrixIndexCSX<'a> {
         SparseMatrixIndexCSX { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseMatrixIndexCSXArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'bldr>> {
         let mut builder = SparseMatrixIndexCSXBuilder::new(_fbb);
@@ -662,11 +664,11 @@ impl<'a> Default for SparseMatrixIndexCSXArgs<'a> {
     }
 }
 
-pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseMatrixIndexCSXBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_compressedAxis(&mut self, compressedAxis: SparseMatrixCompressedAxis) {
         self.fbb_.push_slot::<SparseMatrixCompressedAxis>(
@@ -701,8 +703,8 @@ impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseMatrixIndexCSXBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseMatrixIndexCSXBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseMatrixIndexCSXBuilder {
             fbb_: _fbb,
@@ -765,8 +767,8 @@ impl<'a> SparseTensorIndexCSF<'a> {
         SparseTensorIndexCSF { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorIndexCSFArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'bldr>> {
         let mut builder = SparseTensorIndexCSFBuilder::new(_fbb);
@@ -977,11 +979,11 @@ impl<'a> Default for SparseTensorIndexCSFArgs<'a> {
     }
 }
 
-pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorIndexCSFBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
         self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
@@ -1028,8 +1030,8 @@ impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCSFBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorIndexCSFBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorIndexCSFBuilder {
             fbb_: _fbb,
@@ -1095,8 +1097,8 @@ impl<'a> SparseTensor<'a> {
         SparseTensor { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensor<'bldr>> {
         let mut builder = SparseTensorBuilder::new(_fbb);
@@ -1521,6 +1523,62 @@ impl<'a> SparseTensor<'a> {
         }
     }
 
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_binary_view(&self) -> Option<BinaryView<'a>> {
+        if self.type_type() == Type::BinaryView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { BinaryView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_utf_8_view(&self) -> Option<Utf8View<'a>> {
+        if self.type_type() == Type::Utf8View {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { Utf8View::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_list_view(&self) -> Option<ListView<'a>> {
+        if self.type_type() == Type::ListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { ListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_large_list_view(&self) -> Option<LargeListView<'a>> {
+        if self.type_type() == Type::LargeListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { LargeListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
     #[inline]
     #[allow(non_snake_case)]
     pub fn sparseIndex_as_sparse_tensor_index_coo(&self) -> Option<SparseTensorIndexCOO<'a>> {
@@ -1679,6 +1737,26 @@ impl flatbuffers::Verifiable for SparseTensor<'_> {
                             "Type::RunEndEncoded",
                             pos,
                         ),
+                    Type::BinaryView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<BinaryView>>(
+                            "Type::BinaryView",
+                            pos,
+                        ),
+                    Type::Utf8View => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8View>>(
+                            "Type::Utf8View",
+                            pos,
+                        ),
+                    Type::ListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<ListView>>(
+                            "Type::ListView",
+                            pos,
+                        ),
+                    Type::LargeListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeListView>>(
+                            "Type::LargeListView",
+                            pos,
+                        ),
                     _ => Ok(()),
                 },
             )?
@@ -1744,11 +1822,11 @@ impl<'a> Default for SparseTensorArgs<'a> {
     }
 }
 
-pub struct SparseTensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_type_type(&mut self, type_type: Type) {
         self.fbb_
@@ -1798,7 +1876,9 @@ impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
             .push_slot_always::<&Buffer>(SparseTensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SparseTensorBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorBuilder {
             fbb_: _fbb,
@@ -2042,6 +2122,46 @@ impl core::fmt::Debug for SparseTensor<'_> {
                     )
                 }
             }
+            Type::BinaryView => {
+                if let Some(x) = self.type_as_binary_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::Utf8View => {
+                if let Some(x) = self.type_as_utf_8_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::ListView => {
+                if let Some(x) = self.type_as_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::LargeListView => {
+                if let Some(x) = self.type_as_large_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
             _ => {
                 let x: Option<()> = None;
                 ds.field("type_", &x)
@@ -2153,16 +2273,16 @@ pub unsafe fn size_prefixed_root_as_sparse_tensor_unchecked(buf: &[u8]) -> Spars
     flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf)
 }
 #[inline]
-pub fn finish_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_sparse_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<SparseTensor<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<SparseTensor<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Tensor.rs b/arrow-ipc/src/gen/Tensor.rs
index 1766d95144c2..b332a5d77e96 100644
--- a/arrow-ipc/src/gen/Tensor.rs
+++ b/arrow-ipc/src/gen/Tensor.rs
@@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 pub enum TensorDimOffset {}
 #[derive(Copy, Clone, PartialEq)]
 
@@ -52,8 +54,8 @@ impl<'a> TensorDim<'a> {
         TensorDim { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TensorDimArgs<'args>,
     ) -> flatbuffers::WIPOffset<TensorDim<'bldr>> {
         let mut builder = TensorDimBuilder::new(_fbb);
@@ -113,11 +115,11 @@ impl<'a> Default for TensorDimArgs<'a> {
     }
 }
 
-pub struct TensorDimBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TensorDimBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorDimBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_size_(&mut self, size_: i64) {
         self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE_, size_, 0);
@@ -128,7 +130,7 @@ impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(TensorDim::VT_NAME, name);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorDimBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TensorDimBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TensorDimBuilder {
             fbb_: _fbb,
@@ -179,8 +181,8 @@ impl<'a> Tensor<'a> {
         Tensor { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TensorArgs<'args>,
     ) -> flatbuffers::WIPOffset<Tensor<'bldr>> {
         let mut builder = TensorBuilder::new(_fbb);
@@ -568,6 +570,62 @@ impl<'a> Tensor<'a> {
             None
         }
     }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_binary_view(&self) -> Option<BinaryView<'a>> {
+        if self.type_type() == Type::BinaryView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { BinaryView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_utf_8_view(&self) -> Option<Utf8View<'a>> {
+        if self.type_type() == Type::Utf8View {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { Utf8View::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_list_view(&self) -> Option<ListView<'a>> {
+        if self.type_type() == Type::ListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { ListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_large_list_view(&self) -> Option<LargeListView<'a>> {
+        if self.type_type() == Type::LargeListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { LargeListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
 }
 
 impl flatbuffers::Verifiable for Tensor<'_> {
@@ -685,6 +743,26 @@ impl flatbuffers::Verifiable for Tensor<'_> {
                             "Type::RunEndEncoded",
                             pos,
                         ),
+                    Type::BinaryView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<BinaryView>>(
+                            "Type::BinaryView",
+                            pos,
+                        ),
+                    Type::Utf8View => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8View>>(
+                            "Type::Utf8View",
+                            pos,
+                        ),
+                    Type::ListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<ListView>>(
+                            "Type::ListView",
+                            pos,
+                        ),
+                    Type::LargeListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeListView>>(
+                            "Type::LargeListView",
+                            pos,
+                        ),
                     _ => Ok(()),
                 },
             )?
@@ -725,11 +803,11 @@ impl<'a> Default for TensorArgs<'a> {
     }
 }
 
-pub struct TensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TensorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_type_type(&mut self, type_type: Type) {
         self.fbb_
@@ -760,7 +838,7 @@ impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
         self.fbb_.push_slot_always::<&Buffer>(Tensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TensorBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TensorBuilder {
             fbb_: _fbb,
@@ -1002,6 +1080,46 @@ impl core::fmt::Debug for Tensor<'_> {
                     )
                 }
             }
+            Type::BinaryView => {
+                if let Some(x) = self.type_as_binary_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::Utf8View => {
+                if let Some(x) = self.type_as_utf_8_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::ListView => {
+                if let Some(x) = self.type_as_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::LargeListView => {
+                if let Some(x) = self.type_as_large_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
             _ => {
                 let x: Option<()> = None;
                 ds.field("type_", &x)
@@ -1074,16 +1192,16 @@ pub unsafe fn size_prefixed_root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
     flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf)
 }
 #[inline]
-pub fn finish_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Tensor<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Tensor<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index dde137153964..a76083b93953 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -43,3 +43,14 @@ pub use self::gen::Tensor::*;
 
 const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
 const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
+
+impl Endianness {
+    /// Returns true if the endianness of the source system matches the endianness of the target system.
+    pub fn equals_to_target_endianness(self) -> bool {
+        match self {
+            Self::Little => cfg!(target_endian = "little"),
+            Self::Big => cfg!(target_endian = "big"),
+            _ => false,
+        }
+    }
+}

From dd9d294c224bfe76a951f2b6dfe76a4d395457a3 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 9 Jan 2025 11:24:03 +1300
Subject: [PATCH 42/68] [Parquet] Add benchmark and test for writing NaNs to
 Parquet (#6955)

* Add test and benchmarks for writing floats with NaNs

* Remove extra benchmark with no NaNs
---
 arrow/Cargo.toml                      |  3 +-
 arrow/src/util/bench_util.rs          | 46 +++++++++++++++++++++++++++
 parquet/benches/arrow_writer.rs       | 33 +++++++++++++++++++
 parquet/src/arrow/arrow_writer/mod.rs | 39 +++++++++++++++++++++++
 4 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index a1c9c0ab2113..76119ec4abb4 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -55,6 +55,7 @@ arrow-string = { workspace = true }
 
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
 pyo3 = { version = "0.23", default-features = false, optional = true }
+half = { version = "2.1", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
 features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
@@ -70,7 +71,7 @@ prettyprint = ["arrow-cast/prettyprint"]
 # not the core arrow code itself. Be aware that `rand` must be kept as
 # an optional dependency for supporting compile to wasm32-unknown-unknown
 # target without assuming an environment containing JavaScript.
-test_utils = ["dep:rand"]
+test_utils = ["dep:rand", "dep:half"]
 pyarrow = ["pyo3", "ffi"]
 # force_validate runs full data validation for all arrays that are created
 # this is not enabled by default as it is too computationally expensive
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 8eaae36dbe56..53e01034122b 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -21,6 +21,7 @@ use crate::array::*;
 use crate::datatypes::*;
 use crate::util::test_util::seedable_rng;
 use arrow_buffer::{Buffer, IntervalMonthDayNano};
+use half::f16;
 use rand::distributions::uniform::SampleUniform;
 use rand::thread_rng;
 use rand::Rng;
@@ -416,3 +417,48 @@ where
 
     DictionaryArray::from(data)
 }
+
+/// Creates a random (but fixed-seeded) f16 array of a given size and nan-value density
+pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f16::NAN)
+            } else {
+                Some(f16::from_f32(rng.gen()))
+            }
+        })
+        .collect()
+}
+
+/// Creates a random (but fixed-seeded) f32 array of a given size and nan-value density
+pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f32::NAN)
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
+
+/// Creates a random (but fixed-seeded) f64 array of a given size and nan-value density
+pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f64::NAN)
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index bfa333db722c..4166d962b550 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -28,7 +28,9 @@ extern crate parquet;
 use std::sync::Arc;
 
 use arrow::datatypes::*;
+use arrow::util::bench_util::{create_f16_array, create_f32_array, create_f64_array};
 use arrow::{record_batch::RecordBatch, util::data_gen::*};
+use arrow_array::RecordBatchOptions;
 use parquet::file::properties::WriterProperties;
 use parquet::{arrow::ArrowWriter, errors::Result};
 
@@ -181,6 +183,25 @@ fn create_bool_bench_batch_non_null(
     )?)
 }
 
+fn create_float_bench_batch_with_nans(size: usize, nan_density: f32) -> Result<RecordBatch> {
+    let fields = vec![
+        Field::new("_1", DataType::Float16, false),
+        Field::new("_2", DataType::Float32, false),
+        Field::new("_3", DataType::Float64, false),
+    ];
+    let schema = Schema::new(fields);
+    let columns: Vec<arrow_array::ArrayRef> = vec![
+        Arc::new(create_f16_array(size, nan_density)),
+        Arc::new(create_f32_array(size, nan_density)),
+        Arc::new(create_f64_array(size, nan_density)),
+    ];
+    Ok(RecordBatch::try_new_with_options(
+        Arc::new(schema),
+        columns,
+        &RecordBatchOptions::new().with_match_field_names(false),
+    )?)
+}
+
 fn create_list_primitive_bench_batch(
     size: usize,
     null_density: f32,
@@ -459,6 +480,18 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
     });
 
+    let batch = create_float_bench_batch_with_nans(4096, 0.5).unwrap();
+    group.throughput(Throughput::Bytes(
+        batch
+            .columns()
+            .iter()
+            .map(|f| f.get_array_memory_size() as u64)
+            .sum(),
+    ));
+    group.bench_function("4096 values float with NaNs", |b| {
+        b.iter(|| write_batch(&batch).unwrap())
+    });
+
     group.finish();
 }
 
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index b250b9fffb24..6775263c34f7 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1150,6 +1150,7 @@ mod tests {
     use arrow::{array::*, buffer::Buffer};
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer};
     use arrow_schema::Fields;
+    use half::f16;
 
     use crate::basic::Encoding;
     use crate::data_type::AsBytes;
@@ -1818,6 +1819,44 @@ mod tests {
         );
     }
 
+    #[test]
+    fn arrow_writer_float_nans() {
+        let f16_field = Field::new("a", DataType::Float16, false);
+        let f32_field = Field::new("b", DataType::Float32, false);
+        let f64_field = Field::new("c", DataType::Float64, false);
+        let schema = Schema::new(vec![f16_field, f32_field, f64_field]);
+
+        let f16_values = (0..MEDIUM_SIZE)
+            .map(|i| {
+                Some(if i % 2 == 0 {
+                    f16::NAN
+                } else {
+                    f16::from_f32(i as f32)
+                })
+            })
+            .collect::<Float16Array>();
+
+        let f32_values = (0..MEDIUM_SIZE)
+            .map(|i| Some(if i % 2 == 0 { f32::NAN } else { i as f32 }))
+            .collect::<Float32Array>();
+
+        let f64_values = (0..MEDIUM_SIZE)
+            .map(|i| Some(if i % 2 == 0 { f64::NAN } else { i as f64 }))
+            .collect::<Float64Array>();
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(f16_values),
+                Arc::new(f32_values),
+                Arc::new(f64_values),
+            ],
+        )
+        .unwrap();
+
+        roundtrip(batch, None);
+    }
+
     const SMALL_SIZE: usize = 7;
     const MEDIUM_SIZE: usize = 63;
 

From 87624a72832a0c5f31e473501b0e576c93250987 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Thu, 9 Jan 2025 16:34:10 -0500
Subject: [PATCH 43/68] Add `peek_next_page_offset` to `SerializedPageReader`
 (#6945)

* add peek_next_page_offset

* Update parquet/src/file/serialized_reader.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 parquet/src/file/serialized_reader.rs | 142 ++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index a942481f7e4d..81ba0a66463e 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -568,6 +568,63 @@ impl<R: ChunkReader> SerializedPageReader<R> {
             physical_type: meta.column_type(),
         })
     }
+
+    /// Similar to `peek_next_page`, but returns the offset of the next page instead of the page metadata.
+    /// Unlike page metadata, an offset can uniquely identify a page.
+    ///
+    /// This is used when we need to read parquet with row-filter, and we don't want to decompress the page twice.
+    /// This function allows us to check if the next page is being cached or read previously.
+    #[cfg(test)]
+    fn peek_next_page_offset(&mut self) -> Result<Option<usize>> {
+        match &mut self.state {
+            SerializedPageReaderState::Values {
+                offset,
+                remaining_bytes,
+                next_page_header,
+            } => {
+                loop {
+                    if *remaining_bytes == 0 {
+                        return Ok(None);
+                    }
+                    return if let Some(header) = next_page_header.as_ref() {
+                        if let Ok(_page_meta) = PageMetadata::try_from(&**header) {
+                            Ok(Some(*offset))
+                        } else {
+                            // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                            *next_page_header = None;
+                            continue;
+                        }
+                    } else {
+                        let mut read = self.reader.get_read(*offset as u64)?;
+                        let (header_len, header) = read_page_header_len(&mut read)?;
+                        *offset += header_len;
+                        *remaining_bytes -= header_len;
+                        let page_meta = if let Ok(_page_meta) = PageMetadata::try_from(&header) {
+                            Ok(Some(*offset))
+                        } else {
+                            // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                            continue;
+                        };
+                        *next_page_header = Some(Box::new(header));
+                        page_meta
+                    };
+                }
+            }
+            SerializedPageReaderState::Pages {
+                page_locations,
+                dictionary_page,
+                ..
+            } => {
+                if let Some(page) = dictionary_page {
+                    Ok(Some(page.offset as usize))
+                } else if let Some(page) = page_locations.front() {
+                    Ok(Some(page.offset as usize))
+                } else {
+                    Ok(None)
+                }
+            }
+        }
+    }
 }
 
 impl<R: ChunkReader> Iterator for SerializedPageReader<R> {
@@ -802,6 +859,8 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashSet;
+
     use bytes::Buf;
 
     use crate::file::properties::{EnabledStatistics, WriterProperties};
@@ -1107,6 +1166,89 @@ mod tests {
         assert_eq!(page_count, 2);
     }
 
+    fn get_serialized_page_reader<R: ChunkReader>(
+        file_reader: &SerializedFileReader<R>,
+        row_group: usize,
+        column: usize,
+    ) -> Result<SerializedPageReader<R>> {
+        let row_group = {
+            let row_group_metadata = file_reader.metadata.row_group(row_group);
+            let props = Arc::clone(&file_reader.props);
+            let f = Arc::clone(&file_reader.chunk_reader);
+            SerializedRowGroupReader::new(
+                f,
+                row_group_metadata,
+                file_reader
+                    .metadata
+                    .offset_index()
+                    .map(|x| x[row_group].as_slice()),
+                props,
+            )?
+        };
+
+        let col = row_group.metadata.column(column);
+
+        let page_locations = row_group
+            .offset_index
+            .map(|x| x[column].page_locations.clone());
+
+        let props = Arc::clone(&row_group.props);
+        SerializedPageReader::new_with_properties(
+            Arc::clone(&row_group.chunk_reader),
+            col,
+            row_group.metadata.num_rows() as usize,
+            page_locations,
+            props,
+        )
+    }
+
+    #[test]
+    fn test_peek_next_page_offset_matches_actual() -> Result<()> {
+        let test_file = get_test_file("alltypes_plain.parquet");
+        let reader = SerializedFileReader::new(test_file)?;
+
+        let mut offset_set = HashSet::new();
+        let num_row_groups = reader.metadata.num_row_groups();
+        for row_group in 0..num_row_groups {
+            let num_columns = reader.metadata.row_group(row_group).num_columns();
+            for column in 0..num_columns {
+                let mut page_reader = get_serialized_page_reader(&reader, row_group, column)?;
+
+                while let Ok(Some(page_offset)) = page_reader.peek_next_page_offset() {
+                    match &page_reader.state {
+                        SerializedPageReaderState::Pages {
+                            page_locations,
+                            dictionary_page,
+                            ..
+                        } => {
+                            if let Some(page) = dictionary_page {
+                                assert_eq!(page.offset as usize, page_offset);
+                            } else if let Some(page) = page_locations.front() {
+                                assert_eq!(page.offset as usize, page_offset);
+                            } else {
+                                unreachable!()
+                            }
+                        }
+                        SerializedPageReaderState::Values {
+                            offset,
+                            next_page_header,
+                            ..
+                        } => {
+                            assert!(next_page_header.is_some());
+                            assert_eq!(*offset, page_offset);
+                        }
+                    }
+                    let page = page_reader.get_next_page()?;
+                    assert!(page.is_some());
+                    let newly_inserted = offset_set.insert(page_offset);
+                    assert!(newly_inserted);
+                }
+            }
+        }
+
+        Ok(())
+    }
+
     #[test]
     fn test_page_iterator() {
         let file = get_test_file("alltypes_plain.parquet");

From 2dffbe4767292ed774d5263c3088703049a63a60 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 10 Jan 2025 14:25:16 -0500
Subject: [PATCH 44/68] Fix CI for Rust 1.84 release (#6963)

---
 .github/workflows/arrow.yml        | 6 +++---
 .github/workflows/object_store.yml | 6 +++---
 .github/workflows/parquet.yml      | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index daf38f2523fc..0b90a78577e5 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -146,11 +146,11 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
-          target: wasm32-unknown-unknown,wasm32-wasi
+          target: wasm32-unknown-unknown,wasm32-wasip1
       - name: Build wasm32-unknown-unknown
         run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-unknown-unknown
-      - name: Build wasm32-wasi
-        run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-wasi
+      - name: Build wasm32-wasip1
+        run: cargo build -p arrow --no-default-features --features=json,csv,ipc,ffi --target wasm32-wasip1
 
   clippy:
     name: Clippy
diff --git a/.github/workflows/object_store.yml b/.github/workflows/object_store.yml
index 899318f01324..1639b031ebfc 100644
--- a/.github/workflows/object_store.yml
+++ b/.github/workflows/object_store.yml
@@ -204,11 +204,11 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
-          target: wasm32-unknown-unknown,wasm32-wasi
+          target: wasm32-unknown-unknown,wasm32-wasip1
       - name: Build wasm32-unknown-unknown
         run: cargo build --target wasm32-unknown-unknown
-      - name: Build wasm32-wasi
-        run: cargo build --target wasm32-wasi
+      - name: Build wasm32-wasip1
+        run: cargo build --target wasm32-wasip1
 
   windows:
     name: cargo test LocalFileSystem (win64)
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 2269950fd235..19503fde7991 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -123,13 +123,13 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
-          target: wasm32-unknown-unknown,wasm32-wasi
+          target: wasm32-unknown-unknown,wasm32-wasip1
       - name: Install clang # Needed for zlib compilation
         run: apt-get update && apt-get install -y clang gcc-multilib
       - name: Build wasm32-unknown-unknown
         run: cargo build -p parquet --target wasm32-unknown-unknown
-      - name: Build wasm32-wasi
-        run: cargo build -p parquet --target wasm32-wasi
+      - name: Build wasm32-wasip1
+        run: cargo build -p parquet --target wasm32-wasip1
 
   pyspark-integration-test:
     name: PySpark Integration Test

From 1710c8729e56a075c44be8473845830f5d92b395 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 10 Jan 2025 14:49:27 -0500
Subject: [PATCH 45/68] Improve `ParquetRecordBatchStreamBuilder` docs /
 examples (#6948)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Improve `ParquetRecordBatchStreamBuilder` docs

* Apply suggestions from code review

Thank you @etseidl  ❤️

Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>

* Update parquet/src/arrow/async_reader/mod.rs

Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>

---------

Co-authored-by: Ed Seidl <etseidl@users.noreply.github.com>
---
 parquet/src/arrow/async_reader/mod.rs | 215 ++++++++++++++++----------
 1 file changed, 133 insertions(+), 82 deletions(-)

diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 5323251b07e7..2c8a59399de1 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -15,65 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Provides `async` API for reading parquet files as
+//! [`ParquetRecordBatchStreamBuilder`]:  `async` API for reading Parquet files as
 //! [`RecordBatch`]es
 //!
-//! ```
-//! # #[tokio::main(flavor="current_thread")]
-//! # async fn main() {
-//! #
-//! # use arrow_array::RecordBatch;
-//! # use arrow::util::pretty::pretty_format_batches;
-//! # use futures::TryStreamExt;
-//! # use tokio::fs::File;
-//! #
-//! # use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
-//! #
-//! # fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
-//! #     let formatted = pretty_format_batches(batches).unwrap().to_string();
-//! #     let actual_lines: Vec<_> = formatted.trim().lines().collect();
-//! #     assert_eq!(
-//! #          &actual_lines, expected_lines,
-//! #          "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-//! #          expected_lines, actual_lines
-//! #      );
-//! #  }
-//! #
-//! let testdata = arrow::util::test_util::parquet_test_data();
-//! let path = format!("{}/alltypes_plain.parquet", testdata);
-//! let file = File::open(path).await.unwrap();
+//! This can be used to decode a Parquet file in streaming fashion (without
+//! downloading the whole file at once) from a remote source, such as an object store.
 //!
-//! let builder = ParquetRecordBatchStreamBuilder::new(file)
-//!     .await
-//!     .unwrap()
-//!     .with_batch_size(3);
-//!
-//! let file_metadata = builder.metadata().file_metadata();
-//! let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);
-//!
-//! let stream = builder.with_projection(mask).build().unwrap();
-//! let results = stream.try_collect::<Vec<_>>().await.unwrap();
-//! assert_eq!(results.len(), 3);
-//!
-//! assert_batches_eq(
-//!     &results,
-//!     &[
-//!         "+----------+-------------+-----------+",
-//!         "| bool_col | tinyint_col | float_col |",
-//!         "+----------+-------------+-----------+",
-//!         "| true     | 0           | 0.0       |",
-//!         "| false    | 1           | 1.1       |",
-//!         "| true     | 0           | 0.0       |",
-//!         "| false    | 1           | 1.1       |",
-//!         "| true     | 0           | 0.0       |",
-//!         "| false    | 1           | 1.1       |",
-//!         "| true     | 0           | 0.0       |",
-//!         "| false    | 1           | 1.1       |",
-//!         "+----------+-------------+-----------+",
-//!      ],
-//!  );
-//! # }
-//! ```
+//! See example on [`ParquetRecordBatchStreamBuilder::new`]
 
 use std::collections::VecDeque;
 use std::fmt::Formatter;
@@ -249,53 +197,153 @@ impl ArrowReaderMetadata {
 /// breaking the pre-existing ParquetRecordBatchStreamBuilder API
 pub struct AsyncReader<T>(T);
 
-/// A builder used to construct a [`ParquetRecordBatchStream`] for `async` reading of a parquet file
+/// A builder for reading parquet files from an `async` source as  [`ParquetRecordBatchStream`]
 ///
-/// In particular, this handles reading the parquet file metadata, allowing consumers
+/// This builder  handles reading the parquet file metadata, allowing consumers
 /// to use this information to select what specific columns, row groups, etc...
 /// they wish to be read by the resulting stream
 ///
+/// See examples on [`ParquetRecordBatchStreamBuilder::new`]
+///
 /// See [`ArrowReaderBuilder`] for additional member functions
 pub type ParquetRecordBatchStreamBuilder<T> = ArrowReaderBuilder<AsyncReader<T>>;
 
 impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
-    /// Create a new [`ParquetRecordBatchStreamBuilder`] with the provided parquet file
+    /// Create a new [`ParquetRecordBatchStreamBuilder`] for reading from the
+    /// specified source.
     ///
     /// # Example
+    /// ```
+    /// # #[tokio::main(flavor="current_thread")]
+    /// # async fn main() {
+    /// #
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow::util::pretty::pretty_format_batches;
+    /// # use futures::TryStreamExt;
+    /// #
+    /// # use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+    /// #
+    /// # fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
+    /// #     let formatted = pretty_format_batches(batches).unwrap().to_string();
+    /// #     let actual_lines: Vec<_> = formatted.trim().lines().collect();
+    /// #     assert_eq!(
+    /// #          &actual_lines, expected_lines,
+    /// #          "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+    /// #          expected_lines, actual_lines
+    /// #      );
+    /// #  }
+    /// #
+    /// # let testdata = arrow::util::test_util::parquet_test_data();
+    /// # let path = format!("{}/alltypes_plain.parquet", testdata);
+    /// // Use tokio::fs::File to read data using an async I/O. This can be replaced with
+    /// // another async I/O reader such as a reader from an object store.
+    /// let file = tokio::fs::File::open(path).await.unwrap();
+    ///
+    /// // Configure options for reading from the async souce
+    /// let builder = ParquetRecordBatchStreamBuilder::new(file)
+    ///     .await
+    ///     .unwrap();
+    /// // Building the stream opens the parquet file (reads metadata, etc) and returns
+    /// // a stream that can be used to incrementally read the data in batches
+    /// let stream = builder.build().unwrap();
+    /// // In this example, we collect the stream into a Vec<RecordBatch>
+    /// // but real applications would likely process the batches as they are read
+    /// let results = stream.try_collect::<Vec<_>>().await.unwrap();
+    /// // Demonstrate the results are as expected
+    /// assert_batches_eq(
+    ///     &results,
+    ///     &[
+    ///       "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
+    ///       "| id | bool_col | tinyint_col | smallint_col | int_col | bigint_col | float_col | double_col | date_string_col  | string_col | timestamp_col       |",
+    ///       "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
+    ///       "| 4  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30332f30312f3039 | 30         | 2009-03-01T00:00:00 |",
+    ///       "| 5  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30332f30312f3039 | 31         | 2009-03-01T00:01:00 |",
+    ///       "| 6  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30342f30312f3039 | 30         | 2009-04-01T00:00:00 |",
+    ///       "| 7  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30342f30312f3039 | 31         | 2009-04-01T00:01:00 |",
+    ///       "| 2  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30322f30312f3039 | 30         | 2009-02-01T00:00:00 |",
+    ///       "| 3  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30322f30312f3039 | 31         | 2009-02-01T00:01:00 |",
+    ///       "| 0  | true     | 0           | 0            | 0       | 0          | 0.0       | 0.0        | 30312f30312f3039 | 30         | 2009-01-01T00:00:00 |",
+    ///       "| 1  | false    | 1           | 1            | 1       | 10         | 1.1       | 10.1       | 30312f30312f3039 | 31         | 2009-01-01T00:01:00 |",
+    ///       "+----+----------+-------------+--------------+---------+------------+-----------+------------+------------------+------------+---------------------+",
+    ///      ],
+    ///  );
+    /// # }
+    /// ```
+    ///
+    /// # Example configuring options and reading metadata
+    ///
+    /// There are many options that control the behavior of the reader, such as
+    /// `with_batch_size`, `with_projection`, `with_filter`, etc...
     ///
     /// ```
-    /// # use std::fs::metadata;
-    /// # use std::sync::Arc;
-    /// # use bytes::Bytes;
-    /// # use arrow_array::{Int32Array, RecordBatch};
-    /// # use arrow_schema::{DataType, Field, Schema};
-    /// # use parquet::arrow::arrow_reader::ArrowReaderMetadata;
-    /// # use parquet::arrow::{ArrowWriter, ParquetRecordBatchStreamBuilder};
-    /// # use tempfile::tempfile;
-    /// # use futures::StreamExt;
     /// # #[tokio::main(flavor="current_thread")]
     /// # async fn main() {
     /// #
-    /// # let mut file = tempfile().unwrap();
-    /// # let schema = Arc::new(Schema::new(vec![Field::new("i32", DataType::Int32, false)]));
-    /// # let mut writer = ArrowWriter::try_new(&mut file, schema.clone(), None).unwrap();
-    /// # let batch = RecordBatch::try_new(schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))]).unwrap();
-    /// # writer.write(&batch).unwrap();
-    /// # writer.close().unwrap();
-    /// // Open async file containing parquet data
-    /// let mut file = tokio::fs::File::from_std(file);
-    /// // construct the reader
-    /// let mut reader = ParquetRecordBatchStreamBuilder::new(file)
-    ///   .await.unwrap().build().unwrap();
-    /// // Read batche
-    /// let batch: RecordBatch = reader.next().await.unwrap().unwrap();
+    /// # use arrow_array::RecordBatch;
+    /// # use arrow::util::pretty::pretty_format_batches;
+    /// # use futures::TryStreamExt;
+    /// #
+    /// # use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
+    /// #
+    /// # fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
+    /// #     let formatted = pretty_format_batches(batches).unwrap().to_string();
+    /// #     let actual_lines: Vec<_> = formatted.trim().lines().collect();
+    /// #     assert_eq!(
+    /// #          &actual_lines, expected_lines,
+    /// #          "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
+    /// #          expected_lines, actual_lines
+    /// #      );
+    /// #  }
+    /// #
+    /// # let testdata = arrow::util::test_util::parquet_test_data();
+    /// # let path = format!("{}/alltypes_plain.parquet", testdata);
+    /// // As before, use tokio::fs::File to read data using an async I/O.
+    /// let file = tokio::fs::File::open(path).await.unwrap();
+    ///
+    /// // Configure options for reading from the async source, in this case we set the batch size
+    /// // to 3 which produces 3 rows at a time.
+    /// let builder = ParquetRecordBatchStreamBuilder::new(file)
+    ///     .await
+    ///     .unwrap()
+    ///     .with_batch_size(3);
+    ///
+    /// // We can also read the metadata to inspect the schema and other metadata
+    /// // before actually reading the data
+    /// let file_metadata = builder.metadata().file_metadata();
+    /// // Specify that we only want to read the 1st, 2nd, and 6th columns
+    /// let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);
+    ///
+    /// let stream = builder.with_projection(mask).build().unwrap();
+    /// let results = stream.try_collect::<Vec<_>>().await.unwrap();
+    /// // Print out the results
+    /// assert_batches_eq(
+    ///     &results,
+    ///     &[
+    ///         "+----------+-------------+-----------+",
+    ///         "| bool_col | tinyint_col | float_col |",
+    ///         "+----------+-------------+-----------+",
+    ///         "| true     | 0           | 0.0       |",
+    ///         "| false    | 1           | 1.1       |",
+    ///         "| true     | 0           | 0.0       |",
+    ///         "| false    | 1           | 1.1       |",
+    ///         "| true     | 0           | 0.0       |",
+    ///         "| false    | 1           | 1.1       |",
+    ///         "| true     | 0           | 0.0       |",
+    ///         "| false    | 1           | 1.1       |",
+    ///         "+----------+-------------+-----------+",
+    ///      ],
+    ///  );
+    ///
+    /// // The results has 8 rows, so since we set the batch size to 3, we expect
+    /// // 3 batches, two with 3 rows each and the last batch with 2 rows.
+    /// assert_eq!(results.len(), 3);
     /// # }
     /// ```
     pub async fn new(input: T) -> Result<Self> {
         Self::new_with_options(input, Default::default()).await
     }
 
-    /// Create a new [`ParquetRecordBatchStreamBuilder`] with the provided parquet file
+    /// Create a new [`ParquetRecordBatchStreamBuilder`] with the provided async source
     /// and [`ArrowReaderOptions`]
     pub async fn new_with_options(mut input: T, options: ArrowReaderOptions) -> Result<Self> {
         let metadata = ArrowReaderMetadata::load_async(&mut input, options).await?;
@@ -352,6 +400,7 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
     }
 
     /// Read bloom filter for a column in a row group
+    ///
     /// Returns `None` if the column does not have a bloom filter
     ///
     /// We should call this function after other forms pruning, such as projection and predicate pushdown.
@@ -415,6 +464,8 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
     }
 
     /// Build a new [`ParquetRecordBatchStream`]
+    ///
+    /// See examples on [`ParquetRecordBatchStreamBuilder::new`]
     pub fn build(self) -> Result<ParquetRecordBatchStream<T>> {
         let num_row_groups = self.metadata.row_groups().len();
 

From b265b623a1102289588492906dbd7b8a058beb95 Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Sat, 11 Jan 2025 22:45:08 +1300
Subject: [PATCH 46/68] [Parquet] Improve speed of dictionary encoding NaN
 float values (#6953)

* Treat NaNs equal to NaN when interning for dictionary encoding

* Compare all values by bytes rather than adding Intern trait
---
 parquet/src/util/interner.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/parquet/src/util/interner.rs b/parquet/src/util/interner.rs
index 489d4d58122c..34c7d1390f7a 100644
--- a/parquet/src/util/interner.rs
+++ b/parquet/src/util/interner.rs
@@ -24,7 +24,7 @@ const DEFAULT_DEDUP_CAPACITY: usize = 4096;
 pub trait Storage {
     type Key: Copy;
 
-    type Value: AsBytes + PartialEq + ?Sized;
+    type Value: AsBytes + ?Sized;
 
     /// Gets an element by its key
     fn get(&self, idx: Self::Key) -> &Self::Value;
@@ -66,7 +66,8 @@ impl<S: Storage> Interner<S> {
             .dedup
             .entry(
                 hash,
-                |index| value == self.storage.get(*index),
+                // Compare bytes rather than directly comparing values so NaNs can be interned
+                |index| value.as_bytes() == self.storage.get(*index).as_bytes(),
                 |key| self.state.hash_one(self.storage.get(*key).as_bytes()),
             )
             .or_insert_with(|| self.storage.push(value))

From d54635077a1a0e8f17c9ef08dc255766e8771c88 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 19 Oct 2024 10:15:04 -0700
Subject: [PATCH 47/68] preliminary changes

---
 arrow-cast/src/cast/mod.rs    | 5 +++--
 arrow-schema/src/ffi.rs       | 8 ++++++--
 arrow/benches/cast_kernels.rs | 8 --------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 0e56d7633a80..886ea18489fd 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -181,9 +181,10 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             UInt8 | UInt16 | UInt32 | UInt64) |
         // decimal to signed numeric
         (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) |
         // decimal to string
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) => true,
+        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Utf8View | Utf8 | LargeUtf8) |
         // string to decimal
         (Utf8View | Utf8 | LargeUtf8, Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) => true,
         (Struct(from_fields), Struct(to_fields)) => {
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index 56bc93559ed8..e99ea8d67899 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -709,8 +709,12 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
-        DataType::Decimal32(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},32"))),
-        DataType::Decimal64(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},64"))),
+        DataType::Decimal32(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
+        }
+        DataType::Decimal64(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
+        }
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index e95e342122f0..8dcb928fa66d 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -279,14 +279,6 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
     });
 
-    // TODO: decimal32, decimal64
-    c.bench_function("cast decimal32 to decimal32 512", |b| {
-        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(8, 2)))
-    });
-    c.bench_function("cast decimal64 to decimal64 512", |b| {
-        b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(16, 5)))
-    });
-
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
     });

From b53a8b4c9da643e6a38b1af5012dae8274b888f6 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 19 Oct 2024 10:15:04 -0700
Subject: [PATCH 48/68] preliminary changes

---
 arrow-cast/src/cast/mod.rs | 2 +-
 arrow-data/src/data.rs     | 4 ++++
 arrow-schema/src/ffi.rs    | 8 ++------
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 886ea18489fd..921c13237420 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -181,7 +181,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             UInt8 | UInt16 | UInt32 | UInt64) |
         // decimal to signed numeric
         (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) |
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
         // decimal to string
         (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
             Utf8View | Utf8 | LargeUtf8) |
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 6f016d213675..7c5b9ea52ed4 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -143,6 +143,10 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
+        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
+            MutableBuffer::new(capacity * mem::size_of::<u8>()),
+            empty_buffer,
+        ],
         DataType::Union(_, mode) => {
             let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
             match mode {
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index e99ea8d67899..56bc93559ed8 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -709,12 +709,8 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
-        DataType::Decimal32(precision, scale) => {
-            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
-        }
-        DataType::Decimal64(precision, scale) => {
-            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
-        }
+        DataType::Decimal32(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},32"))),
+        DataType::Decimal64(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},64"))),
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))

From ba830a4d511e398697108dfc64f1d99e45cfdb59 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 12 Jan 2025 19:09:56 -0800
Subject: [PATCH 49/68] Decimal32/64 mostly done

---
 arrow-data/src/data.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 7c5b9ea52ed4..6f016d213675 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -143,10 +143,6 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
             [empty_buffer, MutableBuffer::new(0)]
         }
-        DataType::Decimal32(_, _) | DataType::Decimal64(_, _) | DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
-            MutableBuffer::new(capacity * mem::size_of::<u8>()),
-            empty_buffer,
-        ],
         DataType::Union(_, mode) => {
             let type_ids = MutableBuffer::new(capacity * mem::size_of::<i8>());
             match mode {

From 2fdddf4628961a1449b4512587799294a04a62f8 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Tue, 21 Jan 2025 07:11:23 -0800
Subject: [PATCH 50/68] Small changes

---
 arrow-cast/src/cast/mod.rs        | 2 +-
 arrow/benches/decimal_validate.rs | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 921c13237420..a772631c5dde 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -10334,7 +10334,7 @@ mod tests {
     #[test]
     fn test_decimal_to_decimal_throw_error_on_precision_overflow_lower_scale() {
         let array = vec![Some(123456789)];
-        let array = create_decimal128_array(array, 24, 2).unwrap();
+        let array = create_decimal128_array(array, 24, 4).unwrap();
         let input_type = DataType::Decimal128(24, 4);
         let output_type = DataType::Decimal128(6, 2);
         assert!(can_cast_types(&input_type, &output_type));
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index 88086f9a8720..cafaae00f970 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,11 +18,9 @@
 #[macro_use]
 extern crate criterion;
 
-use arrow::array::{Array,
-    Decimal32Array, Decimal32Builder,
-    Decimal64Array, Decimal64Builder,
-    Decimal128Array, Decimal128Builder,
-    Decimal256Array, Decimal256Builder
+use arrow::array::{
+    Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder,
+    Decimal32Array, Decimal32Builder, Decimal64Array, Decimal64Builder,
 };
 use criterion::Criterion;
 use rand::Rng;

From 691cb4f6c47e0b08f43265694299d89ca4e45674 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Tue, 21 Jan 2025 20:13:03 -0800
Subject: [PATCH 51/68] Alphabetize using

---
 arrow-array/src/array/mod.rs                           | 7 +++++++
 parquet/src/arrow/array_reader/fixed_len_byte_array.rs | 2 +-
 parquet/src/arrow/arrow_reader/statistics.rs           | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index da258b4f431b..e1f6aaa336c5 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -901,6 +901,13 @@ where
     Ok(())
 }
 
+#[cfg(test)]
+pub mod test_helpers {
+    pub trait DecimalCtor: Sized {
+        fn 
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index f04b6236a70b..df6168660877 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -27,7 +27,7 @@ use crate::column::reader::decoder::ColumnValueDecoder;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use arrow_array::{
-    ArrayRef, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
+    ArrayRef, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
     FixedSizeBinaryArray, Float16Array, IntervalDayTimeArray, IntervalYearMonthArray,
 };
 use arrow_buffer::{i256, Buffer, IntervalDayTime};
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index bf1723a74359..b7b3ede6c785 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -34,7 +34,7 @@ use arrow_array::builder::{
 };
 use arrow_array::{
     new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, Float16Array, Float32Array,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, Float16Array, Float32Array,
     Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray,
     Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
     TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,

From 0f4df670d7cb3214640dd168bcf2e6637a2a1ccf Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 1 Feb 2025 09:16:33 -0800
Subject: [PATCH 52/68] Cleanup and fixes

---
 arrow-array/src/array/primitive_array.rs |  2 +-
 arrow-array/src/types.rs                 | 17 +++++-----
 arrow-cast/src/cast/dictionary.rs        |  7 +++-
 arrow-cast/src/cast/mod.rs               | 41 ++++++++++++++++--------
 arrow-csv/src/writer.rs                  |  2 +-
 arrow-data/src/decimal.rs                | 13 ++++----
 6 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index fe3fa034f513..467f0ecb06f5 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1712,7 +1712,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder};
+    use crate::builder::{Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder};
     use crate::cast::downcast_array;
     use crate::BooleanArray;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 1ad165f5f6f4..fe780efba3d9 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -25,16 +25,17 @@ use crate::timezone::Tz;
 use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{
-    is_validate_decimal256_precision, is_validate_decimal_precision,
-    is_validate_decimal64_precision, is_validate_decimal32_precision,
-    validate_decimal256_precision, validate_decimal_precision,
-    validate_decimal64_precision, validate_decimal32_precision,
+    is_validate_decimal256_precision, is_validate_decimal32_precision,
+    is_validate_decimal64_precision, is_validate_decimal_precision,
+    validate_decimal256_precision, validate_decimal32_precision,
+    validate_decimal64_precision, validate_decimal_precision,
 };
 use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
-    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
-    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_DEFAULT_SCALE, DECIMAL64_DEFAULT_SCALE,
+    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
+    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
     DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
@@ -1191,7 +1192,7 @@ pub trait DecimalType:
     /// Default values for [`DataType`]
     const DEFAULT_TYPE: DataType;
 
-    /// "Decima32", "Decimal64", "Decimal128" or "Decimal256", for use in error messages
+    /// "Decimal32", "Decimal64", "Decimal128" or "Decimal256", for use in error messages
     const PREFIX: &'static str;
 
     /// Formats the decimal value with the provided precision and scale
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index 4ea514375b61..3fe0109a1546 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -217,6 +217,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal32(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal32Type, _>(
                 array,
+                &dict_value_type,
                 p,
                 s,
                 cast_options
@@ -225,6 +226,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal64(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal64Type, _>(
                 array,
+                &dict_value_type,
                 p,
                 s,
                 cast_options
@@ -233,6 +235,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal128(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal128Type, _>(
                 array,
+                &dict_value_type,
                 p,
                 s,
                 cast_options
@@ -241,6 +244,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal256(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal256Type, _>(
                 array,
+                &dict_value_type,
                 p,
                 s,
                 cast_options
@@ -349,6 +353,7 @@ where
 
 pub(crate) fn pack_decimal_to_dictionary<K, D, M>(
     array: &dyn Array,
+    dict_value_type: &DataType,
     precision: u8,
     scale: i8,
     cast_options: &CastOptions,
@@ -360,7 +365,7 @@ where
 {
     let dict = pack_numeric_to_dictionary::<K, D>(
         array,
-        &D::DATA_TYPE,
+        dict_value_type,
         cast_options,
     )?;
     let dict = dict
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index a772631c5dde..2f681a7387d2 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -168,25 +168,40 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
                 _ => false
             },
         // cast one decimal type to another decimal type
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+        ) => true,
         // unsigned integer to decimal
-        (UInt8 | UInt16 | UInt32 | UInt64,
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
+        (
+            UInt8 | UInt16 | UInt32 | UInt64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+        ) => true,
         // signed numeric to decimal
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) |
+        (
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+        ) => true,
         // decimal to unsigned numeric
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            UInt8 | UInt16 | UInt32 | UInt64) |
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            UInt8 | UInt16 | UInt32 | UInt64
+        ) => true,
         // decimal to signed numeric
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64
+        ) => true,
         // decimal to string
-        (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Utf8View | Utf8 | LargeUtf8) |
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Utf8View | Utf8 | LargeUtf8
+        ) => true,
         // string to decimal
-        (Utf8View | Utf8 | LargeUtf8, Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)) => true,
+        (
+            Utf8View | Utf8 | LargeUtf8,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+        ) => true,
         (Struct(from_fields), Struct(to_fields)) => {
             from_fields.len() == to_fields.len() &&
                 from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index 211a107e2a1e..61ac8485c6f1 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -418,7 +418,7 @@ mod tests {
 
     use crate::ReaderBuilder;
     use arrow_array::builder::{
-        BinaryBuilder, Decimal32Builder, Decimal64Builder, Decimal128Builder, Decimal256Builder,
+        BinaryBuilder, Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder,
         FixedSizeBinaryBuilder, LargeBinaryBuilder,
     };
     use arrow_array::types::*;
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index 380a236821b2..e975f0703b71 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -28,9 +28,10 @@ use arrow_buffer::i256;
 use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
-    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL32_DEFAULT_SCALE, DECIMAL64_DEFAULT_SCALE, DECIMAL_DEFAULT_SCALE,
+    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
+    DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
+    DECIMAL_DEFAULT_SCALE,
 };
 
 /// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can
@@ -917,7 +918,7 @@ pub const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [
 /// ```
 ///
 /// [`Decimal64`]: arrow_schema::DataType::Decimal64
-pub(crate) const MAX_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+pub const MAX_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
     0, // unused first element
     9,
     99,
@@ -954,7 +955,7 @@ pub(crate) const MAX_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
 /// ```
 ///
 /// [`Decimal64`]: arrow_schema::DataType::Decimal64
-pub(crate) const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+pub const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
     0, // unused first element
     -9,
     -99,
@@ -991,7 +992,7 @@ pub(crate) const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
 /// ```
 ///
 /// [`Decimal32`]: arrow_schema::DataType::Decimal32
-pub(crate) const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+pub const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
     0, // unused first element
     9,
     99,
@@ -1019,7 +1020,7 @@ pub(crate) const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
 /// ```
 ///
 /// [`Decimal32`]: arrow_schema::DataType::Decimal32
-pub(crate) const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+pub const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
     0, // unused first element
     -9,
     -99,

From c0a73de476d522dbadadaef304d62bf11bfc582e Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 1 Feb 2025 09:28:06 -0800
Subject: [PATCH 53/68] Mollify clippy

---
 arrow-cast/src/cast/dictionary.rs     | 8 ++++----
 parquet/src/arrow/arrow_writer/mod.rs | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index 3fe0109a1546..ebae486cc351 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -217,7 +217,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal32(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal32Type, _>(
                 array,
-                &dict_value_type,
+                dict_value_type,
                 p,
                 s,
                 cast_options
@@ -226,7 +226,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal64(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal64Type, _>(
                 array,
-                &dict_value_type,
+                dict_value_type,
                 p,
                 s,
                 cast_options
@@ -235,7 +235,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal128(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal128Type, _>(
                 array,
-                &dict_value_type,
+                dict_value_type,
                 p,
                 s,
                 cast_options
@@ -244,7 +244,7 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         Decimal256(p, s) => {
             pack_decimal_to_dictionary::<K, Decimal256Type, _>(
                 array,
-                &dict_value_type,
+                dict_value_type,
                 p,
                 s,
                 cast_options
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 1439efe9d894..0be541b53cdc 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -832,7 +832,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                 ArrowDataType::Decimal32(_, _) => {
                     let array = column
                         .as_primitive::<Decimal32Type>()
-                        .unary::<_, Int32Type>(|v| v as i32);
+                        .unary::<_, Int32Type>(|v| v);
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Decimal64(_, _) => {
@@ -914,7 +914,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                 ArrowDataType::Decimal64(_, _) => {
                     let array = column
                         .as_primitive::<Decimal64Type>()
-                        .unary::<_, Int64Type>(|v| v as i64);
+                        .unary::<_, Int64Type>(|v| v);
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Decimal128(_, _) => {

From 495a19391c65f67ca6b15576e1c9e986f42978d9 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 1 Feb 2025 10:02:32 -0800
Subject: [PATCH 54/68] Formatting changes

---
 arrow-array/src/array/primitive_array.rs     |   4 +-
 arrow-array/src/types.rs                     |  12 +-
 arrow-cast/src/cast/decimal.rs               |   4 +-
 arrow-cast/src/cast/dictionary.rs            |  75 +++---
 arrow-cast/src/cast/mod.rs                   | 267 ++++++++-----------
 arrow-csv/src/writer.rs                      |   8 +-
 arrow-data/src/decimal.rs                    |  25 +-
 arrow-ord/src/comparison.rs                  |   9 +-
 arrow-ord/src/sort.rs                        |   6 +-
 arrow-schema/src/datatype_parse.rs           |   2 +-
 arrow-schema/src/ffi.rs                      |   8 +-
 arrow/benches/decimal_validate.rs            |   4 +-
 arrow/tests/array_cast.rs                    |  39 +--
 parquet/src/arrow/arrow_reader/mod.rs        |   5 +-
 parquet/src/arrow/arrow_reader/statistics.rs |  15 +-
 15 files changed, 205 insertions(+), 278 deletions(-)

diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 467f0ecb06f5..0381e5278416 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -1712,7 +1712,9 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder};
+    use crate::builder::{
+        Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder,
+    };
     use crate::cast::downcast_array;
     use crate::BooleanArray;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index fe780efba3d9..8ae35e196204 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -26,17 +26,15 @@ use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{
     is_validate_decimal256_precision, is_validate_decimal32_precision,
-    is_validate_decimal64_precision, is_validate_decimal_precision,
-    validate_decimal256_precision, validate_decimal32_precision,
-    validate_decimal64_precision, validate_decimal_precision,
+    is_validate_decimal64_precision, is_validate_decimal_precision, validate_decimal256_precision,
+    validate_decimal32_precision, validate_decimal64_precision, validate_decimal_precision,
 };
 use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
     ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
-    DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_DEFAULT_SCALE,
+    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION,
+    DECIMAL64_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index e9216385a818..cec039547eba 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -580,9 +580,7 @@ where
                             v
                         ))
                     })
-                    .and_then(|v| {
-                        D::validate_decimal_precision(v, precision).map(|_| v)
-                    })
+                    .and_then(|v| D::validate_decimal_precision(v, precision).map(|_| v))
             })?
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index ebae486cc351..8d290be740aa 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -214,42 +214,34 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
         UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
         UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
-        Decimal32(p, s) => {
-            pack_decimal_to_dictionary::<K, Decimal32Type, _>(
-                array,
-                dict_value_type,
-                p,
-                s,
-                cast_options
-            )
-        }
-        Decimal64(p, s) => {
-            pack_decimal_to_dictionary::<K, Decimal64Type, _>(
-                array,
-                dict_value_type,
-                p,
-                s,
-                cast_options
-            )
-        }
-        Decimal128(p, s) => {
-            pack_decimal_to_dictionary::<K, Decimal128Type, _>(
-                array,
-                dict_value_type,
-                p,
-                s,
-                cast_options
-            )
-        }
-        Decimal256(p, s) => {
-            pack_decimal_to_dictionary::<K, Decimal256Type, _>(
-                array,
-                dict_value_type,
-                p,
-                s,
-                cast_options
-            )
-        }
+        Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type, _>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
+        Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type, _>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
+        Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type, _>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
+        Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type, _>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
         Float16 => {
             pack_numeric_to_dictionary::<K, Float16Type>(array, dict_value_type, cast_options)
         }
@@ -363,19 +355,12 @@ where
     D: DecimalType + ArrowPrimitiveType<Native = M>,
     M: ArrowNativeTypeOp + DecimalCast,
 {
-    let dict = pack_numeric_to_dictionary::<K, D>(
-        array,
-        dict_value_type,
-        cast_options,
-    )?;
+    let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
     let dict = dict
         .as_dictionary::<K>()
         .downcast_dict::<PrimitiveArray<D>>()
         .ok_or_else(|| {
-            ArrowError::ComputeError(format!(
-                "Internal Error: Cannot cast dict to {}",
-                D::PREFIX
-            ))
+            ArrowError::ComputeError(format!("Internal Error: Cannot cast dict to {}", D::PREFIX))
         })?;
     let value = dict.values().clone();
     // Set correct precision/scale
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 2f681a7387d2..3de35d61d269 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -170,37 +170,37 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         // cast one decimal type to another decimal type
         (
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
         ) => true,
         // unsigned integer to decimal
         (
             UInt8 | UInt16 | UInt32 | UInt64,
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
         ) => true,
         // signed numeric to decimal
         (
             Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
         ) => true,
         // decimal to unsigned numeric
         (
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            UInt8 | UInt16 | UInt32 | UInt64
+            UInt8 | UInt16 | UInt32 | UInt64,
         ) => true,
         // decimal to signed numeric
         (
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
         ) => true,
         // decimal to string
         (
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
-            Utf8View | Utf8 | LargeUtf8
+            Utf8View | Utf8 | LargeUtf8,
         ) => true,
         // string to decimal
         (
             Utf8View | Utf8 | LargeUtf8,
-            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _)
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
         ) => true,
         (Struct(from_fields), Struct(to_fields)) => {
             from_fields.len() == to_fields.len() &&
@@ -250,7 +250,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             | BinaryView,
         ) => true,
         (Utf8 | LargeUtf8, Utf8View) => true,
-        (BinaryView, Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View ) => true,
+        (BinaryView, Binary | LargeBinary | Utf8 | LargeUtf8 | Utf8View) => true,
         (Utf8View | Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
         (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
         (_, Utf8View) => from_type.is_numeric(),
@@ -2117,54 +2117,14 @@ where
     use DataType::*;
     // cast decimal to other type
     match to_type {
-        UInt8 => cast_decimal_to_integer::<D, UInt8Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        UInt16 => cast_decimal_to_integer::<D, UInt16Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        UInt32 => cast_decimal_to_integer::<D, UInt32Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        UInt64 => cast_decimal_to_integer::<D, UInt64Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        Int8 => cast_decimal_to_integer::<D, Int8Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        Int16 => cast_decimal_to_integer::<D, Int16Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        Int32 => cast_decimal_to_integer::<D, Int32Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
-        Int64 => cast_decimal_to_integer::<D, Int64Type>(
-            array,
-            base,
-            *scale,
-            cast_options,
-        ),
+        UInt8 => cast_decimal_to_integer::<D, UInt8Type>(array, base, *scale, cast_options),
+        UInt16 => cast_decimal_to_integer::<D, UInt16Type>(array, base, *scale, cast_options),
+        UInt32 => cast_decimal_to_integer::<D, UInt32Type>(array, base, *scale, cast_options),
+        UInt64 => cast_decimal_to_integer::<D, UInt64Type>(array, base, *scale, cast_options),
+        Int8 => cast_decimal_to_integer::<D, Int8Type>(array, base, *scale, cast_options),
+        Int16 => cast_decimal_to_integer::<D, Int16Type>(array, base, *scale, cast_options),
+        Int32 => cast_decimal_to_integer::<D, Int32Type>(array, base, *scale, cast_options),
+        Int64 => cast_decimal_to_integer::<D, Int64Type>(array, base, *scale, cast_options),
         Float32 => cast_decimal_to_float::<D, Float32Type, _>(array, |x| {
             (as_float(x) / 10_f64.powi(*scale as i32)) as f32
         }),
@@ -2273,18 +2233,10 @@ where
             *scale,
             cast_options,
         ),
-        Utf8View | Utf8 => cast_string_to_decimal::<D, i32>(
-            array,
-            *precision,
-            *scale,
-            cast_options,
-        ),
-        LargeUtf8 => cast_string_to_decimal::<D, i64>(
-            array,
-            *precision,
-            *scale,
-            cast_options,
-        ),
+        Utf8View | Utf8 => {
+            cast_string_to_decimal::<D, i32>(array, *precision, *scale, cast_options)
+        }
+        LargeUtf8 => cast_string_to_decimal::<D, i64>(array, *precision, *scale, cast_options),
         Null => Ok(new_null_array(to_type, array.len())),
         _ => Err(ArrowError::CastError(format!(
             "Casting from {from_type:?} to {to_type:?} not supported"
@@ -3188,89 +3140,89 @@ mod tests {
 
     macro_rules! generate_decimal_to_numeric_cast_test_case {
         ($INPUT_ARRAY: expr) => {
-        // u8
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            UInt8Array,
-            &DataType::UInt8,
-            vec![Some(1_u8), Some(2_u8), Some(3_u8), None, Some(5_u8)]
-        );
-        // u16
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            UInt16Array,
-            &DataType::UInt16,
-            vec![Some(1_u16), Some(2_u16), Some(3_u16), None, Some(5_u16)]
-        );
-        // u32
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            UInt32Array,
-            &DataType::UInt32,
-            vec![Some(1_u32), Some(2_u32), Some(3_u32), None, Some(5_u32)]
-        );
-        // u64
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            UInt64Array,
-            &DataType::UInt64,
-            vec![Some(1_u64), Some(2_u64), Some(3_u64), None, Some(5_u64)]
-        );
-        // i8
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Int8Array,
-            &DataType::Int8,
-            vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
-        );
-        // i16
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Int16Array,
-            &DataType::Int16,
-            vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
-        );
-        // i32
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Int32Array,
-            &DataType::Int32,
-            vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
-        );
-        // i64
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Int64Array,
-            &DataType::Int64,
-            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
-        );
-        // f32
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Float32Array,
-            &DataType::Float32,
-            vec![
-                Some(1.25_f32),
-                Some(2.25_f32),
-                Some(3.25_f32),
-                None,
-                Some(5.25_f32)
-            ]
-        );
-        // f64
-        generate_cast_test_case!(
-            $INPUT_ARRAY,
-            Float64Array,
-            &DataType::Float64,
-            vec![
-                Some(1.25_f64),
-                Some(2.25_f64),
-                Some(3.25_f64),
-                None,
-                Some(5.25_f64)
-            ]
-        );
-        }
+            // u8
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                UInt8Array,
+                &DataType::UInt8,
+                vec![Some(1_u8), Some(2_u8), Some(3_u8), None, Some(5_u8)]
+            );
+            // u16
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                UInt16Array,
+                &DataType::UInt16,
+                vec![Some(1_u16), Some(2_u16), Some(3_u16), None, Some(5_u16)]
+            );
+            // u32
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                UInt32Array,
+                &DataType::UInt32,
+                vec![Some(1_u32), Some(2_u32), Some(3_u32), None, Some(5_u32)]
+            );
+            // u64
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                UInt64Array,
+                &DataType::UInt64,
+                vec![Some(1_u64), Some(2_u64), Some(3_u64), None, Some(5_u64)]
+            );
+            // i8
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Int8Array,
+                &DataType::Int8,
+                vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
+            );
+            // i16
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Int16Array,
+                &DataType::Int16,
+                vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
+            );
+            // i32
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Int32Array,
+                &DataType::Int32,
+                vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
+            );
+            // i64
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Int64Array,
+                &DataType::Int64,
+                vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+            );
+            // f32
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Float32Array,
+                &DataType::Float32,
+                vec![
+                    Some(1.25_f32),
+                    Some(2.25_f32),
+                    Some(3.25_f32),
+                    None,
+                    Some(5.25_f32)
+                ]
+            );
+            // f64
+            generate_cast_test_case!(
+                $INPUT_ARRAY,
+                Float64Array,
+                &DataType::Float64,
+                vec![
+                    Some(1.25_f64),
+                    Some(2.25_f64),
+                    Some(3.25_f64),
+                    None,
+                    Some(5.25_f64)
+                ]
+            );
+        };
     }
 
     #[test]
@@ -9609,14 +9561,9 @@ mod tests {
             Some(-123456789),
             None,
         ];
-        let array64: Vec<Option<i64>> = array32
-            .iter()
-            .map(|num| num.map(|x| x as i64))
-            .collect();
-        let array128: Vec<Option<i128>> = array64
-            .iter()
-            .map(|num| num.map(|x| x as i128))
-            .collect();
+        let array64: Vec<Option<i64>> = array32.iter().map(|num| num.map(|x| x as i64)).collect();
+        let array128: Vec<Option<i128>> =
+            array64.iter().map(|num| num.map(|x| x as i128)).collect();
         let array256: Vec<Option<i256>> = array128
             .iter()
             .map(|num| num.map(i256::from_i128))
@@ -9647,7 +9594,7 @@ mod tests {
             DataType::LargeUtf8,
             create_decimal64_array(array64, 7, 3).unwrap(),
         );
-        
+
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8View,
             create_decimal128_array(array128.clone(), 7, 3).unwrap(),
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index 61ac8485c6f1..c2cb38a226b6 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -523,9 +523,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
         ]);
         let c4 = c4_builder.finish();
 
-        let batch = RecordBatch::try_new(Arc::new(schema), vec![
-            Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)
-        ]).unwrap();
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![Arc::new(c1), Arc::new(c2), Arc::new(c3), Arc::new(c4)],
+        )
+        .unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
 
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index e975f0703b71..e021811b4e62 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -29,9 +29,8 @@ use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE,
-    DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
+    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 
 /// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can
@@ -994,15 +993,7 @@ pub const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
 /// [`Decimal32`]: arrow_schema::DataType::Decimal32
 pub const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
     0, // unused first element
-    9,
-    99,
-    999,
-    9999,
-    99999,
-    999999,
-    9999999,
-    99999999,
-    999999999,
+    9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999,
 ];
 
 /// `MIN_DECIMAL32_FOR_EACH_PRECISION[p]` holds the minimum `ialue that can
@@ -1022,15 +1013,7 @@ pub const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
 /// [`Decimal32`]: arrow_schema::DataType::Decimal32
 pub const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
     0, // unused first element
-    -9,
-    -99,
-    -999,
-    -9999,
-    -99999,
-    -999999,
-    -9999999,
-    -99999999,
-    -999999999,
+    -9, -99, -999, -9999, -99999, -999999, -9999999, -99999999, -999999999,
 ];
 
 /// Validates that the specified `i32` value can be properly
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index 83f765229f57..f4daff8501b6 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -3060,10 +3060,13 @@ mod tests {
     }
 
     fn create_decimal_array<T: DecimalType>(data: Vec<Option<T::Native>>) -> PrimitiveArray<T> {
-        data.into_iter().collect::<PrimitiveArray::<T>>()
+        data.into_iter().collect::<PrimitiveArray<T>>()
     }
 
-    fn test_cmp_dict_decimal<T: DecimalType>(values1: Vec<Option<T::Native>>, values2: Vec<Option<T::Native>>) {
+    fn test_cmp_dict_decimal<T: DecimalType>(
+        values1: Vec<Option<T::Native>>,
+        values2: Vec<Option<T::Native>>,
+    ) {
         let values = create_decimal_array::<T>(values1);
         let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
         let array1 = DictionaryArray::new(keys, Arc::new(values));
@@ -3087,7 +3090,7 @@ mod tests {
         let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
         assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
     }
-    
+
     #[test]
     fn test_cmp_dict_decimal32() {
         test_cmp_dict_decimal::<Decimal32Type>(
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 90c07d823b83..546fb0d6b122 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -798,14 +798,14 @@ mod tests {
     fn create_decimal_array<T: DecimalType>(
         data: Vec<Option<usize>>,
         precision: u8,
-        scale: i8
+        scale: i8,
     ) -> PrimitiveArray<T> {
         data.into_iter()
             .map(|x| match x {
                 None => None,
                 Some(y) => T::Native::from_usize(y),
             })
-            .collect::<PrimitiveArray::<T>>()
+            .collect::<PrimitiveArray<T>>()
             .with_precision_and_scale(precision, scale)
             .unwrap()
     }
@@ -2129,7 +2129,7 @@ mod tests {
             vec![None, None, Some(5), Some(4), Some(3), Some(2), Some(1)],
             precision,
             scale,
-       );
+        );
         // decimal null_first
         test_sort_decimal_array::<T>(
             vec![None, Some(5), Some(2), Some(3), Some(1), Some(4), None],
diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs
index fde870ed5402..85c102d4a029 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -491,7 +491,7 @@ impl<'a> Tokenizer<'a> {
             "Dictionary" => Token::Dictionary,
 
             "FixedSizeBinary" => Token::FixedSizeBinary,
-            
+
             "Decimal32" => Token::Decimal32,
             "Decimal64" => Token::Decimal64,
             "Decimal128" => Token::Decimal128,
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index 56bc93559ed8..e99ea8d67899 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -709,8 +709,12 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
-        DataType::Decimal32(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},32"))),
-        DataType::Decimal64(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale},64"))),
+        DataType::Decimal32(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
+        }
+        DataType::Decimal64(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
+        }
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index cafaae00f970..57af9fbd1ae7 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -19,8 +19,8 @@
 extern crate criterion;
 
 use arrow::array::{
-    Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder,
-    Decimal32Array, Decimal32Builder, Decimal64Array, Decimal64Builder,
+    Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder, Decimal32Array,
+    Decimal32Builder, Decimal64Array, Decimal64Builder,
 };
 use criterion::Criterion;
 use rand::Rng;
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index addaafb540ca..44c8a192bd73 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -19,20 +19,20 @@ use arrow_array::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder,
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
     ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type,
-    Int16Type, Int32Type, Int64Type, Int8Type, TimestampMicrosecondType,
-    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    Int16Type, Int32Type, Int64Type, Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type,
+    UInt64Type, UInt8Type,
 };
 use arrow_array::{
     Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
-    DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
-    DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
-    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
-    LargeStringArray, ListArray, NullArray, PrimitiveArray, StringArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
+    DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
+    FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array,
+    Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
+    LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, NullArray, PrimitiveArray,
+    StringArray, StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
+    UInt8Array, UnionArray,
 };
 use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano};
 use arrow_cast::pretty::pretty_format_columns;
@@ -266,11 +266,18 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(create_decimal32_array(vec![Some(1), Some(2), Some(3)], 9, 0).unwrap()),
         Arc::new(create_decimal64_array(vec![Some(1), Some(2), Some(3)], 18, 0).unwrap()),
         Arc::new(create_decimal128_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
-        Arc::new(create_decimal256_array(vec![
-            Some(i256::from_i128(1)),
-            Some(i256::from_i128(2)),
-            Some(i256::from_i128(3))
-        ], 40, 0).unwrap()),
+        Arc::new(
+            create_decimal256_array(
+                vec![
+                    Some(i256::from_i128(1)),
+                    Some(i256::from_i128(2)),
+                    Some(i256::from_i128(3)),
+                ],
+                40,
+                0,
+            )
+            .unwrap(),
+        ),
         make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
         make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
         make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 6feedfcf8e0f..749758770cf4 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -4034,10 +4034,7 @@ mod tests {
         };
 
         let d1 = d(vec![1, 2, 3, 4, 5], 9);
-        let batch = RecordBatch::try_from_iter([
-            ("d1", Arc::new(d1) as ArrayRef),
-        ])
-        .unwrap();
+        let batch = RecordBatch::try_from_iter([("d1", Arc::new(d1) as ArrayRef)]).unwrap();
 
         let mut buffer = Vec::with_capacity(1024);
         let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), None).unwrap();
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index b7b3ede6c785..3471f1c97d3d 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -35,10 +35,10 @@ use arrow_array::builder::{
 use arrow_array::{
     new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
     Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, Float16Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray,
-    Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
+    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
 };
 use arrow_buffer::i256;
 use arrow_schema::{DataType, Field, Schema, TimeUnit};
@@ -275,9 +275,10 @@ macro_rules! make_decimal_stats_iterator {
                         ParquetStatistics::Int32(s) => {
                             s.$func().map(|x| $stat_value_type::from(*x))
                         }
-                        ParquetStatistics::Int64(s) => {
-                            s.$func().map(|x| $stat_value_type::try_from(*x).ok()).flatten()
-                        }
+                        ParquetStatistics::Int64(s) => s
+                            .$func()
+                            .map(|x| $stat_value_type::try_from(*x).ok())
+                            .flatten(),
                         ParquetStatistics::ByteArray(s) => s.$bytes_func().map($convert_func),
                         ParquetStatistics::FixedLenByteArray(s) => {
                             s.$bytes_func().map($convert_func)

From a0c70f2ee1ae059eabec871ca5aac115f68a4c68 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 1 Feb 2025 10:03:20 -0800
Subject: [PATCH 55/68] Fix typo

---
 arrow/tests/array_cast.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 44c8a192bd73..522687c3e493 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -444,7 +444,7 @@ fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
 }
 
 fn create_decimal32_array(
-    array: Vec<Option<i132>>,
+    array: Vec<Option<i32>>,
     precision: u8,
     scale: i8,
 ) -> Result<Decimal32Array, ArrowError> {

From 8e599cf6666ea40db2cc305d2926d00f3a85b27a Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 2 Feb 2025 16:35:58 -0800
Subject: [PATCH 56/68] parquet dict support

---
 .../src/arrow/array_reader/primitive_array.rs | 44 +++++++++++++
 parquet/src/arrow/arrow_writer/mod.rs         | 63 +++++++++++++++++++
 2 files changed, 107 insertions(+)

diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index 2fb193d3da16..49852045606d 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -295,6 +295,50 @@ where
                 Arc::new(array) as ArrayRef
             }
             ArrowType::Dictionary(_, value_type) => match value_type.as_ref() {
+                ArrowType::Decimal32(p, s) => {
+                    let array = match array.data_type() {
+                        ArrowType::Int32 => array
+                            .as_any()
+                            .downcast_ref::<Int32Array>()
+                            .unwrap()
+                            .unary(|i| i)
+                            as Decimal32Array,
+                        _ => {
+                            return Err(arrow_err!(
+                                "Cannot convert {:?} to decimal dictionary",
+                                array.data_type()
+                            ));
+                        }
+                    }
+                    .with_precision_and_scale(*p, *s)?;
+
+                    arrow_cast::cast(&array, target_type)?
+                }
+                ArrowType::Decimal64(p, s) => {
+                    let array = match array.data_type() {
+                        ArrowType::Int32 => array
+                            .as_any()
+                            .downcast_ref::<Int32Array>()
+                            .unwrap()
+                            .unary(|i| i as i64)
+                            as Decimal64Array,
+                        ArrowType::Int64 => array
+                            .as_any()
+                            .downcast_ref::<Int64Array>()
+                            .unwrap()
+                            .unary(|i| i)
+                            as Decimal64Array,
+                        _ => {
+                            return Err(arrow_err!(
+                                "Cannot convert {:?} to decimal dictionary",
+                                array.data_type()
+                            ));
+                        }
+                    }
+                    .with_precision_and_scale(*p, *s)?;
+
+                    arrow_cast::cast(&array, target_type)?
+                }
                 ArrowType::Decimal128(p, s) => {
                     let array = match array.data_type() {
                         ArrowType::Int32 => array
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 0be541b53cdc..03b5e8c9449d 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -857,6 +857,20 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Dictionary(_, value_type) => match value_type.as_ref() {
+                    ArrowDataType::Decimal32(_, _) => {
+                        let array = arrow_cast::cast(column, value_type)?;
+                        let array = array
+                            .as_primitive::<Decimal32Type>()
+                            .unary::<_, Int32Type>(|v| v);
+                        write_primitive(typed, array.values(), levels)
+                    }
+                    ArrowDataType::Decimal64(_, _) => {
+                        let array = arrow_cast::cast(column, value_type)?;
+                        let array = array
+                            .as_primitive::<Decimal64Type>()
+                            .unary::<_, Int32Type>(|v| v as i32);
+                        write_primitive(typed, array.values(), levels)
+                    }
                     ArrowDataType::Decimal128(_, _) => {
                         let array = arrow_cast::cast(column, value_type)?;
                         let array = array
@@ -932,6 +946,13 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
                     write_primitive(typed, array.values(), levels)
                 }
                 ArrowDataType::Dictionary(_, value_type) => match value_type.as_ref() {
+                    ArrowDataType::Decimal64(_, _) => {
+                        let array = arrow_cast::cast(column, value_type)?;
+                        let array = array
+                            .as_primitive::<Decimal64Type>()
+                            .unary::<_, Int64Type>(|v| v);
+                        write_primitive(typed, array.values(), levels)
+                    }
                     ArrowDataType::Decimal128(_, _) => {
                         let array = arrow_cast::cast(column, value_type)?;
                         let array = array
@@ -2767,6 +2788,48 @@ mod tests {
         one_column_roundtrip_with_schema(Arc::new(d), schema);
     }
 
+    #[test]
+    fn arrow_writer_decimal32_dictionary() {
+        let integers = vec![12345, 56789, 34567];
+
+        let keys = UInt8Array::from(vec![Some(0), None, Some(1), Some(2), Some(1)]);
+
+        let values = Decimal32Array::from(integers.clone())
+            .with_precision_and_scale(5, 2)
+            .unwrap();
+
+        let array = DictionaryArray::new(keys, Arc::new(values));
+        one_column_roundtrip(Arc::new(array.clone()), true);
+
+        let values = Decimal32Array::from(integers)
+            .with_precision_and_scale(9, 2)
+            .unwrap();
+
+        let array = array.with_values(Arc::new(values));
+        one_column_roundtrip(Arc::new(array), true);
+    }
+
+    #[test]
+    fn arrow_writer_decimal64_dictionary() {
+        let integers = vec![12345, 56789, 34567];
+
+        let keys = UInt8Array::from(vec![Some(0), None, Some(1), Some(2), Some(1)]);
+
+        let values = Decimal64Array::from(integers.clone())
+            .with_precision_and_scale(5, 2)
+            .unwrap();
+
+        let array = DictionaryArray::new(keys, Arc::new(values));
+        one_column_roundtrip(Arc::new(array.clone()), true);
+
+        let values = Decimal64Array::from(integers)
+            .with_precision_and_scale(12, 2)
+            .unwrap();
+
+        let array = array.with_values(Arc::new(values));
+        one_column_roundtrip(Arc::new(array), true);
+    }
+
     #[test]
     fn arrow_writer_decimal128_dictionary() {
         let integers = vec![12345, 56789, 34567];

From 605c99e351885d5d2de7e37660208ffe2af40536 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Wed, 19 Mar 2025 09:47:44 -0700
Subject: [PATCH 57/68] simplifications

---
 arrow-cast/src/cast/decimal.rs    |  23 ++--
 arrow-cast/src/cast/dictionary.rs |  18 +--
 arrow-cast/src/cast/mod.rs        | 212 +++++++++++++++++-------------
 arrow-ord/src/sort.rs             |   5 +-
 4 files changed, 143 insertions(+), 115 deletions(-)

diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index cec039547eba..defcfe9cc87c 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -30,7 +30,7 @@ pub(crate) trait DecimalCast: Sized {
 
     fn from_decimal<T: DecimalCast>(n: T) -> Option<Self>;
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self>;
+    fn from_f64(n: f64) -> Option<Self>;
 }
 
 impl DecimalCast for i32 {
@@ -54,7 +54,7 @@ impl DecimalCast for i32 {
         n.to_i32()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         n.to_i32()
     }
 }
@@ -80,7 +80,7 @@ impl DecimalCast for i64 {
         n.to_i64()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         n.to_i64()
     }
 }
@@ -106,7 +106,7 @@ impl DecimalCast for i128 {
         n.to_i128()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         n.to_i128()
     }
 }
@@ -132,7 +132,7 @@ impl DecimalCast for i256 {
         n.to_i256()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         i256::from_f64(n)
     }
 }
@@ -249,8 +249,7 @@ where
     let array: PrimitiveArray<T> =
         if input_scale == output_scale && input_precision <= output_precision {
             array.clone()
-        } else if input_scale < output_scale {
-            // the scale doesn't change, but precision may change and cause overflow
+        } else if input_scale <= output_scale {
             convert_to_bigger_or_equal_scale_decimal::<T, T>(
                 array,
                 input_scale,
@@ -546,7 +545,7 @@ where
     Ok(Arc::new(result))
 }
 
-pub(crate) fn cast_floating_point_to_decimal<T: ArrowPrimitiveType, D, M>(
+pub(crate) fn cast_floating_point_to_decimal<T: ArrowPrimitiveType, D>(
     array: &PrimitiveArray<T>,
     precision: u8,
     scale: i8,
@@ -554,15 +553,15 @@ pub(crate) fn cast_floating_point_to_decimal<T: ArrowPrimitiveType, D, M>(
 ) -> Result<ArrayRef, ArrowError>
 where
     <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
-    D: DecimalType + ArrowPrimitiveType<Native = M>,
-    M: ArrowNativeTypeOp + DecimalCast,
+    D: DecimalType + ArrowPrimitiveType,
+    <D as ArrowPrimitiveType>::Native: DecimalCast,
 {
     let mul = 10_f64.powi(scale as i32);
 
     if cast_options.safe {
         array
             .unary_opt::<_, D>(|v| {
-                M::from_f64::<M>((mul * v.as_()).round())
+                D::Native::from_f64((mul * v.as_()).round())
                     .filter(|v| D::is_valid_decimal_precision(*v, precision))
             })
             .with_precision_and_scale(precision, scale)
@@ -570,7 +569,7 @@ where
     } else {
         array
             .try_unary::<_, D, _>(|v| {
-                M::from_f64::<M>((mul * v.as_()).round())
+                D::Native::from_f64((mul * v.as_()).round())
                     .ok_or_else(|| {
                         ArrowError::CastError(format!(
                             "Cannot cast to {}({}, {}). Overflowing on {:?}",
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index 8d290be740aa..53348d460d7d 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -214,28 +214,28 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
         UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
         UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
-        Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type, _>(
+        Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
             array,
             dict_value_type,
             p,
             s,
             cast_options,
         ),
-        Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type, _>(
+        Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
             array,
             dict_value_type,
             p,
             s,
             cast_options,
         ),
-        Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type, _>(
+        Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
             array,
             dict_value_type,
             p,
             s,
             cast_options,
         ),
-        Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type, _>(
+        Decimal256(p, s) => pack_decimal_to_dictionary::<K, Decimal256Type>(
             array,
             dict_value_type,
             p,
@@ -343,7 +343,7 @@ where
     Ok(Arc::new(b.finish()))
 }
 
-pub(crate) fn pack_decimal_to_dictionary<K, D, M>(
+pub(crate) fn pack_decimal_to_dictionary<K, D>(
     array: &dyn Array,
     dict_value_type: &DataType,
     precision: u8,
@@ -352,15 +352,17 @@ pub(crate) fn pack_decimal_to_dictionary<K, D, M>(
 ) -> Result<ArrayRef, ArrowError>
 where
     K: ArrowDictionaryKeyType,
-    D: DecimalType + ArrowPrimitiveType<Native = M>,
-    M: ArrowNativeTypeOp + DecimalCast,
+    D: DecimalType + ArrowPrimitiveType,
 {
     let dict = pack_numeric_to_dictionary::<K, D>(array, dict_value_type, cast_options)?;
     let dict = dict
         .as_dictionary::<K>()
         .downcast_dict::<PrimitiveArray<D>>()
         .ok_or_else(|| {
-            ArrowError::ComputeError(format!("Internal Error: Cannot cast dict to {}", D::PREFIX))
+            ArrowError::ComputeError(format!(
+                "Internal Error: Cannot cast dict to {}Array",
+                D::PREFIX
+            ))
         })?;
     let value = dict.values().clone();
     // Set correct precision/scale
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 3de35d61d269..3c7b6056c725 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -2221,13 +2221,13 @@ where
             base,
             cast_options,
         ),
-        Float32 => cast_floating_point_to_decimal::<_, D, _>(
+        Float32 => cast_floating_point_to_decimal::<_, D>(
             array.as_primitive::<Float32Type>(),
             *precision,
             *scale,
             cast_options,
         ),
-        Float64 => cast_floating_point_to_decimal::<_, D, _>(
+        Float64 => cast_floating_point_to_decimal::<_, D>(
             array.as_primitive::<Float64Type>(),
             *precision,
             *scale,
@@ -3138,91 +3138,92 @@ mod tests {
         );
     }
 
-    macro_rules! generate_decimal_to_numeric_cast_test_case {
-        ($INPUT_ARRAY: expr) => {
-            // u8
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                UInt8Array,
-                &DataType::UInt8,
-                vec![Some(1_u8), Some(2_u8), Some(3_u8), None, Some(5_u8)]
-            );
-            // u16
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                UInt16Array,
-                &DataType::UInt16,
-                vec![Some(1_u16), Some(2_u16), Some(3_u16), None, Some(5_u16)]
-            );
-            // u32
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                UInt32Array,
-                &DataType::UInt32,
-                vec![Some(1_u32), Some(2_u32), Some(3_u32), None, Some(5_u32)]
-            );
-            // u64
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                UInt64Array,
-                &DataType::UInt64,
-                vec![Some(1_u64), Some(2_u64), Some(3_u64), None, Some(5_u64)]
-            );
-            // i8
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Int8Array,
-                &DataType::Int8,
-                vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
-            );
-            // i16
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Int16Array,
-                &DataType::Int16,
-                vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
-            );
-            // i32
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Int32Array,
-                &DataType::Int32,
-                vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
-            );
-            // i64
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Int64Array,
-                &DataType::Int64,
-                vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
-            );
-            // f32
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Float32Array,
-                &DataType::Float32,
-                vec![
-                    Some(1.25_f32),
-                    Some(2.25_f32),
-                    Some(3.25_f32),
-                    None,
-                    Some(5.25_f32)
-                ]
-            );
-            // f64
-            generate_cast_test_case!(
-                $INPUT_ARRAY,
-                Float64Array,
-                &DataType::Float64,
-                vec![
-                    Some(1.25_f64),
-                    Some(2.25_f64),
-                    Some(3.25_f64),
-                    None,
-                    Some(5.25_f64)
-                ]
-            );
-        };
+    fn generate_decimal_to_numeric_cast_test_case<T>(array: &PrimitiveArray<T>)
+    where
+        T: ArrowPrimitiveType + DecimalType,
+    {
+        // u8
+        generate_cast_test_case!(
+            array,
+            UInt8Array,
+            &DataType::UInt8,
+            vec![Some(1_u8), Some(2_u8), Some(3_u8), None, Some(5_u8)]
+        );
+        // u16
+        generate_cast_test_case!(
+            array,
+            UInt16Array,
+            &DataType::UInt16,
+            vec![Some(1_u16), Some(2_u16), Some(3_u16), None, Some(5_u16)]
+        );
+        // u32
+        generate_cast_test_case!(
+            array,
+            UInt32Array,
+            &DataType::UInt32,
+            vec![Some(1_u32), Some(2_u32), Some(3_u32), None, Some(5_u32)]
+        );
+        // u64
+        generate_cast_test_case!(
+            array,
+            UInt64Array,
+            &DataType::UInt64,
+            vec![Some(1_u64), Some(2_u64), Some(3_u64), None, Some(5_u64)]
+        );
+        // i8
+        generate_cast_test_case!(
+            array,
+            Int8Array,
+            &DataType::Int8,
+            vec![Some(1_i8), Some(2_i8), Some(3_i8), None, Some(5_i8)]
+        );
+        // i16
+        generate_cast_test_case!(
+            array,
+            Int16Array,
+            &DataType::Int16,
+            vec![Some(1_i16), Some(2_i16), Some(3_i16), None, Some(5_i16)]
+        );
+        // i32
+        generate_cast_test_case!(
+            array,
+            Int32Array,
+            &DataType::Int32,
+            vec![Some(1_i32), Some(2_i32), Some(3_i32), None, Some(5_i32)]
+        );
+        // i64
+        generate_cast_test_case!(
+            array,
+            Int64Array,
+            &DataType::Int64,
+            vec![Some(1_i64), Some(2_i64), Some(3_i64), None, Some(5_i64)]
+        );
+        // f32
+        generate_cast_test_case!(
+            array,
+            Float32Array,
+            &DataType::Float32,
+            vec![
+                Some(1.25_f32),
+                Some(2.25_f32),
+                Some(3.25_f32),
+                None,
+                Some(5.25_f32)
+            ]
+        );
+        // f64
+        generate_cast_test_case!(
+            array,
+            Float64Array,
+            &DataType::Float64,
+            vec![
+                Some(1.25_f64),
+                Some(2.25_f64),
+                Some(3.25_f64),
+                None,
+                Some(5.25_f64)
+            ]
+        );
     }
 
     #[test]
@@ -3230,7 +3231,7 @@ mod tests {
         let value_array: Vec<Option<i32>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
         let array = create_decimal32_array(value_array, 8, 2).unwrap();
 
-        generate_decimal_to_numeric_cast_test_case!(&array);
+        generate_decimal_to_numeric_cast_test_case(&array);
     }
 
     #[test]
@@ -3238,7 +3239,7 @@ mod tests {
         let value_array: Vec<Option<i64>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
         let array = create_decimal64_array(value_array, 8, 2).unwrap();
 
-        generate_decimal_to_numeric_cast_test_case!(&array);
+        generate_decimal_to_numeric_cast_test_case(&array);
     }
 
     #[test]
@@ -3246,7 +3247,7 @@ mod tests {
         let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
         let array = create_decimal128_array(value_array, 38, 2).unwrap();
 
-        generate_decimal_to_numeric_cast_test_case!(&array);
+        generate_decimal_to_numeric_cast_test_case(&array);
 
         // overflow test: out of range of max u8
         let value_array: Vec<Option<i128>> = vec![Some(51300)];
@@ -10290,7 +10291,36 @@ mod tests {
         };
         let result = cast_with_options(&array, &output_type, &options);
         assert_eq!(result.unwrap_err().to_string(),
-                   "Invalid argument error: 123456790 is too large to store in a Decimal128 of precision 6. Max is 999999");
+                   "Invalid argument error: 123456789 is too large to store in a Decimal128 of precision 6. Max is 999999");
+    }
+
+    #[test]
+    fn test_decimal_to_decimal_same_scale() {
+        let array = vec![Some(520)];
+        let array = create_decimal128_array(array, 4, 2).unwrap();
+        let input_type = DataType::Decimal128(4, 2);
+        let output_type = DataType::Decimal128(3, 2);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let result = cast_with_options(&array, &output_type, &options);
+        assert_eq!(
+            result.unwrap().as_primitive::<Decimal128Type>().value(0),
+            520
+        );
+
+        // Cast 0 of decimal(3, 0) type to decimal(2, 0)
+        assert_eq!(
+            &cast(
+                &create_decimal128_array(vec![Some(0)], 3, 0).unwrap(),
+                &DataType::Decimal128(2, 0)
+            )
+            .unwrap(),
+            &(Arc::new(create_decimal128_array(vec![Some(0)], 2, 0).unwrap()) as ArrayRef)
+        );
     }
 
     #[test]
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 546fb0d6b122..37b859cd72ba 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -801,10 +801,7 @@ mod tests {
         scale: i8,
     ) -> PrimitiveArray<T> {
         data.into_iter()
-            .map(|x| match x {
-                None => None,
-                Some(y) => T::Native::from_usize(y),
-            })
+            .map(|x| x.and_then(T::Native::from_usize))
             .collect::<PrimitiveArray<T>>()
             .with_precision_and_scale(precision, scale)
             .unwrap()

From c7e09600d22a237bf79137251dea7c891bfc3879 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Fri, 7 Feb 2025 18:31:17 -0800
Subject: [PATCH 58/68] Implement Type and Array for Decimal32 and Decimal64

---
 arrow-arith/src/numeric.rs                   |  40 ++++
 arrow-array/src/array/mod.rs                 |   2 +
 arrow-array/src/array/primitive_array.rs     | 194 +++++++++++++++++-
 arrow-array/src/builder/buffer_builder.rs    |   4 +
 arrow-array/src/builder/primitive_builder.rs |   7 +-
 arrow-array/src/cast.rs                      |   6 +
 arrow-array/src/record_batch.rs              |   4 +-
 arrow-array/src/types.rs                     |  87 ++++++++-
 arrow-cast/src/display.rs                    |   2 +-
 arrow-data/src/data.rs                       |   4 +
 arrow-data/src/decimal.rs                    | 195 ++++++++++++++++++-
 arrow-data/src/equal/mod.rs                  |   2 +
 arrow-data/src/transform/mod.rs              |   8 +-
 arrow-integration-test/src/datatype.rs       |   8 +
 arrow-integration-test/src/lib.rs            |  36 ++++
 arrow-ipc/src/convert.rs                     |  24 +++
 arrow-schema/src/datatype.rs                 |  52 +++++
 arrow-schema/src/datatype_parse.rs           |  35 ++++
 arrow-schema/src/ffi.rs                      |  10 +-
 arrow-schema/src/field.rs                    |   2 +
 parquet/src/arrow/arrow_reader/statistics.rs | 107 ++++++++--
 parquet/src/arrow/schema/mod.rs              |   1 +
 22 files changed, 804 insertions(+), 26 deletions(-)

diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index a2dc39166931..9c0233133ed9 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -111,6 +111,20 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
         Float16 => neg_wrapping!(Float16Type, array),
         Float32 => neg_wrapping!(Float32Type, array),
         Float64 => neg_wrapping!(Float64Type, array),
+        Decimal32(p, s) => {
+            let a = array
+                .as_primitive::<Decimal32Type>()
+                .try_unary::<_, Decimal32Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
+        Decimal64(p, s) => {
+            let a = array
+                .as_primitive::<Decimal64Type>()
+                .try_unary::<_, Decimal64Type, _>(|x| x.neg_checked())?;
+
+            Ok(Arc::new(a.with_precision_and_scale(*p, *s)?))
+        }
         Decimal128(p, s) => {
             let a = array
                 .as_primitive::<Decimal128Type>()
@@ -234,6 +248,8 @@ fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, A
         (Interval(MonthDayNano), Interval(MonthDayNano)) => interval_op::<IntervalMonthDayNanoType>(op, l, l_scalar, r, r_scalar),
         (Date32, _) => date_op::<Date32Type>(op, l, l_scalar, r, r_scalar),
         (Date64, _) => date_op::<Date64Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal32(_, _), Decimal32(_, _)) => decimal_op::<Decimal32Type>(op, l, l_scalar, r, r_scalar),
+        (Decimal64(_, _), Decimal64(_, _)) => decimal_op::<Decimal64Type>(op, l, l_scalar, r, r_scalar),
         (Decimal128(_, _), Decimal128(_, _)) => decimal_op::<Decimal128Type>(op, l, l_scalar, r, r_scalar),
         (Decimal256(_, _), Decimal256(_, _)) => decimal_op::<Decimal256Type>(op, l, l_scalar, r, r_scalar),
         (l_t, r_t) => match (l_t, r_t) {
@@ -734,6 +750,8 @@ fn decimal_op<T: DecimalType>(
     let r = r.as_primitive::<T>();
 
     let (p1, s1, p2, s2) = match (l.data_type(), r.data_type()) {
+        (DataType::Decimal32(p1, s1), DataType::Decimal32(p2, s2)) => (p1, s1, p2, s2),
+        (DataType::Decimal64(p1, s1), DataType::Decimal64(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal128(p1, s1), DataType::Decimal128(p2, s2)) => (p1, s1, p2, s2),
         (DataType::Decimal256(p1, s1), DataType::Decimal256(p2, s2)) => (p1, s1, p2, s2),
         _ => unreachable!(),
@@ -922,6 +940,28 @@ mod tests {
             "Arithmetic overflow: Overflow happened on: - -9223372036854775808"
         );
 
+        let a = Decimal32Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal32Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
+        let a = Decimal64Array::from(vec![1, 3, -44, 2, 4])
+            .with_precision_and_scale(9, 6)
+            .unwrap();
+
+        let r = neg(&a).unwrap();
+        assert_eq!(r.data_type(), a.data_type());
+        assert_eq!(
+            r.as_primitive::<Decimal64Type>().values(),
+            &[-1, -3, 44, -2, -4]
+        );
+
         let a = Decimal128Array::from(vec![1, 3, -44, 2, 4])
             .with_precision_and_scale(9, 6)
             .unwrap();
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index e41a3a1d719a..1b2bff42b5a3 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -827,6 +827,8 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
             dt => panic!("Unexpected data type for run_ends array {dt:?}"),
         },
         DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
+        DataType::Decimal32(_, _) => Arc::new(Decimal32Array::from(data)) as ArrayRef,
+        DataType::Decimal64(_, _) => Arc::new(Decimal64Array::from(data)) as ArrayRef,
         DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
         DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
         dt => panic!("Unexpected data type {dt:?}"),
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 57aa23bf9040..0381e5278416 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -410,6 +410,44 @@ pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>;
 /// A [`PrimitiveArray`] of elapsed durations in nanoseconds
 pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
 
+/// A [`PrimitiveArray`] of 32-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal32Array;
+/// // Create from Vec<Option<i32>>
+/// let arr = Decimal32Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i32>
+/// let arr = Decimal32Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal32Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal32Array = PrimitiveArray<Decimal32Type>;
+
+/// A [`PrimitiveArray`] of 64-bit fixed point decimals
+///
+/// # Examples
+///
+/// Construction
+///
+/// ```
+/// # use arrow_array::Decimal64Array;
+/// // Create from Vec<Option<i64>>
+/// let arr = Decimal64Array::from(vec![Some(1), None, Some(2)]);
+/// // Create from Vec<i64>
+/// let arr = Decimal64Array::from(vec![1, 2, 3]);
+/// // Create iter/collect
+/// let arr: Decimal64Array = std::iter::repeat(42).take(10).collect();
+/// ```
+///
+/// See [`PrimitiveArray`] for more information and examples
+pub type Decimal64Array = PrimitiveArray<Decimal64Type>;
+
 /// A [`PrimitiveArray`] of 128-bit fixed point decimals
 ///
 /// # Examples
@@ -418,7 +456,7 @@ pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>;
 ///
 /// ```
 /// # use arrow_array::Decimal128Array;
-/// // Create from Vec<Option<i18>>
+/// // Create from Vec<Option<i128>>
 /// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]);
 /// // Create from Vec<i128>
 /// let arr = Decimal128Array::from(vec![1, 2, 3]);
@@ -672,6 +710,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
             DataType::Timestamp(t1, _) => {
                 matches!(data_type, DataType::Timestamp(t2, _) if &t1 == t2)
             }
+            DataType::Decimal32(_, _) => matches!(data_type, DataType::Decimal32(_, _)),
+            DataType::Decimal64(_, _) => matches!(data_type, DataType::Decimal64(_, _)),
             DataType::Decimal128(_, _) => matches!(data_type, DataType::Decimal128(_, _)),
             DataType::Decimal256(_, _) => matches!(data_type, DataType::Decimal256(_, _)),
             _ => T::DATA_TYPE.eq(data_type),
@@ -1353,6 +1393,8 @@ def_from_for_primitive!(UInt64Type, u64);
 def_from_for_primitive!(Float16Type, f16);
 def_from_for_primitive!(Float32Type, f32);
 def_from_for_primitive!(Float64Type, f64);
+def_from_for_primitive!(Decimal32Type, i32);
+def_from_for_primitive!(Decimal64Type, i64);
 def_from_for_primitive!(Decimal128Type, i128);
 def_from_for_primitive!(Decimal256Type, i256);
 
@@ -1465,6 +1507,8 @@ def_numeric_from_vec!(UInt64Type);
 def_numeric_from_vec!(Float16Type);
 def_numeric_from_vec!(Float32Type);
 def_numeric_from_vec!(Float64Type);
+def_numeric_from_vec!(Decimal32Type);
+def_numeric_from_vec!(Decimal64Type);
 def_numeric_from_vec!(Decimal128Type);
 def_numeric_from_vec!(Decimal256Type);
 
@@ -1573,6 +1617,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal precision of this array
     pub fn precision(&self) -> u8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(p, _) = self.data_type() {
+                    *p
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(p, _) = self.data_type() {
                     *p
@@ -1600,6 +1664,26 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// Returns the decimal scale of this array
     pub fn scale(&self) -> i8 {
         match T::BYTE_LENGTH {
+            4 => {
+                if let DataType::Decimal32(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal32Array datatype is not DataType::Decimal32 but {}",
+                        self.data_type()
+                    )
+                }
+            }
+            8 => {
+                if let DataType::Decimal64(_, s) = self.data_type() {
+                    *s
+                } else {
+                    unreachable!(
+                        "Decimal64Array datatype is not DataType::Decimal64 but {}",
+                        self.data_type()
+                    )
+                }
+            }
             16 => {
                 if let DataType::Decimal128(_, s) = self.data_type() {
                     *s
@@ -1628,7 +1712,9 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Decimal128Builder, Decimal256Builder};
+    use crate::builder::{
+        Decimal128Builder, Decimal256Builder, Decimal32Builder, Decimal64Builder,
+    };
     use crate::cast::downcast_array;
     use crate::BooleanArray;
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano};
@@ -2238,6 +2324,42 @@ mod tests {
         let _ = PrimitiveArray::<Int64Type>::from(foo.into_data());
     }
 
+    #[test]
+    fn test_decimal32() {
+        let values: Vec<_> = vec![0, 1, -1, i32::MIN, i32::MAX];
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal32Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal32Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let values: Vec<_> = vec![0, 1, -1, i64::MIN, i64::MAX];
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array: PrimitiveArray<Decimal64Type> =
+            PrimitiveArray::from_iter_values(values.iter().copied());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(values.clone());
+        assert_eq!(array.values(), &values);
+
+        let array = PrimitiveArray::<Decimal64Type>::from(array.to_data());
+        assert_eq!(array.values(), &values);
+    }
+
     #[test]
     fn test_decimal128() {
         let values: Vec<_> = vec![0, 1, -1, i128::MIN, i128::MAX];
@@ -2509,6 +2631,74 @@ mod tests {
         assert!(!array.is_null(2));
     }
 
+    #[test]
+    fn test_decimal64_iter() {
+        let mut builder = Decimal64Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal64Array = builder.finish().with_precision_and_scale(18, 4).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal64array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal64Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(18, 4).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal64(18, 4));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
+    #[test]
+    fn test_decimal32_iter() {
+        let mut builder = Decimal32Builder::with_capacity(30);
+        let decimal1 = 12345;
+        builder.append_value(decimal1);
+
+        builder.append_null();
+
+        let decimal2 = 56789;
+        builder.append_value(decimal2);
+
+        let array: Decimal32Array = builder.finish().with_precision_and_scale(9, 2).unwrap();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
+
+    #[test]
+    fn test_from_iter_decimal32array() {
+        let value1 = 12345;
+        let value2 = 56789;
+
+        let mut array: Decimal32Array =
+            vec![Some(value1), None, Some(value2)].into_iter().collect();
+        array = array.with_precision_and_scale(9, 2).unwrap();
+        assert_eq!(array.len(), 3);
+        assert_eq!(array.data_type(), &DataType::Decimal32(9, 2));
+        assert_eq!(value1, array.value(0));
+        assert!(!array.is_null(0));
+        assert!(array.is_null(1));
+        assert_eq!(value2, array.value(2));
+        assert!(!array.is_null(2));
+    }
+
     #[test]
     fn test_unary_opt() {
         let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs
index c0cabb1f7353..5975654667ce 100644
--- a/arrow-array/src/builder/buffer_builder.rs
+++ b/arrow-array/src/builder/buffer_builder.rs
@@ -45,6 +45,10 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
 /// Buffer builder for 64-bit floating point type.
 pub type Float64BufferBuilder = BufferBuilder<f64>;
 
+/// Buffer builder for 32-bit decimal type.
+pub type Decimal32BufferBuilder = BufferBuilder<<Decimal32Type as ArrowPrimitiveType>::Native>;
+/// Buffer builder for 64-bit decimal type.
+pub type Decimal64BufferBuilder = BufferBuilder<<Decimal64Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 128-bit decimal type.
 pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 256-bit decimal type.
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index 3191fea6e407..0b987d5e8d0b 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -87,6 +87,10 @@ pub type DurationMicrosecondBuilder = PrimitiveBuilder<DurationMicrosecondType>;
 /// An elapsed time in nanoseconds array builder.
 pub type DurationNanosecondBuilder = PrimitiveBuilder<DurationNanosecondType>;
 
+/// A decimal 32 array builder
+pub type Decimal32Builder = PrimitiveBuilder<Decimal32Type>;
+/// A decimal 64 array builder
+pub type Decimal64Builder = PrimitiveBuilder<Decimal64Type>;
 /// A decimal 128 array builder
 pub type Decimal128Builder = PrimitiveBuilder<Decimal128Type>;
 /// A decimal 256 array builder
@@ -175,7 +179,8 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// data type of the generated array.
     ///
     /// This method allows overriding the data type, to allow specifying timezones
-    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal128`] and [`DataType::Decimal256`]
+    /// for [`DataType::Timestamp`] or precision and scale for [`DataType::Decimal32`],
+    /// [`DataType::Decimal64`], [`DataType::Decimal128`] and [`DataType::Decimal256`]
     ///
     /// # Panics
     ///
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index d7d7571b80fc..f238549ca179 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -345,6 +345,12 @@ macro_rules! downcast_primitive {
             $crate::repeat_pat!($crate::cast::__private::DataType::Float64, $($data_type),+) => {
                 $m!($crate::types::Float64Type $(, $args)*)
             }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal32(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal32Type $(, $args)*)
+            }
+            $crate::repeat_pat!($crate::cast::__private::DataType::Decimal64(_, _), $($data_type),+) => {
+                $m!($crate::types::Decimal64Type $(, $args)*)
+            }
             $crate::repeat_pat!($crate::cast::__private::DataType::Decimal128(_, _), $($data_type),+) => {
                 $m!($crate::types::Decimal128Type $(, $args)*)
             }
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index a6c2aee7cbc6..edfdbc5b8c80 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -65,7 +65,7 @@ pub trait RecordBatchWriter {
 /// Support for limited data types is available. The macro will return a compile error if an unsupported data type is used.
 /// Presently supported data types are:
 /// - `Boolean`, `Null`
-/// - `Decimal128`, `Decimal256`
+/// - `Decimal32`, `Decimal64`, `Decimal128`, `Decimal256`
 /// - `Float16`, `Float32`, `Float64`
 /// - `Int8`, `Int16`, `Int32`, `Int64`
 /// - `UInt8`, `UInt16`, `UInt32`, `UInt64`
@@ -107,6 +107,8 @@ macro_rules! create_array {
     (@from DurationMillisecond) => { $crate::DurationMillisecondArray };
     (@from DurationMicrosecond) => { $crate::DurationMicrosecondArray };
     (@from DurationNanosecond) => { $crate::DurationNanosecondArray };
+    (@from Decimal32) => { $crate::Decimal32Array };
+    (@from Decimal64) => { $crate::Decimal64Array };
     (@from Decimal128) => { $crate::Decimal128Array };
     (@from Decimal256) => { $crate::Decimal256Array };
     (@from TimestampSecond) => { $crate::TimestampSecondArray };
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 3d8cfcdb112b..c700e814bb3d 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -25,13 +25,16 @@ use crate::timezone::Tz;
 use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{
-    is_validate_decimal256_precision, is_validate_decimal_precision, validate_decimal256_precision,
-    validate_decimal_precision,
+    is_validate_decimal256_precision, is_validate_decimal32_precision,
+    is_validate_decimal64_precision, is_validate_decimal_precision, validate_decimal256_precision,
+    validate_decimal32_precision, validate_decimal64_precision, validate_decimal_precision,
 };
 use arrow_data::{validate_binary_view, validate_string_view};
 use arrow_schema::{
     ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_DEFAULT_SCALE,
+    DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE, DECIMAL64_MAX_PRECISION,
+    DECIMAL64_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
@@ -1156,6 +1159,8 @@ mod decimal {
     use super::*;
 
     pub trait DecimalTypeSealed {}
+    impl DecimalTypeSealed for Decimal32Type {}
+    impl DecimalTypeSealed for Decimal64Type {}
     impl DecimalTypeSealed for Decimal128Type {}
     impl DecimalTypeSealed for Decimal256Type {}
 }
@@ -1163,10 +1168,12 @@ mod decimal {
 /// A trait over the decimal types, used by [`PrimitiveArray`] to provide a generic
 /// implementation across the various decimal types
 ///
-/// Implemented by [`Decimal128Type`] and [`Decimal256Type`] for [`Decimal128Array`]
-/// and [`Decimal256Array`] respectively
+/// Implemented by [`Decimal32Type`], [`Decimal64Type`], [`Decimal128Type`] and [`Decimal256Type`]
+/// for [`Decimal32Array`], [`Decimal64Array`], [`Decimal128Array`] and [`Decimal256Array`] respectively
 ///
 /// [`PrimitiveArray`]: crate::array::PrimitiveArray
+/// [`Decimal32Array`]: crate::array::Decimal32Array
+/// [`Decimal64Array`]: crate::array::Decimal64Array
 /// [`Decimal128Array`]: crate::array::Decimal128Array
 /// [`Decimal256Array`]: crate::array::Decimal256Array
 pub trait DecimalType:
@@ -1183,7 +1190,7 @@ pub trait DecimalType:
     /// Default values for [`DataType`]
     const DEFAULT_TYPE: DataType;
 
-    /// "Decimal128" or "Decimal256", for use in error messages
+    /// "Decimal32", "Decimal64", "Decimal128" or "Decimal256", for use in error messages
     const PREFIX: &'static str;
 
     /// Formats the decimal value with the provided precision and scale
@@ -1236,6 +1243,74 @@ pub fn validate_decimal_precision_and_scale<T: DecimalType>(
     Ok(())
 }
 
+/// The decimal type for a Decimal32Array
+#[derive(Debug)]
+pub struct Decimal32Type {}
+
+impl DecimalType for Decimal32Type {
+    const BYTE_LENGTH: usize = 4;
+    const MAX_PRECISION: u8 = DECIMAL32_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL32_MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal32;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal32(DECIMAL32_MAX_PRECISION, DECIMAL32_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal32";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i32, precision: u8) -> Result<(), ArrowError> {
+        validate_decimal32_precision(num, precision)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal32_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal32Type {
+    type Native = i32;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal32Type {}
+
+/// The decimal type for a Decimal64Array
+#[derive(Debug)]
+pub struct Decimal64Type {}
+
+impl DecimalType for Decimal64Type {
+    const BYTE_LENGTH: usize = 8;
+    const MAX_PRECISION: u8 = DECIMAL64_MAX_PRECISION;
+    const MAX_SCALE: i8 = DECIMAL64_MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(u8, i8) -> DataType = DataType::Decimal64;
+    const DEFAULT_TYPE: DataType =
+        DataType::Decimal64(DECIMAL64_MAX_PRECISION, DECIMAL64_DEFAULT_SCALE);
+    const PREFIX: &'static str = "Decimal64";
+
+    fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String {
+        format_decimal_str(&value.to_string(), precision as usize, scale)
+    }
+
+    fn validate_decimal_precision(num: i64, precision: u8) -> Result<(), ArrowError> {
+        validate_decimal64_precision(num, precision)
+    }
+
+    fn is_valid_decimal_precision(value: Self::Native, precision: u8) -> bool {
+        is_validate_decimal64_precision(value, precision)
+    }
+}
+
+impl ArrowPrimitiveType for Decimal64Type {
+    type Native = i64;
+
+    const DATA_TYPE: DataType = <Self as DecimalType>::DEFAULT_TYPE;
+}
+
+impl primitive::PrimitiveTypeSealed for Decimal64Type {}
+
 /// The decimal type for a Decimal128Array
 #[derive(Debug)]
 pub struct Decimal128Type {}
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 669b8a664c2b..e7f683ee2563 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -474,7 +474,7 @@ macro_rules! decimal_display {
     };
 }
 
-decimal_display!(Decimal128Type, Decimal256Type);
+decimal_display!(Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type);
 
 fn write_timestamp(
     f: &mut dyn Write,
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index a76e00e26737..0f5aa4cb993b 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -83,6 +83,8 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
         | DataType::Float16
         | DataType::Float32
         | DataType::Float64
+        | DataType::Decimal32(_, _)
+        | DataType::Decimal64(_, _)
         | DataType::Decimal128(_, _)
         | DataType::Decimal256(_, _)
         | DataType::Date32
@@ -1587,6 +1589,8 @@ pub fn layout(data_type: &DataType) -> DataTypeLayout {
             DataTypeLayout::new_fixed_width::<IntervalMonthDayNano>()
         }
         DataType::Duration(_) => DataTypeLayout::new_fixed_width::<i64>(),
+        DataType::Decimal32(_, _) => DataTypeLayout::new_fixed_width::<i32>(),
+        DataType::Decimal64(_, _) => DataTypeLayout::new_fixed_width::<i64>(),
         DataType::Decimal128(_, _) => DataTypeLayout::new_fixed_width::<i128>(),
         DataType::Decimal256(_, _) => DataTypeLayout::new_fixed_width::<i256>(),
         DataType::FixedSizeBinary(size) => {
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index e84461f2ec3a..35a7c08d8e47 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Maximum and minimum values for [`Decimal256`] and [`Decimal128`].
+//! Maximum and minimum values for [`Decimal256`], [`Decimal128`], [`Decimal64`] and [`Decimal32`].
 //!
 //! Also provides functions to validate if a given decimal value is within
 //! the valid range of the decimal type.
 //!
+//! [`Decimal32`]: arrow_schema::DataType::Decimal32
+//! [`Decimal64`]: arrow_schema::DataType::Decimal64
 //! [`Decimal128`]: arrow_schema::DataType::Decimal128
 //! [`Decimal256`]: arrow_schema::DataType::Decimal256
 use arrow_buffer::i256;
@@ -27,7 +29,8 @@ use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
     DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
+    DECIMAL32_DEFAULT_SCALE, DECIMAL32_MAX_PRECISION, DECIMAL32_MAX_SCALE, DECIMAL64_DEFAULT_SCALE,
+    DECIMAL64_MAX_PRECISION, DECIMAL64_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 
 /// `MAX_DECIMAL256_FOR_EACH_PRECISION[p]` holds the maximum [`i256`] value that can
@@ -899,6 +902,194 @@ pub const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [
     -99999999999999999999999999999999999999,
 ];
 
+/// `MAX_DECIMAL64_FOR_EACH_PRECISION[p]` holds the maximum `i64` value that can
+/// be stored in [`Decimal64`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MAX_DECIMAL64_FOR_EACH_PRECISION;
+/// assert_eq!(MAX_DECIMAL64_FOR_EACH_PRECISION[3], 999);
+/// ```
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+pub const MAX_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+    0, // unused first element
+    9,
+    99,
+    999,
+    9999,
+    99999,
+    999999,
+    9999999,
+    99999999,
+    999999999,
+    9999999999,
+    99999999999,
+    999999999999,
+    9999999999999,
+    99999999999999,
+    999999999999999,
+    9999999999999999,
+    99999999999999999,
+    999999999999999999,
+];
+
+/// `MIN_DECIMAL64_FOR_EACH_PRECISION[p]` holds the minimum `i64` value that can
+/// be stored in a [`Decimal64`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MIN_DECIMAL64_FOR_EACH_PRECISION;
+/// assert_eq!(MIN_DECIMAL64_FOR_EACH_PRECISION[3], -999);
+/// ```
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+pub const MIN_DECIMAL64_FOR_EACH_PRECISION: [i64; 19] = [
+    0, // unused first element
+    -9,
+    -99,
+    -999,
+    -9999,
+    -99999,
+    -999999,
+    -9999999,
+    -99999999,
+    -999999999,
+    -9999999999,
+    -99999999999,
+    -999999999999,
+    -9999999999999,
+    -99999999999999,
+    -999999999999999,
+    -9999999999999999,
+    -99999999999999999,
+    -999999999999999999,
+];
+
+/// `MAX_DECIMAL32_FOR_EACH_PRECISION[p]` holds the maximum `i32` value that can
+/// be stored in [`Decimal32`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MAX_DECIMAL32_FOR_EACH_PRECISION;
+/// assert_eq!(MAX_DECIMAL32_FOR_EACH_PRECISION[3], 999);
+/// ```
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+pub const MAX_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+    0, // unused first element
+    9, 99, 999, 9999, 99999, 999999, 9999999, 99999999, 999999999,
+];
+
+/// `MIN_DECIMAL32_FOR_EACH_PRECISION[p]` holds the minimum `ialue that can
+/// be stored in a [`Decimal32`] value of precision `p`.
+///
+/// # Notes
+///
+/// The first element is unused and is inserted so that we can look up using
+/// precision as the index without the need to subtract 1 first.
+///
+/// # Example
+/// ```
+/// # use arrow_data::decimal::MIN_DECIMAL32_FOR_EACH_PRECISION;
+/// assert_eq!(MIN_DECIMAL32_FOR_EACH_PRECISION[3], -999);
+/// ```
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+pub const MIN_DECIMAL32_FOR_EACH_PRECISION: [i32; 10] = [
+    0, // unused first element
+    -9, -99, -999, -9999, -99999, -999999, -9999999, -99999999, -999999999,
+];
+
+/// Validates that the specified `i32` value can be properly
+/// interpreted as a [`Decimal32`] number with precision `precision`
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+#[inline]
+pub fn validate_decimal32_precision(value: i32, precision: u8) -> Result<(), ArrowError> {
+    if precision > DECIMAL32_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal32 is {DECIMAL32_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too large to store in a Decimal32 of precision {precision}. Max is {}",
+            MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+        )))
+    } else if value < MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too small to store in a Decimal32 of precision {precision}. Min is {}",
+            MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Returns true if the specified `i32` value can be properly
+/// interpreted as a [`Decimal32`] number with precision `precision`
+///
+/// [`Decimal32`]: arrow_schema::DataType::Decimal32
+#[inline]
+pub fn is_validate_decimal32_precision(value: i32, precision: u8) -> bool {
+    precision <= DECIMAL32_MAX_PRECISION
+        && value >= MIN_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+        && value <= MAX_DECIMAL32_FOR_EACH_PRECISION[precision as usize]
+}
+
+/// Validates that the specified `i64` value can be properly
+/// interpreted as a [`Decimal64`] number with precision `precision`
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+#[inline]
+pub fn validate_decimal64_precision(value: i64, precision: u8) -> Result<(), ArrowError> {
+    if precision > DECIMAL64_MAX_PRECISION {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Max precision of a Decimal64 is {DECIMAL64_MAX_PRECISION}, but got {precision}",
+        )));
+    }
+    if value > MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too large to store in a Decimal64 of precision {precision}. Max is {}",
+            MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+        )))
+    } else if value < MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize] {
+        Err(ArrowError::InvalidArgumentError(format!(
+            "{value} is too small to store in a Decimal64 of precision {precision}. Min is {}",
+            MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+        )))
+    } else {
+        Ok(())
+    }
+}
+
+/// Returns true if the specified `i64` value can be properly
+/// interpreted as a [`Decimal64`] number with precision `precision`
+///
+/// [`Decimal64`]: arrow_schema::DataType::Decimal64
+#[inline]
+pub fn is_validate_decimal64_precision(value: i64, precision: u8) -> bool {
+    precision <= DECIMAL64_MAX_PRECISION
+        && value >= MIN_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+        && value <= MAX_DECIMAL64_FOR_EACH_PRECISION[precision as usize]
+}
+
 /// Validates that the specified `i128` value can be properly
 /// interpreted as a [`Decimal128`] number with precision `precision`
 ///
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index f24179b61700..1c16ee2f8a14 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -78,6 +78,8 @@ fn equal_values(
         DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal32(_, _) => primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal64(_, _) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index 93b79e6a5eb8..9b8cd806f043 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -256,6 +256,8 @@ fn build_extend(array: &ArrayData) -> Extend {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
+        DataType::Decimal32(_, _) => primitive::build_extend::<i32>(array),
+        DataType::Decimal64(_, _) => primitive::build_extend::<i64>(array),
         DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
         DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
         DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
@@ -302,6 +304,8 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         | DataType::Duration(_)
         | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>,
         DataType::Interval(IntervalUnit::MonthDayNano) => primitive::extend_nulls::<i128>,
+        DataType::Decimal32(_, _) => primitive::extend_nulls::<i32>,
+        DataType::Decimal64(_, _) => primitive::extend_nulls::<i64>,
         DataType::Decimal128(_, _) => primitive::extend_nulls::<i128>,
         DataType::Decimal256(_, _) => primitive::extend_nulls::<i256>,
         DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>,
@@ -455,7 +459,9 @@ impl<'a> MutableArrayData<'a> {
         };
 
         let child_data = match &data_type {
-            DataType::Decimal128(_, _)
+            DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _)
             | DataType::Null
             | DataType::Boolean
diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs
index 24e02c8430c7..4c17fbe76be7 100644
--- a/arrow-integration-test/src/datatype.rs
+++ b/arrow-integration-test/src/datatype.rs
@@ -61,6 +61,8 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
                 };
 
                 match bit_width {
+                    32 => Ok(DataType::Decimal32(precision, scale)),
+                    64 => Ok(DataType::Decimal64(precision, scale)),
                     128 => Ok(DataType::Decimal128(precision, scale)),
                     256 => Ok(DataType::Decimal256(precision, scale)),
                     _ => Err(ArrowError::ParseError(
@@ -335,6 +337,12 @@ pub fn data_type_to_json(data_type: &DataType) -> serde_json::Value {
             TimeUnit::Nanosecond => "NANOSECOND",
         }}),
         DataType::Dictionary(_, _) => json!({ "name": "dictionary"}),
+        DataType::Decimal32(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 32})
+        }
+        DataType::Decimal64(precision, scale) => {
+            json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 64})
+        }
         DataType::Decimal128(precision, scale) => {
             json!({"name": "decimal", "precision": precision, "scale": scale, "bitWidth": 128})
         }
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index baa76059f9c6..177a1c47f31f 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -818,6 +818,42 @@ pub fn array_from_json(
                 ))),
             }
         }
+        DataType::Decimal32(precision, scale) => {
+            let mut b = Decimal32Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i32>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
+        DataType::Decimal64(precision, scale) => {
+            let mut b = Decimal64Builder::with_capacity(json_col.count);
+            for (is_valid, value) in json_col
+                .validity
+                .as_ref()
+                .unwrap()
+                .iter()
+                .zip(json_col.data.unwrap())
+            {
+                match is_valid {
+                    1 => b.append_value(value.as_str().unwrap().parse::<i64>().unwrap()),
+                    _ => b.append_null(),
+                };
+            }
+            Ok(Arc::new(
+                b.finish().with_precision_and_scale(*precision, *scale)?,
+            ))
+        }
         DataType::Decimal128(precision, scale) => {
             let mut b = Decimal128Builder::with_capacity(json_col.count);
             for (is_valid, value) in json_col
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index 79dd1726ed70..a0038f4ab71f 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -468,6 +468,8 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             let precision: u8 = fsb.precision().try_into().unwrap();
             let scale: i8 = fsb.scale().try_into().unwrap();
             match bit_width {
+                32 => DataType::Decimal32(precision, scale),
+                64 => DataType::Decimal64(precision, scale),
                 128 => DataType::Decimal128(precision, scale),
                 256 => DataType::Decimal256(precision, scale),
                 _ => panic!("Unexpected decimal bit width {bit_width}"),
@@ -838,6 +840,28 @@ pub(crate) fn get_fb_field_type<'a>(
             // type in the DictionaryEncoding metadata in the parent field
             get_fb_field_type(value_type, dictionary_tracker, fbb)
         }
+        Decimal32(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(32);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
+        Decimal64(precision, scale) => {
+            let mut builder = crate::DecimalBuilder::new(fbb);
+            builder.add_precision(*precision as i32);
+            builder.add_scale(*scale as i32);
+            builder.add_bitWidth(64);
+            FBFieldType {
+                type_type: crate::Type::Decimal,
+                type_: builder.finish().as_union_value(),
+                children: Some(fbb.create_vector(&empty_fields[..])),
+            }
+        }
         Decimal128(precision, scale) => {
             let mut builder = crate::DecimalBuilder::new(fbb);
             builder.add_precision(*precision as i32);
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 5c9073c4eeb6..ca3da47e92c5 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -357,6 +357,34 @@ pub enum DataType {
     /// This type mostly used to represent low cardinality string
     /// arrays or a limited set of primitive types as integers.
     Dictionary(Box<DataType>, Box<DataType>),
+    /// Exact 32-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal32(u8, i8),
+    /// Exact 64-bit width decimal value with precision and scale
+    ///
+    /// * precision is the total number of digits
+    /// * scale is the number of digits past the decimal
+    ///
+    /// For example the number 123.45 has precision 5 and scale 2.
+    ///
+    /// In certain situations, scale could be negative number. For
+    /// negative scale, it is the number of padding 0 to the right
+    /// of the digits.
+    ///
+    /// For example the number 12300 could be treated as a decimal
+    /// has precision 3 and scale -2.
+    Decimal64(u8, i8),
     /// Exact 128-bit width decimal value with precision and scale
     ///
     /// * precision is the total number of digits
@@ -515,6 +543,8 @@ impl DataType {
                 | Float16
                 | Float32
                 | Float64
+                | Decimal32(_, _)
+                | Decimal64(_, _)
                 | Decimal128(_, _)
                 | Decimal256(_, _)
         )
@@ -674,6 +704,8 @@ impl DataType {
             DataType::Interval(IntervalUnit::YearMonth) => Some(4),
             DataType::Interval(IntervalUnit::DayTime) => Some(8),
             DataType::Interval(IntervalUnit::MonthDayNano) => Some(16),
+            DataType::Decimal32(_, _) => Some(4),
+            DataType::Decimal64(_, _) => Some(8),
             DataType::Decimal128(_, _) => Some(16),
             DataType::Decimal256(_, _) => Some(32),
             DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => None,
@@ -724,6 +756,8 @@ impl DataType {
                 | DataType::Utf8
                 | DataType::LargeUtf8
                 | DataType::Utf8View
+                | DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
                 | DataType::Decimal128(_, _)
                 | DataType::Decimal256(_, _) => 0,
                 DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
@@ -799,6 +833,18 @@ impl DataType {
     }
 }
 
+/// The maximum precision for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_PRECISION: u8 = 9;
+
+/// The maximum scale for [DataType::Decimal32] values
+pub const DECIMAL32_MAX_SCALE: i8 = 9;
+
+/// The maximum precision for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_PRECISION: u8 = 18;
+
+/// The maximum scale for [DataType::Decimal64] values
+pub const DECIMAL64_MAX_SCALE: i8 = 18;
+
 /// The maximum precision for [DataType::Decimal128] values
 pub const DECIMAL128_MAX_PRECISION: u8 = 38;
 
@@ -811,6 +857,12 @@ pub const DECIMAL256_MAX_PRECISION: u8 = 76;
 /// The maximum scale for [DataType::Decimal256] values
 pub const DECIMAL256_MAX_SCALE: i8 = 76;
 
+/// The default scale for [DataType::Decimal32] values
+pub const DECIMAL32_DEFAULT_SCALE: i8 = 2;
+
+/// The default scale for [DataType::Decimal64] values
+pub const DECIMAL64_DEFAULT_SCALE: i8 = 6;
+
 /// The default scale for [DataType::Decimal128] and [DataType::Decimal256]
 /// values
 pub const DECIMAL_DEFAULT_SCALE: i8 = 10;
diff --git a/arrow-schema/src/datatype_parse.rs b/arrow-schema/src/datatype_parse.rs
index bf557d8941dc..85c102d4a029 100644
--- a/arrow-schema/src/datatype_parse.rs
+++ b/arrow-schema/src/datatype_parse.rs
@@ -72,6 +72,8 @@ impl<'a> Parser<'a> {
             Token::Duration => self.parse_duration(),
             Token::Interval => self.parse_interval(),
             Token::FixedSizeBinary => self.parse_fixed_size_binary(),
+            Token::Decimal32 => self.parse_decimal_32(),
+            Token::Decimal64 => self.parse_decimal_64(),
             Token::Decimal128 => self.parse_decimal_128(),
             Token::Decimal256 => self.parse_decimal_256(),
             Token::Dictionary => self.parse_dictionary(),
@@ -259,6 +261,26 @@ impl<'a> Parser<'a> {
         Ok(DataType::FixedSizeBinary(length))
     }
 
+    /// Parses the next Decimal32 (called after `Decimal32` has been consumed)
+    fn parse_decimal_32(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal32")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal32")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal32(precision, scale))
+    }
+
+    /// Parses the next Decimal64 (called after `Decimal64` has been consumed)
+    fn parse_decimal_64(&mut self) -> ArrowResult<DataType> {
+        self.expect_token(Token::LParen)?;
+        let precision = self.parse_u8("Decimal64")?;
+        self.expect_token(Token::Comma)?;
+        let scale = self.parse_i8("Decimal64")?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::Decimal64(precision, scale))
+    }
+
     /// Parses the next Decimal128 (called after `Decimal128` has been consumed)
     fn parse_decimal_128(&mut self) -> ArrowResult<DataType> {
         self.expect_token(Token::LParen)?;
@@ -469,6 +491,9 @@ impl<'a> Tokenizer<'a> {
             "Dictionary" => Token::Dictionary,
 
             "FixedSizeBinary" => Token::FixedSizeBinary,
+
+            "Decimal32" => Token::Decimal32,
+            "Decimal64" => Token::Decimal64,
             "Decimal128" => Token::Decimal128,
             "Decimal256" => Token::Decimal256,
 
@@ -531,6 +556,8 @@ enum Token {
     Duration,
     Interval,
     FixedSizeBinary,
+    Decimal32,
+    Decimal64,
     Decimal128,
     Decimal256,
     Dictionary,
@@ -568,6 +595,8 @@ impl Display for Token {
             Token::Some => write!(f, "Some"),
             Token::None => write!(f, "None"),
             Token::FixedSizeBinary => write!(f, "FixedSizeBinary"),
+            Token::Decimal32 => write!(f, "Decimal32"),
+            Token::Decimal64 => write!(f, "Decimal64"),
             Token::Decimal128 => write!(f, "Decimal128"),
             Token::Decimal256 => write!(f, "Decimal256"),
             Token::Dictionary => write!(f, "Dictionary"),
@@ -658,6 +687,8 @@ mod test {
             DataType::Utf8,
             DataType::Utf8View,
             DataType::LargeUtf8,
+            DataType::Decimal32(7, 8),
+            DataType::Decimal64(6, 9),
             DataType::Decimal128(7, 12),
             DataType::Decimal256(6, 13),
             // ---------
@@ -750,8 +781,12 @@ mod test {
             // too large for i32
             ("FixedSizeBinary(4000000000), ", "Error converting 4000000000 into i32 for FixedSizeBinary: out of range integral type conversion attempted"),
             // can't have negative precision
+            ("Decimal32(-3, 5)", "Error converting -3 into u8 for Decimal32: out of range integral type conversion attempted"),
+            ("Decimal64(-3, 5)", "Error converting -3 into u8 for Decimal64: out of range integral type conversion attempted"),
             ("Decimal128(-3, 5)", "Error converting -3 into u8 for Decimal128: out of range integral type conversion attempted"),
             ("Decimal256(-3, 5)", "Error converting -3 into u8 for Decimal256: out of range integral type conversion attempted"),
+            ("Decimal32(3, 500)", "Error converting 500 into i8 for Decimal32: out of range integral type conversion attempted"),
+            ("Decimal64(3, 500)", "Error converting 500 into i8 for Decimal64: out of range integral type conversion attempted"),
             ("Decimal128(3, 500)", "Error converting 500 into i8 for Decimal128: out of range integral type conversion attempted"),
             ("Decimal256(3, 500)", "Error converting 500 into i8 for Decimal256: out of range integral type conversion attempted"),
 
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index d86fb66190b4..fda309c01a36 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -521,9 +521,11 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
                                     )
                                 })?;
                                 match *bits {
+                                    "32" => DataType::Decimal32(parsed_precision, parsed_scale),
+                                    "64" => DataType::Decimal64(parsed_precision, parsed_scale),
                                     "128" => DataType::Decimal128(parsed_precision, parsed_scale),
                                     "256" => DataType::Decimal256(parsed_precision, parsed_scale),
-                                    _ => return Err(ArrowError::CDataInterface("Only 128- and 256- bit wide decimals are supported in the Rust implementation".to_string())),
+                                    _ => return Err(ArrowError::CDataInterface("Only 32/64/128/256 bit wide decimals are supported in the Rust implementation".to_string())),
                                 }
                             }
                             _ => {
@@ -706,6 +708,12 @@ fn get_format_string(dtype: &DataType) -> Result<Cow<'static, str>, ArrowError>
         DataType::LargeUtf8 => Ok("U".into()),
         DataType::FixedSizeBinary(num_bytes) => Ok(Cow::Owned(format!("w:{num_bytes}"))),
         DataType::FixedSizeList(_, num_elems) => Ok(Cow::Owned(format!("+w:{num_elems}"))),
+        DataType::Decimal32(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},32")))
+        }
+        DataType::Decimal64(precision, scale) => {
+            Ok(Cow::Owned(format!("d:{precision},{scale},64")))
+        }
         DataType::Decimal128(precision, scale) => Ok(Cow::Owned(format!("d:{precision},{scale}"))),
         DataType::Decimal256(precision, scale) => {
             Ok(Cow::Owned(format!("d:{precision},{scale},256")))
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index dbd671a62a3a..5605d29d20b0 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -760,6 +760,8 @@ impl Field {
             | DataType::Utf8
             | DataType::LargeUtf8
             | DataType::Utf8View
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => {
                 if from.data_type == DataType::Null {
diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs
index 09f8ec7cc274..3471f1c97d3d 100644
--- a/parquet/src/arrow/arrow_reader/statistics.rs
+++ b/parquet/src/arrow/arrow_reader/statistics.rs
@@ -34,11 +34,11 @@ use arrow_array::builder::{
 };
 use arrow_array::{
     new_empty_array, new_null_array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, Decimal256Array, Float16Array, Float32Array, Float64Array, Int16Array,
-    Int32Array, Int64Array, Int8Array, LargeBinaryArray, Time32MillisecondArray, Time32SecondArray,
-    Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
-    TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array,
-    UInt32Array, UInt64Array, UInt8Array,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, Float16Array, Float32Array,
+    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeBinaryArray,
+    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
 };
 use arrow_buffer::i256;
 use arrow_schema::{DataType, Field, Schema, TimeUnit};
@@ -46,12 +46,24 @@ use half::f16;
 use paste::paste;
 use std::sync::Arc;
 
-// Convert the bytes array to i128.
+// Convert the bytes array to i32.
 // The endian of the input bytes array must be big-endian.
-pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
+pub(crate) fn from_bytes_to_i32(b: &[u8]) -> i32 {
     // The bytes array are from parquet file and must be the big-endian.
     // The endian is defined by parquet format, and the reference document
     // https://github.com/apache/parquet-format/blob/54e53e5d7794d383529dd30746378f19a12afd58/src/main/thrift/parquet.thrift#L66
+    i32::from_be_bytes(sign_extend_be::<4>(b))
+}
+
+// Convert the bytes array to i64.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i64(b: &[u8]) -> i64 {
+    i64::from_be_bytes(sign_extend_be::<8>(b))
+}
+
+// Convert the bytes array to i128.
+// The endian of the input bytes array must be big-endian.
+pub(crate) fn from_bytes_to_i128(b: &[u8]) -> i128 {
     i128::from_be_bytes(sign_extend_be::<16>(b))
 }
 
@@ -263,9 +275,10 @@ macro_rules! make_decimal_stats_iterator {
                         ParquetStatistics::Int32(s) => {
                             s.$func().map(|x| $stat_value_type::from(*x))
                         }
-                        ParquetStatistics::Int64(s) => {
-                            s.$func().map(|x| $stat_value_type::from(*x))
-                        }
+                        ParquetStatistics::Int64(s) => s
+                            .$func()
+                            .map(|x| $stat_value_type::try_from(*x).ok())
+                            .flatten(),
                         ParquetStatistics::ByteArray(s) => s.$bytes_func().map($convert_func),
                         ParquetStatistics::FixedLenByteArray(s) => {
                             s.$bytes_func().map($convert_func)
@@ -282,6 +295,34 @@ macro_rules! make_decimal_stats_iterator {
     };
 }
 
+make_decimal_stats_iterator!(
+    MinDecimal32StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MaxDecimal32StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i32,
+    from_bytes_to_i32
+);
+make_decimal_stats_iterator!(
+    MinDecimal64StatsIterator,
+    min_opt,
+    min_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
+make_decimal_stats_iterator!(
+    MaxDecimal64StatsIterator,
+    max_opt,
+    max_bytes_opt,
+    i64,
+    from_bytes_to_i64
+);
 make_decimal_stats_iterator!(
     MinDecimal128StatsIterator,
     min_opt,
@@ -476,6 +517,18 @@ macro_rules! get_statistics {
                 }
                 Ok(Arc::new(builder.finish()))
             },
+            DataType::Decimal32(precision, scale) => {
+                let arr = Decimal32Array::from_iter(
+                    [<$stat_type_prefix Decimal32StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
+            DataType::Decimal64(precision, scale) => {
+                let arr = Decimal64Array::from_iter(
+                    [<$stat_type_prefix Decimal64StatsIterator>]::new($iterator)
+                ).with_precision_and_scale(*precision, *scale)?;
+                Ok(Arc::new(arr))
+            },
             DataType::Decimal128(precision, scale) => {
                 let arr = Decimal128Array::from_iter(
                     [<$stat_type_prefix Decimal128StatsIterator>]::new($iterator)
@@ -730,7 +783,7 @@ macro_rules! get_decimal_page_stats_iterator {
                             native_index
                                 .indexes
                                 .iter()
-                                .map(|x| x.$func.and_then(|x| Some($stat_value_type::from(x))))
+                                .map(|x| x.$func.and_then(|x| $stat_value_type::try_from(x).ok()))
                                 .collect::<Vec<_>>(),
                         ),
                         Index::BYTE_ARRAY(native_index) => Some(
@@ -764,6 +817,34 @@ macro_rules! get_decimal_page_stats_iterator {
     };
 }
 
+get_decimal_page_stats_iterator!(
+    MinDecimal32DataPageStatsIterator,
+    min,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal32DataPageStatsIterator,
+    max,
+    i32,
+    from_bytes_to_i32
+);
+
+get_decimal_page_stats_iterator!(
+    MinDecimal64DataPageStatsIterator,
+    min,
+    i64,
+    from_bytes_to_i64
+);
+
+get_decimal_page_stats_iterator!(
+    MaxDecimal64DataPageStatsIterator,
+    max,
+    i64,
+    from_bytes_to_i64
+);
+
 get_decimal_page_stats_iterator!(
     MinDecimal128DataPageStatsIterator,
     min,
@@ -958,6 +1039,10 @@ macro_rules! get_data_page_statistics {
                     )
                 ),
                 DataType::Date64 if $physical_type == Some(PhysicalType::INT64) => Ok(Arc::new(Date64Array::from_iter([<$stat_type_prefix Int64DataPageStatsIterator>]::new($iterator).flatten()))),
+                DataType::Decimal32(precision, scale) => Ok(Arc::new(
+                    Decimal32Array::from_iter([<$stat_type_prefix Decimal32DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
+                DataType::Decimal64(precision, scale) => Ok(Arc::new(
+                    Decimal64Array::from_iter([<$stat_type_prefix Decimal64DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal128(precision, scale) => Ok(Arc::new(
                     Decimal128Array::from_iter([<$stat_type_prefix Decimal128DataPageStatsIterator>]::new($iterator).flatten()).with_precision_and_scale(*precision, *scale)?)),
                 DataType::Decimal256(precision, scale) => Ok(Arc::new(
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 89c42f5eaf92..04d6ab461854 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -622,6 +622,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
+        DataType::Decimal32(precision, scale) | DataType::Decimal64(precision, scale) |
         DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
             // Decimal precision determines the Parquet physical type to use.
             // Following the: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal

From 58dd007f60e35b258a4edca5594dd64fc47f56a3 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Fri, 7 Feb 2025 18:44:15 -0800
Subject: [PATCH 59/68] Fix formatting

---
 parquet/src/arrow/schema/mod.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 04d6ab461854..14087b234304 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -622,8 +622,10 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
-        DataType::Decimal32(precision, scale) | DataType::Decimal64(precision, scale) |
-        DataType::Decimal128(precision, scale) | DataType::Decimal256(precision, scale) => {
+        DataType::Decimal32(precision, scale)
+        | DataType::Decimal64(precision, scale)
+        | DataType::Decimal128(precision, scale)
+        | DataType::Decimal256(precision, scale) => {
             // Decimal precision determines the Parquet physical type to use.
             // Following the: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal
             let (physical_type, length) = if *precision > 1 && *precision <= 9 {

From ccadb4f842b2d26cab1182a611a4531da5715842 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Fri, 21 Mar 2025 19:35:10 -0700
Subject: [PATCH 60/68] Re-added new types to builder

---
 arrow-array/src/builder/mod.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 680563c6cfc3..cbbf423467d1 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -450,6 +450,12 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::FixedSizeBinary(len) => {
             Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
         }
+        DataType::Decimal32(p, s) => Box::new(
+            Decimal32Builder::with_capacity(capacity).with_data_type(DataType::Decimal32(*p, *s)),
+        ),
+        DataType::Decimal64(p, s) => Box::new(
+            Decimal64Builder::with_capacity(capacity).with_data_type(DataType::Decimal64(*p, *s)),
+        ),
         DataType::Decimal128(p, s) => Box::new(
             Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
         ),

From d62af6bd079264807b94374f3b4edf3d8a7e4043 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 28 Jun 2025 11:47:37 -0700
Subject: [PATCH 61/68] Fixes

---
 arrow-cast/src/cast/decimal.rs |  4 ++--
 arrow-cast/src/cast/mod.rs     | 38 +++++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index b9648bd8376a..17e47e4f7b6c 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -54,7 +54,7 @@ impl DecimalCast for i32 {
         n.to_i32()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         n.to_i32()
     }
 }
@@ -80,7 +80,7 @@ impl DecimalCast for i64 {
         n.to_i64()
     }
 
-    fn from_f64<T: DecimalCast>(n: f64) -> Option<Self> {
+    fn from_f64(n: f64) -> Option<Self> {
         n.to_i64()
     }
 }
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 7709aa835edc..8e5ed4a6d304 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -888,7 +888,7 @@ pub fn cast_with_options(
             )
         }
         // Decimal to decimal, different width
-        (Decimal32(_, s1), Decimal64(p2, s2)) => {
+        (Decimal32(p1, s1), Decimal64(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
                 array.as_primitive(),
                 *p1,
@@ -898,7 +898,7 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Decimal32(_, s1), Decimal128(p2, s2)) => {
+        (Decimal32(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal32Type, Decimal128Type>(
                 array.as_primitive(),
                 *p1,
@@ -908,90 +908,100 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Decimal32(_, s1), Decimal256(p2, s2)) => {
+        (Decimal32(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal32Type, Decimal256Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal64(_, s1), Decimal32(p2, s2)) => {
+        (Decimal64(p1, s1), Decimal32(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal64Type, Decimal32Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal64(_, s1), Decimal128(p2, s2)) => {
+        (Decimal64(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal64Type, Decimal128Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal64(_, s1), Decimal256(p2, s2)) => {
+        (Decimal64(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal64Type, Decimal256Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal128(_, s1), Decimal32(p2, s2)) => {
+        (Decimal128(p1, s1), Decimal32(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal128Type, Decimal32Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal128(_, s1), Decimal64(p2, s2)) => {
+        (Decimal128(p1, s1), Decimal64(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal128Type, Decimal64Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal128(_, s1), Decimal256(p2, s2)) => {
+        (Decimal128(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal128Type, Decimal256Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal256(_, s1), Decimal32(p2, s2)) => {
+        (Decimal256(p1, s1), Decimal32(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal32Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal256(_, s1), Decimal64(p2, s2)) => {
+        (Decimal256(p1, s1), Decimal64(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal64Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
                 cast_options,
             )
         }
-        (Decimal256(_, s1), Decimal128(p2, s2)) => {
+        (Decimal256(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal128Type>(
                 array.as_primitive(),
+                *p1,
                 *s1,
                 *p2,
                 *s2,
@@ -2242,13 +2252,13 @@ where
             base,
             cast_options,
         ),
-        Float32 => cast_floating_point_to_decimal::<_, D, _>(
+        Float32 => cast_floating_point_to_decimal::<_, D>(
             array.as_primitive::<Float32Type>(),
             *precision,
             *scale,
             cast_options,
         ),
-        Float64 => cast_floating_point_to_decimal::<_, D, _>(
+        Float64 => cast_floating_point_to_decimal::<_, D>(
             array.as_primitive::<Float64Type>(),
             *precision,
             *scale,

From 088775ddfb0f42033c689b3fcb6cea082d1bf5f7 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 28 Jun 2025 12:59:09 -0700
Subject: [PATCH 62/68] fix build break

---
 parquet/tests/arrow_reader/statistics.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/parquet/tests/arrow_reader/statistics.rs b/parquet/tests/arrow_reader/statistics.rs
index 837bf76ccf98..7d08a26b8a08 100644
--- a/parquet/tests/arrow_reader/statistics.rs
+++ b/parquet/tests/arrow_reader/statistics.rs
@@ -1972,6 +1972,9 @@ async fn test_decimal32() {
         ),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "decimal32_col",
         check: Check::Both,
     }
@@ -2002,6 +2005,9 @@ async fn test_decimal64() {
         ),
         expected_null_counts: UInt64Array::from(vec![0, 0, 0]),
         expected_row_counts: Some(UInt64Array::from(vec![5, 5, 5])),
+        // stats are exact
+        expected_max_value_exact: BooleanArray::from(vec![true, true, true]),
+        expected_min_value_exact: BooleanArray::from(vec![true, true, true]),
         column_name: "decimal64_col",
         check: Check::Both,
     }

From ed22dea4fffbd0710d6fbcbafe2652dabb91561a Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 28 Jun 2025 13:50:52 -0700
Subject: [PATCH 63/68] Implement cast and other operations on decimal32 and
 decimal64

---
 arrow-array/src/cast.rs           |  12 +
 arrow-array/src/types.rs          |   2 +
 arrow-cast/src/cast/decimal.rs    |  72 +++++
 arrow-cast/src/cast/dictionary.rs |  14 +
 arrow-cast/src/cast/mod.rs        | 457 +++++++++++++++++++++++++++++-
 arrow-ord/src/comparison.rs       | 211 ++++++++++++++
 arrow-ord/src/ord.rs              |  28 +-
 arrow-ord/src/sort.rs             |  20 ++
 arrow-row/src/lib.rs              |  60 ++++
 arrow/benches/array_from_vec.rs   |  46 +++
 arrow/benches/builder.rs          |  38 +++
 arrow/benches/cast_kernels.rs     |  32 +++
 arrow/benches/decimal_validate.rs |  55 +++-
 arrow/src/tensor.rs               |   4 +
 arrow/tests/array_cast.rs         |  87 +++++-
 15 files changed, 1113 insertions(+), 25 deletions(-)

diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index 41fffc4bc80c..de590ff87c77 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -1132,6 +1132,18 @@ mod tests {
         assert!(!as_string_array(&array).is_empty())
     }
 
+    #[test]
+    fn test_decimal32array() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal32Type>(&a).is_empty());
+    }
+
+    #[test]
+    fn test_decimal64array() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        assert!(!as_primitive_array::<Decimal64Type>(&a).is_empty());
+    }
+
     #[test]
     fn test_decimal128array() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index da5a5c6da06a..4e37a346ead2 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -1826,6 +1826,8 @@ mod tests {
         test_layout::<Float16Type>();
         test_layout::<Float32Type>();
         test_layout::<Float64Type>();
+        test_layout::<Decimal32Type>();
+        test_layout::<Decimal64Type>();
         test_layout::<Decimal128Type>();
         test_layout::<Decimal256Type>();
         test_layout::<TimestampNanosecondType>();
diff --git a/arrow-cast/src/cast/decimal.rs b/arrow-cast/src/cast/decimal.rs
index 57dfc51d74c8..178ccd0e34e4 100644
--- a/arrow-cast/src/cast/decimal.rs
+++ b/arrow-cast/src/cast/decimal.rs
@@ -20,6 +20,10 @@ use crate::cast::*;
 /// A utility trait that provides checked conversions between
 /// decimal types inspired by [`NumCast`]
 pub(crate) trait DecimalCast: Sized {
+    fn to_i32(self) -> Option<i32>;
+
+    fn to_i64(self) -> Option<i64>;
+
     fn to_i128(self) -> Option<i128>;
 
     fn to_i256(self) -> Option<i256>;
@@ -29,7 +33,67 @@ pub(crate) trait DecimalCast: Sized {
     fn from_f64(n: f64) -> Option<Self>;
 }
 
+impl DecimalCast for i32 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self as i64)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i32()
+    }
+
+    fn from_f64(n: f64) -> Option<Self> {
+        n.to_i32()
+    }
+}
+
+impl DecimalCast for i64 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self)
+    }
+
+    fn to_i128(self) -> Option<i128> {
+        Some(self as i128)
+    }
+
+    fn to_i256(self) -> Option<i256> {
+        Some(i256::from_i128(self as i128))
+    }
+
+    fn from_decimal<T: DecimalCast>(n: T) -> Option<Self> {
+        n.to_i64()
+    }
+
+    fn from_f64(n: f64) -> Option<Self> {
+        n.to_i64()
+    }
+}
+
 impl DecimalCast for i128 {
+    fn to_i32(self) -> Option<i32> {
+        Some(self as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        Some(self as i64)
+    }
+
     fn to_i128(self) -> Option<i128> {
         Some(self)
     }
@@ -48,6 +112,14 @@ impl DecimalCast for i128 {
 }
 
 impl DecimalCast for i256 {
+    fn to_i32(self) -> Option<i32> {
+        self.to_i128().map(|x| x as i32)
+    }
+
+    fn to_i64(self) -> Option<i64> {
+        self.to_i128().map(|x| x as i64)
+    }
+
     fn to_i128(self) -> Option<i128> {
         self.to_i128()
     }
diff --git a/arrow-cast/src/cast/dictionary.rs b/arrow-cast/src/cast/dictionary.rs
index eae2f2167b39..43a67a7d9a2d 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -214,6 +214,20 @@ pub(crate) fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
         UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
         UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
         UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
+        Decimal32(p, s) => pack_decimal_to_dictionary::<K, Decimal32Type>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
+        Decimal64(p, s) => pack_decimal_to_dictionary::<K, Decimal64Type>(
+            array,
+            dict_value_type,
+            p,
+            s,
+            cast_options,
+        ),
         Decimal128(p, s) => pack_decimal_to_dictionary::<K, Decimal128Type>(
             array,
             dict_value_type,
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 884a32197c99..65930181564f 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -168,24 +168,40 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
                 _ => false
             },
         // cast one decimal type to another decimal type
-        (Decimal128(_, _), Decimal128(_, _)) => true,
-        (Decimal256(_, _), Decimal256(_, _)) => true,
-        (Decimal128(_, _), Decimal256(_, _)) => true,
-        (Decimal256(_, _), Decimal128(_, _)) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // unsigned integer to decimal
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal128(_, _)) |
-        (UInt8 | UInt16 | UInt32 | UInt64, Decimal256(_, _)) |
+        (
+            UInt8 | UInt16 | UInt32 | UInt64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // signed numeric to decimal
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
-        (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal256(_, _)) |
+        (
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         // decimal to unsigned numeric
-        (Decimal128(_, _) | Decimal256(_, _), UInt8 | UInt16 | UInt32 | UInt64) |
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            UInt8 | UInt16 | UInt32 | UInt64,
+        ) => true,
         // decimal to signed numeric
-        (Decimal128(_, _) | Decimal256(_, _), Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
+        ) => true,
         // decimal to string
-        (Decimal128(_, _) | Decimal256(_, _), Utf8View | Utf8 | LargeUtf8) => true,
+        (
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+            Utf8View | Utf8 | LargeUtf8,
+        ) => true,
         // string to decimal
-        (Utf8View | Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
+        (
+            Utf8View | Utf8 | LargeUtf8,
+            Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
+        ) => true,
         (Struct(from_fields), Struct(to_fields)) => {
             from_fields.len() == to_fields.len() &&
                 from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
@@ -831,6 +847,26 @@ pub fn cast_with_options(
             cast_map_values(array.as_map(), to_type, cast_options, ordered1.to_owned())
         }
         // Decimal to decimal, same width
+        (Decimal32(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal_same_type::<Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal128(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal_same_type::<Decimal128Type>(
                 array.as_primitive(),
@@ -852,6 +888,86 @@ pub fn cast_with_options(
             )
         }
         // Decimal to decimal, different width
+        (Decimal32(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal128Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal32(p1, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal32Type, Decimal256Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal128Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal64(p1, s1), Decimal256(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal64Type, Decimal256Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal128(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal128(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal128Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal128(p1, s1), Decimal256(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal128Type, Decimal256Type>(
                 array.as_primitive(),
@@ -862,6 +978,26 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
+        (Decimal256(p1, s1), Decimal32(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal32Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
+        (Decimal256(p1, s1), Decimal64(p2, s2)) => {
+            cast_decimal_to_decimal::<Decimal256Type, Decimal64Type>(
+                array.as_primitive(),
+                *p1,
+                *s1,
+                *p2,
+                *s2,
+                cast_options,
+            )
+        }
         (Decimal256(p1, s1), Decimal128(p2, s2)) => {
             cast_decimal_to_decimal::<Decimal256Type, Decimal128Type>(
                 array.as_primitive(),
@@ -873,6 +1009,28 @@ pub fn cast_with_options(
             )
         }
         // Decimal to non-decimal
+        (Decimal32(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                scale,
+                from_type,
+                to_type,
+                |x: i32| x as f64,
+                cast_options,
+            )
+        }
+        (Decimal64(_, scale), _) if !to_type.is_temporal() => {
+            cast_from_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                scale,
+                from_type,
+                to_type,
+                |x: i64| x as f64,
+                cast_options,
+            )
+        }
         (Decimal128(_, scale), _) if !to_type.is_temporal() => {
             cast_from_decimal::<Decimal128Type, _>(
                 array,
@@ -896,6 +1054,28 @@ pub fn cast_with_options(
             )
         }
         // Non-decimal to decimal
+        (_, Decimal32(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal32Type, _>(
+                array,
+                10_i32,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
+        }
+        (_, Decimal64(precision, scale)) if !from_type.is_temporal() => {
+            cast_to_decimal::<Decimal64Type, _>(
+                array,
+                10_i64,
+                precision,
+                scale,
+                from_type,
+                to_type,
+                cast_options,
+            )
+        }
         (_, Decimal128(precision, scale)) if !from_type.is_temporal() => {
             cast_to_decimal::<Decimal128Type, _>(
                 array,
@@ -2507,6 +2687,28 @@ mod tests {
         }
     }
 
+    fn create_decimal32_array(
+        array: Vec<Option<i32>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal32Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
+    fn create_decimal64_array(
+        array: Vec<Option<i64>>,
+        precision: u8,
+        scale: i8,
+    ) -> Result<Decimal64Array, ArrowError> {
+        array
+            .into_iter()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
     fn create_decimal128_array(
         array: Vec<Option<i128>>,
         precision: u8,
@@ -2655,6 +2857,72 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32() {
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal32Array,
+            &output_type,
+            vec![
+                Some(11234560_i32),
+                Some(21234560_i32),
+                Some(31234560_i32),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal32_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal32(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal32(2, 2), &options);
+        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal32 of precision 2. Max is 99",
+                   result_unsafe.unwrap_err().to_string());
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64() {
+        let input_type = DataType::Decimal64(17, 3);
+        let output_type = DataType::Decimal64(17, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 17, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal64Array,
+            &output_type,
+            vec![
+                Some(11234560_i64),
+                Some(21234560_i64),
+                Some(31234560_i64),
+                None
+            ]
+        );
+        // negative test
+        let array = vec![Some(123456), None];
+        let array = create_decimal64_array(array, 9, 0).unwrap();
+        let result_safe = cast(&array, &DataType::Decimal64(2, 2));
+        assert!(result_safe.is_ok());
+        let options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let result_unsafe = cast_with_options(&array, &DataType::Decimal64(2, 2), &options);
+        assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal64 of precision 2. Max is 99",
+                   result_unsafe.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128() {
         let input_type = DataType::Decimal128(20, 3);
@@ -2688,6 +2956,38 @@ mod tests {
                    result_unsafe.unwrap_err().to_string());
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_dict() {
+        let p = 9;
+        let s = 3;
+        let input_type = DataType::Decimal32(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal32(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_dict() {
+        let p = 15;
+        let s = 3;
+        let input_type = DataType::Decimal64(p, s);
+        let output_type = DataType::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(DataType::Decimal64(p, s)),
+        );
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, p, s).unwrap();
+        let cast_array = cast_with_options(&array, &output_type, &CastOptions::default()).unwrap();
+        assert_eq!(cast_array.data_type(), &output_type);
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128_dict() {
         let p = 20;
@@ -2720,6 +3020,46 @@ mod tests {
         assert_eq!(cast_array.data_type(), &output_type);
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal32_overflow() {
+        let input_type = DataType::Decimal32(9, 3);
+        let output_type = DataType::Decimal32(9, 9);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i32::MAX)];
+        let array = create_decimal32_array(array, 9, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Cast error: Cannot cast to Decimal32(9, 9). Overflowing on 2147483647",
+                   result.unwrap_err().to_string());
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_decimal64_overflow() {
+        let input_type = DataType::Decimal64(18, 3);
+        let output_type = DataType::Decimal64(18, 18);
+        assert!(can_cast_types(&input_type, &output_type));
+
+        let array = vec![Some(i64::MAX)];
+        let array = create_decimal64_array(array, 18, 3).unwrap();
+        let result = cast_with_options(
+            &array,
+            &output_type,
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Cast error: Cannot cast to Decimal64(18, 18). Overflowing on 9223372036854775807",
+                   result.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_decimal128_to_decimal128_overflow() {
         let input_type = DataType::Decimal128(38, 3);
@@ -2760,6 +3100,44 @@ mod tests {
                    result.unwrap_err().to_string());
     }
 
+    #[test]
+    fn test_cast_decimal32_to_decimal256() {
+        let input_type = DataType::Decimal32(8, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal32_array(array, 8, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
+    #[test]
+    fn test_cast_decimal64_to_decimal256() {
+        let input_type = DataType::Decimal64(12, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let array = create_decimal64_array(array, 12, 3).unwrap();
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(i256::from_i128(11234560_i128)),
+                Some(i256::from_i128(21234560_i128)),
+                Some(i256::from_i128(31234560_i128)),
+                None
+            ]
+        );
+    }
     #[test]
     fn test_cast_decimal128_to_decimal256() {
         let input_type = DataType::Decimal128(20, 3);
@@ -2956,6 +3334,22 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_decimal32_to_numeric() {
+        let value_array: Vec<Option<i32>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal32_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case(&array);
+    }
+
+    #[test]
+    fn test_cast_decimal64_to_numeric() {
+        let value_array: Vec<Option<i64>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let array = create_decimal64_array(value_array, 8, 2).unwrap();
+
+        generate_decimal_to_numeric_cast_test_case(&array);
+    }
+
     #[test]
     fn test_cast_decimal128_to_numeric() {
         let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
@@ -9511,6 +9905,14 @@ mod tests {
 
     #[test]
     fn test_cast_decimal_to_string() {
+        assert!(can_cast_types(
+            &DataType::Decimal32(9, 4),
+            &DataType::Utf8View
+        ));
+        assert!(can_cast_types(
+            &DataType::Decimal64(16, 4),
+            &DataType::Utf8View
+        ));
         assert!(can_cast_types(
             &DataType::Decimal128(10, 4),
             &DataType::Utf8View
@@ -9555,7 +9957,7 @@ mod tests {
             }
         }
 
-        let array128: Vec<Option<i128>> = vec![
+        let array32: Vec<Option<i32>> = vec![
             Some(1123454),
             Some(2123456),
             Some(-3123453),
@@ -9566,11 +9968,40 @@ mod tests {
             Some(-123456789),
             None,
         ];
+        let array64: Vec<Option<i64>> = array32.iter().map(|num| num.map(|x| x as i64)).collect();
+        let array128: Vec<Option<i128>> =
+            array64.iter().map(|num| num.map(|x| x as i128)).collect();
         let array256: Vec<Option<i256>> = array128
             .iter()
             .map(|num| num.map(i256::from_i128))
             .collect();
 
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8View,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i32>(
+            DataType::Utf8,
+            create_decimal32_array(array32.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal32Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal32_array(array32, 7, 3).unwrap(),
+        );
+
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8View,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i32>(
+            DataType::Utf8,
+            create_decimal64_array(array64.clone(), 7, 3).unwrap(),
+        );
+        test_decimal_to_string::<Decimal64Type, i64>(
+            DataType::LargeUtf8,
+            create_decimal64_array(array64, 7, 3).unwrap(),
+        );
+
         test_decimal_to_string::<Decimal128Type, i32>(
             DataType::Utf8View,
             create_decimal128_array(array128.clone(), 7, 3).unwrap(),
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index bb82f54d4918..f4daff8501b6 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -3059,6 +3059,120 @@ mod tests {
         );
     }
 
+    fn create_decimal_array<T: DecimalType>(data: Vec<Option<T::Native>>) -> PrimitiveArray<T> {
+        data.into_iter().collect::<PrimitiveArray<T>>()
+    }
+
+    fn test_cmp_dict_decimal<T: DecimalType>(
+        values1: Vec<Option<T::Native>>,
+        values2: Vec<Option<T::Native>>,
+    ) {
+        let values = create_decimal_array::<T>(values1);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = create_decimal_array::<T>(values2);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_decimal32() {
+        test_cmp_dict_decimal::<Decimal32Type>(
+            vec![Some(0), Some(1), Some(2), Some(3), Some(4), Some(5)],
+            vec![Some(7), Some(-3), Some(4), Some(3), Some(5)],
+        );
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal32() {
+        let array1: Decimal32Array = Decimal32Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal32Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_decimal64() {
+        let values = Decimal64Array::from_iter_values([0, 1, 2, 3, 4, 5]);
+        let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
+        let array1 = DictionaryArray::new(keys, Arc::new(values));
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
+    #[test]
+    fn test_cmp_dict_non_dict_decimal64() {
+        let array1: Decimal64Array = Decimal64Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+
+        let values = Decimal64Array::from_iter_values([7, -3, 4, 3, 5]);
+        let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
+        let array2 = DictionaryArray::new(keys, Arc::new(values));
+
+        let expected = BooleanArray::from(vec![false, false, false, true, true, false]);
+        assert_eq!(crate::cmp::eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, false, false, true]);
+        assert_eq!(crate::cmp::lt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![true, true, false, true, true, true]);
+        assert_eq!(crate::cmp::lt_eq(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, false, false, false]);
+        assert_eq!(crate::cmp::gt(&array1, &array2).unwrap(), expected);
+
+        let expected = BooleanArray::from(vec![false, false, true, true, true, false]);
+        assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
+    }
+
     #[test]
     fn test_cmp_dict_decimal128() {
         let values = Decimal128Array::from_iter_values([0, 1, 2, 3, 4, 5]);
@@ -3163,6 +3277,103 @@ mod tests {
         assert_eq!(crate::cmp::gt_eq(&array1, &array2).unwrap(), expected);
     }
 
+    #[test]
+    fn test_decimal32() {
+        let a = Decimal32Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal32Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal32_scalar() {
+        let a = Decimal32Array::from(vec![Some(1), Some(2), Some(3), None, Some(4), Some(5)]);
+        let b = Decimal32Array::new_scalar(3_i32);
+        // array eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array neq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::neq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(false), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array lt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(true), Some(true), Some(true), None, Some(false), Some(false)],
+        );
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(false), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        // array gt_eq scalar
+        let e = BooleanArray::from(
+            vec![Some(false), Some(false), Some(true), None, Some(true), Some(true)],
+        );
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let a = Decimal64Array::from_iter_values([1, 2, 4, 5]);
+        let b = Decimal64Array::from_iter_values([7, -3, 4, 3]);
+        let e = BooleanArray::from(vec![false, false, true, false]);
+        let r = crate::cmp::eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, false, false]);
+        let r = crate::cmp::lt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![true, false, true, false]);
+        let r = crate::cmp::lt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, false, true]);
+        let r = crate::cmp::gt(&a, &b).unwrap();
+        assert_eq!(e, r);
+
+        let e = BooleanArray::from(vec![false, true, true, true]);
+        let r = crate::cmp::gt_eq(&a, &b).unwrap();
+        assert_eq!(e, r);
+    }
+
     #[test]
     fn test_decimal128() {
         let a = Decimal128Array::from_iter_values([1, 2, 4, 5]);
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 55e397cd8aa4..9dc22e1f5a4c 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -549,7 +549,33 @@ mod tests {
     }
 
     #[test]
-    fn test_decimal() {
+    fn test_decimali32() {
+        let array = vec![Some(5_i32), Some(2_i32), Some(3_i32)]
+            .into_iter()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(8, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali64() {
+        let array = vec![Some(5_i64), Some(2_i64), Some(3_i64)]
+            .into_iter()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(16, 6)
+            .unwrap();
+
+        let cmp = make_comparator(&array, &array, SortOptions::default()).unwrap();
+        assert_eq!(Ordering::Less, cmp(1, 0));
+        assert_eq!(Ordering::Greater, cmp(0, 2));
+    }
+
+    #[test]
+    fn test_decimali128() {
         let array = vec![Some(5_i128), Some(2_i128), Some(3_i128)]
             .into_iter()
             .collect::<Decimal128Array>()
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 00606cc6e6c4..8c17ee6afcc1 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -2100,6 +2100,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_indices_decimal32() {
+        test_sort_indices_decimal::<Decimal32Type>(8, 3);
+    }
+
+    #[test]
+    fn test_sort_indices_decimal64() {
+        test_sort_indices_decimal::<Decimal64Type>(17, 5);
+    }
+
     #[test]
     fn test_sort_indices_decimal128() {
         test_sort_indices_decimal::<Decimal128Type>(23, 6);
@@ -2253,6 +2263,16 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_sort_decimal32() {
+        test_sort_decimal::<Decimal32Type>(8, 3);
+    }
+
+    #[test]
+    fn test_sort_decimal64() {
+        test_sort_decimal::<Decimal64Type>(17, 5);
+    }
+
     #[test]
     fn test_sort_decimal128() {
         test_sort_decimal::<Decimal128Type>(23, 6);
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index ee1c117859f5..5b237fa39c96 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1754,6 +1754,66 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_decimal32() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal32(
+            DECIMAL32_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal32Array::from_iter([
+                None,
+                Some(i32::MIN),
+                Some(-13),
+                Some(46_i32),
+                Some(5456_i32),
+                Some(i32::MAX),
+            ])
+            .with_precision_and_scale(9, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
+    #[test]
+    fn test_decimal64() {
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal64(
+            DECIMAL64_MAX_PRECISION,
+            7,
+        ))])
+        .unwrap();
+        let col = Arc::new(
+            Decimal64Array::from_iter([
+                None,
+                Some(i64::MIN),
+                Some(-13),
+                Some(46_i64),
+                Some(5456_i64),
+                Some(i64::MAX),
+            ])
+            .with_precision_and_scale(18, 7)
+            .unwrap(),
+        ) as ArrayRef;
+
+        let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
+        for i in 0..rows.num_rows() - 1 {
+            assert!(rows.row(i) < rows.row(i + 1));
+        }
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 1);
+        assert_eq!(col.as_ref(), back[0].as_ref())
+    }
+
     #[test]
     fn test_decimal128() {
         let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index 2850eae5d718..8e98f413a074 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -73,6 +73,28 @@ fn struct_array_from_vec(
     hint::black_box(StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap());
 }
 
+fn decimal32_array_from_vec(array: &[Option<i32>]) {
+    criterion::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal32Array>()
+            .with_precision_and_scale(9, 2)
+            .unwrap(),
+    );
+}
+
+fn decimal64_array_from_vec(array: &[Option<i64>]) {
+    criterion::black_box(
+        array
+            .iter()
+            .copied()
+            .collect::<Decimal64Array>()
+            .with_precision_and_scale(17, 2)
+            .unwrap(),
+    );
+}
+
 fn decimal128_array_from_vec(array: &[Option<i128>]) {
     hint::black_box(
         array
@@ -96,6 +118,30 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
 }
 
 fn decimal_benchmark(c: &mut Criterion) {
+    // bench decimal32 array
+    // create option<i32> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::thread_rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.gen_range::<i32, _>(0..99999999)));
+    }
+    c.bench_function("decimal32_array_from_vec 32768", |b| {
+        b.iter(|| decimal32_array_from_vec(array.as_slice()))
+    });
+
+    // bench decimal64 array
+    // create option<i64> array
+    let size: usize = 1 << 15;
+    let mut rng = rand::thread_rng();
+    let mut array = vec![];
+    for _ in 0..size {
+        array.push(Some(rng.gen_range::<i64, _>(0..9999999999)));
+    }
+    c.bench_function("decimal64_array_from_vec 32768", |b| {
+        b.iter(|| decimal64_array_from_vec(array.as_slice()))
+    });
+
     // bench decimal128 array
     // create option<i128> array
     let size: usize = 1 << 15;
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 46dd18c0fa52..738c5b9c5dd6 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -108,6 +108,42 @@ fn bench_string(c: &mut Criterion) {
     group.finish();
 }
 
+fn bench_decimal32(c: &mut Criterion) {
+    c.bench_function("bench_decimal32_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::thread_rng();
+            let mut decimal_builder = Decimal32Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.gen_range::<i32, _>(0..999999999));
+            }
+            black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(9, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
+fn bench_decimal64(c: &mut Criterion) {
+    c.bench_function("bench_decimal64_builder", |b| {
+        b.iter(|| {
+            let mut rng = rand::thread_rng();
+            let mut decimal_builder = Decimal64Builder::with_capacity(BATCH_SIZE);
+            for _ in 0..BATCH_SIZE {
+                decimal_builder.append_value(rng.gen_range::<i64, _>(0..9999999999));
+            }
+            black_box(
+                decimal_builder
+                    .finish()
+                    .with_precision_and_scale(18, 0)
+                    .unwrap(),
+            );
+        })
+    });
+}
+
 fn bench_decimal128(c: &mut Criterion) {
     c.bench_function("bench_decimal128_builder", |b| {
         b.iter(|| {
@@ -151,6 +187,8 @@ criterion_group!(
     bench_primitive_nulls,
     bench_bool,
     bench_string,
+    bench_decimal32,
+    bench_decimal64,
     bench_decimal128,
     bench_decimal256,
 );
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index d01031be5fd4..a0def7119569 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -83,6 +83,36 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
     Arc::new(builder.finish())
 }
 
+fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal32Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.gen_range::<i32, _>(0..10000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
+fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
+    let mut rng = seedable_rng();
+    let mut builder = Decimal64Builder::with_capacity(size);
+
+    for _ in 0..size {
+        builder.append_value(rng.gen_range::<i64, _>(0..1000000000));
+    }
+    Arc::new(
+        builder
+            .finish()
+            .with_precision_and_scale(precision, scale)
+            .unwrap(),
+    )
+}
+
 fn build_decimal128_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
     let mut rng = seedable_rng();
     let mut builder = Decimal128Builder::with_capacity(size);
@@ -159,6 +189,8 @@ fn add_benchmark(c: &mut Criterion) {
     let utf8_date_array = build_utf8_date_array(512, true);
     let utf8_date_time_array = build_utf8_date_time_array(512, true);
 
+    let decimal32_array = build_decimal32_array(512, 9, 3);
+    let decimal64_array = build_decimal64_array(512, 10, 3);
     let decimal128_array = build_decimal128_array(512, 10, 3);
     let decimal256_array = build_decimal256_array(512, 50, 3);
     let string_array = build_string_array(512);
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index dfa4f5992023..fc355ff8d54b 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,7 +18,10 @@
 #[macro_use]
 extern crate criterion;
 
-use arrow::array::{Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder};
+use arrow::array::{
+    Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder, Decimal32Array,
+    Decimal32Builder, Decimal64Array, Decimal64Builder,
+};
 use criterion::Criterion;
 use rand::Rng;
 
@@ -26,6 +29,14 @@ extern crate arrow;
 
 use arrow_buffer::i256;
 
+fn validate_decimal32_array(array: Decimal32Array) {
+    array.with_precision_and_scale(8, 0).unwrap();
+}
+
+fn validate_decimal64_array(array: Decimal64Array) {
+    array.with_precision_and_scale(16, 0).unwrap();
+}
+
 fn validate_decimal128_array(array: Decimal128Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
@@ -34,6 +45,46 @@ fn validate_decimal256_array(array: Decimal256Array) {
     array.with_precision_and_scale(35, 0).unwrap();
 }
 
+fn validate_decimal32_benchmark(c: &mut Criterion) {
+    let mut rng = rand::thread_rng();
+    let size: i32 = 20000;
+    let mut decimal_builder = Decimal32Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.gen_range::<i32, _>(0..99999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(9, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal32_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal32Array::from(data.clone());
+            validate_decimal32_array(array);
+        })
+    });
+}
+
+fn validate_decimal64_benchmark(c: &mut Criterion) {
+    let mut rng = rand::thread_rng();
+    let size: i64 = 20000;
+    let mut decimal_builder = Decimal64Builder::with_capacity(size as usize);
+    for _ in 0..size {
+        decimal_builder.append_value(rng.gen_range::<i64, _>(0..999999999999));
+    }
+    let decimal_array = decimal_builder
+        .finish()
+        .with_precision_and_scale(18, 0)
+        .unwrap();
+    let data = decimal_array.into_data();
+    c.bench_function("validate_decimal64_array 20000", |b| {
+        b.iter(|| {
+            let array = Decimal64Array::from(data.clone());
+            validate_decimal64_array(array);
+        })
+    });
+}
+
 fn validate_decimal128_benchmark(c: &mut Criterion) {
     let mut rng = rand::rng();
     let size: i128 = 20000;
@@ -78,6 +129,8 @@ fn validate_decimal256_benchmark(c: &mut Criterion) {
 
 criterion_group!(
     benches,
+    validate_decimal32_benchmark,
+    validate_decimal64_benchmark,
     validate_decimal128_benchmark,
     validate_decimal256_benchmark,
 );
diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs
index cd135a2f04df..3b65ea7b52f9 100644
--- a/arrow/src/tensor.rs
+++ b/arrow/src/tensor.rs
@@ -86,6 +86,10 @@ pub type BooleanTensor<'a> = Tensor<'a, BooleanType>;
 pub type Date32Tensor<'a> = Tensor<'a, Date32Type>;
 /// [Tensor] of type [Int16Type]
 pub type Date64Tensor<'a> = Tensor<'a, Date64Type>;
+/// [Tensor] of type [Decimal32Type]
+pub type Decimal32Tensor<'a> = Tensor<'a, Decimal32Type>;
+/// [Tensor] of type [Decimal64Type]
+pub type Decimal64Tensor<'a> = Tensor<'a, Decimal64Type>;
 /// [Tensor] of type [Decimal128Type]
 pub type Decimal128Tensor<'a> = Tensor<'a, Decimal128Type>;
 /// [Tensor] of type [Decimal256Type]
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index da7d37fc48a4..522687c3e493 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -18,19 +18,21 @@
 use arrow_array::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder, UnionBuilder};
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type,
+    Int16Type, Int32Type, Int64Type, Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type,
+    UInt64Type, UInt8Type,
 };
 use arrow_array::{
     Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
-    Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
-    DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array,
-    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
-    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
-    LargeStringArray, ListArray, NullArray, PrimitiveArray, StringArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
-    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
-    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
+    Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
+    DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
+    FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array,
+    Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
+    LargeBinaryArray, LargeListArray, LargeStringArray, ListArray, NullArray, PrimitiveArray,
+    StringArray, StructArray, Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
+    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
+    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array,
+    UInt8Array, UnionArray,
 };
 use arrow_buffer::{i256, Buffer, IntervalDayTime, IntervalMonthDayNano};
 use arrow_cast::pretty::pretty_format_columns;
@@ -261,7 +263,37 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(DurationMillisecondArray::from(vec![1000, 2000])),
         Arc::new(DurationMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(DurationNanosecondArray::from(vec![1000, 2000])),
+        Arc::new(create_decimal32_array(vec![Some(1), Some(2), Some(3)], 9, 0).unwrap()),
+        Arc::new(create_decimal64_array(vec![Some(1), Some(2), Some(3)], 18, 0).unwrap()),
         Arc::new(create_decimal128_array(vec![Some(1), Some(2), Some(3)], 38, 0).unwrap()),
+        Arc::new(
+            create_decimal256_array(
+                vec![
+                    Some(i256::from_i128(1)),
+                    Some(i256::from_i128(2)),
+                    Some(i256::from_i128(3)),
+                ],
+                40,
+                0,
+            )
+            .unwrap(),
+        ),
+        make_dictionary_primitive::<Int8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal32Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<Int64Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt8Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt16Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt32Type, Decimal64Type>(vec![1, 2]),
+        make_dictionary_primitive::<UInt64Type, Decimal64Type>(vec![1, 2]),
         make_dictionary_primitive::<Int8Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int16Type, Decimal128Type>(vec![1, 2]),
         make_dictionary_primitive::<Int32Type, Decimal128Type>(vec![1, 2]),
@@ -411,6 +443,28 @@ fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
     Arc::new(b.finish())
 }
 
+fn create_decimal32_array(
+    array: Vec<Option<i32>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal32Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal32Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
+fn create_decimal64_array(
+    array: Vec<Option<i64>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal64Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal64Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
 fn create_decimal128_array(
     array: Vec<Option<i128>>,
     precision: u8,
@@ -422,6 +476,17 @@ fn create_decimal128_array(
         .with_precision_and_scale(precision, scale)
 }
 
+fn create_decimal256_array(
+    array: Vec<Option<i256>>,
+    precision: u8,
+    scale: i8,
+) -> Result<Decimal256Array, ArrowError> {
+    array
+        .into_iter()
+        .collect::<Decimal256Array>()
+        .with_precision_and_scale(precision, scale)
+}
+
 // Get a selection of datatypes to try and cast to
 fn get_all_types() -> Vec<DataType> {
     use DataType::*;
@@ -501,6 +566,8 @@ fn get_all_types() -> Vec<DataType> {
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeUtf8)),
                 Dictionary(Box::new(key_type.clone()), Box::new(Binary)),
                 Dictionary(Box::new(key_type.clone()), Box::new(LargeBinary)),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal32(9, 0))),
+                Dictionary(Box::new(key_type.clone()), Box::new(Decimal64(18, 0))),
                 Dictionary(Box::new(key_type.clone()), Box::new(Decimal128(38, 0))),
                 Dictionary(Box::new(key_type), Box::new(Decimal256(76, 0))),
             ]

From a2bd773a37f39bde00712cfae3eda735bb33cd1b Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 28 Jun 2025 20:39:57 -0700
Subject: [PATCH 64/68] Fix clippy

---
 arrow-cast/src/cast/mod.rs        | 12 ++++++++----
 arrow/benches/array_from_vec.rs   |  8 ++++----
 arrow/benches/builder.rs          |  8 ++++----
 arrow/benches/cast_kernels.rs     | 20 ++++++++++++++++++--
 arrow/benches/decimal_validate.rs |  8 ++++----
 5 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 65930181564f..109e062a6534 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -3036,8 +3036,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal32(9, 9). Overflowing on 2147483647",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal32(9, 9). Overflowing on 2147483647",
+            result.unwrap_err().to_string()
+        );
     }
 
     #[test]
@@ -3056,8 +3058,10 @@ mod tests {
                 format_options: FormatOptions::default(),
             },
         );
-        assert_eq!("Cast error: Cannot cast to Decimal64(18, 18). Overflowing on 9223372036854775807",
-                   result.unwrap_err().to_string());
+        assert_eq!(
+            "Cast error: Cannot cast to Decimal64(18, 18). Overflowing on 9223372036854775807",
+            result.unwrap_err().to_string()
+        );
     }
 
     #[test]
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index 8e98f413a074..175419efe5b5 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -121,10 +121,10 @@ fn decimal_benchmark(c: &mut Criterion) {
     // bench decimal32 array
     // create option<i32> array
     let size: usize = 1 << 15;
-    let mut rng = rand::thread_rng();
+    let mut rng = rand::rng();
     let mut array = vec![];
     for _ in 0..size {
-        array.push(Some(rng.gen_range::<i32, _>(0..99999999)));
+        array.push(Some(rng.random_range::<i32, _>(0..99999999)));
     }
     c.bench_function("decimal32_array_from_vec 32768", |b| {
         b.iter(|| decimal32_array_from_vec(array.as_slice()))
@@ -133,10 +133,10 @@ fn decimal_benchmark(c: &mut Criterion) {
     // bench decimal64 array
     // create option<i64> array
     let size: usize = 1 << 15;
-    let mut rng = rand::thread_rng();
+    let mut rng = rand::rng();
     let mut array = vec![];
     for _ in 0..size {
-        array.push(Some(rng.gen_range::<i64, _>(0..9999999999)));
+        array.push(Some(rng.random_range::<i64, _>(0..9999999999)));
     }
     c.bench_function("decimal64_array_from_vec 32768", |b| {
         b.iter(|| decimal64_array_from_vec(array.as_slice()))
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 738c5b9c5dd6..f1ceed691b74 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -111,10 +111,10 @@ fn bench_string(c: &mut Criterion) {
 fn bench_decimal32(c: &mut Criterion) {
     c.bench_function("bench_decimal32_builder", |b| {
         b.iter(|| {
-            let mut rng = rand::thread_rng();
+            let mut rng = rand::rng();
             let mut decimal_builder = Decimal32Builder::with_capacity(BATCH_SIZE);
             for _ in 0..BATCH_SIZE {
-                decimal_builder.append_value(rng.gen_range::<i32, _>(0..999999999));
+                decimal_builder.append_value(rng.random_range::<i32, _>(0..999999999));
             }
             black_box(
                 decimal_builder
@@ -129,10 +129,10 @@ fn bench_decimal32(c: &mut Criterion) {
 fn bench_decimal64(c: &mut Criterion) {
     c.bench_function("bench_decimal64_builder", |b| {
         b.iter(|| {
-            let mut rng = rand::thread_rng();
+            let mut rng = rand::rng();
             let mut decimal_builder = Decimal64Builder::with_capacity(BATCH_SIZE);
             for _ in 0..BATCH_SIZE {
-                decimal_builder.append_value(rng.gen_range::<i64, _>(0..9999999999));
+                decimal_builder.append_value(rng.random_range::<i64, _>(0..9999999999));
             }
             black_box(
                 decimal_builder
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index a0def7119569..179fde0a70be 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -88,7 +88,7 @@ fn build_decimal32_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
     let mut builder = Decimal32Builder::with_capacity(size);
 
     for _ in 0..size {
-        builder.append_value(rng.gen_range::<i32, _>(0..10000000));
+        builder.append_value(rng.random_range::<i32, _>(0..1000000));
     }
     Arc::new(
         builder
@@ -103,7 +103,7 @@ fn build_decimal64_array(size: usize, precision: u8, scale: i8) -> ArrayRef {
     let mut builder = Decimal64Builder::with_capacity(size);
 
     for _ in 0..size {
-        builder.append_value(rng.gen_range::<i64, _>(0..1000000000));
+        builder.append_value(rng.random_range::<i64, _>(0..1000000000));
     }
     Arc::new(
         builder
@@ -280,6 +280,22 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| cast_array(&utf8_date_time_array, DataType::Date64))
     });
 
+    c.bench_function("cast decimal32 to decimal32 512", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(9, 4)))
+    });
+    c.bench_function("cast decimal32 to decimal32 512 lower precision", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal32(6, 5)))
+    });
+    c.bench_function("cast decimal32 to decimal64 512", |b| {
+        b.iter(|| cast_array(&decimal32_array, DataType::Decimal64(11, 5)))
+    });
+    c.bench_function("cast decimal64 to decimal32 512", |b| {
+        b.iter(|| cast_array(&decimal64_array, DataType::Decimal32(9, 2)))
+    });
+    c.bench_function("cast decimal64 to decimal64 512", |b| {
+        b.iter(|| cast_array(&decimal64_array, DataType::Decimal64(12, 4)))
+    });
+
     c.bench_function("cast decimal128 to decimal128 512", |b| {
         b.iter(|| cast_array(&decimal128_array, DataType::Decimal128(30, 5)))
     });
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index fc355ff8d54b..7867b10ba222 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -46,11 +46,11 @@ fn validate_decimal256_array(array: Decimal256Array) {
 }
 
 fn validate_decimal32_benchmark(c: &mut Criterion) {
-    let mut rng = rand::thread_rng();
+    let mut rng = rand::rng();
     let size: i32 = 20000;
     let mut decimal_builder = Decimal32Builder::with_capacity(size as usize);
     for _ in 0..size {
-        decimal_builder.append_value(rng.gen_range::<i32, _>(0..99999999));
+        decimal_builder.append_value(rng.random_range::<i32, _>(0..99999999));
     }
     let decimal_array = decimal_builder
         .finish()
@@ -66,11 +66,11 @@ fn validate_decimal32_benchmark(c: &mut Criterion) {
 }
 
 fn validate_decimal64_benchmark(c: &mut Criterion) {
-    let mut rng = rand::thread_rng();
+    let mut rng = rand::rng();
     let size: i64 = 20000;
     let mut decimal_builder = Decimal64Builder::with_capacity(size as usize);
     for _ in 0..size {
-        decimal_builder.append_value(rng.gen_range::<i64, _>(0..999999999999));
+        decimal_builder.append_value(rng.random_range::<i64, _>(0..999999999999));
     }
     let decimal_array = decimal_builder
         .finish()

From 43a5356d438f1e179693c94e763015368bb20441 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sat, 28 Jun 2025 21:17:06 -0700
Subject: [PATCH 65/68] Fix clippy

---
 arrow/benches/array_from_vec.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index 175419efe5b5..dc1b2d7b749d 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -74,7 +74,7 @@ fn struct_array_from_vec(
 }
 
 fn decimal32_array_from_vec(array: &[Option<i32>]) {
-    criterion::black_box(
+    hint::black_box(
         array
             .iter()
             .copied()
@@ -85,7 +85,7 @@ fn decimal32_array_from_vec(array: &[Option<i32>]) {
 }
 
 fn decimal64_array_from_vec(array: &[Option<i64>]) {
-    criterion::black_box(
+    hint::black_box(
         array
             .iter()
             .copied()

From a87c826bc94e549c70ab45f77a070defe0db9d70 Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 29 Jun 2025 05:05:04 -0700
Subject: [PATCH 66/68] Fix clippy

---
 arrow/benches/builder.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index f1ceed691b74..2374797961a1 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -116,7 +116,7 @@ fn bench_decimal32(c: &mut Criterion) {
             for _ in 0..BATCH_SIZE {
                 decimal_builder.append_value(rng.random_range::<i32, _>(0..999999999));
             }
-            black_box(
+            hint::black_box(
                 decimal_builder
                     .finish()
                     .with_precision_and_scale(9, 0)
@@ -134,7 +134,7 @@ fn bench_decimal64(c: &mut Criterion) {
             for _ in 0..BATCH_SIZE {
                 decimal_builder.append_value(rng.random_range::<i64, _>(0..9999999999));
             }
-            black_box(
+            hint::black_box(
                 decimal_builder
                     .finish()
                     .with_precision_and_scale(18, 0)

From da9a0d4ad6a63a84069aefb9456865a0cc3c23ab Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Sun, 29 Jun 2025 09:21:22 -0700
Subject: [PATCH 67/68] Fix format issues

---
 arrow-cast/src/cast/mod.rs | 54 +++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 109e062a6534..a344b8374045 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -148,8 +148,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             can_cast_types(list_from.data_type(), list_to.data_type())
         }
         (List(_), _) => false,
-        (FixedSizeList(list_from,_), List(list_to)) |
-        (FixedSizeList(list_from,_), LargeList(list_to)) => {
+        (FixedSizeList(list_from, _), List(list_to))
+        | (FixedSizeList(list_from, _), LargeList(list_to)) => {
             can_cast_types(list_from.data_type(), list_to.data_type())
         }
         (FixedSizeList(inner, size), FixedSizeList(inner_to, size_to)) if size == size_to => {
@@ -157,16 +157,28 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         }
         (_, List(list_to)) => can_cast_types(from_type, list_to.data_type()),
         (_, LargeList(list_to)) => can_cast_types(from_type, list_to.data_type()),
-        (_, FixedSizeList(list_to,size)) if *size == 1 => {
-            can_cast_types(from_type, list_to.data_type())},
-        (FixedSizeList(list_from,size), _) if *size == 1 => {
-            can_cast_types(list_from.data_type(), to_type)},
-        (Map(from_entries,ordered_from), Map(to_entries, ordered_to)) if ordered_from == ordered_to =>
-            match (key_field(from_entries), key_field(to_entries), value_field(from_entries), value_field(to_entries)) {
-                (Some(from_key), Some(to_key), Some(from_value), Some(to_value)) =>
-                    can_cast_types(from_key.data_type(), to_key.data_type()) && can_cast_types(from_value.data_type(), to_value.data_type()),
-                _ => false
-            },
+        (_, FixedSizeList(list_to, size)) if *size == 1 => {
+            can_cast_types(from_type, list_to.data_type())
+        }
+        (FixedSizeList(list_from, size), _) if *size == 1 => {
+            can_cast_types(list_from.data_type(), to_type)
+        }
+        (Map(from_entries, ordered_from), Map(to_entries, ordered_to))
+            if ordered_from == ordered_to =>
+        {
+            match (
+                key_field(from_entries),
+                key_field(to_entries),
+                value_field(from_entries),
+                value_field(to_entries),
+            ) {
+                (Some(from_key), Some(to_key), Some(from_value), Some(to_value)) => {
+                    can_cast_types(from_key.data_type(), to_key.data_type())
+                        && can_cast_types(from_value.data_type(), to_value.data_type())
+                }
+                _ => false,
+            }
+        }
         // cast one decimal type to another decimal type
         (
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
@@ -203,8 +215,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
             Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, _),
         ) => true,
         (Struct(from_fields), Struct(to_fields)) => {
-            from_fields.len() == to_fields.len() &&
-                from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
+            from_fields.len() == to_fields.len()
+                && from_fields.iter().zip(to_fields.iter()).all(|(f1, f2)| {
                     // Assume that nullability between two structs are compatible, if not,
                     // cast kernel will return error.
                     can_cast_types(f1.data_type(), f2.data_type())
@@ -227,8 +239,12 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
                 || to_type == &LargeUtf8
         }
 
-        (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true,
-        (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View ) => true,
+        (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View) => {
+            true
+        }
+        (LargeBinary, Binary | Utf8 | LargeUtf8 | FixedSizeBinary(_) | BinaryView | Utf8View) => {
+            true
+        }
         (FixedSizeBinary(_), Binary | LargeBinary | BinaryView) => true,
         (
             Utf8 | LargeUtf8 | Utf8View,
@@ -259,8 +275,10 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
 
         // start numeric casts
         (
-            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64,
-            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32 | Float64,
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32
+            | Float64,
+            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float16 | Float32
+            | Float64,
         ) => true,
         // end numeric casts
 

From 666716276150c322cc8a2542c0be5e3a455ea52e Mon Sep 17 00:00:00 2001
From: Curt Hagenlocher <curt@hagenlocher.org>
Date: Tue, 1 Jul 2025 07:37:20 -0700
Subject: [PATCH 68/68] fix format

---
 parquet/src/arrow/array_reader/primitive_array.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index f8f4eeff49c3..68d2968b01ed 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -174,7 +174,7 @@ where
                         // follow C++ implementation and use overflow/reinterpret cast from  i32 to u32 which will map
                         // `i32::MIN..0` to `(i32::MAX as u32)..u32::MAX`
                         ArrowType::UInt32
-                    },
+                    }
                     ArrowType::Decimal32(_, _) => target_type.clone(),
                     _ => ArrowType::Int32,
                 }
@@ -185,7 +185,7 @@ where
                         // follow C++ implementation and use overflow/reinterpret cast from  i64 to u64 which will map
                         // `i64::MIN..0` to `(i64::MAX as u64)..u64::MAX`
                         ArrowType::UInt64
-                    },
+                    }
                     ArrowType::Decimal64(_, _) => target_type.clone(),
                     _ => ArrowType::Int64,
                 }
@@ -330,7 +330,7 @@ where
                 }
                 .with_precision_and_scale(*p, *s)?;
 
-                Arc::new(array) as ArrayRef                
+                Arc::new(array) as ArrayRef
             }
             ArrowType::Decimal128(p, s) => {
                 // See above comment. Conversion to `i128` is likewise infallible.