From 72d2a37102fa5812ac3dd17b4827de56bb026bcd Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Mon, 10 Mar 2025 17:45:51 -0400 Subject: [PATCH 1/8] Support different Timestamp TimeUnit resolutions for INT96. --- .../src/arrow/array_reader/primitive_array.rs | 80 +++++++++++++++---- parquet/src/arrow/arrow_reader/mod.rs | 59 +++++++++++--- parquet/src/arrow/schema/primitive.rs | 13 +++ parquet/src/data_type.rs | 76 ++++++++++++++---- parquet/src/record/api.rs | 2 +- 5 files changed, 187 insertions(+), 43 deletions(-) diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs index 709d0f8bb16e..3a76ba2c0c85 100644 --- a/parquet/src/arrow/array_reader/primitive_array.rs +++ b/parquet/src/arrow/array_reader/primitive_array.rs @@ -23,11 +23,14 @@ use crate::column::page::PageIterator; use crate::data_type::{DataType, Int96}; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; -use arrow_array::Decimal256Array; use arrow_array::{ - builder::TimestampNanosecondBufferBuilder, ArrayRef, BooleanArray, Decimal128Array, - Float32Array, Float64Array, Int32Array, Int64Array, TimestampNanosecondArray, UInt32Array, - UInt64Array, + builder::{ + TimestampMicrosecondBufferBuilder, TimestampMillisecondBufferBuilder, + TimestampNanosecondBufferBuilder, TimestampSecondBufferBuilder, + }, + ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Float32Array, Float64Array, + Int32Array, Int64Array, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, UInt32Array, UInt64Array, }; use arrow_buffer::{i256, BooleanBuffer, Buffer}; use arrow_data::ArrayDataBuilder; @@ -37,13 +40,13 @@ use std::sync::Arc; /// Provides conversion from `Vec` to `Buffer` pub trait IntoBuffer { - fn into_buffer(self) -> Buffer; + fn into_buffer(self, target_type: &ArrowType) -> Buffer; } macro_rules! native_buffer { ($($t:ty),*) => { $(impl IntoBuffer for Vec<$t> { - fn into_buffer(self) -> Buffer { + fn into_buffer(self, _target_type: &ArrowType) -> Buffer { Buffer::from_vec(self) } })* @@ -52,18 +55,44 @@ macro_rules! native_buffer { native_buffer!(i8, i16, i32, i64, u8, u16, u32, u64, f32, f64); impl IntoBuffer for Vec { - fn into_buffer(self) -> Buffer { + fn into_buffer(self, _target_type: &ArrowType) -> Buffer { BooleanBuffer::from_iter(self).into_inner() } } impl IntoBuffer for Vec { - fn into_buffer(self) -> Buffer { - let mut builder = TimestampNanosecondBufferBuilder::new(self.len()); - for v in self { - builder.append(v.to_nanos()) + fn into_buffer(self, target_type: &ArrowType) -> Buffer { + match target_type { + ArrowType::Timestamp(TimeUnit::Second, _) => { + let mut builder = TimestampSecondBufferBuilder::new(self.len()); + for v in self { + builder.append(v.to_seconds()) + } + builder.finish() + } + ArrowType::Timestamp(TimeUnit::Millisecond, _) => { + let mut builder = TimestampMillisecondBufferBuilder::new(self.len()); + for v in self { + builder.append(v.to_millis()) + } + builder.finish() + } + ArrowType::Timestamp(TimeUnit::Microsecond, _) => { + let mut builder = TimestampMicrosecondBufferBuilder::new(self.len()); + for v in self { + builder.append(v.to_micros()) + } + builder.finish() + } + ArrowType::Timestamp(TimeUnit::Nanosecond, _) => { + let mut builder = TimestampNanosecondBufferBuilder::new(self.len()); + for v in self { + builder.append(v.to_nanos()) + } + builder.finish() + } + _ => unreachable!("Invalid target_type for Int96."), } - builder.finish() } } @@ -161,8 +190,11 @@ where PhysicalType::FLOAT => ArrowType::Float32, PhysicalType::DOUBLE => ArrowType::Float64, PhysicalType::INT96 => match target_type { + ArrowType::Timestamp(TimeUnit::Second, _) => target_type.clone(), + ArrowType::Timestamp(TimeUnit::Millisecond, _) => target_type.clone(), + ArrowType::Timestamp(TimeUnit::Microsecond, _) => target_type.clone(), ArrowType::Timestamp(TimeUnit::Nanosecond, _) => target_type.clone(), - _ => unreachable!("INT96 must be timestamp nanosecond"), + _ => unreachable!("INT96 must be a timestamp."), }, PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => { unreachable!("PrimitiveArrayReaders don't support complex physical types"); @@ -172,7 +204,10 @@ where // Convert to arrays by using the Parquet physical type. // The physical types are then cast to Arrow types if necessary - let record_data = self.record_reader.consume_record_data().into_buffer(); + let record_data = self + .record_reader + .consume_record_data() + .into_buffer(target_type); let array_data = ArrayDataBuilder::new(arrow_data_type) .len(self.record_reader.num_values()) @@ -194,7 +229,22 @@ where }, PhysicalType::FLOAT => Arc::new(Float32Array::from(array_data)), PhysicalType::DOUBLE => Arc::new(Float64Array::from(array_data)), - PhysicalType::INT96 => Arc::new(TimestampNanosecondArray::from(array_data)), + PhysicalType::INT96 => match target_type { + ArrowType::Timestamp(TimeUnit::Second, _) => { + Arc::new(TimestampSecondArray::from(array_data)) + } + ArrowType::Timestamp(TimeUnit::Millisecond, _) => { + Arc::new(TimestampMillisecondArray::from(array_data)) + } + ArrowType::Timestamp(TimeUnit::Microsecond, _) => { + Arc::new(TimestampMicrosecondArray::from(array_data)) + } + ArrowType::Timestamp(TimeUnit::Nanosecond, _) => { + Arc::new(TimestampNanosecondArray::from(array_data)) + } + _ => unreachable!("INT96 must be a timestamp."), + }, + PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => { unreachable!("PrimitiveArrayReaders don't support complex physical types"); } diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 6eba04c86f91..2f77d600a0e1 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -953,7 +953,7 @@ mod tests { use crate::column::reader::decoder::REPETITION_LEVELS_BATCH_SIZE; use crate::data_type::{ BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType, - FloatType, Int32Type, Int64Type, Int96Type, + FloatType, Int32Type, Int64Type, Int96, Int96Type, }; use crate::errors::Result; use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion}; @@ -1455,17 +1455,56 @@ mod tests { #[test] fn test_int96_single_column_reader_test() { let encodings = &[Encoding::PLAIN, Encoding::RLE_DICTIONARY]; - run_single_column_reader_tests::( - 2, - ConvertedType::NONE, - None, - |vals| { + + let resolutions: Vec<(Option, fn(&[Option]) -> ArrayRef)> = vec![ + (None, |vals: &[Option]| { Arc::new(TimestampNanosecondArray::from_iter( vals.iter().map(|x| x.map(|x| x.to_nanos())), - )) as _ - }, - encodings, - ); + )) as ArrayRef + }), + ( + Some(ArrowDataType::Timestamp(TimeUnit::Second, None)), + |vals: &[Option]| { + Arc::new(TimestampSecondArray::from_iter( + vals.iter().map(|x| x.map(|x| x.to_seconds())), + )) as ArrayRef + }, + ), + ( + Some(ArrowDataType::Timestamp(TimeUnit::Millisecond, None)), + |vals: &[Option]| { + Arc::new(TimestampMillisecondArray::from_iter( + vals.iter().map(|x| x.map(|x| x.to_millis())), + )) as ArrayRef + }, + ), + ( + Some(ArrowDataType::Timestamp(TimeUnit::Microsecond, None)), + |vals: &[Option]| { + Arc::new(TimestampMicrosecondArray::from_iter( + vals.iter().map(|x| x.map(|x| x.to_micros())), + )) as ArrayRef + }, + ), + ( + Some(ArrowDataType::Timestamp(TimeUnit::Nanosecond, None)), + |vals: &[Option]| { + Arc::new(TimestampNanosecondArray::from_iter( + vals.iter().map(|x| x.map(|x| x.to_nanos())), + )) as ArrayRef + }, + ), + ]; + + resolutions.iter().for_each(|(arrow_type, converter)| { + run_single_column_reader_tests::( + 2, + ConvertedType::NONE, + arrow_type.clone(), + converter, + encodings, + ); + }) } struct RandUtf8Gen {} diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 9f215b4dc07e..7bf297ccfc0e 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -53,6 +53,19 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType { // Determine timezone (DataType::Timestamp(p, _), DataType::Timestamp(h, Some(_))) if p == h => hint, + ( + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp(TimeUnit::Second, None), + ) => hint, + ( + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp(TimeUnit::Millisecond, None), + ) => hint, + ( + DataType::Timestamp(TimeUnit::Nanosecond, None), + DataType::Timestamp(TimeUnit::Microsecond, None), + ) => hint, + // Determine offset size (DataType::Utf8, DataType::LargeUtf8) => hint, (DataType::Binary, DataType::LargeBinary) => hint, diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index c4c03727f44a..530d87e2c854 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -38,6 +38,24 @@ pub struct Int96 { value: [u32; 3], } +const JULIAN_DAY_OF_EPOCH: i32 = 2_440_588; + +/// Number of seconds in a day +const SECONDS_IN_DAY: i64 = 86_400; +/// Number of milliseconds in a second +const MILLISECONDS: i64 = 1_000; +/// Number of microseconds in a second +const MICROSECONDS: i64 = 1_000_000; +/// Number of nanoseconds in a second +const NANOSECONDS: i64 = 1_000_000_000; + +/// Number of milliseconds in a day +const MILLISECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MILLISECONDS; +/// Number of microseconds in a day +const MICROSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * MICROSECONDS; +/// Number of nanoseconds in a day +const NANOSECONDS_IN_DAY: i64 = SECONDS_IN_DAY * NANOSECONDS; + impl Int96 { /// Creates new INT96 type struct with no data set. pub fn new() -> Self { @@ -56,31 +74,55 @@ impl Int96 { self.value = [elem0, elem1, elem2]; } - /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch - pub fn to_i64(&self) -> i64 { - let (seconds, nanoseconds) = self.to_seconds_and_nanos(); - seconds * 1_000 + nanoseconds / 1_000_000 + /// Converts this INT96 into an i64 representing the number of SECONDS since EPOCH + /// + /// Will wrap around on overflow + #[inline] + pub fn to_seconds(&self) -> i64 { + let (day, nanos) = self.to_parts(); + (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(SECONDS_IN_DAY) + .wrapping_add(nanos / 1_000_000_000) + } + + /// Converts this INT96 into an i64 representing the number of MILLISECONDS since EPOCH + /// + /// Will wrap around on overflow + #[inline] + pub fn to_millis(&self) -> i64 { + let (day, nanos) = self.to_parts(); + (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(MILLISECONDS_IN_DAY) + .wrapping_add(nanos / 1_000_000) + } + + /// Converts this INT96 into an i64 representing the number of MICROSECONDS since EPOCH + /// + /// Will wrap around on overflow + #[inline] + pub fn to_micros(&self) -> i64 { + let (day, nanos) = self.to_parts(); + (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(MICROSECONDS_IN_DAY) + .wrapping_add(nanos / 1_000) } /// Converts this INT96 into an i64 representing the number of NANOSECONDS since EPOCH /// /// Will wrap around on overflow + #[inline] pub fn to_nanos(&self) -> i64 { - let (seconds, nanoseconds) = self.to_seconds_and_nanos(); - seconds - .wrapping_mul(1_000_000_000) - .wrapping_add(nanoseconds) + let (day, nanos) = self.to_parts(); + (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + .wrapping_mul(NANOSECONDS_IN_DAY) + .wrapping_add(nanos) } - /// Converts this INT96 to a number of seconds and nanoseconds since EPOCH - pub fn to_seconds_and_nanos(&self) -> (i64, i64) { - const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588; - const SECONDS_PER_DAY: i64 = 86_400; - - let day = self.data()[2] as i64; - let nanoseconds = ((self.data()[1] as i64) << 32) + self.data()[0] as i64; - let seconds = (day - JULIAN_DAY_OF_EPOCH) * SECONDS_PER_DAY; - (seconds, nanoseconds) + #[inline] + fn to_parts(&self) -> (i32, i64) { + let day = self.data()[2] as i32; + let nanos = ((self.data()[1] as i64) << 32) + self.data()[0] as i64; + (day, nanos) } } diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs index 1b0d81c7d9ab..dc8cb52ee65c 100644 --- a/parquet/src/record/api.rs +++ b/parquet/src/record/api.rs @@ -701,7 +701,7 @@ impl Field { /// `Timestamp` value. #[inline] pub fn convert_int96(_descr: &ColumnDescPtr, value: Int96) -> Self { - Field::TimestampMillis(value.to_i64()) + Field::TimestampMillis(value.to_millis()) } /// Converts Parquet FLOAT type with logical type into `f32` value. From e5a636370a07acb1a0c5f547ebdefd11081bd019 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 11 Mar 2025 15:47:24 -0400 Subject: [PATCH 2/8] Use i64 for subtracting JULIAN_DAY_OF_EPOCH. --- parquet/src/data_type.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 530d87e2c854..d6d195b93cad 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -38,7 +38,7 @@ pub struct Int96 { value: [u32; 3], } -const JULIAN_DAY_OF_EPOCH: i32 = 2_440_588; +const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588; /// Number of seconds in a day const SECONDS_IN_DAY: i64 = 86_400; @@ -80,7 +80,7 @@ impl Int96 { #[inline] pub fn to_seconds(&self) -> i64 { let (day, nanos) = self.to_parts(); - (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(SECONDS_IN_DAY) .wrapping_add(nanos / 1_000_000_000) } @@ -91,7 +91,7 @@ impl Int96 { #[inline] pub fn to_millis(&self) -> i64 { let (day, nanos) = self.to_parts(); - (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(MILLISECONDS_IN_DAY) .wrapping_add(nanos / 1_000_000) } @@ -102,7 +102,7 @@ impl Int96 { #[inline] pub fn to_micros(&self) -> i64 { let (day, nanos) = self.to_parts(); - (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(MICROSECONDS_IN_DAY) .wrapping_add(nanos / 1_000) } @@ -113,7 +113,7 @@ impl Int96 { #[inline] pub fn to_nanos(&self) -> i64 { let (day, nanos) = self.to_parts(); - (day.wrapping_sub(JULIAN_DAY_OF_EPOCH) as i64) + (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(NANOSECONDS_IN_DAY) .wrapping_add(nanos) } From ee320cc50ce54c0747902fade2f1cd57ce559858 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Thu, 13 Mar 2025 17:04:45 -0400 Subject: [PATCH 3/8] docs. --- parquet/src/arrow/schema/primitive.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 7bf297ccfc0e..1b8a9000c1c3 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -50,9 +50,11 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType { // Coerce Date32 back to Date64 (#1666) (DataType::Date32, DataType::Date64) => hint, - // Determine timezone + // Timestamps of the same resolution can be converted to a a different timezone. (DataType::Timestamp(p, _), DataType::Timestamp(h, Some(_))) if p == h => hint, + // INT96 default to Timestamp(TimeUnit::Nanosecond, None) (see from_parquet below). + // Allow different resolutions to support larger date ranges. ( DataType::Timestamp(TimeUnit::Nanosecond, None), DataType::Timestamp(TimeUnit::Second, None), From 43f8ad07e0c4e97a8ec4c0c58b6e9b76ee47abe1 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 18 Mar 2025 10:07:23 -0400 Subject: [PATCH 4/8] Add deprecation comment. --- parquet/src/data_type.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index d6d195b93cad..d2d9935656d4 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -74,6 +74,12 @@ impl Int96 { self.value = [elem0, elem1, elem2]; } +Ad /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch + #[deprecated(since = "54.0.0", note = "Use `to_millis` instead")] + pub fn to_i64(&self) -> i64 { + self.to_millis() + } + /// Converts this INT96 into an i64 representing the number of SECONDS since EPOCH /// /// Will wrap around on overflow From be475dca8f4a34b0572b8f0340fb7c210e60ecfb Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 18 Mar 2025 10:17:27 -0400 Subject: [PATCH 5/8] Fix typo. --- parquet/src/data_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index d2d9935656d4..c7923f4789be 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -74,7 +74,7 @@ impl Int96 { self.value = [elem0, elem1, elem2]; } -Ad /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch + /// Converts this INT96 into an i64 representing the number of MILLISECONDS since Epoch #[deprecated(since = "54.0.0", note = "Use `to_millis` instead")] pub fn to_i64(&self) -> i64 { self.to_millis() From 10844a116f321cd975db3a48897fc6f1bd69e6fe Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 18 Mar 2025 11:49:15 -0400 Subject: [PATCH 6/8] Add timezone test. --- parquet/src/arrow/arrow_reader/mod.rs | 17 +++++++++++++++++ parquet/src/arrow/schema/primitive.rs | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index a1f0d02865c8..3d1a17485326 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1519,11 +1519,13 @@ mod tests { let encodings = &[Encoding::PLAIN, Encoding::RLE_DICTIONARY]; let resolutions: Vec<(Option, fn(&[Option]) -> ArrayRef)> = vec![ + // Test without a specified ArrowType hint. (None, |vals: &[Option]| { Arc::new(TimestampNanosecondArray::from_iter( vals.iter().map(|x| x.map(|x| x.to_nanos())), )) as ArrayRef }), + // Test other TimeUnits as ArrowType hints. ( Some(ArrowDataType::Timestamp(TimeUnit::Second, None)), |vals: &[Option]| { @@ -1556,6 +1558,21 @@ mod tests { )) as ArrayRef }, ), + // Test another timezone with TimeUnit as ArrowType hints. + ( + Some(ArrowDataType::Timestamp( + TimeUnit::Second, + Some(Arc::from("-05:00")), + )), + |vals: &[Option]| { + Arc::new( + TimestampSecondArray::from_iter( + vals.iter().map(|x| x.map(|x| x.to_seconds())), + ) + .with_timezone("-05:00"), + ) as ArrayRef + }, + ), ]; resolutions.iter().for_each(|(arrow_type, converter)| { diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 1b8a9000c1c3..df41f400279a 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -57,15 +57,15 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType { // Allow different resolutions to support larger date ranges. ( DataType::Timestamp(TimeUnit::Nanosecond, None), - DataType::Timestamp(TimeUnit::Second, None), + DataType::Timestamp(TimeUnit::Second, _), ) => hint, ( DataType::Timestamp(TimeUnit::Nanosecond, None), - DataType::Timestamp(TimeUnit::Millisecond, None), + DataType::Timestamp(TimeUnit::Millisecond, _), ) => hint, ( DataType::Timestamp(TimeUnit::Nanosecond, None), - DataType::Timestamp(TimeUnit::Microsecond, None), + DataType::Timestamp(TimeUnit::Microsecond, _), ) => hint, // Determine offset size From 8383c3680c8009ef240d24f5f0442a422406820a Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Tue, 18 Mar 2025 13:09:35 -0400 Subject: [PATCH 7/8] Fix clippy. --- parquet/src/data_type.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index c7923f4789be..79ecbea45ebe 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -85,7 +85,7 @@ impl Int96 { /// Will wrap around on overflow #[inline] pub fn to_seconds(&self) -> i64 { - let (day, nanos) = self.to_parts(); + let (day, nanos) = self.data_as_days_and_nanos(); (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(SECONDS_IN_DAY) .wrapping_add(nanos / 1_000_000_000) @@ -96,7 +96,7 @@ impl Int96 { /// Will wrap around on overflow #[inline] pub fn to_millis(&self) -> i64 { - let (day, nanos) = self.to_parts(); + let (day, nanos) = self.data_as_days_and_nanos(); (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(MILLISECONDS_IN_DAY) .wrapping_add(nanos / 1_000_000) @@ -107,7 +107,7 @@ impl Int96 { /// Will wrap around on overflow #[inline] pub fn to_micros(&self) -> i64 { - let (day, nanos) = self.to_parts(); + let (day, nanos) = self.data_as_days_and_nanos(); (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(MICROSECONDS_IN_DAY) .wrapping_add(nanos / 1_000) @@ -118,14 +118,14 @@ impl Int96 { /// Will wrap around on overflow #[inline] pub fn to_nanos(&self) -> i64 { - let (day, nanos) = self.to_parts(); + let (day, nanos) = self.data_as_days_and_nanos(); (day as i64 - JULIAN_DAY_OF_EPOCH) .wrapping_mul(NANOSECONDS_IN_DAY) .wrapping_add(nanos) } #[inline] - fn to_parts(&self) -> (i32, i64) { + fn data_as_days_and_nanos(&self) -> (i32, i64) { let day = self.data()[2] as i32; let nanos = ((self.data()[1] as i64) << 32) + self.data()[0] as i64; (day, nanos) From 9cfa58b477cabd716dde00e7f480900103268cb7 Mon Sep 17 00:00:00 2001 From: Matt Butrovich Date: Wed, 19 Mar 2025 11:14:04 -0400 Subject: [PATCH 8/8] Try to fix Clippy again. --- parquet/src/arrow/arrow_reader/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 3d1a17485326..588a8ea2fac1 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1518,7 +1518,10 @@ mod tests { fn test_int96_single_column_reader_test() { let encodings = &[Encoding::PLAIN, Encoding::RLE_DICTIONARY]; - let resolutions: Vec<(Option, fn(&[Option]) -> ArrayRef)> = vec![ + type TypeHintAndConversionFunction = + (Option, fn(&[Option]) -> ArrayRef); + + let resolutions: Vec = vec![ // Test without a specified ArrowType hint. (None, |vals: &[Option]| { Arc::new(TimestampNanosecondArray::from_iter(