diff --git a/parquet-variant-json/src/from_json.rs b/parquet-variant-json/src/from_json.rs index c0910950367f..3052bc504dee 100644 --- a/parquet-variant-json/src/from_json.rs +++ b/parquet-variant-json/src/from_json.rs @@ -165,7 +165,7 @@ mod test { expected: Variant<'a, 'a>, } - impl<'a> JsonToVariantTest<'a> { + impl JsonToVariantTest<'_> { fn run(self) -> Result<(), ArrowError> { let mut variant_builder = VariantBuilder::new(); json_to_variant(self.json, &mut variant_builder)?; diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 542065045c92..33608d27cbb7 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -1932,7 +1932,6 @@ mod tests { assert!(metadata.is_empty()); let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); - assert!(metadata.is_empty()); assert_eq!(variant, Variant::Int8(42)); } diff --git a/parquet-variant/src/decoder.rs b/parquet-variant/src/decoder.rs index 5a6aab43ff6d..5d6a06479376 100644 --- a/parquet-variant/src/decoder.rs +++ b/parquet-variant/src/decoder.rs @@ -22,8 +22,6 @@ use crate::ShortString; use arrow_schema::ArrowError; use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc}; -use std::num::TryFromIntError; - /// The basic type of a [`Variant`] value, encoded in the first two bits of the /// header byte. /// @@ -147,11 +145,9 @@ impl OffsetSizeBytes { /// * `bytes` – the byte buffer to index /// * `index` – 0-based index into the buffer /// - /// Each value is `self as usize` bytes wide (1, 2, 3 or 4). - /// Three-byte values are zero-extended to 32 bits before the final - /// fallible cast to `usize`. - pub(crate) fn unpack_usize(&self, bytes: &[u8], index: usize) -> Result { - self.unpack_usize_at_offset(bytes, 0, index) + /// Each value is `self as u32` bytes wide (1, 2, 3 or 4), zero-extended to 32 bits as needed. + pub(crate) fn unpack_u32(&self, bytes: &[u8], index: usize) -> Result { + self.unpack_u32_at_offset(bytes, 0, index) } /// Return one unsigned little-endian value from `bytes`. @@ -162,15 +158,13 @@ impl OffsetSizeBytes { /// * `offset_index` – 0-based index **after** the skipped bytes /// (`0` is the first value, `1` the next, …). /// - /// Each value is `self as usize` bytes wide (1, 2, 3 or 4). - /// Three-byte values are zero-extended to 32 bits before the final - /// fallible cast to `usize`. - pub(crate) fn unpack_usize_at_offset( + /// Each value is `self as u32` bytes wide (1, 2, 3 or 4), zero-extended to 32 bits as needed. + pub(crate) fn unpack_u32_at_offset( &self, bytes: &[u8], byte_offset: usize, // how many bytes to skip offset_index: usize, // which offset in an array of offsets - ) -> Result { + ) -> Result { use OffsetSizeBytes::*; // Index into the byte array: @@ -179,7 +173,7 @@ impl OffsetSizeBytes { .checked_mul(*self as usize) .and_then(|n| n.checked_add(byte_offset)) .ok_or_else(|| overflow_error("unpacking offset array value"))?; - let result = match self { + let value = match self { One => u8::from_le_bytes(array_from_slice(bytes, offset)?).into(), Two => u16::from_le_bytes(array_from_slice(bytes, offset)?).into(), Three => { @@ -192,11 +186,7 @@ impl OffsetSizeBytes { } Four => u32::from_le_bytes(array_from_slice(bytes, offset)?), }; - - // Convert the u32 we extracted to usize (should always succeed on 32- and 64-bit arch) - result - .try_into() - .map_err(|e: TryFromIntError| ArrowError::InvalidArgumentError(e.to_string())) + Ok(value) } } @@ -518,57 +508,51 @@ mod tests { } #[test] - fn unpack_usize_all_widths() { + fn unpack_u32_all_widths() { // One-byte offsets let buf_one = [0x01u8, 0xAB, 0xCD]; - assert_eq!( - OffsetSizeBytes::One.unpack_usize(&buf_one, 0).unwrap(), - 0x01 - ); - assert_eq!( - OffsetSizeBytes::One.unpack_usize(&buf_one, 2).unwrap(), - 0xCD - ); + assert_eq!(OffsetSizeBytes::One.unpack_u32(&buf_one, 0).unwrap(), 0x01); + assert_eq!(OffsetSizeBytes::One.unpack_u32(&buf_one, 2).unwrap(), 0xCD); // Two-byte offsets (little-endian 0x1234, 0x5678) let buf_two = [0x34, 0x12, 0x78, 0x56]; assert_eq!( - OffsetSizeBytes::Two.unpack_usize(&buf_two, 0).unwrap(), + OffsetSizeBytes::Two.unpack_u32(&buf_two, 0).unwrap(), 0x1234 ); assert_eq!( - OffsetSizeBytes::Two.unpack_usize(&buf_two, 1).unwrap(), + OffsetSizeBytes::Two.unpack_u32(&buf_two, 1).unwrap(), 0x5678 ); // Three-byte offsets (0x030201 and 0x0000FF) let buf_three = [0x01, 0x02, 0x03, 0xFF, 0x00, 0x00]; assert_eq!( - OffsetSizeBytes::Three.unpack_usize(&buf_three, 0).unwrap(), + OffsetSizeBytes::Three.unpack_u32(&buf_three, 0).unwrap(), 0x030201 ); assert_eq!( - OffsetSizeBytes::Three.unpack_usize(&buf_three, 1).unwrap(), + OffsetSizeBytes::Three.unpack_u32(&buf_three, 1).unwrap(), 0x0000FF ); // Four-byte offsets (0x12345678, 0x90ABCDEF) let buf_four = [0x78, 0x56, 0x34, 0x12, 0xEF, 0xCD, 0xAB, 0x90]; assert_eq!( - OffsetSizeBytes::Four.unpack_usize(&buf_four, 0).unwrap(), + OffsetSizeBytes::Four.unpack_u32(&buf_four, 0).unwrap(), 0x1234_5678 ); assert_eq!( - OffsetSizeBytes::Four.unpack_usize(&buf_four, 1).unwrap(), + OffsetSizeBytes::Four.unpack_u32(&buf_four, 1).unwrap(), 0x90AB_CDEF ); } #[test] - fn unpack_usize_out_of_bounds() { + fn unpack_u32_out_of_bounds() { let tiny = [0x00u8]; // deliberately too short - assert!(OffsetSizeBytes::Two.unpack_usize(&tiny, 0).is_err()); - assert!(OffsetSizeBytes::Three.unpack_usize(&tiny, 0).is_err()); + assert!(OffsetSizeBytes::Two.unpack_u32(&tiny, 0).is_err()); + assert!(OffsetSizeBytes::Three.unpack_u32(&tiny, 0).is_err()); } #[test] @@ -584,20 +568,20 @@ mod tests { let width = OffsetSizeBytes::Two; // dictionary_size starts immediately after the header byte - let dict_size = width.unpack_usize_at_offset(&buf, 1, 0).unwrap(); + let dict_size = width.unpack_u32_at_offset(&buf, 1, 0).unwrap(); assert_eq!(dict_size, 2); // offset array immediately follows the dictionary size - let first = width.unpack_usize_at_offset(&buf, 1, 1).unwrap(); + let first = width.unpack_u32_at_offset(&buf, 1, 1).unwrap(); assert_eq!(first, 0); - let second = width.unpack_usize_at_offset(&buf, 1, 2).unwrap(); + let second = width.unpack_u32_at_offset(&buf, 1, 2).unwrap(); assert_eq!(second, 5); - let third = width.unpack_usize_at_offset(&buf, 1, 3).unwrap(); + let third = width.unpack_u32_at_offset(&buf, 1, 3).unwrap(); assert_eq!(third, 9); - let err = width.unpack_usize_at_offset(&buf, 1, 4); + let err = width.unpack_u32_at_offset(&buf, 1, 4); assert!(err.is_err()) } } diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs index ef402064e956..a9751f0ab60a 100644 --- a/parquet-variant/src/utils.rs +++ b/parquet-variant/src/utils.rs @@ -122,3 +122,12 @@ where Some(Err(start)) } + +/// Verifies the expected size of type T, for a type that should only grow if absolutely necessary. +#[allow(unused)] +pub(crate) const fn expect_size_of(expected: usize) { + let size = std::mem::size_of::(); + if size != expected { + let _ = [""; 0][size]; + } +} diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 6bcf61c036ac..7767dfac3186 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -256,6 +256,9 @@ pub enum Variant<'m, 'v> { List(VariantList<'m, 'v>), } +// We don't want this to grow because it could hurt performance of a frequently-created type. +const _: () = crate::utils::expect_size_of::(80); + impl<'m, 'v> Variant<'m, 'v> { /// Attempts to interpret a metadata and value buffer pair as a new `Variant`. /// diff --git a/parquet-variant/src/variant/list.rs b/parquet-variant/src/variant/list.rs index 11122190b446..17f87a2e0d7a 100644 --- a/parquet-variant/src/variant/list.rs +++ b/parquet-variant/src/variant/list.rs @@ -23,7 +23,7 @@ use crate::variant::{Variant, VariantMetadata}; use arrow_schema::ArrowError; // The value header occupies one byte; use a named constant for readability -const NUM_HEADER_BYTES: usize = 1; +const NUM_HEADER_BYTES: u32 = 1; /// A parsed version of the variant array value header byte. #[derive(Debug, Clone, PartialEq)] @@ -34,15 +34,15 @@ pub(crate) struct VariantListHeader { impl VariantListHeader { // Hide the ugly casting - const fn num_elements_size(&self) -> usize { + const fn num_elements_size(&self) -> u32 { self.num_elements_size as _ } - const fn offset_size(&self) -> usize { + const fn offset_size(&self) -> u32 { self.offset_size as _ } // Avoid materializing this offset, since it's cheaply and safely computable - const fn first_offset_byte(&self) -> usize { + const fn first_offset_byte(&self) -> u32 { NUM_HEADER_BYTES + self.num_elements_size() } @@ -122,11 +122,14 @@ pub struct VariantList<'m, 'v> { pub metadata: VariantMetadata<'m>, pub value: &'v [u8], header: VariantListHeader, - num_elements: usize, - first_value_byte: usize, + num_elements: u32, + first_value_byte: u32, validated: bool, } +// We don't want this to grow because it could increase the size of `Variant` and hurt performance. +const _: () = crate::utils::expect_size_of::(64); + impl<'m, 'v> VariantList<'m, 'v> { /// Attempts to interpret `value` as a variant array value. /// @@ -157,7 +160,7 @@ impl<'m, 'v> VariantList<'m, 'v> { let num_elements = header .num_elements_size - .unpack_usize_at_offset(value, NUM_HEADER_BYTES, 0)?; + .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?; // (num_elements + 1) * offset_size + first_offset_byte let first_value_byte = num_elements @@ -185,10 +188,10 @@ impl<'m, 'v> VariantList<'m, 'v> { // Use the last offset to upper-bound the value buffer let last_offset = new_self - .get_offset(num_elements)? + .get_offset(num_elements as _)? .checked_add(first_value_byte) .ok_or_else(|| overflow_error("variant array size"))?; - new_self.value = slice_from_slice(value, ..last_offset)?; + new_self.value = slice_from_slice(value, ..last_offset as _)?; Ok(new_self) } @@ -210,7 +213,7 @@ impl<'m, 'v> VariantList<'m, 'v> { let offset_buffer = slice_from_slice( self.value, - self.header.first_offset_byte()..self.first_value_byte, + self.header.first_offset_byte() as _..self.first_value_byte as _, )?; let offsets = @@ -226,7 +229,7 @@ impl<'m, 'v> VariantList<'m, 'v> { )); } - let value_buffer = slice_from_slice(self.value, self.first_value_byte..)?; + let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?; // Validate whether values are valid variant objects for i in 1..offsets.len() { @@ -234,7 +237,7 @@ impl<'m, 'v> VariantList<'m, 'v> { let end_offset = offsets[i]; let value_bytes = slice_from_slice(value_buffer, start_offset..end_offset)?; - Variant::try_new_with_metadata(self.metadata, value_bytes)?; + Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?; } self.validated = true; @@ -244,7 +247,7 @@ impl<'m, 'v> VariantList<'m, 'v> { /// Return the length of this array pub fn len(&self) -> usize { - self.num_elements + self.num_elements as _ } /// Is the array of zero length @@ -256,7 +259,7 @@ impl<'m, 'v> VariantList<'m, 'v> { /// /// [invalid]: Self#Validation pub fn get(&self, index: usize) -> Option> { - (index < self.num_elements).then(|| { + (index < self.len()).then(|| { self.try_get_with_shallow_validation(index) .expect("Invalid variant array element") }) @@ -272,10 +275,10 @@ impl<'m, 'v> VariantList<'m, 'v> { fn try_get_with_shallow_validation(&self, index: usize) -> Result, ArrowError> { // Fetch the value bytes between the two offsets for this index, from the value array region // of the byte buffer - let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?; + let byte_range = self.get_offset(index)? as _..self.get_offset(index + 1)? as _; let value_bytes = - slice_from_slice_at_offset(self.value, self.first_value_byte, byte_range)?; - Variant::try_new_with_metadata_and_shallow_validation(self.metadata, value_bytes) + slice_from_slice_at_offset(self.value, self.first_value_byte as _, byte_range)?; + Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes) } /// Iterates over the values of this list. When working with [unvalidated] input, consider @@ -297,14 +300,14 @@ impl<'m, 'v> VariantList<'m, 'v> { fn iter_try_with_shallow_validation( &self, ) -> impl Iterator, ArrowError>> + '_ { - (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i)) + (0..self.len()).map(|i| self.try_get_with_shallow_validation(i)) } // Attempts to retrieve the ith offset from the offset array region of the byte buffer. - fn get_offset(&self, index: usize) -> Result { - let byte_range = self.header.first_offset_byte()..self.first_value_byte; + fn get_offset(&self, index: usize) -> Result { + let byte_range = self.header.first_offset_byte() as _..self.first_value_byte as _; let offset_bytes = slice_from_slice(self.value, byte_range)?; - self.header.offset_size.unpack_usize(offset_bytes, index) + self.header.offset_size.unpack_u32(offset_bytes, index) } } @@ -623,7 +626,7 @@ mod tests { expected_num_element_size, variant_list.header.num_elements_size ); - assert_eq!(list_size, variant_list.num_elements); + assert_eq!(list_size, variant_list.num_elements as usize); // verify the data in the variant assert_eq!(list_size, variant_list.len()); diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index b50a76686996..007122af7599 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -34,16 +34,16 @@ pub(crate) struct VariantMetadataHeader { const CORRECT_VERSION_VALUE: u8 = 1; // The metadata header occupies one byte; use a named constant for readability -const NUM_HEADER_BYTES: usize = 1; +const NUM_HEADER_BYTES: u32 = 1; impl VariantMetadataHeader { // Hide the cast - const fn offset_size(&self) -> usize { - self.offset_size as usize + const fn offset_size(&self) -> u32 { + self.offset_size as u32 } // Avoid materializing this offset, since it's cheaply and safely computable - const fn first_offset_byte(&self) -> usize { + const fn first_offset_byte(&self) -> u32 { NUM_HEADER_BYTES + self.offset_size() } @@ -125,15 +125,19 @@ impl VariantMetadataHeader { /// /// [`Variant`]: crate::Variant /// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct VariantMetadata<'m> { bytes: &'m [u8], header: VariantMetadataHeader, - dictionary_size: usize, - first_value_byte: usize, + dictionary_size: u32, + first_value_byte: u32, validated: bool, } +// We don't want this to grow because it increases the size of VariantList and VariantObject, which +// could increase the size of Variant. All those size increases could hurt performance. +const _: () = crate::utils::expect_size_of::(32); + impl<'m> VariantMetadata<'m> { /// Attempts to interpret `bytes` as a variant metadata instance, with full [validation] of all /// dictionary entries. @@ -166,7 +170,7 @@ impl<'m> VariantMetadata<'m> { let dictionary_size = header .offset_size - .unpack_usize_at_offset(bytes, NUM_HEADER_BYTES, 0)?; + .unpack_u32_at_offset(bytes, NUM_HEADER_BYTES as usize, 0)?; // Calculate the starting offset of the dictionary string bytes. // @@ -196,16 +200,16 @@ impl<'m> VariantMetadata<'m> { // Use the last offset to upper-bound the byte slice let last_offset = new_self - .get_offset(dictionary_size)? + .get_offset(dictionary_size as _)? .checked_add(first_value_byte) .ok_or_else(|| overflow_error("variant metadata size"))?; - new_self.bytes = slice_from_slice(bytes, ..last_offset)?; + new_self.bytes = slice_from_slice(bytes, ..last_offset as _)?; Ok(new_self) } /// The number of metadata dictionary entries pub fn len(&self) -> usize { - self.dictionary_size + self.dictionary_size() } /// True if this metadata dictionary contains no entries @@ -227,7 +231,7 @@ impl<'m> VariantMetadata<'m> { if !self.validated { let offset_bytes = slice_from_slice( self.bytes, - self.header.first_offset_byte()..self.first_value_byte, + self.header.first_offset_byte() as _..self.first_value_byte as _, )?; let offsets = @@ -245,7 +249,7 @@ impl<'m> VariantMetadata<'m> { // Verify the string values in the dictionary are UTF-8 encoded strings. let value_buffer = - string_from_slice(self.bytes, 0, self.first_value_byte..self.bytes.len())?; + string_from_slice(self.bytes, 0, self.first_value_byte as _..self.bytes.len())?; if self.header.is_sorted { // Validate the dictionary values are unique and lexicographically sorted @@ -278,7 +282,7 @@ impl<'m> VariantMetadata<'m> { /// Get the dictionary size pub const fn dictionary_size(&self) -> usize { - self.dictionary_size + self.dictionary_size as _ } /// The variant protocol version @@ -290,10 +294,10 @@ impl<'m> VariantMetadata<'m> { /// /// This offset is an index into the dictionary, at the boundary between string `i-1` and string /// `i`. See [`Self::get`] to retrieve a specific dictionary entry. - fn get_offset(&self, i: usize) -> Result { - let offset_byte_range = self.header.first_offset_byte()..self.first_value_byte; + fn get_offset(&self, i: usize) -> Result { + let offset_byte_range = self.header.first_offset_byte() as _..self.first_value_byte as _; let bytes = slice_from_slice(self.bytes, offset_byte_range)?; - self.header.offset_size.unpack_usize(bytes, i) + self.header.offset_size.unpack_u32(bytes, i) } /// Attempts to retrieve a dictionary entry by index, failing if out of bounds or if the @@ -301,8 +305,8 @@ impl<'m> VariantMetadata<'m> { /// /// [invalid]: Self#Validation pub fn get(&self, i: usize) -> Result<&'m str, ArrowError> { - let byte_range = self.get_offset(i)?..self.get_offset(i + 1)?; - string_from_slice(self.bytes, self.first_value_byte, byte_range) + let byte_range = self.get_offset(i)? as _..self.get_offset(i + 1)? as _; + string_from_slice(self.bytes, self.first_value_byte as _, byte_range) } /// Returns an iterator that attempts to visit all dictionary entries, producing `Err` if the @@ -310,7 +314,7 @@ impl<'m> VariantMetadata<'m> { /// /// [invalid]: Self#Validation pub fn iter_try(&self) -> impl Iterator> + '_ { - (0..self.dictionary_size).map(move |i| self.get(i)) + (0..self.len()).map(|i| self.get(i)) } /// Iterates over all dictionary entries. When working with [unvalidated] input, consider diff --git a/parquet-variant/src/variant/object.rs b/parquet-variant/src/variant/object.rs index 36c8f999b244..dd6da08fbe64 100644 --- a/parquet-variant/src/variant/object.rs +++ b/parquet-variant/src/variant/object.rs @@ -23,7 +23,7 @@ use crate::variant::{Variant, VariantMetadata}; use arrow_schema::ArrowError; // The value header occupies one byte; use a named constant for readability -const NUM_HEADER_BYTES: usize = 1; +const NUM_HEADER_BYTES: u32 = 1; /// Header structure for [`VariantObject`] #[derive(Debug, Clone, PartialEq)] @@ -35,18 +35,18 @@ pub(crate) struct VariantObjectHeader { impl VariantObjectHeader { // Hide the ugly casting - const fn num_elements_size(&self) -> usize { + const fn num_elements_size(&self) -> u32 { self.num_elements_size as _ } - const fn field_id_size(&self) -> usize { + const fn field_id_size(&self) -> u32 { self.field_id_size as _ } - const fn field_offset_size(&self) -> usize { + const fn field_offset_size(&self) -> u32 { self.field_offset_size as _ } // Avoid materializing this offset, since it's cheaply and safely computable - const fn field_ids_start_byte(&self) -> usize { + const fn field_ids_start_byte(&self) -> u32 { NUM_HEADER_BYTES + self.num_elements_size() } @@ -119,12 +119,15 @@ pub struct VariantObject<'m, 'v> { pub metadata: VariantMetadata<'m>, pub value: &'v [u8], header: VariantObjectHeader, - num_elements: usize, - first_field_offset_byte: usize, - first_value_byte: usize, + num_elements: u32, + first_field_offset_byte: u32, + first_value_byte: u32, validated: bool, } +// We don't want this to grow because it could increase the size of `Variant` and hurt performance. +const _: () = crate::utils::expect_size_of::(64); + impl<'m, 'v> VariantObject<'m, 'v> { pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self { Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant object") @@ -156,7 +159,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { let num_elements = header .num_elements_size - .unpack_usize_at_offset(value, NUM_HEADER_BYTES, 0)?; + .unpack_u32_at_offset(value, NUM_HEADER_BYTES as _, 0)?; // Calculate byte offsets for field offsets and values with overflow protection, and verify // they're in bounds @@ -186,10 +189,10 @@ impl<'m, 'v> VariantObject<'m, 'v> { // Use it to upper-bound the value bytes, which also verifies that the field id and field // offset arrays are in bounds. let last_offset = new_self - .get_offset(num_elements)? + .get_offset(num_elements as _)? .checked_add(first_value_byte) .ok_or_else(|| overflow_error("variant object size"))?; - new_self.value = slice_from_slice(value, ..last_offset)?; + new_self.value = slice_from_slice(value, ..last_offset as _)?; Ok(new_self) } @@ -211,7 +214,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { let field_id_buffer = slice_from_slice( self.value, - self.header.field_ids_start_byte()..self.first_field_offset_byte, + self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _, )?; let field_ids = map_bytes_to_offsets(field_id_buffer, self.header.field_id_size) @@ -268,17 +271,17 @@ impl<'m, 'v> VariantObject<'m, 'v> { // Validate whether values are valid variant objects let field_offset_buffer = slice_from_slice( self.value, - self.first_field_offset_byte..self.first_value_byte, + self.first_field_offset_byte as _..self.first_value_byte as _, )?; - let num_offsets = field_offset_buffer.len() / self.header.field_offset_size(); + let num_offsets = field_offset_buffer.len() / self.header.field_offset_size() as usize; - let value_buffer = slice_from_slice(self.value, self.first_value_byte..)?; + let value_buffer = slice_from_slice(self.value, self.first_value_byte as _..)?; map_bytes_to_offsets(field_offset_buffer, self.header.field_offset_size) .take(num_offsets.saturating_sub(1)) .try_for_each(|offset| { let value_bytes = slice_from_slice(value_buffer, offset..)?; - Variant::try_new_with_metadata(self.metadata, value_bytes)?; + Variant::try_new_with_metadata(self.metadata.clone(), value_bytes)?; Ok::<_, ArrowError>(()) })?; @@ -290,7 +293,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { /// Returns the number of key-value pairs in this object pub fn len(&self) -> usize { - self.num_elements + self.num_elements as _ } /// Returns true if the object contains no key-value pairs @@ -321,16 +324,16 @@ impl<'m, 'v> VariantObject<'m, 'v> { // Attempts to retrieve the ith field value from the value region of the byte buffer; it // performs only basic (constant-cost) validation. fn try_field_with_shallow_validation(&self, i: usize) -> Result, ArrowError> { - let value_bytes = slice_from_slice(self.value, self.first_value_byte..)?; - let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)?..)?; - Variant::try_new_with_metadata_and_shallow_validation(self.metadata, value_bytes) + let value_bytes = slice_from_slice(self.value, self.first_value_byte as _..)?; + let value_bytes = slice_from_slice(value_bytes, self.get_offset(i)? as _..)?; + Variant::try_new_with_metadata_and_shallow_validation(self.metadata.clone(), value_bytes) } // Attempts to retrieve the ith offset from the field offset region of the byte buffer. - fn get_offset(&self, i: usize) -> Result { - let byte_range = self.first_field_offset_byte..self.first_value_byte; + fn get_offset(&self, i: usize) -> Result { + let byte_range = self.first_field_offset_byte as _..self.first_value_byte as _; let field_offsets = slice_from_slice(self.value, byte_range)?; - self.header.field_offset_size.unpack_usize(field_offsets, i) + self.header.field_offset_size.unpack_u32(field_offsets, i) } /// Get a field's name by index in `0..self.len()` @@ -347,10 +350,10 @@ impl<'m, 'v> VariantObject<'m, 'v> { /// Fallible version of `field_name`. Returns field name by index, capturing validation errors fn try_field_name(&self, i: usize) -> Result<&'m str, ArrowError> { - let byte_range = self.header.field_ids_start_byte()..self.first_field_offset_byte; + let byte_range = self.header.field_ids_start_byte() as _..self.first_field_offset_byte as _; let field_id_bytes = slice_from_slice(self.value, byte_range)?; - let field_id = self.header.field_id_size.unpack_usize(field_id_bytes, i)?; - self.metadata.get(field_id) + let field_id = self.header.field_id_size.unpack_u32(field_id_bytes, i)?; + self.metadata.get(field_id as _) } /// Returns an iterator of (name, value) pairs over the fields of this object. @@ -374,7 +377,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { fn iter_try_with_shallow_validation( &self, ) -> impl Iterator), ArrowError>> + '_ { - (0..self.num_elements).map(move |i| { + (0..self.len()).map(|i| { let field = self.try_field_with_shallow_validation(i)?; Ok((self.try_field_name(i)?, field)) }) @@ -389,8 +392,7 @@ impl<'m, 'v> VariantObject<'m, 'v> { // NOTE: This does not require a sorted metadata dictionary, because the variant spec // requires object field ids to be lexically sorted by their corresponding string values, // and probing the dictionary for a field id is always O(1) work. - let i = try_binary_search_range_by(0..self.num_elements, &name, |i| self.field_name(i))? - .ok()?; + let i = try_binary_search_range_by(0..self.len(), &name, |i| self.field_name(i))?.ok()?; self.field(i) }