diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index e6c56440848..a8c593037a3 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -2825,7 +2825,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_primitive_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build(); Int32Array::from(array_data); @@ -2835,7 +2835,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_list_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let values: [i32; 8] = [0; 8]; @@ -2855,7 +2855,7 @@ mod tests { #[should_panic(expected = "memory is not aligned")] fn test_binary_array_alignment() { let ptr = memory::allocate_aligned(8); - let buf = Buffer::from_raw_parts(ptr, 8); + let buf = unsafe { Buffer::from_raw_parts(ptr, 8) }; let buf2 = buf.slice(1); let values: [u8; 12] = [0; 12]; diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 597f34e0f81..f51b56afdcc 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -97,27 +97,52 @@ impl Debug for BufferData { } impl Buffer { - /// Creates a buffer from an existing memory region (must already be byte-aligned), and this - /// buffer will free this piece of memory when dropped. - pub fn from_raw_parts(ptr: *const u8, len: usize) -> Self { + /// Creates a buffer from an existing memory region (must already be byte-aligned), this + /// `Buffer` will free this piece of memory when dropped. + /// + /// # Arguments + /// + /// * `ptr` - Pointer to raw parts + /// * `len` - Length of raw parts in **bytes** + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + pub unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self { Buffer::build_with_arguments(ptr, len, true) } - /// Creates a buffer from an existing memory region (must already be byte-aligned), and this - /// buffers doesn't free this piece of memory when dropped. - pub fn from_unowned(ptr: *const u8, len: usize) -> Self { + /// Creates a buffer from an existing memory region (must already be byte-aligned), this + /// `Buffer` **does not** free this piece of memory when dropped. + /// + /// # Arguments + /// + /// * `ptr` - Pointer to raw parts + /// * `len` - Length of raw parts in **bytes** + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + pub unsafe fn from_unowned(ptr: *const u8, len: usize) -> Self { Buffer::build_with_arguments(ptr, len, false) } - /// Creates a buffer from an existing memory region (must already be byte-aligned) + /// Creates a buffer from an existing memory region (must already be byte-aligned). /// /// # Arguments /// - /// * `ptr` - Pointer to raw parts. + /// * `ptr` - Pointer to raw parts /// * `len` - Length of raw parts in bytes - /// * `owned` - Whether the raw parts is owned by this buffer. If true, this buffer will free - /// this memory when dropped, otherwise it will skip freeing the raw parts. - fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self { + /// * `owned` - Whether the raw parts is owned by this `Buffer`. If true, this `Buffer` will + /// free this memory when dropped, otherwise it will skip freeing the raw parts. + /// + /// # Safety + /// + /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` + /// bytes. + unsafe fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self { assert!( memory::is_aligned(ptr, memory::ALIGNMENT), "memory not aligned" @@ -165,20 +190,27 @@ impl Buffer { } /// View buffer as typed slice. - pub fn typed_data(&self) -> &[T] { + /// + /// # Safety + /// + /// `ArrowNativeType` is public so that it can be used as a trait bound for other public + /// components, such as the `ToByteSlice` trait. However, this means that it can be + /// implemented by user defined types, which it is not intended for. + /// + /// Also `typed_data::` is unsafe as `0x00` and `0x01` are the only valid values for + /// `bool` in Rust. However, `bool` arrays in Arrow are bit-packed which breaks this condition. + pub unsafe fn typed_data(&self) -> &[T] { assert_eq!(self.len() % mem::size_of::(), 0); assert!(memory::is_ptr_aligned::(self.raw_data() as *const T)); - unsafe { - from_raw_parts( - mem::transmute::<*const u8, *const T>(self.raw_data()), - self.len() / mem::size_of::(), - ) - } + from_raw_parts( + mem::transmute::<*const u8, *const T>(self.raw_data()), + self.len() / mem::size_of::(), + ) } /// Returns an empty buffer. pub fn empty() -> Self { - Self::from_raw_parts(::std::ptr::null(), 0) + unsafe { Self::from_raw_parts(::std::ptr::null(), 0) } } } @@ -202,8 +234,8 @@ impl> From for Buffer { let buffer = memory::allocate_aligned(capacity); unsafe { memory::memcpy(buffer, slice.as_ptr(), len); + Buffer::from_raw_parts(buffer, len) } - Buffer::from_raw_parts(buffer, len) } } @@ -552,7 +584,7 @@ mod tests { #[test] fn test_from_raw_parts() { - let buf = Buffer::from_raw_parts(null_mut(), 0); + let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0) }; assert_eq!(0, buf.len()); assert_eq!(0, buf.data().len()); assert!(buf.raw_data().is_null()); @@ -772,7 +804,7 @@ mod tests { macro_rules! check_as_typed_data { ($input: expr, $native_t: ty) => {{ let buffer = Buffer::from($input.to_byte_slice()); - let slice: &[$native_t] = buffer.typed_data::<$native_t>(); + let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() }; assert_eq!($input, slice); }}; } diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index dbca001707b..9437f216ba1 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -232,11 +232,15 @@ impl ArrayReader for PrimitiveArrayReader { } fn get_def_levels(&self) -> Option<&[i16]> { - self.def_levels_buffer.as_ref().map(|buf| buf.typed_data()) + self.def_levels_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } fn get_rep_levels(&self) -> Option<&[i16]> { - self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data()) + self.rep_levels_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } } @@ -579,11 +583,15 @@ impl ArrayReader for StructArrayReader { } fn get_def_levels(&self) -> Option<&[i16]> { - self.def_level_buffer.as_ref().map(|buf| buf.typed_data()) + self.def_level_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } fn get_rep_levels(&self) -> Option<&[i16]> { - self.rep_level_buffer.as_ref().map(|buf| buf.typed_data()) + self.rep_level_buffer + .as_ref() + .map(|buf| unsafe { buf.typed_data() }) } }