diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 4aa1c673f45..028444bd113 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -141,3 +141,7 @@ harness = false [[bench]] name = "mutable_array" harness = false + +[[bench]] +name = "buffer_create" +harness = false diff --git a/rust/arrow/benches/buffer_bit_ops.rs b/rust/arrow/benches/buffer_bit_ops.rs index 572f1f26519..8a5fbaf1e45 100644 --- a/rust/arrow/benches/buffer_bit_ops.rs +++ b/rust/arrow/benches/buffer_bit_ops.rs @@ -28,7 +28,7 @@ fn create_buffer(size: usize) -> Buffer { let mut result = MutableBuffer::new(size).with_bitset(size, false); for i in 0..size { - result.data_mut()[i] = 0b01010101 << i << (i % 4); + result.as_slice_mut()[i] = 0b01010101 << i << (i % 4); } result.freeze() diff --git a/rust/arrow/benches/buffer_create.rs b/rust/arrow/benches/buffer_create.rs new file mode 100644 index 00000000000..69132b87fac --- /dev/null +++ b/rust/arrow/benches/buffer_create.rs @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_use] +extern crate criterion; +use arrow::util::test_util::seedable_rng; +use criterion::Criterion; +use rand::distributions::Uniform; +use rand::Rng; + +extern crate arrow; + +use arrow::{ + buffer::{Buffer, MutableBuffer}, + datatypes::ToByteSlice, +}; + +fn mutable_buffer(data: &[Vec], capacity: usize) -> Buffer { + criterion::black_box({ + let mut result = MutableBuffer::new(capacity); + + data.iter() + .for_each(|vec| result.extend_from_slice(vec.to_byte_slice())); + + result.freeze() + }) +} + +fn from_slice(data: &[Vec], capacity: usize) -> Buffer { + criterion::black_box({ + let mut a = Vec::::with_capacity(capacity); + + data.iter().for_each(|vec| a.extend(vec)); + + Buffer::from(a.to_byte_slice()) + }) +} + +fn create_data(size: usize) -> Vec> { + let rng = &mut seedable_rng(); + let range = Uniform::new(0, 33); + + (0..size) + .map(|_| { + let size = rng.sample(range); + seedable_rng() + .sample_iter(&range) + .take(size as usize) + .collect() + }) + .collect() +} + +fn benchmark(c: &mut Criterion) { + let size = 2usize.pow(15); + let data = create_data(size); + let cap = data.iter().map(|i| i.len()).sum(); + let byte_cap = cap * std::mem::size_of::(); + + c.bench_function("mutable", |b| b.iter(|| mutable_buffer(&data, 0))); + + c.bench_function("mutable prepared", |b| { + b.iter(|| mutable_buffer(&data, byte_cap)) + }); + + c.bench_function("from_slice", |b| b.iter(|| from_slice(&data, 0))); + + c.bench_function("from_slice prepared", |b| b.iter(|| from_slice(&data, cap))); +} + +criterion_group!(benches, benchmark); +criterion_main!(benches); diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index 40c785b322c..a8fca67197c 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -24,9 +24,9 @@ use std::{ }; use super::{ - array::print_long_array, raw_pointer::as_aligned_pointer, raw_pointer::RawPtrBox, - Array, ArrayData, ArrayDataRef, FixedSizeListArray, GenericBinaryIter, - GenericListArray, LargeListArray, ListArray, OffsetSizeTrait, + array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef, + FixedSizeListArray, GenericBinaryIter, GenericListArray, LargeListArray, ListArray, + OffsetSizeTrait, }; use crate::util::bit_util; use crate::{buffer::Buffer, datatypes::ToByteSlice}; @@ -82,7 +82,7 @@ impl GenericBinaryArray { #[inline] fn value_offset_at(&self, i: usize) -> OffsetSize { - unsafe { *self.value_offsets.get().add(i) } + unsafe { *self.value_offsets.as_ptr().add(i) } } /// Returns the element at index `i` as a byte slice. @@ -92,7 +92,7 @@ impl GenericBinaryArray { unsafe { let pos = self.value_offset_at(offset); std::slice::from_raw_parts( - self.value_data.get().offset(pos.to_isize()), + self.value_data.as_ptr().offset(pos.to_isize()), (self.value_offset_at(offset + 1) - pos).to_usize().unwrap(), ) } @@ -203,14 +203,12 @@ impl From 2, "BinaryArray data should contain 2 buffers only (offsets and values)" ); - let raw_value_offsets = data.buffers()[0].raw_data(); - let value_data = data.buffers()[1].raw_data(); + let offsets = data.buffers()[0].as_ptr(); + let values = data.buffers()[1].as_ptr(); Self { data, - value_offsets: RawPtrBox::new(as_aligned_pointer::( - raw_value_offsets, - )), - value_data: RawPtrBox::new(value_data), + value_offsets: unsafe { RawPtrBox::new(offsets) }, + value_data: unsafe { RawPtrBox::new(values) }, } } } @@ -232,7 +230,7 @@ where offsets.push(length_so_far); { - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for (i, s) in iter.enumerate() { if let Some(s) = s { @@ -326,7 +324,7 @@ impl FixedSizeBinaryArray { unsafe { let pos = self.value_offset_at(offset); std::slice::from_raw_parts( - self.value_data.get().offset(pos as isize), + self.value_data.as_ptr().offset(pos as isize), (self.value_offset_at(offset + 1) - pos) as usize, ) } @@ -387,7 +385,7 @@ impl From>>> for FixedSizeBinaryArray { let num_bytes = bit_util::ceil(len, 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); data.iter().enumerate().for_each(|(i, entry)| { if entry.is_some() { @@ -419,14 +417,14 @@ impl From for FixedSizeBinaryArray { 1, "FixedSizeBinaryArray data should contain 1 buffer only (values)" ); - let value_data = data.buffers()[0].raw_data(); + let value_data = data.buffers()[0].as_ptr(); let length = match data.data_type() { DataType::FixedSizeBinary(len) => *len, _ => panic!("Expected data type to be FixedSizeBinary"), }; Self { data, - value_data: RawPtrBox::new(value_data), + value_data: unsafe { RawPtrBox::new(value_data) }, length, } } @@ -510,7 +508,7 @@ impl DecimalArray { let raw_val = unsafe { let pos = self.value_offset_at(offset); std::slice::from_raw_parts( - self.value_data.get().offset(pos as isize), + self.value_data.as_ptr().offset(pos as isize), (self.value_offset_at(offset + 1) - pos) as usize, ) }; @@ -583,7 +581,7 @@ impl From for DecimalArray { 1, "DecimalArray data should contain 1 buffer only (values)" ); - let value_data = data.buffers()[0].raw_data(); + let values = data.buffers()[0].as_ptr(); let (precision, scale) = match data.data_type() { DataType::Decimal(precision, scale) => (*precision, *scale), _ => panic!("Expected data type to be Decimal"), @@ -591,7 +589,7 @@ impl From for DecimalArray { let length = 16; Self { data, - value_data: RawPtrBox::new(value_data), + value_data: unsafe { RawPtrBox::new(values) }, precision, scale, length, diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs index 82deca9bd9f..ae39dd151dd 100644 --- a/rust/arrow/src/array/array_boolean.rs +++ b/rust/arrow/src/array/array_boolean.rs @@ -24,7 +24,6 @@ use std::{convert::From, sync::Arc}; use super::*; use super::{array::print_long_array, raw_pointer::RawPtrBox}; use crate::buffer::{Buffer, MutableBuffer}; -use crate::memory; use crate::util::bit_util; /// Array of bools @@ -58,7 +57,7 @@ impl BooleanArray { /// Returns a raw pointer to the values of this array. pub fn raw_values(&self) -> *const u8 { - unsafe { self.raw_values.get().add(self.data.offset()) } + unsafe { self.raw_values.as_ptr().add(self.data.offset()) } } /// Returns a slice for the given offset and length @@ -87,7 +86,7 @@ impl BooleanArray { /// Note this doesn't do any bound checking, for performance reason. pub fn value(&self, i: usize) -> bool { let offset = i + self.offset(); - unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset) } + unsafe { bit_util::get_bit_raw(self.raw_values.as_ptr(), offset) } } } @@ -119,7 +118,7 @@ impl From> for BooleanArray { fn from(data: Vec) -> Self { let mut mut_buf = MutableBuffer::new_null(data.len()); { - let mut_slice = mut_buf.data_mut(); + let mut_slice = mut_buf.as_slice_mut(); for (i, b) in data.iter().enumerate() { if *b { bit_util::set_bit(mut_slice, i); @@ -147,14 +146,10 @@ impl From for BooleanArray { 1, "BooleanArray data should contain a single buffer only (values buffer)" ); - let raw_values = data.buffers()[0].raw_data(); - assert!( - memory::is_aligned::(raw_values, mem::align_of::()), - "memory is not aligned" - ); + let ptr = data.buffers()[0].as_ptr(); Self { data, - raw_values: RawPtrBox::new(raw_values as *const u8), + raw_values: unsafe { RawPtrBox::new(ptr) }, } } } @@ -185,11 +180,9 @@ impl>> FromIterator for BooleanArray { let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); let mut val_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); - let data = unsafe { - std::slice::from_raw_parts_mut(val_buf.raw_data_mut(), val_buf.capacity()) - }; + let data = val_buf.as_slice_mut(); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); iter.enumerate().for_each(|(i, item)| { if let Some(a) = item.borrow() { bit_util::set_bit(null_slice, i); diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs index 4eb8dc56640..5af27769bfa 100644 --- a/rust/arrow/src/array/array_list.rs +++ b/rust/arrow/src/array/array_list.rs @@ -23,8 +23,8 @@ use std::mem; use num::Num; use super::{ - array::print_long_array, make_array, raw_pointer::as_aligned_pointer, - raw_pointer::RawPtrBox, Array, ArrayDataRef, ArrayRef, + array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayDataRef, + ArrayRef, }; use crate::datatypes::ArrowNativeType; use crate::datatypes::DataType; @@ -100,7 +100,7 @@ impl GenericListArray { #[inline] fn value_offset_at(&self, i: usize) -> OffsetSize { - unsafe { *self.value_offsets.get().add(i) } + unsafe { *self.value_offsets.as_ptr().add(i) } } } @@ -117,18 +117,19 @@ impl From for GenericListArray::new(value_offsets) }; unsafe { assert!( - (*value_offsets.offset(0)).is_zero(), + (*value_offsets.as_ptr().offset(0)).is_zero(), "offsets do not start at zero" ); } Self { data, values, - value_offsets: RawPtrBox::new(value_offsets), + value_offsets, } } } diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 1faf0db1499..03001fe6b4c 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -29,7 +29,6 @@ use super::array::print_long_array; use super::raw_pointer::RawPtrBox; use super::*; use crate::buffer::{Buffer, MutableBuffer}; -use crate::memory; use crate::util::bit_util; /// Number of seconds in a day @@ -75,7 +74,7 @@ impl PrimitiveArray { #[deprecated(note = "Please use values() instead")] pub unsafe fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { std::slice::from_raw_parts( - self.raw_values.get().add(self.data.offset()).add(offset), + self.raw_values.as_ptr().add(self.data.offset()).add(offset), len, ) } @@ -88,7 +87,7 @@ impl PrimitiveArray { // buffer bounds/offset is ensured by the ArrayData instance. unsafe { std::slice::from_raw_parts( - self.raw_values.get().add(self.data.offset()), + self.raw_values.as_ptr().add(self.data.offset()), self.len(), ) } @@ -106,7 +105,7 @@ impl PrimitiveArray { /// caller must ensure that the passed in offset is less than the array len() pub fn value(&self, i: usize) -> T::Native { let offset = i + self.offset(); - unsafe { *self.raw_values.get().add(offset) } + unsafe { *self.raw_values.as_ptr().add(offset) } } } @@ -316,7 +315,7 @@ impl::Native let null = vec![0; mem::size_of::<::Native>()]; - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); iter.enumerate().for_each(|(i, item)| { if let Some(a) = item.borrow() { bit_util::set_bit(null_slice, i); @@ -413,7 +412,7 @@ impl PrimitiveArray { { let null = vec![0; mem::size_of::()]; - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for (i, v) in data.iter().enumerate() { if let Some(n) = v { bit_util::set_bit(null_slice, i); @@ -442,14 +441,11 @@ impl From for PrimitiveArray { 1, "PrimitiveArray data should contain a single buffer only (values buffer)" ); - let raw_values = data.buffers()[0].raw_data(); - assert!( - memory::is_aligned::(raw_values, mem::align_of::()), - "memory is not aligned" - ); + + let ptr = data.buffers()[0].as_ptr(); Self { data, - raw_values: RawPtrBox::new(raw_values as *const T::Native), + raw_values: unsafe { RawPtrBox::new(ptr) }, } } } diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index 5545fce3c45..9dde1c6bc61 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -21,9 +21,8 @@ use std::mem; use std::{any::Any, iter::FromIterator}; use super::{ - array::print_long_array, raw_pointer::as_aligned_pointer, raw_pointer::RawPtrBox, - Array, ArrayData, ArrayDataRef, GenericListArray, GenericStringIter, LargeListArray, - ListArray, OffsetSizeTrait, + array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, ArrayDataRef, + GenericListArray, GenericStringIter, LargeListArray, ListArray, OffsetSizeTrait, }; use crate::util::bit_util; use crate::{buffer::Buffer, datatypes::ToByteSlice}; @@ -80,7 +79,7 @@ impl GenericStringArray { #[inline] fn value_offset_at(&self, i: usize) -> OffsetSize { - unsafe { *self.value_offsets.get().add(i) } + unsafe { *self.value_offsets.as_ptr().add(i) } } /// Returns the element at index `i` as &str @@ -90,7 +89,7 @@ impl GenericStringArray { unsafe { let pos = self.value_offset_at(offset); let slice = std::slice::from_raw_parts( - self.value_data.get().offset(pos.to_isize()), + self.value_data.as_ptr().offset(pos.to_isize()), (self.value_offset_at(offset + 1) - pos).to_usize().unwrap(), ); @@ -166,7 +165,7 @@ where if let Some(s) = s { let s = s.as_ref(); // set null bit - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); bit_util::set_bit(null_slice, i); length_so_far = length_so_far + OffsetSize::from_usize(s.len()).unwrap(); @@ -252,14 +251,12 @@ impl From 2, "StringArray data should contain 2 buffers only (offsets and values)" ); - let raw_value_offsets = data.buffers()[0].raw_data(); - let value_data = data.buffers()[1].raw_data(); + let offsets = data.buffers()[0].as_ptr(); + let values = data.buffers()[1].as_ptr(); Self { data, - value_offsets: RawPtrBox::new(as_aligned_pointer::( - raw_value_offsets, - )), - value_data: RawPtrBox::new(value_data), + value_offsets: unsafe { RawPtrBox::new(offsets) }, + value_data: unsafe { RawPtrBox::new(values) }, } } } diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index a5137cc1008..d6646662748 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -384,8 +384,8 @@ mod tests { for i in 0..expected_int_data.len() { if !expected_int_data.is_null(i) { assert_eq!( - expected_value_buf.data()[i * 4..(i + 1) * 4], - actual_value_buf.data()[i * 4..(i + 1) * 4] + expected_value_buf.as_slice()[i * 4..(i + 1) * 4], + actual_value_buf.as_slice()[i * 4..(i + 1) * 4] ); } } diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index 7bced12fa33..284508a27d5 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -208,7 +208,7 @@ impl UnionArray { /// Panics if `index` is greater than the length of the array. pub fn type_id(&self, index: usize) -> i8 { assert!(index - self.offset() < self.len()); - self.data().buffers()[0].data()[index] as i8 + self.data().buffers()[0].as_slice()[index] as i8 } /// Returns the offset into the underlying values array for the array slot at `index`. @@ -225,7 +225,7 @@ impl UnionArray { Some(b) => b.count_set_bits_offset(0, index), None => index, }; - self.data().buffers()[1].data()[valid_slots * size_of::()] as i32 + self.data().buffers()[1].as_slice()[valid_slots * size_of::()] as i32 } else { index as i32 } diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 6cc765d5181..95178dacf4b 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -344,7 +344,7 @@ impl BooleanBufferBuilder { if v { let data = unsafe { std::slice::from_raw_parts_mut( - self.buffer.raw_data_mut(), + self.buffer.as_mut_ptr(), self.buffer.capacity(), ) }; @@ -359,7 +359,7 @@ impl BooleanBufferBuilder { if n != 0 && v { let data = unsafe { std::slice::from_raw_parts_mut( - self.buffer.raw_data_mut(), + self.buffer.as_mut_ptr(), self.buffer.capacity(), ) }; @@ -379,7 +379,7 @@ impl BooleanBufferBuilder { // updated on each append but is updated in the // `freeze` method instead. unsafe { - bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); + bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len); } } self.len += 1; @@ -2484,7 +2484,7 @@ mod tests { let buf2 = builder.finish(); assert_eq!(buf.len(), buf2.len()); - assert_eq!(buf.data(), buf2.data()); + assert_eq!(buf.as_slice(), buf2.as_slice()); } #[test] @@ -3144,8 +3144,8 @@ mod tests { for i in 0..expected_int_data.len() { if !expected_int_data.is_null(i) { assert_eq!( - expected_value_buf.data()[i * 4..(i + 1) * 4], - actual_value_buf.data()[i * 4..(i + 1) * 4] + expected_value_buf.as_slice()[i * 4..(i + 1) * 4], + actual_value_buf.as_slice()[i * 4..(i + 1) * 4] ); } } diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index caa93518694..f4e241e1666 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -239,7 +239,7 @@ impl ArrayData { /// * the datatype is `Boolean` (it corresponds to a bit-packed buffer where the offset is not applicable) #[inline] pub(super) fn buffer(&self, buffer: usize) -> &[T] { - let values = unsafe { self.buffers[buffer].data().align_to::() }; + let values = unsafe { self.buffers[buffer].as_slice().align_to::() }; if !values.0.is_empty() || !values.2.is_empty() { panic!("The buffer is not byte-aligned with its interpretation") }; @@ -379,7 +379,7 @@ mod tests { assert_eq!(10, arr_data.null_count()); assert_eq!(5, arr_data.offset()); assert_eq!(1, arr_data.buffers().len()); - assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].data()); + assert_eq!(&[0, 1, 2, 3], arr_data.buffers()[0].as_slice()); assert_eq!(1, arr_data.child_data().len()); assert_eq!(child_arr_data, arr_data.child_data()[0]); } @@ -420,7 +420,7 @@ mod tests { .null_bit_buffer(Buffer::from(bit_v)) .build(); assert!(arr_data.null_buffer().is_some()); - assert_eq!(&bit_v, arr_data.null_buffer().unwrap().data()); + assert_eq!(&bit_v, arr_data.null_buffer().unwrap().as_slice()); } #[test] diff --git a/rust/arrow/src/array/equal/boolean.rs b/rust/arrow/src/array/equal/boolean.rs index 4158080b81d..88bd080ba53 100644 --- a/rust/arrow/src/array/equal/boolean.rs +++ b/rust/arrow/src/array/equal/boolean.rs @@ -26,8 +26,8 @@ pub(super) fn boolean_equal( rhs_start: usize, len: usize, ) -> bool { - let lhs_values = lhs.buffers()[0].data(); - let rhs_values = rhs.buffers()[0].data(); + let lhs_values = lhs.buffers()[0].as_slice(); + let rhs_values = rhs.buffers()[0].as_slice(); // TODO: we can do this more efficiently if all values are not-null (0..len).all(|i| { diff --git a/rust/arrow/src/array/equal/decimal.rs b/rust/arrow/src/array/equal/decimal.rs index 715b308c4b8..a8fdded2fa7 100644 --- a/rust/arrow/src/array/equal/decimal.rs +++ b/rust/arrow/src/array/equal/decimal.rs @@ -31,8 +31,8 @@ pub(super) fn decimal_equal( _ => unreachable!(), }; - let lhs_values = &lhs.buffers()[0].data()[lhs.offset() * size..]; - let rhs_values = &rhs.buffers()[0].data()[rhs.offset() * size..]; + let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..]; + let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..]; if lhs.null_count() == 0 && rhs.null_count() == 0 { equal_len( diff --git a/rust/arrow/src/array/equal/fixed_binary.rs b/rust/arrow/src/array/equal/fixed_binary.rs index f14b097fbb0..c6889ba4b43 100644 --- a/rust/arrow/src/array/equal/fixed_binary.rs +++ b/rust/arrow/src/array/equal/fixed_binary.rs @@ -31,8 +31,8 @@ pub(super) fn fixed_binary_equal( _ => unreachable!(), }; - let lhs_values = &lhs.buffers()[0].data()[lhs.offset() * size..]; - let rhs_values = &rhs.buffers()[0].data()[rhs.offset() * size..]; + let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * size..]; + let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * size..]; if lhs.null_count() == 0 && rhs.null_count() == 0 { equal_len( diff --git a/rust/arrow/src/array/equal/primitive.rs b/rust/arrow/src/array/equal/primitive.rs index 19602e46488..4bb256643ca 100644 --- a/rust/arrow/src/array/equal/primitive.rs +++ b/rust/arrow/src/array/equal/primitive.rs @@ -29,8 +29,8 @@ pub(super) fn primitive_equal( len: usize, ) -> bool { let byte_width = size_of::(); - let lhs_values = &lhs.buffers()[0].data()[lhs.offset() * byte_width..]; - let rhs_values = &rhs.buffers()[0].data()[rhs.offset() * byte_width..]; + let lhs_values = &lhs.buffers()[0].as_slice()[lhs.offset() * byte_width..]; + let rhs_values = &rhs.buffers()[0].as_slice()[rhs.offset() * byte_width..]; if lhs.null_count() == 0 && rhs.null_count() == 0 { // without nulls, we just need to compare slices diff --git a/rust/arrow/src/array/equal/structure.rs b/rust/arrow/src/array/equal/structure.rs index 5b12ae776d9..31ccbc870d0 100644 --- a/rust/arrow/src/array/equal/structure.rs +++ b/rust/arrow/src/array/equal/structure.rs @@ -93,8 +93,8 @@ pub(super) fn struct_equal( equal_values(lhs, rhs, lhs_nulls, rhs_nulls, lhs_start, rhs_start, len) } else { // get a ref of the null buffer bytes, to use in testing for nullness - let lhs_null_bytes = lhs_nulls.as_ref().unwrap().data(); - let rhs_null_bytes = rhs_nulls.as_ref().unwrap().data(); + let lhs_null_bytes = lhs_nulls.as_ref().unwrap().as_slice(); + let rhs_null_bytes = rhs_nulls.as_ref().unwrap().as_slice(); // with nulls, we need to compare item by item whenever it is not null (0..len).all(|i| { let lhs_pos = lhs_start + i; diff --git a/rust/arrow/src/array/equal/utils.rs b/rust/arrow/src/array/equal/utils.rs index 3bb4c0be653..3ccc2450852 100644 --- a/rust/arrow/src/array/equal/utils.rs +++ b/rust/arrow/src/array/equal/utils.rs @@ -46,8 +46,8 @@ pub(super) fn equal_nulls( let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); if lhs_null_count > 0 || rhs_null_count > 0 { - let lhs_values = lhs_nulls.unwrap().data(); - let rhs_values = rhs_nulls.unwrap().data(); + let lhs_values = lhs_nulls.unwrap().as_slice(); + let rhs_values = rhs_nulls.unwrap().as_slice(); equal_bits( lhs_values, rhs_values, diff --git a/rust/arrow/src/array/equal/variable_size.rs b/rust/arrow/src/array/equal/variable_size.rs index caf8a0c1eae..94fdf6b2980 100644 --- a/rust/arrow/src/array/equal/variable_size.rs +++ b/rust/arrow/src/array/equal/variable_size.rs @@ -61,8 +61,8 @@ pub(super) fn variable_sized_equal( let rhs_offsets = rhs.buffer::(0); // these are bytes, and thus the offset does not need to be multiplied - let lhs_values = &lhs.buffers()[1].data()[lhs.offset()..]; - let rhs_values = &rhs.buffers()[1].data()[rhs.offset()..]; + let lhs_values = &lhs.buffers()[1].as_slice()[lhs.offset()..]; + let rhs_values = &rhs.buffers()[1].as_slice()[rhs.offset()..]; let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len); let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len); @@ -88,10 +88,10 @@ pub(super) fn variable_sized_equal( // the null bits can still be `None`, so we don't unwrap let lhs_is_null = !lhs_nulls - .map(|v| get_bit(v.data(), lhs_pos)) + .map(|v| get_bit(v.as_slice(), lhs_pos)) .unwrap_or(false); let rhs_is_null = !rhs_nulls - .map(|v| get_bit(v.data(), rhs_pos)) + .map(|v| get_bit(v.as_slice(), rhs_pos)) .unwrap_or(false); lhs_is_null diff --git a/rust/arrow/src/array/raw_pointer.rs b/rust/arrow/src/array/raw_pointer.rs index 8eeadfe9390..d18ba4b29a3 100644 --- a/rust/arrow/src/array/raw_pointer.rs +++ b/rust/arrow/src/array/raw_pointer.rs @@ -16,28 +16,37 @@ // under the License. use crate::memory; +use std::ptr::NonNull; +/// This struct is highly `unsafe` and offers the possibility to self-reference a [arrow::Buffer] from [arrow::array::ArrayData]. +/// as a pointer to the beginning of its contents. pub(super) struct RawPtrBox { - inner: *const T, + ptr: NonNull, } impl RawPtrBox { - pub(super) fn new(inner: *const T) -> Self { - Self { inner } + /// # Safety + /// The user must guarantee that: + /// * the contents where `ptr` points to are never `moved`. This is guaranteed when they are Pinned. + /// * the lifetime of this struct does not outlive the lifetime of `ptr`. + /// Failure to fulfill any the above conditions results in undefined behavior. + /// # Panic + /// This function panics if: + /// * `ptr` is null + /// * `ptr` is not aligned to a slice of type `T`. This is guaranteed if it was built from a slice of type `T`. + pub(super) unsafe fn new(ptr: *const u8) -> Self { + let ptr = NonNull::new(ptr as *mut u8).expect("Pointer cannot be null"); + assert!( + memory::is_aligned(ptr, std::mem::align_of::()), + "memory is not aligned" + ); + Self { ptr: ptr.cast() } } - pub(super) fn get(&self) -> *const T { - self.inner + pub(super) fn as_ptr(&self) -> *const T { + self.ptr.as_ptr() } } unsafe impl Send for RawPtrBox {} unsafe impl Sync for RawPtrBox {} - -pub(super) fn as_aligned_pointer(p: *const u8) -> *const T { - assert!( - memory::is_aligned(p, std::mem::align_of::()), - "memory is not aligned" - ); - p as *const T -} diff --git a/rust/arrow/src/array/transform/boolean.rs b/rust/arrow/src/array/transform/boolean.rs index cfe485b7b70..23be955b975 100644 --- a/rust/arrow/src/array/transform/boolean.rs +++ b/rust/arrow/src/array/transform/boolean.rs @@ -23,13 +23,13 @@ use super::{ }; pub(super) fn build_extend(array: &ArrayData) -> Extend { - let values = array.buffers()[0].data(); + let values = array.buffers()[0].as_slice(); Box::new( move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { let buffer = &mut mutable.buffer1; reserve_for_bits(buffer, mutable.len + len); set_bits( - &mut buffer.data_mut(), + &mut buffer.as_slice_mut(), values, mutable.len, array.offset() + start, diff --git a/rust/arrow/src/array/transform/fixed_binary.rs b/rust/arrow/src/array/transform/fixed_binary.rs index 8899113ede7..477d2ad277c 100644 --- a/rust/arrow/src/array/transform/fixed_binary.rs +++ b/rust/arrow/src/array/transform/fixed_binary.rs @@ -25,7 +25,7 @@ pub(super) fn build_extend(array: &ArrayData) -> Extend { _ => unreachable!(), }; - let values = &array.buffers()[0].data()[array.offset() * size..]; + let values = &array.buffers()[0].as_slice()[array.offset() * size..]; if array.null_count() == 0 { // fast case where we can copy regions without null issues Box::new( diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 28be14eee7b..c32c7876a4b 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -92,11 +92,11 @@ impl<'a> _MutableArrayData<'a> { fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits { if let Some(bitmap) = array.null_bitmap() { - let bytes = bitmap.bits.data(); + let bytes = bitmap.bits.as_slice(); Box::new(move |mutable, start, len| { utils::reserve_for_bits(&mut mutable.null_buffer, mutable.len + len); mutable.null_count += utils::set_bits( - mutable.null_buffer.data_mut(), + mutable.null_buffer.as_slice_mut(), bytes, mutable.len, array.offset() + start, @@ -106,7 +106,7 @@ fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits } else if use_nulls { Box::new(|mutable, _, len| { utils::reserve_for_bits(&mut mutable.null_buffer, mutable.len + len); - let write_data = mutable.null_buffer.data_mut(); + let write_data = mutable.null_buffer.as_slice_mut(); let offset = mutable.len; (0..len).for_each(|i| { bit_util::set_bit(write_data, offset + i); diff --git a/rust/arrow/src/array/transform/primitive.rs b/rust/arrow/src/array/transform/primitive.rs index 01bbd1a4788..86c76941af3 100644 --- a/rust/arrow/src/array/transform/primitive.rs +++ b/rust/arrow/src/array/transform/primitive.rs @@ -22,7 +22,7 @@ use crate::{array::ArrayData, datatypes::ArrowNativeType}; use super::{Extend, _MutableArrayData}; pub(super) fn build_extend(array: &ArrayData) -> Extend { - let values = &array.buffers()[0].data()[array.offset() * size_of::()..]; + let values = &array.buffers()[0].as_slice()[array.offset() * size_of::()..]; Box::new( move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| { let start = start * size_of::(); diff --git a/rust/arrow/src/array/transform/utils.rs b/rust/arrow/src/array/transform/utils.rs index 933ec0da1c6..c95912996f2 100644 --- a/rust/arrow/src/array/transform/utils.rs +++ b/rust/arrow/src/array/transform/utils.rs @@ -72,7 +72,7 @@ pub(super) unsafe fn get_last_offset( // Soundness // * offset buffer is always extended in slices of T and aligned accordingly. // * Buffer[0] is initialized with one element, 0, and thus `mutable_offsets.len() - 1` is always valid. - let (prefix, offsets, suffix) = offset_buffer.data().align_to::(); + let (prefix, offsets, suffix) = offset_buffer.as_slice().align_to::(); debug_assert!(prefix.is_empty() && suffix.is_empty()); *offsets.get_unchecked(offsets.len() - 1) } diff --git a/rust/arrow/src/array/transform/variable_size.rs b/rust/arrow/src/array/transform/variable_size.rs index 3a18b6fe5ee..dcd0ed6147f 100644 --- a/rust/arrow/src/array/transform/variable_size.rs +++ b/rust/arrow/src/array/transform/variable_size.rs @@ -42,7 +42,7 @@ fn extend_offset_values( pub(super) fn build_extend(array: &ArrayData) -> Extend { let offsets = array.buffer::(0); - let values = &array.buffers()[1].data()[array.offset()..]; + let values = &array.buffers()[1].as_slice()[array.offset()..]; if array.null_count() == 0 { // fast case where we can copy regions without null issues Box::new( diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs index 7df609fb88b..b977f550999 100644 --- a/rust/arrow/src/bitmap.rs +++ b/rust/arrow/src/bitmap.rs @@ -54,7 +54,7 @@ impl Bitmap { pub fn is_set(&self, i: usize) -> bool { assert!(i < (self.bits.len() << 3)); - unsafe { bit_util::get_bit_raw(self.bits.raw_data(), i) } + unsafe { bit_util::get_bit_raw(self.bits.as_ptr(), i) } } pub fn buffer_ref(&self) -> &Buffer { @@ -107,7 +107,7 @@ impl PartialEq for Bitmap { if self_len != other_len { return false; } - self.bits.data()[..self_len] == other.bits.data()[..self_len] + self.bits.as_slice()[..self_len] == other.bits.as_slice()[..self_len] } } diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 58b75a7d3fa..82708180e84 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -26,13 +26,12 @@ use crate::{ ffi, }; -use std::cmp; use std::convert::AsRef; use std::fmt::Debug; use std::mem; use std::ops::{BitAnd, BitOr, Not}; -use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::sync::Arc; +use std::{cmp, ptr::NonNull}; #[cfg(feature = "avx512")] use crate::arch::avx512::*; @@ -70,7 +69,8 @@ impl Buffer { /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed. - pub unsafe fn from_raw_parts(ptr: *const u8, len: usize, capacity: usize) -> Self { + pub unsafe fn from_raw_parts(ptr: NonNull, len: usize, capacity: usize) -> Self { + assert!(len <= capacity); Buffer::build_with_arguments(ptr, len, Deallocation::Native(capacity)) } @@ -88,7 +88,7 @@ impl Buffer { /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes and that the foreign deallocator frees the region. pub unsafe fn from_unowned( - ptr: *const u8, + ptr: NonNull, len: usize, data: Arc, ) -> Self { @@ -97,7 +97,7 @@ impl Buffer { /// Auxiliary method to create a new Buffer unsafe fn build_with_arguments( - ptr: *const u8, + ptr: NonNull, len: usize, deallocation: Deallocation, ) -> Self { @@ -125,11 +125,14 @@ impl Buffer { } /// Returns the byte slice stored in this buffer - pub fn data(&self) -> &[u8] { - &self.data.as_slice()[self.offset..] + pub fn as_slice(&self) -> &[u8] { + &self.data[self.offset..] } - /// Returns a slice of this buffer, starting from `offset`. + /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`. + /// Doing so allows the same memory region to be shared between buffers. + /// # Panics + /// Panics iff `offset` is larger than `len`. pub fn slice(&self, offset: usize) -> Self { assert!( offset <= self.len(), @@ -141,12 +144,12 @@ impl Buffer { } } - /// Returns a raw pointer for this buffer. + /// Returns a pointer to the start of this buffer. /// /// Note that this should be used cautiously, and the returned pointer should not be /// stored anywhere, to avoid dangling pointers. - pub fn raw_data(&self) -> *const u8 { - unsafe { self.data.raw_data().add(self.offset) } + pub fn as_ptr(&self) -> *const u8 { + unsafe { self.data.ptr().as_ptr().add(self.offset) } } /// View buffer as typed slice. @@ -161,9 +164,15 @@ impl Buffer { /// `bool` in Rust. However, `bool` arrays in Arrow are bit-packed which breaks this condition. pub unsafe fn typed_data(&self) -> &[T] { assert_eq!(self.len() % mem::size_of::(), 0); - assert!(memory::is_ptr_aligned::(self.raw_data() as *const T)); - from_raw_parts( - self.raw_data() as *const T, + assert!(memory::is_ptr_aligned::(self.data.ptr().cast())); + // JUSTIFICATION + // Benefit + // Many of the buffers represent specific types, and consumers of `Buffer` often need to re-interpret them. + // Soundness + // * The pointer is non-null by construction + // * alignment asserted above + std::slice::from_raw_parts( + self.as_ptr() as *const T, self.len() / mem::size_of::(), ) } @@ -183,7 +192,7 @@ impl Buffer { /// in larger chunks and starting at arbitrary bit offsets. /// Note that both `offset` and `length` are measured in bits. pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks { - BitChunks::new(&self.data.as_slice()[self.offset..], offset, len) + BitChunks::new(&self.as_slice(), offset, len) } /// Returns the number of 1-bits in this buffer. @@ -213,13 +222,33 @@ impl> From for Buffer { let len = slice.len() * mem::size_of::(); let capacity = bit_util::round_upto_multiple_of_64(len); let buffer = memory::allocate_aligned(capacity); + // JUSTIFICATION + // Benefit + // It is often useful to create a buffer from bytes, typically when they are allocated by external sources + // Soundness + // * The pointers are non-null by construction + // * alignment asserted above + // Unsoundness + // * There is no guarantee that the memory regions do are non-overalling, but `memcpy` requires this. unsafe { - memory::memcpy(buffer, slice.as_ptr(), len); + memory::memcpy( + buffer, + NonNull::new_unchecked(slice.as_ptr() as *mut u8), + len, + ); Buffer::build_with_arguments(buffer, len, Deallocation::Native(capacity)) } } } +impl std::ops::Deref for Buffer { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len()) } + } +} + /// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer. /// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time /// and the `scalar_op` gets applied to remaining bytes. @@ -242,9 +271,9 @@ where let mut result = MutableBuffer::new(len).with_bitset(len, false); let lanes = u8x64::lanes(); - let mut left_chunks = left.data()[left_offset..].chunks_exact(lanes); - let mut right_chunks = right.data()[right_offset..].chunks_exact(lanes); - let mut result_chunks = result.data_mut().chunks_exact_mut(lanes); + let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes); + let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes); + let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes); result_chunks .borrow_mut() @@ -289,8 +318,8 @@ where let mut result = MutableBuffer::new(len).with_bitset(len, false); let lanes = u8x64::lanes(); - let mut left_chunks = left.data()[left_offset..].chunks_exact(lanes); - let mut result_chunks = result.data_mut().chunks_exact_mut(lanes); + let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes); + let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes); result_chunks .borrow_mut() @@ -399,10 +428,12 @@ pub(super) fn buffer_bin_and( let mut result = MutableBuffer::new(len).with_bitset(len, false); - let mut left_chunks = left.data()[left_offset..].chunks_exact(AVX512_U8X64_LANES); + let mut left_chunks = + left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES); let mut right_chunks = - right.data()[right_offset..].chunks_exact(AVX512_U8X64_LANES); - let mut result_chunks = result.data_mut().chunks_exact_mut(AVX512_U8X64_LANES); + right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES); + let mut result_chunks = + result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES); result_chunks .borrow_mut() @@ -508,10 +539,12 @@ pub(super) fn buffer_bin_or( let mut result = MutableBuffer::new(len).with_bitset(len, false); - let mut left_chunks = left.data()[left_offset..].chunks_exact(AVX512_U8X64_LANES); + let mut left_chunks = + left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES); let mut right_chunks = - right.data()[right_offset..].chunks_exact(AVX512_U8X64_LANES); - let mut result_chunks = result.data_mut().chunks_exact_mut(AVX512_U8X64_LANES); + right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES); + let mut result_chunks = + result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES); result_chunks .borrow_mut() @@ -667,7 +700,8 @@ unsafe impl Send for Buffer {} /// converted into a immutable buffer via the `freeze` method. #[derive(Debug)] pub struct MutableBuffer { - data: *mut u8, + // dangling iff capacity = 0 + data: NonNull, len: usize, capacity: usize, } @@ -700,7 +734,7 @@ impl MutableBuffer { assert!(end <= self.capacity); let v = if val { 255 } else { 0 }; unsafe { - std::ptr::write_bytes(self.data, v, end); + std::ptr::write_bytes(self.data.as_ptr(), v, end); self.len = end; } self @@ -714,7 +748,7 @@ impl MutableBuffer { pub fn set_null_bits(&mut self, start: usize, count: usize) { assert!(start + count <= self.capacity); unsafe { - std::ptr::write_bytes(self.data.add(start), 0, count); + std::ptr::write_bytes(self.data.as_ptr().add(start), 0, count); } } @@ -726,9 +760,8 @@ impl MutableBuffer { if capacity > self.capacity { let new_capacity = bit_util::round_upto_multiple_of_64(capacity); let new_capacity = cmp::max(new_capacity, self.capacity * 2); - let new_data = + self.data = unsafe { memory::reallocate(self.data, self.capacity, new_capacity) }; - self.data = new_data as *mut u8; self.capacity = new_capacity; } self.capacity @@ -747,9 +780,8 @@ impl MutableBuffer { } else { let new_capacity = bit_util::round_upto_multiple_of_64(new_len); if new_capacity < self.capacity { - let new_data = + self.data = unsafe { memory::reallocate(self.data, self.capacity, new_capacity) }; - self.data = new_data as *mut u8; self.capacity = new_capacity; } } @@ -780,21 +812,13 @@ impl MutableBuffer { } /// Returns the data stored in this buffer as a slice. - pub fn data(&self) -> &[u8] { - if self.data.is_null() { - &[] - } else { - unsafe { std::slice::from_raw_parts(self.raw_data(), self.len()) } - } + pub fn as_slice(&self) -> &[u8] { + self } /// Returns the data stored in this buffer as a mutable slice. - pub fn data_mut(&mut self) -> &mut [u8] { - if self.data.is_null() { - &mut [] - } else { - unsafe { std::slice::from_raw_parts_mut(self.raw_data_mut(), self.len()) } - } + pub fn as_slice_mut(&mut self) -> &mut [u8] { + self } /// Returns a raw pointer for this buffer. @@ -802,13 +826,13 @@ impl MutableBuffer { /// Note that this should be used cautiously, and the returned pointer should not be /// stored anywhere, to avoid dangling pointers. #[inline] - pub const fn raw_data(&self) -> *const u8 { - self.data + pub const fn as_ptr(&self) -> *const u8 { + self.data.as_ptr() } #[inline] - pub fn raw_data_mut(&mut self) -> *mut u8 { - self.data + pub fn as_mut_ptr(&mut self) -> *mut u8 { + self.data.as_ptr() } /// Freezes this buffer and return an immutable version of it. @@ -826,10 +850,16 @@ impl MutableBuffer { /// View buffer as typed slice. pub fn typed_data_mut(&mut self) -> &mut [T] { assert_eq!(self.len() % mem::size_of::(), 0); - assert!(memory::is_ptr_aligned::(self.raw_data() as *const T)); + assert!(memory::is_ptr_aligned::(self.data.cast())); + // JUSTIFICATION + // Benefit + // Many of the buffers represent specific types, and consumers of `Buffer` often need to re-interpret them. + // Soundness + // * The pointer is non-null by construction + // * alignment asserted above unsafe { - from_raw_parts_mut( - self.raw_data() as *mut T, + std::slice::from_raw_parts_mut( + self.as_ptr() as *mut T, self.len() / mem::size_of::(), ) } @@ -843,7 +873,9 @@ impl MutableBuffer { self.reserve(new_len); } unsafe { - memory::memcpy(self.data.add(self.len), bytes.as_ptr(), bytes.len()); + let dst = NonNull::new_unchecked(self.data.as_ptr().add(self.len)); + let src = NonNull::new_unchecked(bytes.as_ptr() as *mut u8); + memory::memcpy(dst, src, bytes.len()); } self.len = new_len; } @@ -858,11 +890,23 @@ impl MutableBuffer { } } +impl std::ops::Deref for MutableBuffer { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) } + } +} + +impl std::ops::DerefMut for MutableBuffer { + fn deref_mut(&mut self) -> &mut [u8] { + unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } + } +} + impl Drop for MutableBuffer { fn drop(&mut self) { - if !self.data.is_null() { - unsafe { memory::free_aligned(self.data, self.capacity) }; - } + unsafe { memory::free_aligned(self.data, self.capacity) }; } } @@ -874,7 +918,7 @@ impl PartialEq for MutableBuffer { if self.capacity != other.capacity { return false; } - unsafe { memory::memcmp(self.data, other.data, self.len) == 0 } + self.as_slice() == other.as_slice() } } @@ -883,7 +927,6 @@ unsafe impl Send for MutableBuffer {} #[cfg(test)] mod tests { - use std::ptr::null_mut; use std::thread; use super::*; @@ -919,24 +962,18 @@ mod tests { #[test] fn test_from_raw_parts() { - let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0, 0) }; - assert_eq!(0, buf.len()); - assert_eq!(0, buf.data().len()); - assert_eq!(0, buf.capacity()); - assert!(buf.raw_data().is_null()); - let buf = Buffer::from(&[0, 1, 2, 3, 4]); assert_eq!(5, buf.len()); - assert!(!buf.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf.data()); + assert!(!buf.as_ptr().is_null()); + assert_eq!([0, 1, 2, 3, 4], buf.as_slice()); } #[test] fn test_from_vec() { let buf = Buffer::from(&[0, 1, 2, 3, 4]); assert_eq!(5, buf.len()); - assert!(!buf.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf.data()); + assert!(!buf.as_ptr().is_null()); + assert_eq!([0, 1, 2, 3, 4], buf.as_slice()); } #[test] @@ -945,8 +982,8 @@ mod tests { let buf2 = buf; assert_eq!(5, buf2.len()); assert_eq!(64, buf2.capacity()); - assert!(!buf2.raw_data().is_null()); - assert_eq!([0, 1, 2, 3, 4], buf2.data()); + assert!(!buf2.as_ptr().is_null()); + assert_eq!([0, 1, 2, 3, 4], buf2.as_slice()); } #[test] @@ -954,21 +991,21 @@ mod tests { let buf = Buffer::from(&[2, 4, 6, 8, 10]); let buf2 = buf.slice(2); - assert_eq!([6, 8, 10], buf2.data()); + assert_eq!([6, 8, 10], buf2.as_slice()); assert_eq!(3, buf2.len()); - assert_eq!(unsafe { buf.raw_data().offset(2) }, buf2.raw_data()); + assert_eq!(unsafe { buf.as_ptr().offset(2) }, buf2.as_ptr()); let buf3 = buf2.slice(1); - assert_eq!([8, 10], buf3.data()); + assert_eq!([8, 10], buf3.as_slice()); assert_eq!(2, buf3.len()); - assert_eq!(unsafe { buf.raw_data().offset(3) }, buf3.raw_data()); + assert_eq!(unsafe { buf.as_ptr().offset(3) }, buf3.as_ptr()); let buf4 = buf.slice(5); let empty_slice: [u8; 0] = []; - assert_eq!(empty_slice, buf4.data()); + assert_eq!(empty_slice, buf4.as_slice()); assert_eq!(0, buf4.len()); assert!(buf4.is_empty()); - assert_eq!(buf2.slice(2).data(), &[10]); + assert_eq!(buf2.slice(2).as_slice(), &[10]); } #[test] @@ -1045,17 +1082,17 @@ mod tests { let mut buf = MutableBuffer::new(100); buf.extend_from_slice(b"hello"); assert_eq!(5, buf.len()); - assert_eq!(b"hello", buf.data()); + assert_eq!(b"hello", buf.as_slice()); buf.extend_from_slice(b" world"); assert_eq!(11, buf.len()); - assert_eq!(b"hello world", buf.data()); + assert_eq!(b"hello world", buf.as_slice()); buf.clear(); assert_eq!(0, buf.len()); buf.extend_from_slice(b"hello arrow"); assert_eq!(11, buf.len()); - assert_eq!(b"hello arrow", buf.data()); + assert_eq!(b"hello arrow", buf.as_slice()); } #[test] @@ -1106,12 +1143,12 @@ mod tests { buf.extend_from_slice(b"aaaa bbbb cccc dddd"); assert_eq!(19, buf.len()); assert_eq!(64, buf.capacity()); - assert_eq!(b"aaaa bbbb cccc dddd", buf.data()); + assert_eq!(b"aaaa bbbb cccc dddd", buf.as_slice()); let immutable_buf = buf.freeze(); assert_eq!(19, immutable_buf.len()); assert_eq!(64, immutable_buf.capacity()); - assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.data()); + assert_eq!(b"aaaa bbbb cccc dddd", immutable_buf.as_slice()); } #[test] @@ -1136,7 +1173,7 @@ mod tests { fn test_access_concurrently() { let buffer = Buffer::from(vec![1, 2, 3, 4, 5]); let buffer2 = buffer.clone(); - assert_eq!([1, 2, 3, 4, 5], buffer.data()); + assert_eq!([1, 2, 3, 4, 5], buffer.as_slice()); let buffer_copy = thread::spawn(move || { // access buffer in another thread. diff --git a/rust/arrow/src/bytes.rs b/rust/arrow/src/bytes.rs index 0363d8735a5..331011687da 100644 --- a/rust/arrow/src/bytes.rs +++ b/rust/arrow/src/bytes.rs @@ -20,6 +20,7 @@ //! Note that this is a low-level functionality of this crate. use core::slice; +use std::ptr::NonNull; use std::sync::Arc; use std::{fmt::Debug, fmt::Formatter}; @@ -56,7 +57,7 @@ impl Debug for Deallocation { /// foreign deallocator to deallocate the region when it is no longer needed. pub struct Bytes { /// The raw pointer to be begining of the region - ptr: *const u8, + ptr: NonNull, /// The number of bytes visible to this region. This is always smaller than its capacity (when avaliable). len: usize, @@ -78,7 +79,11 @@ impl Bytes { /// /// This function is unsafe as there is no guarantee that the given pointer is valid for `len` /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed. - pub unsafe fn new(ptr: *const u8, len: usize, deallocation: Deallocation) -> Bytes { + pub unsafe fn new( + ptr: std::ptr::NonNull, + len: usize, + deallocation: Deallocation, + ) -> Bytes { Bytes { ptr, len, @@ -86,9 +91,8 @@ impl Bytes { } } - #[inline] - pub fn as_slice(&self) -> &[u8] { - unsafe { slice::from_raw_parts(self.ptr, self.len) } + fn as_slice(&self) -> &[u8] { + self } #[inline] @@ -102,15 +106,10 @@ impl Bytes { } #[inline] - pub fn raw_data(&self) -> *const u8 { + pub fn ptr(&self) -> NonNull { self.ptr } - #[inline] - pub fn raw_data_mut(&mut self) -> *mut u8 { - self.ptr as *mut u8 - } - pub fn capacity(&self) -> usize { match self.deallocation { Deallocation::Native(capacity) => capacity, @@ -126,9 +125,7 @@ impl Drop for Bytes { fn drop(&mut self) { match &self.deallocation { Deallocation::Native(capacity) => { - if !self.ptr.is_null() { - unsafe { memory::free_aligned(self.ptr as *mut u8, *capacity) }; - } + unsafe { memory::free_aligned(self.ptr, *capacity) }; } // foreign interface knows how to deallocate itself. Deallocation::Foreign(_) => (), @@ -136,6 +133,14 @@ impl Drop for Bytes { } } +impl std::ops::Deref for Bytes { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + unsafe { slice::from_raw_parts(self.ptr.as_ptr(), self.len) } + } +} + impl PartialEq for Bytes { fn eq(&self, other: &Bytes) -> bool { self.as_slice() == other.as_slice() @@ -146,7 +151,7 @@ impl Debug for Bytes { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { write!(f, "Bytes {{ ptr: {:?}, len: {}, data: ", self.ptr, self.len,)?; - f.debug_list().entries(self.as_slice().iter()).finish()?; + f.debug_list().entries(self.iter()).finish()?; write!(f, " }}") } diff --git a/rust/arrow/src/compute/kernels/arithmetic.rs b/rust/arrow/src/compute/kernels/arithmetic.rs index 59128d320b7..0616628e2d3 100644 --- a/rust/arrow/src/compute/kernels/arithmetic.rs +++ b/rust/arrow/src/compute/kernels/arithmetic.rs @@ -190,7 +190,7 @@ where if let Some(b) = &null_bit_buffer { // some value is null for i in 0..left.len() { - let is_valid = unsafe { bit_util::get_bit_raw(b.raw_data(), i) }; + let is_valid = unsafe { bit_util::get_bit_raw(b.as_ptr(), i) }; values.push(if is_valid { let right_value = right.value(i); if right_value.is_zero() { diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index bc5dd315ad2..012e4f2b198 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -51,7 +51,7 @@ macro_rules! compare_op { let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); let mut buffer = MutableBuffer::new(actual_capacity); buffer.resize(byte_capacity); - let data = buffer.raw_data_mut(); + let data = buffer.as_mut_ptr(); for i in 0..$left.len() { if $op($left.value(i), $right.value(i)) { @@ -84,7 +84,7 @@ macro_rules! compare_op_scalar { let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); let mut buffer = MutableBuffer::new(actual_capacity); buffer.resize(byte_capacity); - let data = buffer.raw_data_mut(); + let data = buffer.as_mut_ptr(); for i in 0..$left.len() { if $op($left.value(i), $right) { @@ -653,10 +653,10 @@ where Some(buff) => buff, None => new_all_set_buffer(num_bytes), }; - let not_both_null_bitmap = not_both_null_bit_buffer.data(); + let not_both_null_bitmap = not_both_null_bit_buffer.as_slice(); let mut bool_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); - let bool_slice = bool_buf.data_mut(); + let bool_slice = bool_buf.as_slice_mut(); // if both array slots are valid, check if list contains primitive for i in 0..left_len { @@ -708,10 +708,10 @@ where Some(buff) => buff, None => new_all_set_buffer(num_bytes), }; - let not_both_null_bitmap = not_both_null_bit_buffer.data(); + let not_both_null_bitmap = not_both_null_bit_buffer.as_slice(); let mut bool_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); - let bool_slice = bool_buf.data_mut(); + let bool_slice = &mut bool_buf; for i in 0..left_len { // contains(null, null) = false diff --git a/rust/arrow/src/compute/kernels/substring.rs b/rust/arrow/src/compute/kernels/substring.rs index a329f45c46e..66298e67722 100644 --- a/rust/arrow/src/compute/kernels/substring.rs +++ b/rust/arrow/src/compute/kernels/substring.rs @@ -38,7 +38,7 @@ fn generic_substring( // compute values let values = &array.data_ref().buffers()[1]; - let data = values.data(); + let data = values.as_slice(); let mut new_values = Vec::new(); // we have no way to estimate how much this will be. let mut new_offsets: Vec = Vec::with_capacity(array.len() + 1); diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index c85578068e3..6184217e4ae 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -291,7 +291,7 @@ where let num_bytes = bit_util::ceil(data_len, 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for (i, elem) in data.iter_mut().enumerate() { let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { @@ -342,7 +342,7 @@ where let num_byte = bit_util::ceil(data_len, 8); let mut val_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false); - let val_slice = val_buf.data_mut(); + let val_slice = val_buf.as_slice_mut(); let null_count = values.null_count(); @@ -363,7 +363,7 @@ where nulls = indices.data_ref().null_buffer().cloned(); } else { let mut null_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, true); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); (0..data_len).try_for_each::<_, Result<()>>(|i| { let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { @@ -442,7 +442,7 @@ where let num_bytes = bit_util::ceil(data_len, 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for (i, offset) in offsets.iter_mut().skip(1).enumerate() { let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { @@ -480,7 +480,7 @@ where let num_bytes = bit_util::ceil(data_len, 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for (i, offset) in offsets.iter_mut().skip(1).enumerate() { let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { @@ -544,7 +544,7 @@ where let num_bytes = bit_util::ceil(indices.len(), 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); { - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); offsets[..].windows(2).enumerate().for_each( |(i, window): (usize, &[OffsetType::Native])| { if window[0] == window[1] { @@ -587,7 +587,7 @@ where // determine null count and null buffer, which are a function of `values` and `indices` let num_bytes = bit_util::ceil(indices.len(), 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, true); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); for i in 0..indices.len() { let index = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| { diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 36684d670fa..e4dae6fd8ee 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -297,7 +297,7 @@ pub(super) mod tests { values.append(&mut array); } else { list_null_count += 1; - bit_util::unset_bit(&mut list_bitmap.data_mut(), idx); + bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx); } offset.push(values.len() as i64); } @@ -386,7 +386,7 @@ pub(super) mod tests { values.extend(items.into_iter()); } else { list_null_count += 1; - bit_util::unset_bit(&mut list_bitmap.data_mut(), idx); + bit_util::unset_bit(&mut list_bitmap.as_slice_mut(), idx); values.extend(vec![None; length as usize].into_iter()); } } diff --git a/rust/arrow/src/ffi.rs b/rust/arrow/src/ffi.rs index 1d8d36da6d9..d494843aafa 100644 --- a/rust/arrow/src/ffi.rs +++ b/rust/arrow/src/ffi.rs @@ -76,7 +76,14 @@ To import an array, unsafely create an `ArrowArray` from two pointers using [Arr To export an array, create an `ArrowArray` using [ArrowArray::try_new]. */ -use std::{ffi::CStr, ffi::CString, iter, mem::size_of, ptr, sync::Arc}; +use std::{ + ffi::CStr, + ffi::CString, + iter, + mem::size_of, + ptr::{self, NonNull}, + sync::Arc, +}; use crate::buffer::Buffer; use crate::datatypes::DataType; @@ -329,7 +336,7 @@ impl FFI_ArrowArray { .iter() .map(|maybe_buffer| match maybe_buffer { // note that `raw_data` takes into account the buffer's offset - Some(b) => b.raw_data() as *const std::os::raw::c_void, + Some(b) => b.as_ptr() as *const std::os::raw::c_void, None => std::ptr::null(), }) .collect::>(); @@ -393,11 +400,7 @@ unsafe fn create_buffer( assert!(index < array.n_buffers as usize); let ptr = *buffers.add(index); - if ptr.is_null() { - None - } else { - Some(Buffer::from_unowned(ptr, len, array)) - } + NonNull::new(ptr as *mut u8).map(|ptr| Buffer::from_unowned(ptr, len, array)) } impl Drop for FFI_ArrowArray { diff --git a/rust/arrow/src/ipc/writer.rs b/rust/arrow/src/ipc/writer.rs index 5161d548ca6..c515f062852 100644 --- a/rust/arrow/src/ipc/writer.rs +++ b/rust/arrow/src/ipc/writer.rs @@ -718,7 +718,7 @@ fn write_buffer( let total_len: i64 = (len + pad_len) as i64; // assert_eq!(len % 8, 0, "Buffer width not a multiple of 8 bytes"); buffers.push(ipc::Buffer::new(offset, total_len)); - arrow_data.extend_from_slice(buffer.data()); + arrow_data.extend_from_slice(buffer.as_slice()); arrow_data.extend_from_slice(&vec![0u8; pad_len][..]); offset + total_len } diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs index d43b02cee98..39f35bbef74 100644 --- a/rust/arrow/src/json/reader.rs +++ b/rust/arrow/src/json/reader.rs @@ -872,7 +872,7 @@ impl Decoder { rows.iter().enumerate().for_each(|(i, v)| { if let Value::Array(a) = v { cur_offset = cur_offset + OffsetSize::from_usize(a.len()).unwrap(); - bit_util::set_bit(list_nulls.data_mut(), i); + bit_util::set_bit(list_nulls.as_slice_mut(), i); } else if let Value::Null = v { // value is null, not incremented } else { @@ -896,11 +896,17 @@ impl Decoder { if let Value::Bool(child) = value { // if valid boolean, append value if *child { - bit_util::set_bit(bool_values.data_mut(), curr_index); + bit_util::set_bit( + bool_values.as_slice_mut(), + curr_index, + ); } } else { // null slot - bit_util::unset_bit(bool_nulls.data_mut(), curr_index); + bit_util::unset_bit( + bool_nulls.as_slice_mut(), + curr_index, + ); } curr_index += 1; }); @@ -964,7 +970,10 @@ impl Decoder { .flat_map(|row| { if let Value::Array(values) = row { values.iter().for_each(|_| { - bit_util::set_bit(null_buffer.data_mut(), struct_index); + bit_util::set_bit( + null_buffer.as_slice_mut(), + struct_index, + ); struct_index += 1; }); values.clone() @@ -1178,7 +1187,7 @@ impl Decoder { .map(|(i, v)| match v { // we want the field as an object, if it's not, we treat as null Some(Value::Object(value)) => { - bit_util::set_bit(null_buffer.data_mut(), i); + bit_util::set_bit(null_buffer.as_slice_mut(), i); Value::Object(value.clone()) } _ => Value::Object(Default::default()), diff --git a/rust/arrow/src/memory.rs b/rust/arrow/src/memory.rs index 8bd334469ee..ad103b06280 100644 --- a/rust/arrow/src/memory.rs +++ b/rust/arrow/src/memory.rs @@ -20,7 +20,10 @@ use std::mem::align_of; use std::ptr::NonNull; -use std::{alloc::Layout, sync::atomic::AtomicIsize}; +use std::{ + alloc::{handle_alloc_error, Layout}, + sync::atomic::AtomicIsize, +}; // NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation // should align well with usage pattern of cache access and block sizes on layers of storage levels from @@ -138,18 +141,19 @@ const BYPASS_PTR: NonNull = unsafe { NonNull::new_unchecked(ALIGNMENT as *mu // If this number is not zero after all objects have been `drop`, there is a memory leak pub static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0); -pub fn allocate_aligned(size: usize) -> *mut u8 { +pub fn allocate_aligned(size: usize) -> NonNull { unsafe { if size == 0 { // In a perfect world, there is no need to request zero size allocation. // Currently, passing zero sized layout to alloc is UB. // This will dodge allocator api for any type. - BYPASS_PTR.as_ptr() + BYPASS_PTR } else { ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst); let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - std::alloc::alloc_zeroed(layout) + let raw_ptr = std::alloc::alloc_zeroed(layout); + NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) } } } @@ -162,10 +166,13 @@ pub fn allocate_aligned(size: usize) -> *mut u8 { /// * ptr must denote a block of memory currently allocated via this allocator, /// /// * size must be the same size that was used to allocate that block of memory, -pub unsafe fn free_aligned(ptr: *mut u8, size: usize) { - if ptr != BYPASS_PTR.as_ptr() { +pub unsafe fn free_aligned(ptr: NonNull, size: usize) { + if ptr != BYPASS_PTR { ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst); - std::alloc::dealloc(ptr, Layout::from_size_align_unchecked(size, ALIGNMENT)); + std::alloc::dealloc( + ptr.as_ptr(), + Layout::from_size_align_unchecked(size, ALIGNMENT), + ); } } @@ -180,65 +187,69 @@ pub unsafe fn free_aligned(ptr: *mut u8, size: usize) { /// /// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e., /// the rounded value must be less than usize::MAX). -pub unsafe fn reallocate(ptr: *mut u8, old_size: usize, new_size: usize) -> *mut u8 { - if ptr == BYPASS_PTR.as_ptr() { +pub unsafe fn reallocate( + ptr: NonNull, + old_size: usize, + new_size: usize, +) -> NonNull { + if ptr == BYPASS_PTR { return allocate_aligned(new_size); } if new_size == 0 { free_aligned(ptr, old_size); - return BYPASS_PTR.as_ptr(); + return BYPASS_PTR; } ALLOCATIONS.fetch_add( new_size as isize - old_size as isize, std::sync::atomic::Ordering::SeqCst, ); - let new_ptr = std::alloc::realloc( - ptr, + let raw_ptr = std::alloc::realloc( + ptr.as_ptr(), Layout::from_size_align_unchecked(old_size, ALIGNMENT), new_size, ); - - if !new_ptr.is_null() && new_size > old_size { - new_ptr.add(old_size).write_bytes(0, new_size - old_size); + let ptr = NonNull::new(raw_ptr).unwrap_or_else(|| { + handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT)) + }); + + if new_size > old_size { + ptr.as_ptr() + .add(old_size) + .write_bytes(0, new_size - old_size); } - - new_ptr + ptr } /// # Safety /// /// Behavior is undefined if any of the following conditions are violated: /// -/// * `src` must be valid for reads of `len * size_of::()` bytes. +/// * `src` must be valid for reads of `len * size_of::()` bytes. /// -/// * `dst` must be valid for writes of `len * size_of::()` bytes. +/// * `dst` must be valid for writes of `len * size_of::()` bytes. /// /// * Both `src` and `dst` must be properly aligned. /// /// `memcpy` creates a bitwise copy of `T`, regardless of whether `T` is [`Copy`]. If `T` is not /// [`Copy`], using both the values in the region beginning at `*src` and the region beginning at /// `*dst` can [violate memory safety][read-ownership]. -pub unsafe fn memcpy(dst: *mut u8, src: *const u8, len: usize) { - if len != 0x00 && src != BYPASS_PTR.as_ptr() { - std::ptr::copy_nonoverlapping(src, dst, len) +pub unsafe fn memcpy(dst: NonNull, src: NonNull, count: usize) { + if src != BYPASS_PTR { + std::ptr::copy_nonoverlapping(src.as_ptr(), dst.as_ptr(), count) } } -extern "C" { - pub fn memcmp(p1: *const u8, p2: *const u8, len: usize) -> i32; -} - /// Check if the pointer `p` is aligned to offset `a`. -pub fn is_aligned(p: *const T, a: usize) -> bool { +pub fn is_aligned(p: NonNull, a: usize) -> bool { let a_minus_one = a.wrapping_sub(1); - let pmoda = p as usize & a_minus_one; + let pmoda = p.as_ptr() as usize & a_minus_one; pmoda == 0 } -pub fn is_ptr_aligned(p: *const T) -> bool { - p.align_offset(align_of::()) == 0 +pub fn is_ptr_aligned(p: NonNull) -> bool { + p.as_ptr().align_offset(align_of::()) == 0 } #[cfg(test)] @@ -250,7 +261,7 @@ mod tests { for _ in 0..10 { let p = allocate_aligned(1024); // make sure this is 64-byte aligned - assert_eq!(0, (p as usize) % 64); + assert_eq!(0, (p.as_ptr() as usize) % 64); unsafe { free_aligned(p, 1024) }; } } @@ -258,16 +269,16 @@ mod tests { #[test] fn test_is_aligned() { // allocate memory aligned to 64-byte - let mut ptr = allocate_aligned(10); + let ptr = allocate_aligned(10); assert_eq!(true, is_aligned::(ptr, 1)); assert_eq!(true, is_aligned::(ptr, 2)); assert_eq!(true, is_aligned::(ptr, 4)); // now make the memory aligned to 63-byte - ptr = unsafe { ptr.offset(1) }; + let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(1)) }; assert_eq!(true, is_aligned::(ptr, 1)); assert_eq!(false, is_aligned::(ptr, 2)); assert_eq!(false, is_aligned::(ptr, 4)); - unsafe { free_aligned(ptr.offset(-1), 10) }; + unsafe { free_aligned(NonNull::new_unchecked(ptr.as_ptr().offset(-1)), 10) }; } } diff --git a/rust/integration-testing/src/bin/arrow-json-integration-test.rs b/rust/integration-testing/src/bin/arrow-json-integration-test.rs index b1bec677cf1..05b30caed8c 100644 --- a/rust/integration-testing/src/bin/arrow-json-integration-test.rs +++ b/rust/integration-testing/src/bin/arrow-json-integration-test.rs @@ -599,7 +599,7 @@ fn create_null_buf(json_col: &ArrowJsonColumn) -> Buffer { .iter() .enumerate() .for_each(|(i, v)| { - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); if *v != 0 { bit_util::set_bit(null_slice, i); } diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 3f1b4a85e48..c0e05d82bbf 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -291,7 +291,7 @@ impl ArrayReader for PrimitiveArrayReader { if T::get_physical_type() == PhysicalType::BOOLEAN { let mut boolean_buffer = BooleanBufferBuilder::new(record_data.len()); - for e in record_data.data() { + for e in record_data.as_slice() { boolean_buffer.append(*e > 0); } record_data = boolean_buffer.finish(); @@ -920,7 +920,7 @@ impl ArrayReader for ListArrayReader { let num_bytes = bit_util::ceil(offsets.len(), 8); let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); - let null_slice = null_buf.data_mut(); + let null_slice = null_buf.as_slice_mut(); let mut list_index = 0; for i in 0..rep_levels.len() { if rep_levels[i] == 0 && def_levels[i] != 0 { diff --git a/rust/parquet/src/arrow/record_reader.rs b/rust/parquet/src/arrow/record_reader.rs index 0a7c17fdcfb..d2f77cd3f9a 100644 --- a/rust/parquet/src/arrow/record_reader.rs +++ b/rust/parquet/src/arrow/record_reader.rs @@ -163,8 +163,8 @@ impl RecordReader { new_buffer.resize(num_bytes); - let new_def_levels = new_buffer.data_mut(); - let left_def_levels = &def_levels_buf.data_mut()[new_len..]; + let new_def_levels = new_buffer.as_slice_mut(); + let left_def_levels = &def_levels_buf.as_slice_mut()[new_len..]; new_def_levels[0..num_bytes].copy_from_slice(&left_def_levels[0..num_bytes]); @@ -190,8 +190,8 @@ impl RecordReader { new_buffer.resize(num_bytes); - let new_rep_levels = new_buffer.data_mut(); - let left_rep_levels = &rep_levels_buf.data_mut()[new_len..]; + let new_rep_levels = new_buffer.as_slice_mut(); + let left_rep_levels = &rep_levels_buf.as_slice_mut()[new_len..]; new_rep_levels[0..num_bytes].copy_from_slice(&left_rep_levels[0..num_bytes]); @@ -217,8 +217,8 @@ impl RecordReader { new_buffer.resize(num_bytes); - let new_records = new_buffer.data_mut(); - let left_records = &mut self.records.data_mut()[new_len..]; + let new_records = new_buffer.as_slice_mut(); + let left_records = &mut self.records.as_slice_mut()[new_len..]; new_records[0..num_bytes].copy_from_slice(&left_records[0..num_bytes]); @@ -291,20 +291,20 @@ impl RecordReader { // Convert mutable buffer spaces to mutable slices let (prefix, values, suffix) = - unsafe { self.records.data_mut().align_to_mut::() }; + unsafe { self.records.as_slice_mut().align_to_mut::() }; assert!(prefix.is_empty() && suffix.is_empty()); let values = &mut values[values_written..]; let def_levels = self.def_levels.as_mut().map(|buf| { let (prefix, def_levels, suffix) = - unsafe { buf.data_mut().align_to_mut::() }; + unsafe { buf.as_slice_mut().align_to_mut::() }; assert!(prefix.is_empty() && suffix.is_empty()); &mut def_levels[values_written..] }); let rep_levels = self.rep_levels.as_mut().map(|buf| { let (prefix, rep_levels, suffix) = - unsafe { buf.data_mut().align_to_mut::() }; + unsafe { buf.as_slice_mut().align_to_mut::() }; assert!(prefix.is_empty() && suffix.is_empty()); &mut rep_levels[values_written..] }); @@ -317,7 +317,8 @@ impl RecordReader { // get new references for the def levels. let def_levels = self.def_levels.as_ref().map(|buf| { - let (prefix, def_levels, suffix) = unsafe { buf.data().align_to::() }; + let (prefix, def_levels, suffix) = + unsafe { buf.as_slice().align_to::() }; assert!(prefix.is_empty() && suffix.is_empty()); &def_levels[values_written..] }); @@ -370,7 +371,8 @@ impl RecordReader { /// records read. fn split_records(&mut self, records_to_read: usize) -> Result { let rep_levels = self.rep_levels.as_ref().map(|buf| { - let (prefix, rep_levels, suffix) = unsafe { buf.data().align_to::() }; + let (prefix, rep_levels, suffix) = + unsafe { buf.as_slice().align_to::() }; assert!(prefix.is_empty() && suffix.is_empty()); rep_levels });