diff --git a/rust/arrow-flight/src/arrow.flight.protocol.rs b/rust/arrow-flight/src/arrow.flight.protocol.rs index 8de767cb8a4..2a87d7b2683 100644 --- a/rust/arrow-flight/src/arrow.flight.protocol.rs +++ b/rust/arrow-flight/src/arrow.flight.protocol.rs @@ -499,9 +499,8 @@ pub mod flight_service_server { #[async_trait] pub trait FlightService: Send + Sync + 'static { #[doc = "Server streaming response type for the Handshake method."] - type HandshakeStream: Stream< - Item = Result, - > + Send + type HandshakeStream: Stream> + + Send + Sync + 'static; #[doc = ""] diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs index c59e540551a..ebbeb52d4f0 100644 --- a/rust/arrow/src/array/array_binary.rs +++ b/rust/arrow/src/array/array_binary.rs @@ -171,7 +171,9 @@ impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray { impl fmt::Debug for GenericBinaryArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}BinaryArray\n[\n", OffsetSize::prefix())?; + let prefix = if OffsetSize::is_large() { "Large" } else { "" }; + + write!(f, "{}BinaryArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { fmt::Debug::fmt(&array.value(index), f) })?; @@ -776,7 +778,9 @@ mod tests { .build(); let binary_array1 = BinaryArray::from(array_data1); - let array_data2 = ArrayData::builder(DataType::Binary) + let data_type = + DataType::List(Box::new(Field::new("item", DataType::UInt8, false))); + let array_data2 = ArrayData::builder(data_type) .len(3) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_child_data(values_data) @@ -818,7 +822,9 @@ mod tests { .build(); let binary_array1 = LargeBinaryArray::from(array_data1); - let array_data2 = ArrayData::builder(DataType::Binary) + let data_type = + DataType::LargeList(Box::new(Field::new("item", DataType::UInt8, false))); + let array_data2 = ArrayData::builder(data_type) .len(3) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_child_data(values_data) @@ -869,41 +875,21 @@ mod tests { #[test] #[should_panic( - expected = "BinaryArray can only be created from List arrays, mismatched \ - data types." - )] - fn test_binary_array_from_incorrect_list_array_type() { - let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; - let values_data = ArrayData::builder(DataType::UInt32) - .len(12) - .add_buffer(Buffer::from_slice_ref(&values)) - .build(); - let offsets: [i32; 4] = [0, 5, 5, 12]; - - let array_data = ArrayData::builder(DataType::Utf8) - .len(3) - .add_buffer(Buffer::from_slice_ref(&offsets)) - .add_child_data(values_data) - .build(); - let list_array = ListArray::from(array_data); - BinaryArray::from(list_array); - } - - #[test] - #[should_panic( - expected = "BinaryArray can only be created from list array of u8 values \ - (i.e. List>)." + expected = "assertion failed: `(left == right)`\n left: `UInt32`,\n \ + right: `UInt8`: BinaryArray can only be created from List arrays, \ + mismatched data types." )] fn test_binary_array_from_incorrect_list_array() { let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; let values_data = ArrayData::builder(DataType::UInt32) .len(12) .add_buffer(Buffer::from_slice_ref(&values)) - .add_child_data(ArrayData::builder(DataType::Boolean).build()) .build(); let offsets: [i32; 4] = [0, 5, 5, 12]; - let array_data = ArrayData::builder(DataType::Utf8) + let data_type = + DataType::List(Box::new(Field::new("item", DataType::UInt32, false))); + let array_data = ArrayData::builder(data_type) .len(3) .add_buffer(Buffer::from_slice_ref(&offsets)) .add_child_data(values_data) diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs index aa399c9647b..09bbd75af51 100644 --- a/rust/arrow/src/array/array_list.rs +++ b/rust/arrow/src/array/array_list.rs @@ -16,7 +16,6 @@ // under the License. use std::any::Any; -use std::convert::From; use std::fmt; use std::mem; @@ -26,19 +25,20 @@ use super::{ array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayDataRef, ArrayRef, GenericListArrayIter, }; -use crate::datatypes::ArrowNativeType; use crate::datatypes::*; +use crate::{datatypes::ArrowNativeType, error::ArrowError}; /// trait declaring an offset size, relevant for i32 vs i64 array types. pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign { - fn prefix() -> &'static str; + fn is_large() -> bool; fn to_isize(&self) -> isize; } impl OffsetSizeTrait for i32 { - fn prefix() -> &'static str { - "" + #[inline] + fn is_large() -> bool { + false } fn to_isize(&self) -> isize { @@ -47,8 +47,9 @@ impl OffsetSizeTrait for i32 { } impl OffsetSizeTrait for i64 { - fn prefix() -> &'static str { - "Large" + #[inline] + fn is_large() -> bool { + true } fn to_isize(&self) -> isize { @@ -116,6 +117,21 @@ impl GenericListArray { pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> { GenericListArrayIter::<'a, OffsetSize>::new(&self) } + + #[inline] + fn get_type(data_type: &DataType) -> Option<&DataType> { + if OffsetSize::is_large() { + if let DataType::LargeList(child) = data_type { + Some(child.data_type()) + } else { + None + } + } else if let DataType::List(child) = data_type { + Some(child.data_type()) + } else { + None + } + } } impl<'a, S: OffsetSizeTrait> IntoIterator for &'a GenericListArray { @@ -129,31 +145,61 @@ impl<'a, S: OffsetSizeTrait> IntoIterator for &'a GenericListArray { impl From for GenericListArray { fn from(data: ArrayDataRef) -> Self { - assert_eq!( - data.buffers().len(), - 1, - "ListArray data should contain a single buffer only (value offsets)" - ); - assert_eq!( - data.child_data().len(), - 1, - "ListArray should contain a single child array (values array)" - ); - let values = make_array(data.child_data()[0].clone()); + Self::try_new_from_array_data(data).expect( + "Expected infallable creation of GenericListArray from ArrayDataRef failed", + ) + } +} + +impl GenericListArray { + fn try_new_from_array_data(data: ArrayDataRef) -> Result { + if data.buffers().len() != 1 { + return Err(ArrowError::InvalidArgumentError( + format!("ListArray data should contain a single buffer only (value offsets), had {}", + data.len()))); + } + + if data.child_data().len() != 1 { + return Err(ArrowError::InvalidArgumentError(format!( + "ListArray should contain a single child array (values array), had {}", + data.child_data().len() + ))); + } + + let values = data.child_data()[0].clone(); + + if let Some(child_data_type) = Self::get_type(data.data_type()) { + if values.data_type() != child_data_type { + return Err(ArrowError::InvalidArgumentError(format!( + "[Large]ListArray's child datatype {:?} does not \ + correspond to the List's datatype {:?}", + values.data_type(), + child_data_type + ))); + } + } else { + return Err(ArrowError::InvalidArgumentError(format!( + "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}", + data.data_type() + ))); + } + + let values = make_array(values); let value_offsets = data.buffers()[0].as_ptr(); let value_offsets = unsafe { RawPtrBox::::new(value_offsets) }; unsafe { - assert!( - (*value_offsets.as_ptr().offset(0)).is_zero(), - "offsets do not start at zero" - ); + if !(*value_offsets.as_ptr().offset(0)).is_zero() { + return Err(ArrowError::InvalidArgumentError(String::from( + "offsets do not start at zero", + ))); + } } - Self { + Ok(Self { data, values, value_offsets, - } + }) } } @@ -183,7 +229,9 @@ impl Array for GenericListArray fmt::Debug for GenericListArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}ListArray\n[\n", OffsetSize::prefix())?; + let prefix = if OffsetSize::is_large() { "Large" } else { "" }; + + write!(f, "{}ListArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { fmt::Debug::fmt(&array.value(index), f) })?; diff --git a/rust/arrow/src/array/array_string.rs b/rust/arrow/src/array/array_string.rs index fbce81ea908..d3491a6a130 100644 --- a/rust/arrow/src/array/array_string.rs +++ b/rust/arrow/src/array/array_string.rs @@ -255,7 +255,9 @@ impl<'a, T: StringOffsetSizeTrait> GenericStringArray { impl fmt::Debug for GenericStringArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}StringArray\n[\n", OffsetSize::prefix())?; + let prefix = if OffsetSize::is_large() { "Large" } else { "" }; + + write!(f, "{}StringArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { fmt::Debug::fmt(&array.value(index), f) })?; diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 61724da1d2a..f786e6b0896 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -754,7 +754,7 @@ where values_data.data_type().clone(), true, // TODO: find a consistent way of getting this )); - let data_type = if OffsetSize::prefix() == "Large" { + let data_type = if OffsetSize::is_large() { DataType::LargeList(field) } else { DataType::List(field) diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index c55422579cd..567409ef9bb 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -250,7 +250,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { let underlying_array = make_array(data.child_data()[0].clone()); let cast_array = cast(&underlying_array, to.data_type())?; let array_data = ArrayData::new( - to.data_type().clone(), + to_type.clone(), array.len(), Some(cast_array.null_count()), cast_array @@ -276,7 +276,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { let offsets: Vec = (0..=array.len() as i32).collect(); let value_offsets = Buffer::from_slice_ref(&offsets); let list_data = ArrayData::new( - to.data_type().clone(), + to_type.clone(), array.len(), Some(cast_array.null_count()), cast_array