Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions rust/arrow-flight/src/arrow.flight.protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,9 +499,8 @@ pub mod flight_service_server {
#[async_trait]
pub trait FlightService: Send + Sync + 'static {
#[doc = "Server streaming response type for the Handshake method."]
type HandshakeStream: Stream<
Item = Result<super::HandshakeResponse, tonic::Status>,
> + Send
type HandshakeStream: Stream<Item = Result<super::HandshakeResponse, tonic::Status>>
+ Send
+ Sync
+ 'static;
#[doc = ""]
Expand Down
44 changes: 15 additions & 29 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {

impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}BinaryArray\n[\n", OffsetSize::prefix())?;
let prefix = if OffsetSize::is_large() { "Large" } else { "" };

write!(f, "{}BinaryArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
})?;
Expand Down Expand Up @@ -776,7 +778,9 @@ mod tests {
.build();
let binary_array1 = BinaryArray::from(array_data1);

let array_data2 = ArrayData::builder(DataType::Binary)
let data_type =
DataType::List(Box::new(Field::new("item", DataType::UInt8, false)));
let array_data2 = ArrayData::builder(data_type)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
Expand Down Expand Up @@ -818,7 +822,9 @@ mod tests {
.build();
let binary_array1 = LargeBinaryArray::from(array_data1);

let array_data2 = ArrayData::builder(DataType::Binary)
let data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::UInt8, false)));
let array_data2 = ArrayData::builder(data_type)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
Expand Down Expand Up @@ -869,41 +875,21 @@ mod tests {

#[test]
#[should_panic(
expected = "BinaryArray can only be created from List<u8> arrays, mismatched \
data types."
)]
fn test_binary_array_from_incorrect_list_array_type() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from_slice_ref(&values))
.build();
let offsets: [i32; 4] = [0, 5, 5, 12];

let array_data = ArrayData::builder(DataType::Utf8)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
.build();
let list_array = ListArray::from(array_data);
BinaryArray::from(list_array);
}

#[test]
#[should_panic(
expected = "BinaryArray can only be created from list array of u8 values \
(i.e. List<PrimitiveArray<u8>>)."
expected = "assertion failed: `(left == right)`\n left: `UInt32`,\n \
right: `UInt8`: BinaryArray can only be created from List<u8> arrays, \
mismatched data types."
)]
fn test_binary_array_from_incorrect_list_array() {
let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let values_data = ArrayData::builder(DataType::UInt32)
.len(12)
.add_buffer(Buffer::from_slice_ref(&values))
.add_child_data(ArrayData::builder(DataType::Boolean).build())
.build();
let offsets: [i32; 4] = [0, 5, 5, 12];

let array_data = ArrayData::builder(DataType::Utf8)
let data_type =
DataType::List(Box::new(Field::new("item", DataType::UInt32, false)));
let array_data = ArrayData::builder(data_type)
.len(3)
.add_buffer(Buffer::from_slice_ref(&offsets))
.add_child_data(values_data)
Expand Down
98 changes: 73 additions & 25 deletions rust/arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
// under the License.

use std::any::Any;
use std::convert::From;
use std::fmt;
use std::mem;

Expand All @@ -26,19 +25,20 @@ use super::{
array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayDataRef,
ArrayRef, GenericListArrayIter,
};
use crate::datatypes::ArrowNativeType;
use crate::datatypes::*;
use crate::{datatypes::ArrowNativeType, error::ArrowError};

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign {
fn prefix() -> &'static str;
fn is_large() -> bool;

fn to_isize(&self) -> isize;
}

impl OffsetSizeTrait for i32 {
fn prefix() -> &'static str {
""
#[inline]
fn is_large() -> bool {
false
}

fn to_isize(&self) -> isize {
Expand All @@ -47,8 +47,9 @@ impl OffsetSizeTrait for i32 {
}

impl OffsetSizeTrait for i64 {
fn prefix() -> &'static str {
"Large"
#[inline]
fn is_large() -> bool {
true
}

fn to_isize(&self) -> isize {
Expand Down Expand Up @@ -116,6 +117,21 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> {
GenericListArrayIter::<'a, OffsetSize>::new(&self)
}

#[inline]
fn get_type(data_type: &DataType) -> Option<&DataType> {
if OffsetSize::is_large() {
if let DataType::LargeList(child) = data_type {
Some(child.data_type())
} else {
None
}
} else if let DataType::List(child) = data_type {
Some(child.data_type())
} else {
None
}
}
}

impl<'a, S: OffsetSizeTrait> IntoIterator for &'a GenericListArray<S> {
Expand All @@ -129,31 +145,61 @@ impl<'a, S: OffsetSizeTrait> IntoIterator for &'a GenericListArray<S> {

impl<OffsetSize: OffsetSizeTrait> From<ArrayDataRef> for GenericListArray<OffsetSize> {
fn from(data: ArrayDataRef) -> Self {
assert_eq!(
data.buffers().len(),
1,
"ListArray data should contain a single buffer only (value offsets)"
);
assert_eq!(
data.child_data().len(),
1,
"ListArray should contain a single child array (values array)"
);
let values = make_array(data.child_data()[0].clone());
Self::try_new_from_array_data(data).expect(
"Expected infallable creation of GenericListArray from ArrayDataRef failed",
)
}
}

impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
fn try_new_from_array_data(data: ArrayDataRef) -> Result<Self, ArrowError> {
if data.buffers().len() != 1 {
return Err(ArrowError::InvalidArgumentError(
format!("ListArray data should contain a single buffer only (value offsets), had {}",
data.len())));
}

if data.child_data().len() != 1 {
return Err(ArrowError::InvalidArgumentError(format!(
"ListArray should contain a single child array (values array), had {}",
data.child_data().len()
)));
}

let values = data.child_data()[0].clone();

if let Some(child_data_type) = Self::get_type(data.data_type()) {
if values.data_type() != child_data_type {
return Err(ArrowError::InvalidArgumentError(format!(
"[Large]ListArray's child datatype {:?} does not \
correspond to the List's datatype {:?}",
values.data_type(),
child_data_type
)));
}
} else {
return Err(ArrowError::InvalidArgumentError(format!(
"[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}",
data.data_type()
)));
}

let values = make_array(values);
let value_offsets = data.buffers()[0].as_ptr();

let value_offsets = unsafe { RawPtrBox::<OffsetSize>::new(value_offsets) };
unsafe {
assert!(
(*value_offsets.as_ptr().offset(0)).is_zero(),
"offsets do not start at zero"
);
if !(*value_offsets.as_ptr().offset(0)).is_zero() {
return Err(ArrowError::InvalidArgumentError(String::from(
"offsets do not start at zero",
)));
}
}
Self {
Ok(Self {
data,
values,
value_offsets,
}
})
}
}

Expand Down Expand Up @@ -183,7 +229,9 @@ impl<OffsetSize: 'static + OffsetSizeTrait> Array for GenericListArray<OffsetSiz

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericListArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}ListArray\n[\n", OffsetSize::prefix())?;
let prefix = if OffsetSize::is_large() { "Large" } else { "" };

write!(f, "{}ListArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
})?;
Expand Down
4 changes: 3 additions & 1 deletion rust/arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ impl<'a, T: StringOffsetSizeTrait> GenericStringArray<T> {

impl<OffsetSize: StringOffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}StringArray\n[\n", OffsetSize::prefix())?;
let prefix = if OffsetSize::is_large() { "Large" } else { "" };

write!(f, "{}StringArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
})?;
Expand Down
2 changes: 1 addition & 1 deletion rust/arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ where
values_data.data_type().clone(),
true, // TODO: find a consistent way of getting this
));
let data_type = if OffsetSize::prefix() == "Large" {
let data_type = if OffsetSize::is_large() {
DataType::LargeList(field)
} else {
DataType::List(field)
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
let underlying_array = make_array(data.child_data()[0].clone());
let cast_array = cast(&underlying_array, to.data_type())?;
let array_data = ArrayData::new(
to.data_type().clone(),
to_type.clone(),
array.len(),
Some(cast_array.null_count()),
cast_array
Expand All @@ -276,7 +276,7 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
let offsets: Vec<i32> = (0..=array.len() as i32).collect();
let value_offsets = Buffer::from_slice_ref(&offsets);
let list_data = ArrayData::new(
to.data_type().clone(),
to_type.clone(),
array.len(),
Some(cast_array.null_count()),
cast_array
Expand Down