Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
349 changes: 339 additions & 10 deletions rust/arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,19 @@ use std::any::Any;
use std::convert::From;
use std::fmt;
use std::mem;
use std::sync::Arc;

use num::Num;

use super::{
array::print_long_array, make_array, raw_pointer::RawPtrBox, Array, ArrayDataRef,
ArrayRef,
ArrayRef, BinaryBuilder, BooleanBuilder, FixedSizeListBuilder, PrimitiveBuilder,
StringBuilder,
};
use crate::array::builder::GenericListBuilder;
use crate::datatypes::ArrowNativeType;
use crate::datatypes::DataType;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};

/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign {
Expand Down Expand Up @@ -238,13 +242,16 @@ impl From<ArrayDataRef> for FixedSizeListArray {
let values = make_array(data.child_data()[0].clone());
let length = match data.data_type() {
DataType::FixedSizeList(_, len) => {
// check that child data is multiple of length
assert_eq!(
values.len() % *len as usize,
0,
"FixedSizeListArray child array length should be a multiple of {}",
len
);
if *len > 0 {
// check that child data is multiple of length
assert_eq!(
values.len() % *len as usize,
0,
"FixedSizeListArray child array length should be a multiple of {}",
len
);
}

*len
}
_ => {
Expand Down Expand Up @@ -295,10 +302,268 @@ impl fmt::Debug for FixedSizeListArray {
}
}

macro_rules! build_empty_list_array_with_primitive_items {
($item_type:ident, $offset_type:ident) => {{
let values_builder = PrimitiveBuilder::<$item_type>::new(0);
let mut builder =
GenericListBuilder::<$offset_type, PrimitiveBuilder<$item_type>>::new(
values_builder,
);
let empty_list_array = builder.finish();
Ok(Arc::new(empty_list_array))
}};
}

macro_rules! build_empty_list_array_with_non_primitive_items {
($type_builder:ident, $offset_type:ident) => {{
let values_builder = $type_builder::new(0);
let mut builder =
GenericListBuilder::<$offset_type, $type_builder>::new(values_builder);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that for an empty list, we know that the offset buffer will be a single entry, 0, and the values buffer will be an empty buffer (len = 0). Therefore, this code could be simplified by just passing the buffers directly instead of using builders.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arrow API is a Complex, I still dont know how to do it pretty simple as you suggested with Buffer. I think it's not a big performance impact to use builder in this place.

let empty_list_array = builder.finish();
Ok(Arc::new(empty_list_array))
}};
}

pub fn build_empty_list_array<OffsetSize: OffsetSizeTrait>(
item_type: DataType,
) -> Result<ArrayRef> {
match item_type {
DataType::UInt8 => {
build_empty_list_array_with_primitive_items!(UInt8Type, OffsetSize)
}
DataType::UInt16 => {
build_empty_list_array_with_primitive_items!(UInt16Type, OffsetSize)
}
DataType::UInt32 => {
build_empty_list_array_with_primitive_items!(UInt32Type, OffsetSize)
}
DataType::UInt64 => {
build_empty_list_array_with_primitive_items!(UInt64Type, OffsetSize)
}
DataType::Int8 => {
build_empty_list_array_with_primitive_items!(Int8Type, OffsetSize)
}
DataType::Int16 => {
build_empty_list_array_with_primitive_items!(Int16Type, OffsetSize)
}
DataType::Int32 => {
build_empty_list_array_with_primitive_items!(Int32Type, OffsetSize)
}
DataType::Int64 => {
build_empty_list_array_with_primitive_items!(Int64Type, OffsetSize)
}
DataType::Float32 => {
build_empty_list_array_with_primitive_items!(Float32Type, OffsetSize)
}
DataType::Float64 => {
build_empty_list_array_with_primitive_items!(Float64Type, OffsetSize)
}
DataType::Boolean => {
build_empty_list_array_with_non_primitive_items!(BooleanBuilder, OffsetSize)
}
DataType::Date32(_) => {
build_empty_list_array_with_primitive_items!(Date32Type, OffsetSize)
}
DataType::Date64(_) => {
build_empty_list_array_with_primitive_items!(Date64Type, OffsetSize)
}
DataType::Time32(TimeUnit::Second) => {
build_empty_list_array_with_primitive_items!(Time32SecondType, OffsetSize)
}
DataType::Time32(TimeUnit::Millisecond) => {
build_empty_list_array_with_primitive_items!(
Time32MillisecondType,
OffsetSize
)
}
DataType::Time64(TimeUnit::Microsecond) => {
build_empty_list_array_with_primitive_items!(
Time64MicrosecondType,
OffsetSize
)
}
DataType::Time64(TimeUnit::Nanosecond) => {
build_empty_list_array_with_primitive_items!(Time64NanosecondType, OffsetSize)
}
DataType::Duration(TimeUnit::Second) => {
build_empty_list_array_with_primitive_items!(DurationSecondType, OffsetSize)
}
DataType::Duration(TimeUnit::Millisecond) => {
build_empty_list_array_with_primitive_items!(
DurationMillisecondType,
OffsetSize
)
}
DataType::Duration(TimeUnit::Microsecond) => {
build_empty_list_array_with_primitive_items!(
DurationMicrosecondType,
OffsetSize
)
}
DataType::Duration(TimeUnit::Nanosecond) => {
build_empty_list_array_with_primitive_items!(
DurationNanosecondType,
OffsetSize
)
}
DataType::Timestamp(TimeUnit::Second, _) => {
build_empty_list_array_with_primitive_items!(TimestampSecondType, OffsetSize)
}
DataType::Timestamp(TimeUnit::Millisecond, _) => {
build_empty_list_array_with_primitive_items!(
TimestampMillisecondType,
OffsetSize
)
}
DataType::Timestamp(TimeUnit::Microsecond, _) => {
build_empty_list_array_with_primitive_items!(
TimestampMicrosecondType,
OffsetSize
)
}
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
build_empty_list_array_with_primitive_items!(
TimestampNanosecondType,
OffsetSize
)
}
DataType::Utf8 => {
build_empty_list_array_with_non_primitive_items!(StringBuilder, OffsetSize)
}
DataType::Binary => {
build_empty_list_array_with_non_primitive_items!(BinaryBuilder, OffsetSize)
}
_ => Err(ArrowError::NotYetImplemented(format!(
"GenericListBuilder of type List({:?}) is not supported",
item_type
))),
}
}

macro_rules! build_empty_fixed_size_list_array_with_primitive_items {
($item_type:ident) => {{
let values_builder = PrimitiveBuilder::<$item_type>::new(0);
let mut builder = FixedSizeListBuilder::new(values_builder, 0);
let empty_list_array = builder.finish();
Ok(Arc::new(empty_list_array))
}};
}

macro_rules! build_empty_fixed_size_list_array_with_non_primitive_items {
($type_builder:ident) => {{
let values_builder = $type_builder::new(0);
let mut builder = FixedSizeListBuilder::new(values_builder, 0);
let empty_list_array = builder.finish();
Ok(Arc::new(empty_list_array))
}};
}

pub fn build_empty_fixed_size_list_array(item_type: DataType) -> Result<ArrayRef> {
match item_type {
DataType::UInt8 => {
build_empty_fixed_size_list_array_with_primitive_items!(UInt8Type)
}
DataType::UInt16 => {
build_empty_fixed_size_list_array_with_primitive_items!(UInt16Type)
}
DataType::UInt32 => {
build_empty_fixed_size_list_array_with_primitive_items!(UInt32Type)
}
DataType::UInt64 => {
build_empty_fixed_size_list_array_with_primitive_items!(UInt64Type)
}
DataType::Int8 => {
build_empty_fixed_size_list_array_with_primitive_items!(Int8Type)
}
DataType::Int16 => {
build_empty_fixed_size_list_array_with_primitive_items!(Int16Type)
}
DataType::Int32 => {
build_empty_fixed_size_list_array_with_primitive_items!(Int32Type)
}
DataType::Int64 => {
build_empty_fixed_size_list_array_with_primitive_items!(Int64Type)
}
DataType::Float32 => {
build_empty_fixed_size_list_array_with_primitive_items!(Float32Type)
}
DataType::Float64 => {
build_empty_fixed_size_list_array_with_primitive_items!(Float64Type)
}
DataType::Boolean => {
build_empty_fixed_size_list_array_with_non_primitive_items!(BooleanBuilder)
}
DataType::Date32(_) => {
build_empty_fixed_size_list_array_with_primitive_items!(Date32Type)
}
DataType::Date64(_) => {
build_empty_fixed_size_list_array_with_primitive_items!(Date64Type)
}
DataType::Time32(TimeUnit::Second) => {
build_empty_fixed_size_list_array_with_primitive_items!(Time32SecondType)
}
DataType::Time32(TimeUnit::Millisecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(Time32MillisecondType)
}
DataType::Time64(TimeUnit::Microsecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(Time64MicrosecondType)
}
DataType::Time64(TimeUnit::Nanosecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(Time64NanosecondType)
}
DataType::Duration(TimeUnit::Second) => {
build_empty_fixed_size_list_array_with_primitive_items!(DurationSecondType)
}
DataType::Duration(TimeUnit::Millisecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(
DurationMillisecondType
)
}
DataType::Duration(TimeUnit::Microsecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(
DurationMicrosecondType
)
}
DataType::Duration(TimeUnit::Nanosecond) => {
build_empty_fixed_size_list_array_with_primitive_items!(
DurationNanosecondType
)
}
DataType::Timestamp(TimeUnit::Second, _) => {
build_empty_fixed_size_list_array_with_primitive_items!(TimestampSecondType)
}
DataType::Timestamp(TimeUnit::Millisecond, _) => {
build_empty_fixed_size_list_array_with_primitive_items!(
TimestampMillisecondType
)
}
DataType::Timestamp(TimeUnit::Microsecond, _) => {
build_empty_fixed_size_list_array_with_primitive_items!(
TimestampMicrosecondType
)
}
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
build_empty_fixed_size_list_array_with_primitive_items!(
TimestampNanosecondType
)
}
DataType::Utf8 => {
build_empty_fixed_size_list_array_with_non_primitive_items!(StringBuilder)
}
DataType::Binary => {
build_empty_fixed_size_list_array_with_non_primitive_items!(BinaryBuilder)
}
_ => Err(ArrowError::NotYetImplemented(format!(
"FixedSizeListBuilder of type FixedSizeList({:?}) is not supported",
item_type
))),
}
}

#[cfg(test)]
mod tests {
use crate::{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test just to verify?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, added.

array::ArrayData, array::Int32Array, buffer::Buffer, datatypes::Field,
array::ArrayData, array::Int32Array, buffer::Buffer, datatypes::Field, memory,
util::bit_util,
};

Expand Down Expand Up @@ -770,4 +1035,68 @@ mod tests {
.build();
ListArray::from(list_data);
}

#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_primitive_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);
let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
Int32Array::from(array_data);
}

#[test]
#[should_panic(expected = "memory is not aligned")]
fn test_list_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8, 8) };
let buf2 = buf.slice(1);

let values: [i32; 8] = [0; 8];
let value_data = ArrayData::builder(DataType::Int32)
.add_buffer(Buffer::from(values.to_byte_slice()))
.build();

let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.add_buffer(buf2)
.add_child_data(value_data)
.build();
ListArray::from(list_data);
}

macro_rules! make_test_build_empty_list_array {
($OFFSET:ident) => {
build_empty_list_array::<$OFFSET>(DataType::Boolean).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Int16).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Int32).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Int64).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Float32).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Float64).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Boolean).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Utf8).unwrap();
build_empty_list_array::<$OFFSET>(DataType::Binary).unwrap();
};
}

#[test]
fn test_build_empty_list_array() {
make_test_build_empty_list_array!(i32);
make_test_build_empty_list_array!(i64);
}

#[test]
fn test_build_empty_fixed_size_list_array() {
build_empty_fixed_size_list_array(DataType::Boolean).unwrap();
build_empty_fixed_size_list_array(DataType::Int16).unwrap();
build_empty_fixed_size_list_array(DataType::Int32).unwrap();
build_empty_fixed_size_list_array(DataType::Int64).unwrap();
build_empty_fixed_size_list_array(DataType::Float32).unwrap();
build_empty_fixed_size_list_array(DataType::Float64).unwrap();
build_empty_fixed_size_list_array(DataType::Boolean).unwrap();
build_empty_fixed_size_list_array(DataType::Utf8).unwrap();
build_empty_fixed_size_list_array(DataType::Binary).unwrap();
}
}
2 changes: 2 additions & 0 deletions rust/arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,8 @@ pub use self::array_binary::FixedSizeBinaryArray;
pub use self::array_binary::LargeBinaryArray;
pub use self::array_boolean::BooleanArray;
pub use self::array_dictionary::DictionaryArray;
pub use self::array_list::build_empty_fixed_size_list_array;
pub use self::array_list::build_empty_list_array;
pub use self::array_list::FixedSizeListArray;
pub use self::array_list::LargeListArray;
pub use self::array_list::ListArray;
Expand Down
Loading