-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-11504: [Rust] Added checks to List DataType. #9425
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,12 +31,19 @@ use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType, Field}; | |
|
|
||
| /// trait declaring an offset size, relevant for i32 vs i64 array types. | ||
| pub trait OffsetSizeTrait: ArrowNativeType + Num + Ord + std::ops::AddAssign { | ||
| fn is_large() -> bool; | ||
|
|
||
| fn prefix() -> &'static str; | ||
|
|
||
| fn to_isize(&self) -> isize; | ||
| } | ||
|
|
||
| impl OffsetSizeTrait for i32 { | ||
| #[inline] | ||
| fn is_large() -> bool { | ||
| false | ||
| } | ||
|
|
||
| fn prefix() -> &'static str { | ||
| "" | ||
| } | ||
|
|
@@ -47,6 +54,11 @@ impl OffsetSizeTrait for i32 { | |
| } | ||
|
|
||
| impl OffsetSizeTrait for i64 { | ||
| #[inline] | ||
| fn is_large() -> bool { | ||
| true | ||
| } | ||
|
|
||
| fn prefix() -> &'static str { | ||
| "Large" | ||
| } | ||
|
|
@@ -117,6 +129,21 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> { | |
| GenericListArrayIter::<'a, OffsetSize>::new(&self) | ||
| } | ||
|
|
||
| #[inline] | ||
| fn get_type(data_type: &DataType) -> Option<&DataType> { | ||
| if OffsetSize::is_large() { | ||
| if let DataType::LargeList(child) = data_type { | ||
| Some(child.data_type()) | ||
| } else { | ||
| None | ||
| } | ||
| } else if let DataType::List(child) = data_type { | ||
| Some(child.data_type()) | ||
| } else { | ||
| None | ||
| } | ||
| } | ||
|
|
||
| /// Creates a [`GenericListArray`] from an iterator of primitive values | ||
| /// # Example | ||
| /// ``` | ||
|
|
@@ -193,7 +220,19 @@ impl<OffsetSize: OffsetSizeTrait> From<ArrayDataRef> for GenericListArray<Offset | |
| 1, | ||
| "ListArray should contain a single child array (values array)" | ||
| ); | ||
| let values = make_array(data.child_data()[0].clone()); | ||
|
|
||
| let values = data.child_data()[0].clone(); | ||
|
|
||
| if let Some(child) = Self::get_type(data.data_type()) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we have a few tests to cover this? checking error message and all. Also I'm not sure if
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with you. However, that requires a larger change as we would need to move from
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is a mid-way proposal: a31a35a Basically, to use normal rust handling, but the make the I actually think using asserts / panics directly (as in this PR) is also fine beacuse:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW I also tried using |
||
| assert_eq!(values.data_type(), child, "[Large]ListArray's child datatype does not correspond to the List's datatype"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: long line? I remember we enforce a limit of 100 characters. |
||
| } else { | ||
| panic!( | ||
| "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}", | ||
| data.data_type() | ||
| ); | ||
| } | ||
|
|
||
| let values = make_array(values); | ||
| let value_offsets = data.buffers()[0].as_ptr(); | ||
|
|
||
| let value_offsets = unsafe { RawPtrBox::<OffsetSize>::new(value_offsets) }; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -984,7 +984,6 @@ mod tests { | |
| "generated_dictionary", | ||
| // "generated_duplicate_fieldnames", | ||
| "generated_interval", | ||
| "generated_large_batch", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nevi-me I don't remember seeing this in the original PR -- was this change intended ?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NM I see #9587 now |
||
| "generated_nested", | ||
| // "generated_nested_large_offsets", | ||
| "generated_null_trivial", | ||
|
|
@@ -1048,7 +1047,6 @@ mod tests { | |
| "generated_dictionary", | ||
| // "generated_duplicate_fieldnames", | ||
| "generated_interval", | ||
| "generated_large_batch", | ||
| "generated_nested", | ||
| // "generated_nested_large_offsets", | ||
| "generated_null_trivial", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe we won't need
prefixanymore with the newis_large.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We might still need it, we also use it for formatting in
DisplayThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that we can drop it, yes. We can merge StringOffset, BinaryOffset and OffsetTrait in a single Trait with this, but I wanted to leave it to another PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is one way to remove
prefixthat does not go as far as @jorgecarleitao suggests to collapse the traits... 8e68e05