Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 59 additions & 28 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1265,34 +1265,7 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {

/// Creates a [GenericBinaryArray] from a vector of Optional (null) byte slices
pub fn from_opt_vec(v: Vec<Option<&[u8]>>) -> Self {
let mut offsets = Vec::with_capacity(v.len() + 1);
let mut values = Vec::new();
let mut null_buf = make_null_buffer(v.len());
let mut length_so_far: OffsetSize = OffsetSize::zero();
offsets.push(length_so_far);

{
let null_slice = null_buf.data_mut();

for (i, s) in v.iter().enumerate() {
if let Some(s) = s {
bit_util::set_bit(null_slice, i);
length_so_far =
length_so_far + OffsetSize::from_usize(s.len()).unwrap();
values.extend_from_slice(s);
}
// always add an element in offsets
offsets.push(length_so_far);
}
}

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(v.len())
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.null_bit_buffer(null_buf.freeze())
.build();
GenericBinaryArray::<OffsetSize>::from(array_data)
v.into_iter().collect()
}

fn from_list(v: GenericListArray<OffsetSize>) -> Self {
Expand Down Expand Up @@ -1323,6 +1296,13 @@ impl<OffsetSize: BinaryOffsetSizeTrait> GenericBinaryArray<OffsetSize> {
}
}

impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> GenericBinaryIter<'a, T> {
GenericBinaryIter::<'a, T>::new(&self)
}
}

impl<OffsetSize: BinaryOffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}BinaryArray\n[\n", OffsetSize::prefix())?;
Expand Down Expand Up @@ -1391,12 +1371,63 @@ impl<OffsetSize: BinaryOffsetSizeTrait> From<ArrayDataRef>
}
}

impl<Ptr, OffsetSize: BinaryOffsetSizeTrait> FromIterator<Option<Ptr>>
for GenericBinaryArray<OffsetSize>
where
Ptr: AsRef<[u8]>,
{
fn from_iter<I: IntoIterator<Item = Option<Ptr>>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.

let mut offsets = Vec::with_capacity(data_len + 1);
let mut values = Vec::new();
let mut null_buf = make_null_buffer(data_len);
let mut length_so_far: OffsetSize = OffsetSize::zero();
offsets.push(length_so_far);

{
let null_slice = null_buf.data_mut();

for (i, s) in iter.enumerate() {
if let Some(s) = s {
let s = s.as_ref();
bit_util::set_bit(null_slice, i);
length_so_far =
length_so_far + OffsetSize::from_usize(s.len()).unwrap();
values.extend_from_slice(s);
}
// always add an element in offsets
offsets.push(length_so_far);
}
}

let array_data = ArrayData::builder(OffsetSize::DATA_TYPE)
.len(data_len)
.add_buffer(Buffer::from(offsets.to_byte_slice()))
.add_buffer(Buffer::from(&values[..]))
.null_bit_buffer(null_buf.freeze())
.build();
Self::from(array_data)
}
}

/// An array where each element is a byte whose maximum length is represented by a i32.
pub type BinaryArray = GenericBinaryArray<i32>;

/// An array where each element is a byte whose maximum length is represented by a i64.
pub type LargeBinaryArray = GenericBinaryArray<i64>;

impl<'a, T: BinaryOffsetSizeTrait> IntoIterator for &'a GenericBinaryArray<T> {
type Item = Option<&'a [u8]>;
type IntoIter = GenericBinaryIter<'a, T>;

fn into_iter(self) -> Self::IntoIter {
GenericBinaryIter::<'a, T>::new(self)
}
}

impl From<Vec<&[u8]>> for BinaryArray {
fn from(v: Vec<&[u8]>) -> Self {
BinaryArray::from_vec(v)
Expand Down
72 changes: 70 additions & 2 deletions rust/arrow/src/array/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

use crate::datatypes::ArrowPrimitiveType;

use super::{Array, GenericStringArray, PrimitiveArray, StringOffsetSizeTrait};
use super::{
array::BinaryOffsetSizeTrait, Array, GenericBinaryArray, GenericStringArray,
PrimitiveArray, StringOffsetSizeTrait,
};

/// an iterator that returns Some(T) or None, that can be used on any non-boolean PrimitiveArray
#[derive(Debug)]
Expand Down Expand Up @@ -111,11 +114,60 @@ impl<'a, T: StringOffsetSizeTrait> std::iter::ExactSizeIterator
{
}

/// an iterator that returns `Some(&[u8])` or `None`, for binary arrays
#[derive(Debug)]
pub struct GenericBinaryIter<'a, T>
where
T: BinaryOffsetSizeTrait,
{
array: &'a GenericBinaryArray<T>,
i: usize,
len: usize,
}

impl<'a, T: BinaryOffsetSizeTrait> GenericBinaryIter<'a, T> {
/// create a new iterator
pub fn new(array: &'a GenericBinaryArray<T>) -> Self {
GenericBinaryIter::<T> {
array,
i: 0,
len: array.len(),
}
}
}

impl<'a, T: BinaryOffsetSizeTrait> std::iter::Iterator for GenericBinaryIter<'a, T> {
type Item = Option<&'a [u8]>;

fn next(&mut self) -> Option<Self::Item> {
let i = self.i;
if i >= self.len {
None
} else if self.array.is_null(i) {
self.i += 1;
Some(None)
} else {
self.i += 1;
Some(Some(self.array.value(i)))
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
(self.len, Some(self.len))
}
}

/// all arrays have known size.
impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator
for GenericBinaryIter<'a, T>
{
}

#[cfg(test)]
mod tests {
use std::sync::Arc;

use crate::array::{ArrayRef, Int32Array, StringArray};
use crate::array::{ArrayRef, BinaryArray, Int32Array, StringArray};

#[test]
fn test_primitive_array_iter_round_trip() {
Expand Down Expand Up @@ -156,4 +208,20 @@ mod tests {
StringArray::from(vec![Some("ab"), None, Some("aaab"), None, Some("aaaaab")]);
assert_eq!(result, expected);
}

#[test]
fn test_binary_array_iter_round_trip() {
let array = BinaryArray::from(vec![
Some(b"a" as &[u8]),
None,
Some(b"aaa"),
None,
Some(b"aaaaa"),
]);

// to and from iter
let result: BinaryArray = array.iter().collect();

assert_eq!(result, array);
}
}