diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs index d75b33f7e4b2..7967084aa7ab 100644 --- a/arrow-array/src/array/boolean_array.rs +++ b/arrow-array/src/array/boolean_array.rs @@ -436,11 +436,84 @@ impl<'a> BooleanArray { } } -impl>> FromIterator for BooleanArray { +/// An optional boolean value +/// +/// This struct is used as an adapter when creating `BooleanArray` from an iterator. +/// `FromIterator` for `BooleanArray` takes an iterator where the elements can be `into` +/// this struct. So once implementing `From` or `Into` trait for a type, an iterator of +/// the type can be collected to `BooleanArray`. +/// +/// See also [NativeAdapter](crate::array::NativeAdapter). +#[derive(Debug)] +struct BooleanAdapter { + /// Corresponding Rust native type if available + pub native: Option, +} + +impl From for BooleanAdapter { + fn from(value: bool) -> Self { + BooleanAdapter { + native: Some(value), + } + } +} + +impl From<&bool> for BooleanAdapter { + fn from(value: &bool) -> Self { + BooleanAdapter { + native: Some(*value), + } + } +} + +impl From> for BooleanAdapter { + fn from(value: Option) -> Self { + BooleanAdapter { native: value } + } +} + +impl From<&Option> for BooleanAdapter { + fn from(value: &Option) -> Self { + BooleanAdapter { native: *value } + } +} + +impl> FromIterator for BooleanArray { fn from_iter>(iter: I) -> Self { let iter = iter.into_iter(); - let (_, data_len) = iter.size_hint(); - let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. + let capacity = match iter.size_hint() { + (lower, Some(upper)) if lower == upper => lower, + _ => 0, + }; + let mut builder = BooleanBuilder::with_capacity(capacity); + builder.extend(iter.map(|item| item.into().native)); + builder.finish() + } +} + +impl BooleanArray { + /// Creates a [`BooleanArray`] from an iterator of trusted length. + /// + /// # Safety + /// + /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). + /// I.e. that `size_hint().1` correctly reports its length. Note that this is a stronger + /// guarantee that `ExactSizeIterator` provides which could still report a wrong length. + /// + /// # Panics + /// + /// Panics if the iterator does not report an upper bound on `size_hint()`. + #[inline] + #[allow( + private_bounds, + reason = "We will expose BooleanAdapter if there is a need" + )] + pub unsafe fn from_trusted_len_iter(iter: I) -> Self + where + P: Into, + I: ExactSizeIterator, + { + let data_len = iter.len(); let num_bytes = bit_util::ceil(data_len, 8); let mut null_builder = MutableBuffer::from_len_zeroed(num_bytes); @@ -450,10 +523,14 @@ impl>> FromIterator for BooleanArray let null_slice = null_builder.as_slice_mut(); iter.enumerate().for_each(|(i, item)| { - if let Some(a) = item.borrow() { - bit_util::set_bit(null_slice, i); - if *a { - bit_util::set_bit(data, i); + if let Some(a) = item.into().native { + unsafe { + // SAFETY: There will be enough space in the buffers due to the trusted len size + // hint + bit_util::set_bit_raw(null_slice.as_mut_ptr(), i); + if a { + bit_util::set_bit_raw(data.as_mut_ptr(), i); + } } } }); @@ -599,6 +676,20 @@ mod tests { } } + #[test] + fn test_boolean_array_from_non_nullable_iter() { + let v = vec![true, false, true]; + let arr = v.into_iter().collect::(); + assert_eq!(3, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(0, arr.null_count()); + assert!(arr.nulls().is_none()); + + assert!(arr.value(0)); + assert!(!arr.value(1)); + assert!(arr.value(2)); + } + #[test] fn test_boolean_array_from_nullable_iter() { let v = vec![Some(true), None, Some(false), None]; @@ -617,6 +708,29 @@ mod tests { assert!(!arr.value(2)); } + #[test] + fn test_boolean_array_from_nullable_trusted_len_iter() { + // Should exhibit the same behavior as `from_iter`, which is tested above. + let v = vec![Some(true), None, Some(false), None]; + let expected = v.clone().into_iter().collect::(); + let actual = unsafe { + // SAFETY: `v` has trusted length + BooleanArray::from_trusted_len_iter(v.into_iter()) + }; + assert_eq!(expected, actual); + } + + #[test] + fn test_boolean_array_from_iter_with_larger_upper_bound() { + // See https://github.com/apache/arrow-rs/issues/8505 + // This returns an upper size hint of 4 + let iterator = vec![Some(true), None, Some(false), None] + .into_iter() + .filter(Option::is_some); + let arr = iterator.collect::(); + assert_eq!(2, arr.len()); + } + #[test] fn test_boolean_array_builder() { // Test building a boolean array with ArrayData builder and offset diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index a0bd5745d21d..275aa8c9e56a 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -234,9 +234,12 @@ impl ArrayBuilder for BooleanBuilder { impl Extend> for BooleanBuilder { #[inline] fn extend>>(&mut self, iter: T) { - for v in iter { - self.append_option(v) - } + let buffered = iter.into_iter().collect::>(); + let array = unsafe { + // SAFETY: std::vec::IntoIter implements TrustedLen + BooleanArray::from_trusted_len_iter(buffered.into_iter()) + }; + self.append_array(&array) } } diff --git a/arrow/benches/array_from.rs b/arrow/benches/array_from.rs index 3af605ef4ab0..575a8280f652 100644 --- a/arrow/benches/array_from.rs +++ b/arrow/benches/array_from.rs @@ -15,13 +15,12 @@ // specific language governing permissions and limitations // under the License. +extern crate arrow; #[macro_use] extern crate criterion; use criterion::Criterion; -extern crate arrow; - use arrow::array::*; use arrow_buffer::i256; use rand::Rng; @@ -236,6 +235,13 @@ fn from_iter_benchmark(c: &mut Criterion) { let values = gen_option_vector(true, ITER_LEN); b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter()))); }); + c.bench_function("BooleanArray::from_trusted_len_iter", |b| { + let values = gen_option_vector(true, ITER_LEN); + b.iter(|| unsafe { + // SAFETY: values.iter() is a TrustedLenIterator + hint::black_box(BooleanArray::from_trusted_len_iter(values.iter())) + }); + }); } criterion_group!(