diff --git a/rust/arrow/benches/csv_writer.rs b/rust/arrow/benches/csv_writer.rs index a12da670d31..59906bf6aaa 100644 --- a/rust/arrow/benches/csv_writer.rs +++ b/rust/arrow/benches/csv_writer.rs @@ -46,7 +46,7 @@ fn record_batches_to_csv() { Some(-556132.25), ]); let c3 = PrimitiveArray::::from(vec![3, 2, 1]); - let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); + let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); let b = RecordBatch::try_new( Arc::new(schema), diff --git a/rust/arrow/benches/take_kernels.rs b/rust/arrow/benches/take_kernels.rs index 48384944085..1df68ddf48e 100644 --- a/rust/arrow/benches/take_kernels.rs +++ b/rust/arrow/benches/take_kernels.rs @@ -47,6 +47,20 @@ where Arc::new(array) as ArrayRef } +// cast array from specified primitive array type to desired data type +fn create_boolean(size: usize) -> ArrayRef +where + Standard: Distribution, +{ + let array: BooleanArray = seedable_rng() + .sample_iter(&Standard) + .take(size) + .map(Some) + .collect(); + + Arc::new(array) as ArrayRef +} + fn create_strings(size: usize, null_density: f32) -> ArrayRef { let rng = &mut seedable_rng(); @@ -101,23 +115,23 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_take(&values, &indices)) }); - let values = create_primitive::(512); + let values = create_boolean(512); let indices = create_random_index(512, 0.0); c.bench_function("take bool 512", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_primitive::(1024); + let values = create_boolean(1024); let indices = create_random_index(1024, 0.0); c.bench_function("take bool 1024", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_primitive::(512); + let values = create_boolean(512); let indices = create_random_index(512, 0.5); c.bench_function("take bool nulls 512", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_primitive::(1024); + let values = create_boolean(1024); let indices = create_random_index(1024, 0.5); c.bench_function("take bool nulls 1024", |b| { b.iter(|| bench_take(&values, &indices)) diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs new file mode 100644 index 00000000000..50c13d5de09 --- /dev/null +++ b/rust/arrow/src/array/array_boolean.rs @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::borrow::Borrow; +use std::iter::{FromIterator, IntoIterator}; +use std::mem; +use std::{any::Any, fmt}; +use std::{convert::From, sync::Arc}; + +use super::*; +use super::{array::print_long_array, raw_pointer::RawPtrBox}; +use crate::buffer::{Buffer, MutableBuffer}; +use crate::memory; +use crate::util::bit_util; + +/// Array of bools +pub struct BooleanArray { + data: ArrayDataRef, + /// Pointer to the value array. The lifetime of this must be <= to the value buffer + /// stored in `data`, so it's safe to store. + raw_values: RawPtrBox, +} + +impl fmt::Debug for BooleanArray { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "BooleanArray\n[\n")?; + print_long_array(self, f, |array, index, f| { + fmt::Debug::fmt(&array.value(index), f) + })?; + write!(f, "]") + } +} + +impl BooleanArray { + /// Returns the length of this array. + pub fn len(&self) -> usize { + self.data.len() + } + + /// Returns whether this array is empty. + pub fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Returns a raw pointer to the values of this array. + pub fn raw_values(&self) -> *const u8 { + unsafe { self.raw_values.get().add(self.data.offset()) } + } + + /// Returns a slice for the given offset and length + /// + /// Note this doesn't do any bound checking, for performance reason. + pub fn value_slice(&self, offset: usize, len: usize) -> &[u8] { + let raw = + unsafe { std::slice::from_raw_parts(self.raw_values().add(offset), len) }; + &raw[..] + } + + // Returns a new boolean array builder + pub fn builder(capacity: usize) -> BooleanBuilder { + BooleanBuilder::new(capacity) + } + + /// Returns a `Buffer` holding all the values of this array. + /// + /// Note this doesn't take the offset of this array into account. + pub fn values(&self) -> Buffer { + self.data.buffers()[0].clone() + } + + /// Returns the boolean value at index `i`. + /// + /// Note this doesn't do any bound checking, for performance reason. + pub fn value(&self, i: usize) -> bool { + let offset = i + self.offset(); + unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset) } + } +} + +impl Array for BooleanArray { + fn as_any(&self) -> &Any { + self + } + + fn data(&self) -> ArrayDataRef { + self.data.clone() + } + + fn data_ref(&self) -> &ArrayDataRef { + &self.data + } + + /// Returns the total number of bytes of memory occupied by the buffers owned by this [BooleanArray]. + fn get_buffer_memory_size(&self) -> usize { + self.data.get_buffer_memory_size() + } + + /// Returns the total number of bytes of memory occupied physically by this [BooleanArray]. + fn get_array_memory_size(&self) -> usize { + self.data.get_array_memory_size() + mem::size_of_val(self) + } +} + +impl From> for BooleanArray { + fn from(data: Vec) -> Self { + let mut mut_buf = MutableBuffer::new_null(data.len()); + { + let mut_slice = mut_buf.data_mut(); + for (i, b) in data.iter().enumerate() { + if *b { + bit_util::set_bit(mut_slice, i); + } + } + } + let array_data = ArrayData::builder(DataType::Boolean) + .len(data.len()) + .add_buffer(mut_buf.freeze()) + .build(); + BooleanArray::from(array_data) + } +} + +impl From>> for BooleanArray { + fn from(data: Vec>) -> Self { + BooleanArray::from_iter(data.iter()) + } +} + +impl From for BooleanArray { + fn from(data: ArrayDataRef) -> Self { + assert_eq!( + data.buffers().len(), + 1, + "BooleanArray data should contain a single buffer only (values buffer)" + ); + let raw_values = data.buffers()[0].raw_data(); + assert!( + memory::is_aligned::(raw_values, mem::align_of::()), + "memory is not aligned" + ); + Self { + data, + raw_values: RawPtrBox::new(raw_values as *const u8), + } + } +} + +impl<'a> IntoIterator for &'a BooleanArray { + type Item = Option; + type IntoIter = BooleanIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + BooleanIter::<'a>::new(self) + } +} + +impl<'a> BooleanArray { + /// constructs a new iterator + pub fn iter(&'a self) -> BooleanIter<'a> { + BooleanIter::<'a>::new(&self) + } +} + +impl>> FromIterator for BooleanArray { + fn from_iter>(iter: I) -> Self { + let iter = iter.into_iter(); + let (_, data_len) = iter.size_hint(); + let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound. + + let num_bytes = bit_util::ceil(data_len, 8); + let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); + let mut val_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); + + let data = unsafe { + std::slice::from_raw_parts_mut(val_buf.raw_data_mut(), val_buf.capacity()) + }; + + let null_slice = null_buf.data_mut(); + iter.enumerate().for_each(|(i, item)| { + if let Some(a) = item.borrow() { + bit_util::set_bit(null_slice, i); + if *a { + bit_util::set_bit(data, i); + } + } + }); + + let data = ArrayData::new( + DataType::Boolean, + data_len, + None, + Some(null_buf.freeze()), + 0, + vec![val_buf.freeze()], + vec![], + ); + BooleanArray::from(Arc::new(data)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::buffer::Buffer; + use crate::datatypes::DataType; + + #[test] + fn test_boolean_fmt_debug() { + let arr = BooleanArray::from(vec![true, false, false]); + assert_eq!( + "BooleanArray\n[\n true,\n false,\n false,\n]", + format!("{:?}", arr) + ); + } + + #[test] + fn test_boolean_with_null_fmt_debug() { + let mut builder = BooleanArray::builder(3); + builder.append_value(true).unwrap(); + builder.append_null().unwrap(); + builder.append_value(false).unwrap(); + let arr = builder.finish(); + assert_eq!( + "BooleanArray\n[\n true,\n null,\n false,\n]", + format!("{:?}", arr) + ); + } + + #[test] + fn test_boolean_array_from_vec() { + let buf = Buffer::from([10_u8]); + let arr = BooleanArray::from(vec![false, true, false, true]); + assert_eq!(buf, arr.values()); + assert_eq!(4, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(0, arr.null_count()); + for i in 0..4 { + assert!(!arr.is_null(i)); + assert!(arr.is_valid(i)); + assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) + } + } + + #[test] + fn test_boolean_array_from_vec_option() { + let buf = Buffer::from([10_u8]); + let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); + assert_eq!(buf, arr.values()); + assert_eq!(4, arr.len()); + assert_eq!(0, arr.offset()); + assert_eq!(1, arr.null_count()); + for i in 0..4 { + if i == 2 { + assert!(arr.is_null(i)); + assert!(!arr.is_valid(i)); + } else { + assert!(!arr.is_null(i)); + assert!(arr.is_valid(i)); + assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) + } + } + } + + #[test] + fn test_boolean_array_builder() { + // Test building a boolean array with ArrayData builder and offset + // 000011011 + let buf = Buffer::from([27_u8]); + let buf2 = buf.clone(); + let data = ArrayData::builder(DataType::Boolean) + .len(5) + .offset(2) + .add_buffer(buf) + .build(); + let arr = BooleanArray::from(data); + assert_eq!(buf2, arr.values()); + assert_eq!(5, arr.len()); + assert_eq!(2, arr.offset()); + assert_eq!(0, arr.null_count()); + for i in 0..3 { + assert_eq!(i != 0, arr.value(i), "failed at {}", i); + } + } + + #[test] + #[should_panic(expected = "BooleanArray data should contain a single buffer only \ + (values buffer)")] + fn test_boolean_array_invalid_buffer_len() { + let data = ArrayData::builder(DataType::Boolean).len(5).build(); + BooleanArray::from(data); + } +} diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index c6f20b16d3c..1e5c808e8b1 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -94,7 +94,7 @@ impl PrimitiveArray { /// Note this doesn't do any bound checking, for performance reason. pub fn value(&self, i: usize) -> T::Native { let offset = i + self.offset(); - unsafe { T::index(self.raw_values.get(), offset) } + unsafe { *self.raw_values.get().add(offset) } } } @@ -422,56 +422,6 @@ impl PrimitiveArray { } } -/// Constructs a boolean array from a vector. Should only be used for testing. -impl From> for BooleanArray { - fn from(data: Vec) -> Self { - let mut mut_buf = MutableBuffer::new_null(data.len()); - { - let mut_slice = mut_buf.data_mut(); - for (i, b) in data.iter().enumerate() { - if *b { - bit_util::set_bit(mut_slice, i); - } - } - } - let array_data = ArrayData::builder(DataType::Boolean) - .len(data.len()) - .add_buffer(mut_buf.freeze()) - .build(); - BooleanArray::from(array_data) - } -} - -impl From>> for BooleanArray { - fn from(data: Vec>) -> Self { - let data_len = data.len(); - let num_byte = bit_util::ceil(data_len, 8); - let mut null_buf = MutableBuffer::new_null(data.len()); - let mut val_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false); - - { - let null_slice = null_buf.data_mut(); - let val_slice = val_buf.data_mut(); - - for (i, v) in data.iter().enumerate() { - if let Some(b) = v { - bit_util::set_bit(null_slice, i); - if *b { - bit_util::set_bit(val_slice, i); - } - } - } - } - - let array_data = ArrayData::builder(DataType::Boolean) - .len(data_len) - .add_buffer(val_buf.freeze()) - .null_bit_buffer(null_buf.freeze()) - .build(); - BooleanArray::from(array_data) - } -} - /// Constructs a `PrimitiveArray` from an array data reference. impl From for PrimitiveArray { fn from(data: ArrayDataRef) -> Self { @@ -834,28 +784,6 @@ mod tests { ); } - #[test] - fn test_boolean_fmt_debug() { - let arr = BooleanArray::from(vec![true, false, false]); - assert_eq!( - "PrimitiveArray\n[\n true,\n false,\n false,\n]", - format!("{:?}", arr) - ); - } - - #[test] - fn test_boolean_with_null_fmt_debug() { - let mut builder = BooleanArray::builder(3); - builder.append_value(true).unwrap(); - builder.append_null().unwrap(); - builder.append_value(false).unwrap(); - let arr = builder.finish(); - assert_eq!( - "PrimitiveArray\n[\n true,\n null,\n false,\n]", - format!("{:?}", arr) - ); - } - #[test] fn test_timestamp_fmt_debug() { let arr: PrimitiveArray = @@ -911,70 +839,6 @@ mod tests { Int32Array::from(data); } - #[test] - fn test_boolean_array_from_vec() { - let buf = Buffer::from([10_u8]); - let arr = BooleanArray::from(vec![false, true, false, true]); - assert_eq!(buf, arr.values()); - assert_eq!(4, arr.len()); - assert_eq!(0, arr.offset()); - assert_eq!(0, arr.null_count()); - for i in 0..4 { - assert!(!arr.is_null(i)); - assert!(arr.is_valid(i)); - assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) - } - } - - #[test] - fn test_boolean_array_from_vec_option() { - let buf = Buffer::from([10_u8]); - let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); - assert_eq!(buf, arr.values()); - assert_eq!(4, arr.len()); - assert_eq!(0, arr.offset()); - assert_eq!(1, arr.null_count()); - for i in 0..4 { - if i == 2 { - assert!(arr.is_null(i)); - assert!(!arr.is_valid(i)); - } else { - assert!(!arr.is_null(i)); - assert!(arr.is_valid(i)); - assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) - } - } - } - - #[test] - fn test_boolean_array_builder() { - // Test building a boolean array with ArrayData builder and offset - // 000011011 - let buf = Buffer::from([27_u8]); - let buf2 = buf.clone(); - let data = ArrayData::builder(DataType::Boolean) - .len(5) - .offset(2) - .add_buffer(buf) - .build(); - let arr = BooleanArray::from(data); - assert_eq!(buf2, arr.values()); - assert_eq!(5, arr.len()); - assert_eq!(2, arr.offset()); - assert_eq!(0, arr.null_count()); - for i in 0..3 { - assert_eq!(i != 0, arr.value(i), "failed at {}", i); - } - } - - #[test] - #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ - (values buffer)")] - fn test_boolean_array_invalid_buffer_len() { - let data = ArrayData::builder(DataType::Boolean).len(5).build(); - BooleanArray::from(data); - } - #[test] fn test_access_array_concurrently() { let a = Int32Array::from(vec![5, 6, 7, 8, 9]); diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index a26404ff912..ea42843589f 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -430,15 +430,13 @@ mod tests { fn test_dense_mixed() { let mut builder = UnionBuilder::new_dense(7); builder.append::("a", 1).unwrap(); - builder.append::("b", false).unwrap(); builder.append::("c", 3).unwrap(); builder.append::("a", 4).unwrap(); builder.append::("c", 5).unwrap(); builder.append::("a", 6).unwrap(); - builder.append::("b", true).unwrap(); let union = builder.build().unwrap(); - assert_eq!(7, union.len()); + assert_eq!(5, union.len()); for i in 0..union.len() { let slot = union.value(i); assert_eq!(false, union.is_null(i)); @@ -450,41 +448,29 @@ mod tests { assert_eq!(1_i32, value); } 1 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(false, value); - } - 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(3_i64, value); } - 3 => { + 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(4_i32, value); } - 4 => { + 3 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(5_i64, value); } - 5 => { + 4 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(6_i32, value); } - 6 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(true, value); - } _ => unreachable!(), } } @@ -494,15 +480,13 @@ mod tests { fn test_dense_mixed_with_nulls() { let mut builder = UnionBuilder::new_dense(7); builder.append::("a", 1).unwrap(); - builder.append::("b", false).unwrap(); builder.append::("c", 3).unwrap(); builder.append::("a", 10).unwrap(); builder.append_null().unwrap(); builder.append::("a", 6).unwrap(); - builder.append::("b", true).unwrap(); let union = builder.build().unwrap(); - assert_eq!(7, union.len()); + assert_eq!(5, union.len()); for i in 0..union.len() { let slot = union.value(i); match i { @@ -514,41 +498,27 @@ mod tests { assert_eq!(1_i32, value); } 1 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(false, union.is_null(i)); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(false, value); - } - 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, union.is_null(i)); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(3_i64, value); } - 3 => { + 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, union.is_null(i)); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(10_i32, value); } - 4 => assert!(union.is_null(i)), - 5 => { + 3 => assert!(union.is_null(i)), + 4 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, union.is_null(i)); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(6_i32, value); } - 6 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(false, union.is_null(i)); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(true, value); - } _ => unreachable!(), } } @@ -558,15 +528,13 @@ mod tests { fn test_dense_mixed_with_nulls_and_offset() { let mut builder = UnionBuilder::new_dense(7); builder.append::("a", 1).unwrap(); - builder.append::("b", false).unwrap(); builder.append::("c", 3).unwrap(); builder.append::("a", 10).unwrap(); builder.append_null().unwrap(); builder.append::("a", 6).unwrap(); - builder.append::("b", true).unwrap(); let union = builder.build().unwrap(); - let slice = union.slice(3, 3); + let slice = union.slice(2, 3); let new_union = slice.as_any().downcast_ref::().unwrap(); assert_eq!(3, new_union.len()); @@ -739,17 +707,15 @@ mod tests { #[test] fn test_sparse_mixed() { - let mut builder = UnionBuilder::new_sparse(7); + let mut builder = UnionBuilder::new_sparse(5); builder.append::("a", 1).unwrap(); - builder.append::("b", true).unwrap(); builder.append::("c", 3.0).unwrap(); builder.append::("a", 4).unwrap(); builder.append::("c", 5.0).unwrap(); builder.append::("a", 6).unwrap(); - builder.append::("b", false).unwrap(); let union = builder.build().unwrap(); - let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1]; + let expected_type_ids = vec![0_i8, 1, 0, 1, 0]; // Check type ids assert_eq!( @@ -774,41 +740,29 @@ mod tests { assert_eq!(1_i32, value); } 1 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(true, value); - } - 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert!(value - 3_f64 < f64::EPSILON); } - 3 => { + 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(4_i32, value); } - 4 => { + 3 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert!(5_f64 - value < f64::EPSILON); } - 5 => { + 4 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(slot.len(), 1); let value = slot.value(0); assert_eq!(6_i32, value); } - 6 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(false, value); - } _ => unreachable!(), } } @@ -816,15 +770,14 @@ mod tests { #[test] fn test_sparse_mixed_with_nulls() { - let mut builder = UnionBuilder::new_sparse(7); + let mut builder = UnionBuilder::new_sparse(5); builder.append::("a", 1).unwrap(); - builder.append::("b", true).unwrap(); builder.append_null().unwrap(); builder.append::("c", 3.0).unwrap(); builder.append::("a", 4).unwrap(); let union = builder.build().unwrap(); - let expected_type_ids = vec![0_i8, 1, 0, 2, 0]; + let expected_type_ids = vec![0_i8, 0, 1, 0]; // Check type ids assert_eq!( @@ -848,22 +801,15 @@ mod tests { let value = slot.value(0); assert_eq!(1_i32, value); } - 1 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(false, union.is_null(i)); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(true, value); - } - 2 => assert!(union.is_null(i)), - 3 => { + 1 => assert!(union.is_null(i)), + 2 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, union.is_null(i)); assert_eq!(slot.len(), 1); let value = slot.value(0); assert!(value - 3_f64 < f64::EPSILON); } - 4 => { + 3 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, union.is_null(i)); assert_eq!(slot.len(), 1); @@ -877,39 +823,31 @@ mod tests { #[test] fn test_sparse_mixed_with_nulls_and_offset() { - let mut builder = UnionBuilder::new_sparse(7); + let mut builder = UnionBuilder::new_sparse(5); builder.append::("a", 1).unwrap(); - builder.append::("b", true).unwrap(); builder.append_null().unwrap(); builder.append::("c", 3.0).unwrap(); builder.append_null().unwrap(); builder.append::("a", 4).unwrap(); let union = builder.build().unwrap(); - let slice = union.slice(1, 5); + let slice = union.slice(1, 4); let new_union = slice.as_any().downcast_ref::().unwrap(); - assert_eq!(5, new_union.len()); + assert_eq!(4, new_union.len()); for i in 0..new_union.len() { let slot = new_union.value(i); match i { - 0 => { - let slot = slot.as_any().downcast_ref::().unwrap(); - assert_eq!(false, new_union.is_null(i)); - assert_eq!(slot.len(), 1); - let value = slot.value(0); - assert_eq!(true, value); - } - 1 => assert!(new_union.is_null(i)), - 2 => { + 0 => assert!(new_union.is_null(i)), + 1 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, new_union.is_null(i)); assert_eq!(slot.len(), 1); let value = slot.value(0); assert!(value - 3_f64 < f64::EPSILON); } - 3 => assert!(new_union.is_null(i)), - 4 => { + 2 => assert!(new_union.is_null(i)), + 3 => { let slot = slot.as_any().downcast_ref::().unwrap(); assert_eq!(false, new_union.is_null(i)); assert_eq!(slot.len(), 1); diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index 68a95faf79f..0dfb70ae4f3 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -254,15 +254,7 @@ pub trait BufferBuilderTrait { impl BufferBuilderTrait for BufferBuilder { #[inline] fn new(capacity: usize) -> Self { - let buffer = if matches!(T::DATA_TYPE, DataType::Boolean) { - let byte_capacity = bit_util::ceil(capacity, 8); - let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); - let mut buffer = MutableBuffer::new(actual_capacity); - buffer.set_null_bits(0, actual_capacity); - buffer - } else { - MutableBuffer::new(capacity * mem::size_of::()) - }; + let buffer = MutableBuffer::new(capacity * mem::size_of::()); Self { buffer, @@ -280,17 +272,13 @@ impl BufferBuilderTrait for BufferBuilder { } fn capacity(&self) -> usize { - let bit_capacity = self.buffer.capacity() * 8; - bit_capacity / T::get_bit_width() + let byte_capacity = self.buffer.capacity(); + byte_capacity / T::get_byte_width() } #[inline] fn advance(&mut self, i: usize) -> Result<()> { - let new_buffer_len = if matches!(T::DATA_TYPE, DataType::Boolean) { - bit_util::ceil(self.len + i, 8) - } else { - (self.len + i) * mem::size_of::() - }; + let new_buffer_len = (self.len + i) * mem::size_of::(); self.buffer.resize(new_buffer_len); self.len += i; Ok(()) @@ -299,54 +287,22 @@ impl BufferBuilderTrait for BufferBuilder { #[inline] fn reserve(&mut self, n: usize) { let new_capacity = self.len + n; - if matches!(T::DATA_TYPE, DataType::Boolean) { - if new_capacity > self.capacity() { - let new_byte_capacity = bit_util::ceil(new_capacity, 8); - let existing_capacity = self.buffer.capacity(); - let new_capacity = self.buffer.reserve(new_byte_capacity); - self.buffer - .set_null_bits(existing_capacity, new_capacity - existing_capacity); - } - } else { - let byte_capacity = mem::size_of::() * new_capacity; - self.buffer.reserve(byte_capacity); - } + let byte_capacity = mem::size_of::() * new_capacity; + self.buffer.reserve(byte_capacity); } #[inline] fn append(&mut self, v: T::Native) -> Result<()> { self.reserve(1); - if matches!(T::DATA_TYPE, DataType::Boolean) { - if v != T::default_value() { - unsafe { - bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); - } - } - self.len += 1; - } else { - self.write_bytes(v.to_byte_slice(), 1); - } + self.write_bytes(v.to_byte_slice(), 1); Ok(()) } #[inline] fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> { self.reserve(n); - if matches!(T::DATA_TYPE, DataType::Boolean) { - if n != 0 && v != T::default_value() { - let data = unsafe { - std::slice::from_raw_parts_mut( - self.buffer.raw_data_mut(), - self.buffer.capacity(), - ) - }; - (self.len..self.len + n).for_each(|i| bit_util::set_bit(data, i)) - } - self.len += n; - } else { - for _ in 0..n { - self.write_bytes(v.to_byte_slice(), 1); - } + for _ in 0..n { + self.write_bytes(v.to_byte_slice(), 1); } Ok(()) } @@ -356,40 +312,127 @@ impl BufferBuilderTrait for BufferBuilder { let array_slots = slice.len(); self.reserve(array_slots); - if matches!(T::DATA_TYPE, DataType::Boolean) { - for v in slice { - if *v != T::default_value() { - // For performance the `len` of the buffer is not - // updated on each append but is updated in the - // `freeze` method instead. - unsafe { - bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); - } + self.write_bytes(slice.to_byte_slice(), array_slots); + Ok(()) + } + + #[inline] + fn finish(&mut self) -> Buffer { + let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); + self.len = 0; + buf.freeze() + } +} + +#[derive(Debug)] +pub struct BooleanBufferBuilder { + buffer: MutableBuffer, + len: usize, +} + +impl BooleanBufferBuilder { + #[inline] + pub fn new(capacity: usize) -> Self { + let byte_capacity = bit_util::ceil(capacity, 8); + let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); + let mut buffer = MutableBuffer::new(actual_capacity); + buffer.set_null_bits(0, actual_capacity); + + Self { buffer, len: 0 } + } + + pub fn len(&self) -> usize { + self.len + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn capacity(&self) -> usize { + self.buffer.capacity() * 8 + } + + #[inline] + pub fn advance(&mut self, i: usize) -> Result<()> { + let new_buffer_len = bit_util::ceil(self.len + i, 8); + self.buffer.resize(new_buffer_len); + self.len += i; + Ok(()) + } + + #[inline] + pub fn reserve(&mut self, n: usize) { + let new_capacity = self.len + n; + if new_capacity > self.capacity() { + let new_byte_capacity = bit_util::ceil(new_capacity, 8); + let existing_capacity = self.buffer.capacity(); + let new_capacity = self.buffer.reserve(new_byte_capacity); + self.buffer + .set_null_bits(existing_capacity, new_capacity - existing_capacity); + } + } + + #[inline] + pub fn append(&mut self, v: bool) -> Result<()> { + self.reserve(1); + if v { + let data = unsafe { + std::slice::from_raw_parts_mut( + self.buffer.raw_data_mut(), + self.buffer.capacity(), + ) + }; + bit_util::set_bit(data, self.len); + } + self.len += 1; + Ok(()) + } + + #[inline] + pub fn append_n(&mut self, n: usize, v: bool) -> Result<()> { + self.reserve(n); + if n != 0 && v { + let data = unsafe { + std::slice::from_raw_parts_mut( + self.buffer.raw_data_mut(), + self.buffer.capacity(), + ) + }; + (self.len..self.len + n).for_each(|i| bit_util::set_bit(data, i)) + } + self.len += n; + Ok(()) + } + + #[inline] + pub fn append_slice(&mut self, slice: &[bool]) -> Result<()> { + let array_slots = slice.len(); + self.reserve(array_slots); + + for v in slice { + if *v { + // For performance the `len` of the buffer is not + // updated on each append but is updated in the + // `freeze` method instead. + unsafe { + bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); } - self.len += 1; } - Ok(()) - } else { - self.write_bytes(slice.to_byte_slice(), array_slots); - Ok(()) + self.len += 1; } + Ok(()) } #[inline] - fn finish(&mut self) -> Buffer { - if matches!(T::DATA_TYPE, DataType::Boolean) { - // `append` does not update the buffer's `len` so do it before `freeze` is called. - let new_buffer_len = bit_util::ceil(self.len, 8); - debug_assert!(new_buffer_len >= self.buffer.len()); - let mut buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); - self.len = 0; - buf.resize(new_buffer_len); - buf.freeze() - } else { - let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); - self.len = 0; - buf.freeze() - } + pub fn finish(&mut self) -> Buffer { + // `append` does not update the buffer's `len` so do it before `freeze` is called. + let new_buffer_len = bit_util::ceil(self.len, 8); + debug_assert!(new_buffer_len >= self.buffer.len()); + let mut buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); + self.len = 0; + buf.resize(new_buffer_len); + buf.freeze() } } @@ -442,6 +485,169 @@ pub trait ArrayBuilder: Any { fn into_box_any(self: Box) -> Box; } +/// Array builder for fixed-width primitive types +#[derive(Debug)] +pub struct BooleanBuilder { + values_builder: BooleanBufferBuilder, + bitmap_builder: BooleanBufferBuilder, +} + +impl BooleanBuilder { + /// Creates a new primitive array builder + pub fn new(capacity: usize) -> Self { + Self { + values_builder: BooleanBufferBuilder::new(capacity), + bitmap_builder: BooleanBufferBuilder::new(capacity), + } + } + + /// Returns the capacity of this builder measured in slots of type `T` + pub fn capacity(&self) -> usize { + self.values_builder.capacity() + } + + /// Appends a value of type `T` into the builder + pub fn append_value(&mut self, v: bool) -> Result<()> { + self.bitmap_builder.append(true)?; + self.values_builder.append(v)?; + Ok(()) + } + + /// Appends a null slot into the builder + pub fn append_null(&mut self) -> Result<()> { + self.bitmap_builder.append(false)?; + self.values_builder.advance(1)?; + Ok(()) + } + + /// Appends an `Option` into the builder + pub fn append_option(&mut self, v: Option) -> Result<()> { + match v { + None => self.append_null()?, + Some(v) => self.append_value(v)?, + }; + Ok(()) + } + + /// Appends a slice of type `T` into the builder + pub fn append_slice(&mut self, v: &[bool]) -> Result<()> { + self.bitmap_builder.append_n(v.len(), true)?; + self.values_builder.append_slice(v)?; + Ok(()) + } + + /// Appends values from a slice of type `T` and a validity boolean slice + pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<()> { + if values.len() != is_valid.len() { + return Err(ArrowError::InvalidArgumentError( + "Value and validity lengths must be equal".to_string(), + )); + } + self.bitmap_builder.append_slice(is_valid)?; + self.values_builder.append_slice(values) + } + + /// Builds the [BooleanArray] and reset this builder. + pub fn finish(&mut self) -> BooleanArray { + let len = self.len(); + let null_bit_buffer = self.bitmap_builder.finish(); + let null_count = len - null_bit_buffer.count_set_bits(); + let mut builder = ArrayData::builder(DataType::Boolean) + .len(len) + .add_buffer(self.values_builder.finish()); + if null_count > 0 { + builder = builder + .null_count(null_count) + .null_bit_buffer(null_bit_buffer); + } + let data = builder.build(); + BooleanArray::from(data) + } +} + +impl ArrayBuilder for BooleanBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.values_builder.len + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.values_builder.is_empty() + } + + /// Appends data from other arrays into the builder + /// + /// This is most useful when concatenating arrays of the same type into a builder. + fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { + // validate arraydata and reserve memory + let mut total_len = 0; + for array in data { + if array.data_type() != &self.data_type() { + return Err(ArrowError::InvalidArgumentError( + "Cannot append data to builder if data types are different" + .to_string(), + )); + } + if array.buffers().len() != 1 { + return Err(ArrowError::InvalidArgumentError( + "Primitive arrays should have 1 buffer".to_string(), + )); + } + total_len += array.len(); + } + // reserve memory + self.values_builder.reserve(total_len); + self.bitmap_builder.reserve(total_len); + + for array in data { + let len = array.len(); + if len == 0 { + continue; + } + + // booleans are bit-packed, thus we iterate through the array + let array = BooleanArray::from(array.clone()); + for i in 0..len { + self.values_builder.append(array.value(i))?; + } + + for i in 0..len { + // account for offset as `ArrayData` does not + self.bitmap_builder.append(array.is_valid(i))?; + } + } + Ok(()) + } + + /// Returns the data type of the builder + /// + /// This is used for validating array data types in `append_data` + fn data_type(&self) -> DataType { + DataType::Boolean + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} + /// Array builder for fixed-width primitive types #[derive(Debug)] pub struct PrimitiveBuilder { @@ -499,7 +705,7 @@ impl ArrayBuilder for PrimitiveBuilder { self.values_builder.reserve(total_len); self.bitmap_builder.reserve(total_len); - let mul = T::get_bit_width() / 8; + let mul = T::get_byte_width(); for array in data { let len = array.len(); if len == 0 { @@ -2403,7 +2609,6 @@ impl FieldData { fn append_null_dynamic(&mut self) -> Result<()> { match self.data_type { DataType::Null => unimplemented!(), - DataType::Boolean => self.append_null::()?, DataType::Int8 => self.append_null::()?, DataType::Int16 => self.append_null::()?, DataType::Int32 diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index 3aa30f415cc..f21f996ae29 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -20,9 +20,9 @@ //! depend on dynamic casting of `Array`. use super::{ - Array, ArrayData, BinaryOffsetSizeTrait, DecimalArray, FixedSizeBinaryArray, - GenericBinaryArray, GenericListArray, GenericStringArray, NullArray, OffsetSizeTrait, - PrimitiveArray, StringOffsetSizeTrait, StructArray, + Array, ArrayData, BinaryOffsetSizeTrait, BooleanArray, DecimalArray, + FixedSizeBinaryArray, GenericBinaryArray, GenericListArray, GenericStringArray, + NullArray, OffsetSizeTrait, PrimitiveArray, StringOffsetSizeTrait, StructArray, }; use crate::{ @@ -80,6 +80,12 @@ impl PartialEq for PrimitiveArray { } } +impl PartialEq for BooleanArray { + fn eq(&self, other: &BooleanArray) -> bool { + equal(self.data().as_ref(), other.data().as_ref()) + } +} + impl PartialEq for GenericStringArray { fn eq(&self, other: &Self) -> bool { equal(self.data().as_ref(), other.data().as_ref()) diff --git a/rust/arrow/src/array/equal_json.rs b/rust/arrow/src/array/equal_json.rs index 6fc37f17a8a..2a3b87d29f4 100644 --- a/rust/arrow/src/array/equal_json.rs +++ b/rust/arrow/src/array/equal_json.rs @@ -38,14 +38,28 @@ pub trait JsonEqual { /// Implement array equals for numeric type impl JsonEqual for PrimitiveArray { fn equals_json(&self, json: &[&Value]) -> bool { - if self.len() != json.len() { - return false; - } + self.len() == json.len() + && (0..self.len()).all(|i| match json[i] { + Value::Null => self.is_null(i), + v => { + self.is_valid(i) + && Some(v) == self.value(i).into_json_value().as_ref() + } + }) + } +} - (0..self.len()).all(|i| match json[i] { - Value::Null => self.is_null(i), - v => self.is_valid(i) && Some(v) == self.value(i).into_json_value().as_ref(), - }) +/// Implement array equals for numeric type +impl JsonEqual for BooleanArray { + fn equals_json(&self, json: &[&Value]) -> bool { + self.len() == json.len() + && (0..self.len()).all(|i| match json[i] { + Value::Null => self.is_null(i), + v => { + self.is_valid(i) + && Some(v) == self.value(i).into_json_value().as_ref() + } + }) } } diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs index 34dfe269d68..356ae545fa8 100644 --- a/rust/arrow/src/array/iterator.rs +++ b/rust/arrow/src/array/iterator.rs @@ -18,11 +18,11 @@ use crate::datatypes::ArrowPrimitiveType; use super::{ - Array, BinaryOffsetSizeTrait, GenericBinaryArray, GenericStringArray, PrimitiveArray, - StringOffsetSizeTrait, + Array, BinaryOffsetSizeTrait, BooleanArray, GenericBinaryArray, GenericStringArray, + PrimitiveArray, StringOffsetSizeTrait, }; -/// an iterator that returns Some(T) or None, that can be used on any non-boolean PrimitiveArray +/// an iterator that returns Some(T) or None, that can be used on any PrimitiveArray // Note: This implementation is based on std's [Vec]s' [IntoIter]. #[derive(Debug)] pub struct PrimitiveIter<'a, T: ArrowPrimitiveType> { @@ -81,6 +81,65 @@ impl<'a, T: ArrowPrimitiveType> std::iter::DoubleEndedIterator for PrimitiveIter /// all arrays have known size. impl<'a, T: ArrowPrimitiveType> std::iter::ExactSizeIterator for PrimitiveIter<'a, T> {} +/// an iterator that returns Some(bool) or None. +// Note: This implementation is based on std's [Vec]s' [IntoIter]. +#[derive(Debug)] +pub struct BooleanIter<'a> { + array: &'a BooleanArray, + current: usize, + current_end: usize, +} + +impl<'a> BooleanIter<'a> { + /// create a new iterator + pub fn new(array: &'a BooleanArray) -> Self { + BooleanIter { + array, + current: 0, + current_end: array.len(), + } + } +} + +impl<'a> std::iter::Iterator for BooleanIter<'a> { + type Item = Option; + + fn next(&mut self) -> Option { + if self.current == self.current_end { + None + } else if self.array.is_null(self.current) { + self.current += 1; + Some(None) + } else { + let old = self.current; + self.current += 1; + Some(Some(self.array.value(old))) + } + } + + fn size_hint(&self) -> (usize, Option) { + (self.array.len(), Some(self.array.len())) + } +} + +impl<'a> std::iter::DoubleEndedIterator for BooleanIter<'a> { + fn next_back(&mut self) -> Option { + if self.current_end == self.current { + None + } else { + self.current_end -= 1; + Some(if self.array.is_null(self.current_end) { + None + } else { + Some(self.array.value(self.current_end)) + }) + } + } +} + +/// all arrays have known size. +impl<'a> std::iter::ExactSizeIterator for BooleanIter<'a> {} + /// an iterator that returns `Some(&str)` or `None`, for string arrays #[derive(Debug)] pub struct GenericStringIter<'a, T> @@ -183,7 +242,7 @@ impl<'a, T: BinaryOffsetSizeTrait> std::iter::ExactSizeIterator mod tests { use std::sync::Arc; - use crate::array::{ArrayRef, BinaryArray, Int32Array, StringArray}; + use crate::array::{ArrayRef, BinaryArray, BooleanArray, Int32Array, StringArray}; #[test] fn test_primitive_array_iter_round_trip() { @@ -253,4 +312,14 @@ mod tests { assert_eq!(result, array); } + + #[test] + fn test_boolean_array_iter_round_trip() { + let array = BooleanArray::from(vec![Some(true), None, Some(false)]); + + // to and from iter + let result: BooleanArray = array.iter().collect(); + + assert_eq!(result, array); + } } diff --git a/rust/arrow/src/array/mod.rs b/rust/arrow/src/array/mod.rs index cb1c13e23e9..1838f83f2bb 100644 --- a/rust/arrow/src/array/mod.rs +++ b/rust/arrow/src/array/mod.rs @@ -84,6 +84,7 @@ #[allow(clippy::module_inception)] mod array; mod array_binary; +mod array_boolean; mod array_dictionary; mod array_list; mod array_primitive; @@ -116,6 +117,7 @@ pub use self::array_binary::BinaryArray; pub use self::array_binary::DecimalArray; pub use self::array_binary::FixedSizeBinaryArray; pub use self::array_binary::LargeBinaryArray; +pub use self::array_boolean::BooleanArray; pub use self::array_dictionary::DictionaryArray; pub use self::array_list::FixedSizeListArray; pub use self::array_list::LargeListArray; @@ -129,7 +131,6 @@ pub use self::null::NullArray; pub use self::array::make_array; -pub type BooleanArray = PrimitiveArray; pub type Int8Array = PrimitiveArray; pub type Int16Array = PrimitiveArray; pub type Int32Array = PrimitiveArray; @@ -176,10 +177,10 @@ pub use self::array_string::StringOffsetSizeTrait; // --------------------- Array Builder --------------------- +pub use self::builder::BooleanBufferBuilder; pub use self::builder::BufferBuilder; pub use self::builder::BufferBuilderTrait; -pub type BooleanBufferBuilder = BufferBuilder; pub type Int8BufferBuilder = BufferBuilder; pub type Int16BufferBuilder = BufferBuilder; pub type Int32BufferBuilder = BufferBuilder; @@ -210,6 +211,7 @@ pub type DurationNanosecondBufferBuilder = BufferBuilder pub use self::builder::ArrayBuilder; pub use self::builder::BinaryBuilder; +pub use self::builder::BooleanBuilder; pub use self::builder::DecimalBuilder; pub use self::builder::FixedSizeBinaryBuilder; pub use self::builder::FixedSizeListBuilder; @@ -224,7 +226,6 @@ pub use self::builder::StringDictionaryBuilder; pub use self::builder::StructBuilder; pub use self::builder::UnionBuilder; -pub type BooleanBuilder = PrimitiveBuilder; pub type Int8Builder = PrimitiveBuilder; pub type Int16Builder = PrimitiveBuilder; pub type Int32Builder = PrimitiveBuilder; diff --git a/rust/arrow/src/array/ord.rs b/rust/arrow/src/array/ord.rs index 22cfa52f980..358b5f1d18d 100644 --- a/rust/arrow/src/array/ord.rs +++ b/rust/arrow/src/array/ord.rs @@ -51,6 +51,12 @@ where Box::new(move |i, j| left.value(i).cmp(&right.value(j))) } +fn compare_boolean<'a>(left: &'a Array, right: &'a Array) -> DynComparator<'a> { + let left = left.as_any().downcast_ref::().unwrap(); + let right = right.as_any().downcast_ref::().unwrap(); + Box::new(move |i, j| left.value(i).cmp(&right.value(j))) +} + fn compare_float<'a, T: ArrowPrimitiveType>( left: &'a Array, right: &'a Array, @@ -129,7 +135,7 @@ pub fn build_compare<'a>(left: &'a Array, right: &'a Array) -> Result compare_primitives::(left, right), + (Boolean, Boolean) => compare_boolean(left, right), (UInt8, UInt8) => compare_primitives::(left, right), (UInt16, UInt16) => compare_primitives::(left, right), (UInt32, UInt32) => compare_primitives::(left, right), diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs index 07cf5288fcf..67c145ef0a9 100644 --- a/rust/arrow/src/compute/kernels/boolean.rs +++ b/rust/arrow/src/compute/kernels/boolean.rs @@ -297,12 +297,7 @@ where left_data .buffers() .iter() - .map(|buf| { - buf.bit_slice( - left.offset() * T::get_bit_width(), - left.len() * T::get_bit_width(), - ) - }) + .map(|buf| buf.slice(left.offset() * T::get_byte_width())) .collect::>() }; diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index c8b00cd7b95..7b0c6bc9a86 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -2979,7 +2979,7 @@ mod tests { fn make_union_array() -> UnionArray { let mut builder = UnionBuilder::new_dense(7); builder.append::("a", 1).unwrap(); - builder.append::("b", false).unwrap(); + builder.append::("b", 2).unwrap(); builder.build().unwrap() } diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index fd0bc7343f0..8d0f86574d9 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -29,7 +29,7 @@ use std::sync::Arc; use crate::array::*; use crate::buffer::{Buffer, MutableBuffer}; use crate::compute::util::combine_option_bitmap; -use crate::datatypes::{ArrowNumericType, BooleanType, DataType}; +use crate::datatypes::{ArrowNumericType, DataType}; use crate::error::{ArrowError, Result}; use crate::util::bit_util; @@ -61,7 +61,7 @@ macro_rules! compare_op { vec![result.finish()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) }}; } @@ -82,7 +82,7 @@ macro_rules! compare_op_scalar { vec![result.finish()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) }}; } @@ -152,7 +152,7 @@ pub fn like_utf8(left: &StringArray, right: &StringArray) -> Result::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } fn is_like_pattern(c: char) -> bool { @@ -203,7 +203,7 @@ pub fn like_utf8_scalar(left: &StringArray, right: &str) -> Result vec![result.finish()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } pub fn nlike_utf8(left: &StringArray, right: &StringArray) -> Result { @@ -248,7 +248,7 @@ pub fn nlike_utf8(left: &StringArray, right: &StringArray) -> Result::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } pub fn nlike_utf8_scalar(left: &StringArray, right: &str) -> Result { @@ -294,7 +294,7 @@ pub fn nlike_utf8_scalar(left: &StringArray, right: &str) -> Result::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } pub fn eq_utf8(left: &StringArray, right: &StringArray) -> Result { @@ -402,7 +402,7 @@ where vec![result.freeze()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } /// Helper function to perform boolean lambda function on values from an array and a scalar value using @@ -453,7 +453,7 @@ where vec![result.freeze()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } /// Perform `left == right` operation on two arrays. @@ -703,7 +703,7 @@ where vec![bool_buf.freeze()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } /// Checks if a `GenericListArray` contains a value in the `GenericStringArray` @@ -761,7 +761,7 @@ where vec![bool_buf.freeze()], vec![], ); - Ok(PrimitiveArray::::from(Arc::new(data))) + Ok(BooleanArray::from(Arc::new(data))) } // create a buffer and fill it with valid bits diff --git a/rust/arrow/src/compute/kernels/concat.rs b/rust/arrow/src/compute/kernels/concat.rs index 8c0965adbf8..e36ccfbc168 100644 --- a/rust/arrow/src/compute/kernels/concat.rs +++ b/rust/arrow/src/compute/kernels/concat.rs @@ -58,7 +58,7 @@ pub fn concat(array_list: &[ArrayRef]) -> Result { Ok(ArrayBuilder::finish(&mut builder)) } DataType::Boolean => { - let mut builder = PrimitiveArray::::builder(0); + let mut builder = BooleanArray::builder(0); builder.append_data(array_data_list)?; Ok(ArrayBuilder::finish(&mut builder)) } @@ -271,7 +271,7 @@ mod tests { #[test] fn test_concat_boolean_primitive_arrays() -> Result<()> { let arr = concat(&[ - Arc::new(PrimitiveArray::::from(vec![ + Arc::new(BooleanArray::from(vec![ Some(true), Some(true), Some(false), @@ -279,7 +279,7 @@ mod tests { None, Some(false), ])) as ArrayRef, - Arc::new(PrimitiveArray::::from(vec![ + Arc::new(BooleanArray::from(vec![ None, Some(false), Some(true), @@ -287,7 +287,7 @@ mod tests { ])) as ArrayRef, ])?; - let expected_output = Arc::new(PrimitiveArray::::from(vec![ + let expected_output = Arc::new(BooleanArray::from(vec![ Some(true), Some(true), Some(false), diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs index e90e4939b4e..31d3a1a18ae 100644 --- a/rust/arrow/src/compute/kernels/filter.rs +++ b/rust/arrow/src/compute/kernels/filter.rs @@ -230,6 +230,46 @@ macro_rules! filter_dictionary_array { }}; } +macro_rules! filter_boolean_item_list_array { + ($context:expr, $array:expr, $list_type:ident, $list_builder_type:ident) => {{ + let input_array = $array.as_any().downcast_ref::<$list_type>().unwrap(); + let values_builder = BooleanBuilder::new($context.filtered_count); + let mut builder = $list_builder_type::new(values_builder); + for i in 0..$context.filter_u64.len() { + // foreach u64 batch + let filter_batch = $context.filter_u64[i]; + if filter_batch == 0 { + // if batch == 0, all items are filtered out, so skip entire batch + continue; + } + for j in 0..64 { + // foreach bit in batch: + if (filter_batch & $context.filter_mask[j]) != 0 { + let data_index = (i * 64) + j; + if input_array.is_null(data_index) { + builder.append(false)?; + } else { + let this_inner_list = input_array.value(data_index); + let inner_list = this_inner_list + .as_any() + .downcast_ref::() + .unwrap(); + for k in 0..inner_list.len() { + if inner_list.is_null(k) { + builder.values().append_null()?; + } else { + builder.values().append_value(inner_list.value(k))?; + } + } + builder.append(true)?; + } + } + } + } + Ok(Arc::new(builder.finish())) + }}; +} + macro_rules! filter_primitive_item_list_array { ($context:expr, $array:expr, $item_type:ident, $list_type:ident, $list_builder_type:ident) => {{ let input_array = $array.as_any().downcast_ref::<$list_type>().unwrap(); @@ -522,7 +562,7 @@ impl FilterContext { filter_primitive_item_list_array!(self, array, Float64Type, ListArray, ListBuilder) } DataType::Boolean => { - filter_primitive_item_list_array!(self, array, BooleanType, ListArray, ListBuilder) + filter_boolean_item_list_array!(self, array, ListArray, ListBuilder) } DataType::Date32(_) => { filter_primitive_item_list_array!(self, array, Date32Type, ListArray, ListBuilder) @@ -635,7 +675,7 @@ impl FilterContext { filter_primitive_item_list_array!(self, array, Float64Type, LargeListArray, LargeListBuilder) } DataType::Boolean => { - filter_primitive_item_list_array!(self, array, BooleanType, LargeListArray, LargeListBuilder) + filter_boolean_item_list_array!(self, array, LargeListArray, LargeListBuilder) } DataType::Date32(_) => { filter_primitive_item_list_array!(self, array, Date32Type, LargeListArray, LargeListBuilder) diff --git a/rust/arrow/src/compute/kernels/sort.rs b/rust/arrow/src/compute/kernels/sort.rs index c824a22b94d..b0a7f68a08a 100644 --- a/rust/arrow/src/compute/kernels/sort.rs +++ b/rust/arrow/src/compute/kernels/sort.rs @@ -94,9 +94,7 @@ pub fn sort_to_indices( let (v, n) = partition_validity(values); match values.data_type() { - DataType::Boolean => { - sort_primitive::(values, v, n, vec![], &options) - } + DataType::Boolean => sort_boolean(values, v, n, &options), DataType::Int8 => sort_primitive::(values, v, n, vec![], &options), DataType::Int16 => sort_primitive::(values, v, n, vec![], &options), DataType::Int32 => sort_primitive::(values, v, n, vec![], &options), @@ -222,6 +220,68 @@ impl Default for SortOptions { } } +/// Sort primitive values +fn sort_boolean( + values: &ArrayRef, + value_indices: Vec, + null_indices: Vec, + options: &SortOptions, +) -> Result { + let values = values + .as_any() + .downcast_ref::() + .expect("Unable to downcast to boolean array"); + let descending = options.descending; + + // create tuples that are used for sorting + let mut valids = value_indices + .into_iter() + .map(|index| (index, values.value(index as usize))) + .collect::>(); + + let mut nulls = null_indices; + + let valids_len = valids.len(); + let nulls_len = nulls.len(); + + if !descending { + valids.sort_by(|a, b| a.1.cmp(&b.1)); + } else { + valids.sort_by(|a, b| a.1.cmp(&b.1).reverse()); + // reverse to keep a stable ordering + nulls.reverse(); + } + + // collect results directly into a buffer instead of a vec to avoid another aligned allocation + let mut result = MutableBuffer::new(values.len() * std::mem::size_of::()); + // sets len to capacity so we can access the whole buffer as a typed slice + result.resize(values.len() * std::mem::size_of::()); + let result_slice: &mut [u32] = result.typed_data_mut(); + + debug_assert_eq!(result_slice.len(), nulls_len + valids_len); + + if options.nulls_first { + result_slice[0..nulls_len].copy_from_slice(&nulls); + insert_valid_and_nan_values(result_slice, nulls_len, valids, vec![], descending); + } else { + // nulls last + insert_valid_and_nan_values(result_slice, 0, valids, vec![], descending); + result_slice[valids_len..].copy_from_slice(nulls.as_slice()) + } + + let result_data = Arc::new(ArrayData::new( + DataType::UInt32, + values.len(), + Some(0), + None, + 0, + vec![result.freeze()], + vec![], + )); + + Ok(UInt32Array::from(result_data)) +} + /// Sort primitive values fn sort_primitive( values: &ArrayRef, @@ -560,6 +620,17 @@ mod tests { use std::iter::FromIterator; use std::sync::Arc; + fn test_sort_to_indices_boolean_arrays( + data: Vec>, + options: Option, + expected_data: Vec, + ) { + let output = BooleanArray::from(data); + let expected = UInt32Array::from(expected_data); + let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options).unwrap(); + assert_eq!(output, expected) + } + fn test_sort_to_indices_primitive_arrays( data: Vec>, options: Option, @@ -825,16 +896,19 @@ mod tests { }), vec![5, 0, 2, 1, 4, 3], ); + } + #[test] + fn test_sort_boolean() { // boolean - test_sort_to_indices_primitive_arrays::( + test_sort_to_indices_boolean_arrays( vec![None, Some(false), Some(true), Some(true), Some(false), None], None, vec![0, 5, 1, 4, 2, 3], ); // boolean, descending - test_sort_to_indices_primitive_arrays::( + test_sort_to_indices_boolean_arrays( vec![None, Some(false), Some(true), Some(true), Some(false), None], Some(SortOptions { descending: true, @@ -844,7 +918,7 @@ mod tests { ); // boolean, descending, nulls first - test_sort_to_indices_primitive_arrays::( + test_sort_to_indices_boolean_arrays( vec![None, Some(false), Some(true), Some(true), Some(false), None], Some(SortOptions { descending: true, diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs index 7d3c490ae77..bb2d8eb5305 100644 --- a/rust/arrow/src/compute/kernels/take.rs +++ b/rust/arrow/src/compute/kernels/take.rs @@ -564,6 +564,18 @@ where mod tests { use super::*; + fn test_take_boolean_arrays( + data: Vec>, + index: &UInt32Array, + options: Option, + expected_data: Vec>, + ) { + let output = BooleanArray::from(data); + let expected = Arc::new(BooleanArray::from(expected_data)) as ArrayRef; + let output = take(&(Arc::new(output) as ArrayRef), index, options).unwrap(); + assert_eq!(&output, &expected) + } + fn test_take_primitive_arrays( data: Vec>, index: &UInt32Array, @@ -825,7 +837,7 @@ mod tests { fn test_take_primitive_bool() { let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]); // boolean - test_take_primitive_arrays::( + test_take_boolean_arrays( vec![Some(false), None, Some(true), Some(false), None], &index, None, diff --git a/rust/arrow/src/csv/reader.rs b/rust/arrow/src/csv/reader.rs index d1e7ff93698..8e5b3ad6c99 100644 --- a/rust/arrow/src/csv/reader.rs +++ b/rust/arrow/src/csv/reader.rs @@ -51,7 +51,7 @@ use std::sync::Arc; use csv as csv_crate; -use crate::array::{ArrayRef, PrimitiveArray, StringBuilder}; +use crate::array::{ArrayRef, BooleanArray, PrimitiveArray, StringBuilder}; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::record_batch::RecordBatch; @@ -405,9 +405,7 @@ fn parse( let i = *i; let field = &fields[i]; match field.data_type() { - &DataType::Boolean => { - build_primitive_array::(line_number, rows, i) - } + &DataType::Boolean => build_boolean_array(line_number, rows, i), &DataType::Int8 => { build_primitive_array::(line_number, rows, i) } @@ -471,18 +469,6 @@ trait Parser: ArrowPrimitiveType { } } -impl Parser for BooleanType { - fn parse(string: &str) -> Option { - if string.eq_ignore_ascii_case("false") { - Some(false) - } else if string.eq_ignore_ascii_case("true") { - Some(true) - } else { - None - } - } -} - impl Parser for Float32Type { fn parse(string: &str) -> Option { lexical_core::parse(string.as_bytes()).ok() @@ -514,6 +500,16 @@ fn parse_item(string: &str) -> Option { T::parse(string) } +fn parse_bool(string: &str) -> Option { + if string.eq_ignore_ascii_case("false") { + Some(false) + } else if string.eq_ignore_ascii_case("true") { + Some(true) + } else { + None + } +} + // parses a specific column (col_idx) into an Arrow Array. fn build_primitive_array( line_number: usize, @@ -548,6 +544,40 @@ fn build_primitive_array( .map(|e| Arc::new(e) as ArrayRef) } +// parses a specific column (col_idx) into an Arrow Array. +fn build_boolean_array( + line_number: usize, + rows: &[StringRecord], + col_idx: usize, +) -> Result { + rows.iter() + .enumerate() + .map(|(row_index, row)| { + match row.get(col_idx) { + Some(s) => { + if s.is_empty() { + return Ok(None); + } + + let parsed = parse_bool(s); + match parsed { + Some(e) => Ok(Some(e)), + None => Err(ArrowError::ParseError(format!( + // TODO: we should surface the underlying error here. + "Error while parsing value {} for column {} at line {}", + s, + col_idx, + line_number + row_index + ))), + } + } + None => Ok(None), + } + }) + .collect::>() + .map(|e| Arc::new(e) as ArrayRef) +} + /// CSV file reader builder #[derive(Debug)] pub struct ReaderBuilder { @@ -1059,21 +1089,21 @@ mod tests { #[test] fn test_parsing_bool() { // Encode the expected behavior of boolean parsing - assert_eq!(Some(true), parse_item::("true")); - assert_eq!(Some(true), parse_item::("tRUe")); - assert_eq!(Some(true), parse_item::("True")); - assert_eq!(Some(true), parse_item::("TRUE")); - assert_eq!(None, parse_item::("t")); - assert_eq!(None, parse_item::("T")); - assert_eq!(None, parse_item::("")); - - assert_eq!(Some(false), parse_item::("false")); - assert_eq!(Some(false), parse_item::("fALse")); - assert_eq!(Some(false), parse_item::("False")); - assert_eq!(Some(false), parse_item::("FALSE")); - assert_eq!(None, parse_item::("f")); - assert_eq!(None, parse_item::("F")); - assert_eq!(None, parse_item::("")); + assert_eq!(Some(true), parse_bool("true")); + assert_eq!(Some(true), parse_bool("tRUe")); + assert_eq!(Some(true), parse_bool("True")); + assert_eq!(Some(true), parse_bool("TRUE")); + assert_eq!(None, parse_bool("t")); + assert_eq!(None, parse_bool("T")); + assert_eq!(None, parse_bool("")); + + assert_eq!(Some(false), parse_bool("false")); + assert_eq!(Some(false), parse_bool("fALse")); + assert_eq!(Some(false), parse_bool("False")); + assert_eq!(Some(false), parse_bool("FALSE")); + assert_eq!(None, parse_bool("f")); + assert_eq!(None, parse_bool("F")); + assert_eq!(None, parse_bool("")); } #[test] diff --git a/rust/arrow/src/csv/writer.rs b/rust/arrow/src/csv/writer.rs index b331813a463..1eb0c4db6f6 100644 --- a/rust/arrow/src/csv/writer.rs +++ b/rust/arrow/src/csv/writer.rs @@ -48,7 +48,7 @@ //! Some(-556132.25), //! ]); //! let c3 = PrimitiveArray::::from(vec![3, 2, 1]); -//! let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); +//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); //! //! let batch = RecordBatch::try_new( //! Arc::new(schema), @@ -415,7 +415,7 @@ mod tests { Some(-556132.25), ]); let c3 = PrimitiveArray::::from(vec![3, 2, 1]); - let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); + let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); let c5 = TimestampMillisecondArray::from_opt_vec( vec![None, Some(1555584887378), Some(1555555555555)], None, @@ -482,7 +482,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03 Some(-556132.25), ]); let c3 = PrimitiveArray::::from(vec![3, 2, 1]); - let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); + let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]); let batch = RecordBatch::try_new( @@ -543,7 +543,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03 Some(-556132.25), ]); let c3 = PrimitiveArray::::from(vec![3, 2, 1]); - let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); + let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); let c5 = TimestampMillisecondArray::from_opt_vec( vec![None, Some(1555584887378), Some(1555555555555)], None, diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs index ed9fdd4e4ff..a34e1f7dbe7 100644 --- a/rust/arrow/src/datatypes.rs +++ b/rust/arrow/src/datatypes.rs @@ -41,7 +41,6 @@ use serde_json::{ }; use crate::error::{ArrowError, Result}; -use crate::util::bit_util; /// The set of datatypes that are supported by this implementation of Apache Arrow. /// @@ -220,9 +219,9 @@ pub trait ArrowPrimitiveType: 'static { /// the corresponding Arrow data type of this primitive type. const DATA_TYPE: DataType; - /// Returns the bit width of this primitive type. - fn get_bit_width() -> usize { - size_of::() * 8 + /// Returns the byte width of this primitive type. + fn get_byte_width() -> usize { + size_of::() } /// Returns a default value of this primitive type. @@ -231,15 +230,6 @@ pub trait ArrowPrimitiveType: 'static { fn default_value() -> Self::Native { Default::default() } - - /// Returns a value offset from the given pointer by the given index. The default - /// implementation (used for all non-boolean types) is simply equivalent to pointer-arithmetic. - /// # Safety - /// Just like array-access in C: the raw_ptr must be the start of a valid array, and the index - /// must be less than the size of the array. - unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native { - *(raw_ptr.add(i)) - } } impl ArrowNativeType for bool { @@ -377,20 +367,8 @@ impl ArrowNativeType for f64 { #[derive(Debug)] pub struct BooleanType {} -impl ArrowPrimitiveType for BooleanType { - type Native = bool; - const DATA_TYPE: DataType = DataType::Boolean; - - fn get_bit_width() -> usize { - 1 - } - - /// # Safety - /// The pointer must be part of a bit-packed boolean array, and the index must be less than the - /// size of the array. - unsafe fn index(raw_ptr: *const Self::Native, i: usize) -> Self::Native { - bit_util::get_bit_raw(raw_ptr as *const u8, i) - } +impl BooleanType { + pub const DATA_TYPE: DataType = DataType::Boolean; } macro_rules! make_type { diff --git a/rust/arrow/src/util/string_writer.rs b/rust/arrow/src/util/string_writer.rs index b9c0c3906a9..2a8175d1562 100644 --- a/rust/arrow/src/util/string_writer.rs +++ b/rust/arrow/src/util/string_writer.rs @@ -47,7 +47,7 @@ //! Some(-556132.25), //! ]); //! let c3 = PrimitiveArray::::from(vec![3, 2, 1]); -//! let c4 = PrimitiveArray::::from(vec![Some(true), Some(false), None]); +//! let c4 = BooleanArray::from(vec![Some(true), Some(false), None]); //! //! let batch = RecordBatch::try_new( //! Arc::new(schema), diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs index 231b46dea55..145f3156dca 100644 --- a/rust/parquet/src/arrow/array_reader.rs +++ b/rust/parquet/src/arrow/array_reader.rs @@ -25,10 +25,10 @@ use std::vec::Vec; use arrow::array::{ Array, ArrayData, ArrayDataBuilder, ArrayDataRef, ArrayRef, BinaryArray, - BinaryBuilder, BooleanBufferBuilder, BufferBuilderTrait, FixedSizeBinaryArray, - FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, ListBuilder, - OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, StringArray, StringBuilder, - StructArray, + BinaryBuilder, BooleanArray, BooleanBufferBuilder, BufferBuilderTrait, + FixedSizeBinaryArray, FixedSizeBinaryBuilder, GenericListArray, Int16BufferBuilder, + ListBuilder, OffsetSizeTrait, PrimitiveArray, PrimitiveBuilder, StringArray, + StringBuilder, StructArray, }; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::datatypes::{ @@ -305,8 +305,7 @@ impl ArrayReader for PrimitiveArrayReader { let array = match T::get_physical_type() { PhysicalType::BOOLEAN => { - Arc::new(PrimitiveArray::::from(array_data.build())) - as ArrayRef + Arc::new(BooleanArray::from(array_data.build())) as ArrayRef } PhysicalType::INT32 => { Arc::new(PrimitiveArray::::from(array_data.build())) @@ -627,7 +626,8 @@ fn build_empty_list_array(item_type: ArrowType) -> Result { build_empty_list_array_with_primitive_items!(ArrowFloat64Type) } ArrowType::Boolean => { - build_empty_list_array_with_primitive_items!(ArrowBooleanType) + //build_empty_list_array_with_primitive_items!(ArrowBooleanType) + todo!() } ArrowType::Date32(_) => { build_empty_list_array_with_primitive_items!(ArrowDate32Type) @@ -772,7 +772,8 @@ fn remove_indices( remove_primitive_array_indices!(arr, ArrowFloat64Type, indices) } ArrowType::Boolean => { - remove_primitive_array_indices!(arr, ArrowBooleanType, indices) + todo!() + //remove_primitive_array_indices!(arr, ArrowBooleanType, indices) } ArrowType::Date32(_) => { remove_primitive_array_indices!(arr, ArrowDate32Type, indices) diff --git a/rust/parquet/src/arrow/record_reader.rs b/rust/parquet/src/arrow/record_reader.rs index 7b9087319e0..16b084698b8 100644 --- a/rust/parquet/src/arrow/record_reader.rs +++ b/rust/parquet/src/arrow/record_reader.rs @@ -25,7 +25,7 @@ use crate::column::{page::PageReader, reader::ColumnReaderImpl}; use crate::data_type::DataType; use crate::errors::{ParquetError, Result}; use crate::schema::types::ColumnDescPtr; -use arrow::array::{BooleanBufferBuilder, BufferBuilderTrait}; +use arrow::array::BooleanBufferBuilder; use arrow::bitmap::Bitmap; use arrow::buffer::{Buffer, MutableBuffer}; use arrow::memory;