diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 71445768207..842df4e4ad0 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -116,3 +116,7 @@ harness = false [[bench]] name = "equal" harness = false + +[[bench]] +name = "array_slice" +harness = false diff --git a/rust/arrow/benches/array_slice.rs b/rust/arrow/benches/array_slice.rs new file mode 100644 index 00000000000..a535c80d217 --- /dev/null +++ b/rust/arrow/benches/array_slice.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_use] +extern crate criterion; +use criterion::Criterion; + +extern crate arrow; + +use arrow::array::*; +use std::sync::Arc; + +fn create_array_slice(array: &ArrayRef, length: usize) -> ArrayRef { + array.slice(0, length) +} + +fn create_array_with_nulls(size: usize) -> ArrayRef { + let array: Float64Array = (0..size) + .map(|i| if i % 2 == 0 { Some(1.0) } else { None }) + .collect(); + Arc::new(array) +} + +fn array_slice_benchmark(c: &mut Criterion) { + let array = create_array_with_nulls(4096); + c.bench_function("array_slice 128", |b| { + b.iter(|| create_array_slice(&array, 128)) + }); + c.bench_function("array_slice 512", |b| { + b.iter(|| create_array_slice(&array, 512)) + }); + c.bench_function("array_slice 2048", |b| { + b.iter(|| create_array_slice(&array, 2048)) + }); +} + +criterion_group!(benches, array_slice_benchmark); +criterion_main!(benches); diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index 3715a8b1501..9d1ee43e586 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -23,14 +23,11 @@ use std::{any::Any, sync::Arc}; use super::{make_array, Array, ArrayData, ArrayDataRef, ArrayRef}; use crate::datatypes::DataType; +use crate::error::{ArrowError, Result}; use crate::{ buffer::{buffer_bin_or, Buffer}, datatypes::Field, }; -use crate::{ - error::{ArrowError, Result}, - util::bit_util, -}; /// A nested array type where each child (called *field*) is represented by a separate /// array. @@ -133,10 +130,18 @@ impl TryFrom> for StructArray { )); if let Some(child_null_buffer) = child_datum.null_buffer() { + let child_datum_offset = child_datum.offset(); + null = Some(if let Some(null_buffer) = &null { - buffer_bin_or(null_buffer, 0, child_null_buffer, 0, child_datum_len) + buffer_bin_or( + null_buffer, + 0, + child_null_buffer, + child_datum_offset, + child_datum_len, + ) } else { - child_null_buffer.clone() + child_null_buffer.bit_slice(child_datum_offset, child_datum_len) }); } else if null.is_some() { // when one of the fields has no nulls, them there is no null in the array @@ -149,7 +154,7 @@ impl TryFrom> for StructArray { .len(len) .child_data(child_data); if let Some(null_buffer) = null { - let null_count = len - bit_util::count_set_bits(null_buffer.data()); + let null_count = len - null_buffer.count_set_bits(); builder = builder.null_count(null_count).null_bit_buffer(null_buffer); } diff --git a/rust/arrow/src/array/array_union.rs b/rust/arrow/src/array/array_union.rs index 8509228aa68..a26404ff912 100644 --- a/rust/arrow/src/array/array_union.rs +++ b/rust/arrow/src/array/array_union.rs @@ -78,7 +78,6 @@ use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; -use crate::util::bit_util; use core::fmt; use std::any::Any; use std::mem; @@ -145,7 +144,7 @@ impl UnionArray { bitmap: Option, ) -> Result { let bitmap_data = bitmap.map(|b| { - let null_count = type_ids.len() - bit_util::count_set_bits(b.data()); + let null_count = type_ids.len() - b.count_set_bits(); (b, null_count) }); @@ -231,8 +230,10 @@ impl UnionArray { pub fn value_offset(&self, index: usize) -> i32 { assert!(index - self.offset() < self.len()); if self.is_dense() { + // In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values + // Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer let valid_slots = match self.data.null_buffer() { - Some(b) => bit_util::count_set_bits_offset(b.data(), 0, index), + Some(b) => b.count_set_bits_offset(0, index), None => index, }; self.data().buffers()[1].data()[valid_slots * size_of::()] as i32 diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs index d3b2f9890a5..c2e6e83a3a6 100644 --- a/rust/arrow/src/array/builder.rs +++ b/rust/arrow/src/array/builder.rs @@ -602,7 +602,7 @@ impl PrimitiveBuilder { pub fn finish(&mut self) -> PrimitiveArray { let len = self.len(); let null_bit_buffer = self.bitmap_builder.finish(); - let null_count = len - bit_util::count_set_bits(null_bit_buffer.data()); + let null_count = len - null_bit_buffer.count_set_bits(); let mut builder = ArrayData::builder(T::DATA_TYPE) .len(len) .add_buffer(self.values_builder.finish()); @@ -619,7 +619,7 @@ impl PrimitiveBuilder { pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray { let len = self.len(); let null_bit_buffer = self.bitmap_builder.finish(); - let null_count = len - bit_util::count_set_bits(null_bit_buffer.data()); + let null_count = len - null_bit_buffer.count_set_bits(); let data_type = DataType::Dictionary( Box::new(T::DATA_TYPE), Box::new(values.data_type().clone()), @@ -831,7 +831,7 @@ where let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = bit_util::count_set_bits(null_bit_buffer.data()); + let nulls = null_bit_buffer.count_set_bits(); self.offsets_builder.append(0).unwrap(); let data = ArrayData::builder(DataType::List(Box::new(Field::new( "item", @@ -1043,7 +1043,7 @@ where let offset_buffer = self.offsets_builder.finish(); let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = bit_util::count_set_bits(null_bit_buffer.data()); + let nulls = null_bit_buffer.count_set_bits(); self.offsets_builder.append(0).unwrap(); let data = ArrayData::builder(DataType::LargeList(Box::new(Field::new( "item", @@ -1234,7 +1234,7 @@ where } let null_bit_buffer = self.bitmap_builder.finish(); - let nulls = bit_util::count_set_bits(null_bit_buffer.data()); + let nulls = null_bit_buffer.count_set_bits(); let data = ArrayData::builder(DataType::FixedSizeList( Box::new(Field::new("item", values_data.data_type().clone(), true)), self.list_len, @@ -2134,7 +2134,7 @@ impl StructBuilder { } let null_bit_buffer = self.bitmap_builder.finish(); - let null_count = self.len - bit_util::count_set_bits(null_bit_buffer.data()); + let null_count = self.len - null_bit_buffer.count_set_bits(); let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone())) .len(self.len) .child_data(child_data); diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index 0aec4e614d9..3be2c5468ac 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -23,7 +23,6 @@ use std::sync::Arc; use crate::buffer::Buffer; use crate::datatypes::DataType; -use crate::util::bit_util; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; use super::equal::equal; @@ -31,7 +30,7 @@ use super::equal::equal; #[inline] fn count_nulls(null_bit_buffer: Option<&Buffer>, offset: usize, len: usize) -> usize { if let Some(ref buf) = null_bit_buffer { - len.checked_sub(bit_util::count_set_bits_offset(buf.data(), offset, len)) + len.checked_sub(buf.count_set_bits_offset(offset, len)) .unwrap() } else { 0 diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index d5b824e2992..15909a40fff 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -266,10 +266,30 @@ impl Buffer { bitwise_unary_op_helper(&self, offset, len, |a| a) } + /// Returns a `BitChunks` instance which can be used to iterate over this buffers bits + /// in larger chunks and starting at arbitrary bit offsets. + /// Note that both `offset` and `length` are measured in bits. pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks { BitChunks::new(&self, offset, len) } + /// Returns the number of 1-bits in this buffer. + pub fn count_set_bits(&self) -> usize { + let len_in_bits = self.len() * 8; + // self.offset is already taken into consideration by the bit_chunks implementation + self.count_set_bits_offset(0, len_in_bits) + } + + /// Returns the number of 1-bits in this buffer, starting from `offset` with `length` bits + /// inspected. Note that both `offset` and `length` are measured in bits. + pub fn count_set_bits_offset(&self, offset: usize, len: usize) -> usize { + let chunks = self.bit_chunks(offset, len); + let mut count = chunks.iter().map(|c| c.count_ones() as usize).sum(); + count += chunks.remainder_bits().count_ones() as usize; + + count + } + /// Returns an empty buffer. pub fn empty() -> Self { unsafe { Self::from_raw_parts(BUFFER_INIT.as_ptr() as _, 0, 0) } @@ -806,7 +826,6 @@ unsafe impl Send for MutableBuffer {} #[cfg(test)] mod tests { - use crate::util::bit_util; use std::ptr::null_mut; use std::thread; @@ -908,11 +927,11 @@ mod tests { fn test_with_bitset() { let mut_buf = MutableBuffer::new(64).with_bitset(64, false); let buf = mut_buf.freeze(); - assert_eq!(0, bit_util::count_set_bits(buf.data())); + assert_eq!(0, buf.count_set_bits()); let mut_buf = MutableBuffer::new(64).with_bitset(64, true); let buf = mut_buf.freeze(); - assert_eq!(512, bit_util::count_set_bits(buf.data())); + assert_eq!(512, buf.count_set_bits()); } #[test] @@ -920,12 +939,12 @@ mod tests { let mut mut_buf = MutableBuffer::new(64).with_bitset(64, true); mut_buf.set_null_bits(0, 64); let buf = mut_buf.freeze(); - assert_eq!(0, bit_util::count_set_bits(buf.data())); + assert_eq!(0, buf.count_set_bits()); let mut mut_buf = MutableBuffer::new(64).with_bitset(64, true); mut_buf.set_null_bits(32, 32); let buf = mut_buf.freeze(); - assert_eq!(256, bit_util::count_set_bits(buf.data())); + assert_eq!(256, buf.count_set_bits()); } #[test] @@ -1094,4 +1113,89 @@ mod tests { check_as_typed_data!(&[1f32, 3f32, 6f32], f32); check_as_typed_data!(&[1f64, 3f64, 6f64], f64); } + + #[test] + fn test_count_bits() { + assert_eq!(0, Buffer::from(&[0b00000000]).count_set_bits()); + assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits()); + assert_eq!(3, Buffer::from(&[0b00001101]).count_set_bits()); + assert_eq!(6, Buffer::from(&[0b01001001, 0b01010010]).count_set_bits()); + assert_eq!(16, Buffer::from(&[0b11111111, 0b11111111]).count_set_bits()); + } + + #[test] + fn test_count_bits_slice() { + assert_eq!( + 0, + Buffer::from(&[0b11111111, 0b00000000]) + .slice(1) + .count_set_bits() + ); + assert_eq!( + 8, + Buffer::from(&[0b11111111, 0b11111111]) + .slice(1) + .count_set_bits() + ); + assert_eq!( + 3, + Buffer::from(&[0b11111111, 0b11111111, 0b00001101]) + .slice(2) + .count_set_bits() + ); + assert_eq!( + 6, + Buffer::from(&[0b11111111, 0b01001001, 0b01010010]) + .slice(1) + .count_set_bits() + ); + assert_eq!( + 16, + Buffer::from(&[0b11111111, 0b11111111, 0b11111111, 0b11111111]) + .slice(2) + .count_set_bits() + ); + } + + #[test] + fn test_count_bits_offset_slice() { + assert_eq!(8, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 8)); + assert_eq!(3, Buffer::from(&[0b11111111]).count_set_bits_offset(0, 3)); + assert_eq!(5, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 5)); + assert_eq!(1, Buffer::from(&[0b11111111]).count_set_bits_offset(3, 1)); + assert_eq!(0, Buffer::from(&[0b11111111]).count_set_bits_offset(8, 0)); + assert_eq!(2, Buffer::from(&[0b01010101]).count_set_bits_offset(0, 3)); + assert_eq!( + 16, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 16) + ); + assert_eq!( + 10, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(0, 10) + ); + assert_eq!( + 10, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(3, 10) + ); + assert_eq!( + 8, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(8, 8) + ); + assert_eq!( + 5, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(11, 5) + ); + assert_eq!( + 0, + Buffer::from(&[0b11111111, 0b11111111]).count_set_bits_offset(16, 0) + ); + assert_eq!( + 2, + Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 5) + ); + assert_eq!( + 4, + Buffer::from(&[0b01101101, 0b10101010]).count_set_bits_offset(7, 9) + ); + } } diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs index 55024ff2ae2..eb8d3397cfc 100644 --- a/rust/arrow/src/compute/kernels/filter.rs +++ b/rust/arrow/src/compute/kernels/filter.rs @@ -319,9 +319,10 @@ impl FilterContext { )); } let filter_mask: Vec = (0..64).map(|x| 1u64 << x).collect(); - let filter_bytes = filter_array.data_ref().buffers()[0].data(); - let filtered_count = - bit_util::count_set_bits_offset(filter_bytes, 0, filter_array.len()); + let filter_buffer = &filter_array.data_ref().buffers()[0]; + let filtered_count = filter_buffer.count_set_bits_offset(0, filter_array.len()); + + let filter_bytes = filter_buffer.data(); // transmute filter_bytes to &[u64] let mut u64_buffer = MutableBuffer::new(filter_bytes.len()); diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs index 269eceb8be2..4467c99f60b 100644 --- a/rust/arrow/src/util/bit_util.rs +++ b/rust/arrow/src/util/bit_util.rs @@ -22,19 +22,6 @@ use packed_simd::u8x64; const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; -const POPCOUNT_TABLE: [u8; 256] = [ - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, - 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, - 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, - 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, - 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, - 6, 7, 7, 8, -]; - /// Returns the nearest number that is `>=` than `num` and is a multiple of 64 #[inline] pub fn round_upto_multiple_of_64(num: usize) -> usize { @@ -99,45 +86,6 @@ pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) { *data.add(i >> 3) ^= BIT_MASK[i & 7]; } -/// Returns the number of 1-bits in `data` -#[inline] -pub fn count_set_bits(data: &[u8]) -> usize { - let mut count: usize = 0; - for u in data { - count += POPCOUNT_TABLE[*u as usize] as usize; - } - count -} - -/// Returns the number of 1-bits in `data`, starting from `offset` with `length` bits -/// inspected. Note that both `offset` and `length` are measured in bits. -#[inline] -pub fn count_set_bits_offset(data: &[u8], offset: usize, length: usize) -> usize { - let bit_end = offset + length; - assert!(bit_end <= (data.len() << 3)); - - let byte_start = std::cmp::min(round_upto_power_of_2(offset, 8), bit_end); - let num_bytes = (bit_end - byte_start) >> 3; - - let mut result = 0; - - for i in offset..byte_start { - if get_bit(data, i) { - result += 1; - } - } - for i in 0..num_bytes { - result += POPCOUNT_TABLE[data[(byte_start >> 3) + i] as usize] as usize; - } - for i in (byte_start + (num_bytes << 3))..bit_end { - if get_bit(data, i) { - result += 1; - } - } - - result -} - /// Returns the ceil of `value`/`divisor` #[inline] pub fn ceil(value: usize, divisor: usize) -> usize { @@ -322,32 +270,6 @@ mod tests { } } - #[test] - fn test_count_bits_slice() { - assert_eq!(0, count_set_bits(&[0b00000000])); - assert_eq!(8, count_set_bits(&[0b11111111])); - assert_eq!(3, count_set_bits(&[0b00001101])); - assert_eq!(6, count_set_bits(&[0b01001001, 0b01010010])); - } - - #[test] - fn test_count_bits_offset_slice() { - assert_eq!(8, count_set_bits_offset(&[0b11111111], 0, 8)); - assert_eq!(3, count_set_bits_offset(&[0b11111111], 0, 3)); - assert_eq!(5, count_set_bits_offset(&[0b11111111], 3, 5)); - assert_eq!(1, count_set_bits_offset(&[0b11111111], 3, 1)); - assert_eq!(0, count_set_bits_offset(&[0b11111111], 8, 0)); - assert_eq!(2, count_set_bits_offset(&[0b01010101], 0, 3)); - assert_eq!(16, count_set_bits_offset(&[0b11111111, 0b11111111], 0, 16)); - assert_eq!(10, count_set_bits_offset(&[0b11111111, 0b11111111], 0, 10)); - assert_eq!(10, count_set_bits_offset(&[0b11111111, 0b11111111], 3, 10)); - assert_eq!(8, count_set_bits_offset(&[0b11111111, 0b11111111], 8, 8)); - assert_eq!(5, count_set_bits_offset(&[0b11111111, 0b11111111], 11, 5)); - assert_eq!(0, count_set_bits_offset(&[0b11111111, 0b11111111], 16, 0)); - assert_eq!(2, count_set_bits_offset(&[0b01101101, 0b10101010], 7, 5)); - assert_eq!(4, count_set_bits_offset(&[0b01101101, 0b10101010], 7, 9)); - } - #[test] #[cfg(all(any(target_arch = "x86", target_arch = "x86_64")))] fn test_ceil() {