diff --git a/rust/arrow/benches/buffer_bit_ops.rs b/rust/arrow/benches/buffer_bit_ops.rs index e10e215a335..f905a0cf78c 100644 --- a/rust/arrow/benches/buffer_bit_ops.rs +++ b/rust/arrow/benches/buffer_bit_ops.rs @@ -22,13 +22,6 @@ use criterion::Criterion; extern crate arrow; use arrow::buffer::{Buffer, MutableBuffer}; -use arrow::error::ArrowError; -use arrow::error::Result; -#[cfg(feature = "simd")] -use arrow::util::bit_util; -use std::borrow::BorrowMut; -#[cfg(feature = "simd")] -use std::slice::{from_raw_parts, from_raw_parts_mut}; /// Helper function to create arrays fn create_buffer(size: usize) -> Buffer { @@ -41,146 +34,15 @@ fn create_buffer(size: usize) -> Buffer { result.freeze() } -fn bench_and_current_impl(left: &Buffer, right: &Buffer) { +fn bench_buffer_and(left: &Buffer, right: &Buffer) { criterion::black_box((left & right).unwrap()); } -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] -fn bench_and_packed_simd_chunked_exact(left: &Buffer, right: &Buffer) { - criterion::black_box( - bitwise_bin_op_simd_helper(&left, &right, |a, b| a & b).unwrap(), - ); -} - -fn bench_and_chunked_exact(left: &Buffer, right: &Buffer) { - criterion::black_box( - bitwise_bin_op_autovec_chunked_helper(&left, &right, |a, b| a & b).unwrap(), - ); -} - -fn bench_and_autovec(left: &Buffer, right: &Buffer) { - criterion::black_box( - bitwise_bin_op_autovec_helper(&left, &right, |a, b| a & b).unwrap(), - ); -} - -const AUTOVEC_LANES: usize = 64; - -fn bitwise_bin_op_autovec_chunked_helper( - left: &Buffer, - right: &Buffer, - op: F, -) -> Result -where - F: Fn(u8, u8) -> u8, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Buffers must be the same size to apply Bitwise AND.".to_string(), - )); - } - - let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false); - - let mut left_chunks = left.data().chunks_exact(AUTOVEC_LANES); - let mut right_chunks = right.data().chunks_exact(AUTOVEC_LANES); - let mut result_chunks = result.data_mut().chunks_exact_mut(AUTOVEC_LANES); - - result_chunks - .borrow_mut() - .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())) - .for_each(|(res, (left, right))| { - for i in 0..AUTOVEC_LANES { - res[i] = op(left[i], right[i]); - } - }); - - result_chunks - .into_remainder() - .iter_mut() - .zip( - left_chunks - .remainder() - .iter() - .zip(right_chunks.remainder().iter()), - ) - .for_each(|(res, (left, right))| { - *res = op(*left, *right); - }); - - Ok(result.freeze()) -} - -fn bitwise_bin_op_autovec_helper( - left: &Buffer, - right: &Buffer, - op: F, -) -> Result -where - F: Fn(u8, u8) -> u8, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Buffers must be the same size to apply Bitwise AND.".to_string(), - )); - } - - let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false); - - result - .data_mut() - .iter_mut() - .zip(left.data().iter().zip(right.data().iter())) - .for_each(|(res, (left, right))| { - *res = op(*left, *right); - }); - - Ok(result.freeze()) -} - -/// Helper function for SIMD `BitAnd` and `BitOr` implementations -#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] -fn bitwise_bin_op_simd_helper(left: &Buffer, right: &Buffer, op: F) -> Result -where - F: Fn(packed_simd::u8x64, packed_simd::u8x64) -> packed_simd::u8x64, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Buffers must be the same size to apply Bitwise AND.".to_string(), - )); - } - - let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false); - let lanes = packed_simd::u8x64::lanes(); - for i in (0..left.len()).step_by(lanes) { - let left_data = unsafe { from_raw_parts(left.raw_data().add(i), lanes) }; - let right_data = unsafe { from_raw_parts(right.raw_data().add(i), lanes) }; - let result_slice: &mut [u8] = unsafe { - from_raw_parts_mut((result.data_mut().as_mut_ptr() as *mut u8).add(i), lanes) - }; - unsafe { - bit_util::bitwise_bin_op_simd(&left_data, &right_data, result_slice, &op) - }; - } - - Ok(result.freeze()) -} - fn bit_ops_benchmark(c: &mut Criterion) { let left = create_buffer(512); let right = create_buffer(512); - c.bench_function("buffer_bit_ops and current impl", |b| { - b.iter(|| bench_and_current_impl(&left, &right)) - }); - #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] - c.bench_function("buffer_bit_ops and packed simd", |b| { - b.iter(|| bench_and_packed_simd_chunked_exact(&left, &right)) - }); - c.bench_function("buffer_bit_ops and chunked autovec", |b| { - b.iter(|| bench_and_chunked_exact(&left, &right)) - }); - c.bench_function("buffer_bit_ops and autovec", |b| { - b.iter(|| bench_and_autovec(&left, &right)) + c.bench_function("buffer_bit_ops and", |b| { + b.iter(|| bench_buffer_and(&left, &right)) }); } diff --git a/rust/arrow/src/array/array.rs b/rust/arrow/src/array/array.rs index 2b53f30ba22..e4628454796 100644 --- a/rust/arrow/src/array/array.rs +++ b/rust/arrow/src/array/array.rs @@ -1907,15 +1907,16 @@ impl TryFrom> for StructArray { let mut null: Option = None; for (field_name, array) in values { let child_datum = array.data(); + let child_datum_len = child_datum.len(); if let Some(len) = len { - if len != child_datum.len() { + if len != child_datum_len { return Err(ArrowError::InvalidArgumentError( format!("Array of field \"{}\" has length {}, but previous elements have length {}. - All arrays in every entry in a struct array must have the same length.", field_name, child_datum.len(), len) + All arrays in every entry in a struct array must have the same length.", field_name, child_datum_len, len) )); } } else { - len = Some(child_datum.len()) + len = Some(child_datum_len) } child_data.push(child_datum.clone()); fields.push(Field::new( @@ -1926,7 +1927,7 @@ impl TryFrom> for StructArray { if let Some(child_null_buffer) = child_datum.null_buffer() { null = Some(if let Some(null_buffer) = &null { - buffer_bin_or(null_buffer, 0, child_null_buffer, 0, null_buffer.len()) + buffer_bin_or(null_buffer, 0, child_null_buffer, 0, child_datum_len) } else { child_null_buffer.clone() }); diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index aa1d7fe7f56..c2a11bea4c2 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -34,7 +34,9 @@ use std::sync::Arc; use crate::datatypes::ArrowNativeType; use crate::error::{ArrowError, Result}; use crate::memory; +use crate::util::bit_chunk_iterator::BitChunks; use crate::util::bit_util; +use crate::util::bit_util::ceil; #[cfg(feature = "simd")] use std::borrow::BorrowMut; @@ -254,6 +256,21 @@ impl Buffer { ) } + /// Returns a slice of this buffer starting at a certain bit offset. + /// If the offset is byte-aligned the returned buffer is a shallow clone, + /// otherwise a new buffer is allocated and filled with a copy of the bits in the range. + pub fn bit_slice(&self, offset: usize, len: usize) -> Self { + if offset % 8 == 0 && len % 8 == 0 { + return self.slice(offset / 8); + } + + bitwise_unary_op_helper(&self, offset, len, |a| a) + } + + pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks { + BitChunks::new(&self, offset, len) + } + /// Returns an empty buffer. pub fn empty() -> Self { unsafe { Self::from_raw_parts(BUFFER_INIT.as_ptr() as _, 0, 0) } @@ -280,12 +297,16 @@ impl> From for Buffer { let buffer = memory::allocate_aligned(capacity); unsafe { memory::memcpy(buffer, slice.as_ptr(), len); - Buffer::from_raw_parts(buffer, len, capacity) + Buffer::build_with_arguments(buffer, len, capacity, true) } } } -/// Helper function for binary SIMD operations like `BitAnd` and `BitOr`. +/// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer. +/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time +/// and the `scalar_op` gets applied to remaining bytes. +/// Contrary to the non-simd version `bitwise_bin_op_helper`, the offset and length is specified in bytes +/// and this version does not support operations starting at arbitrary bit offsets. #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn bitwise_bin_op_simd_helper( left: &Buffer, @@ -330,7 +351,11 @@ where result.freeze() } -/// Helper function for unary SIMD operations like `BitNot`. +/// Apply a bitwise operation `simd_op` / `scalar_op` to one input using simd instructions and return the result as a Buffer. +/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time +/// and the `scalar_op` gets applied to remaining bytes. +/// Contrary to the non-simd version `bitwise_unary_op_helper`, the offset and length is specified in bytes +/// and this version does not support operations starting at arbitrary bit offsets. #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] fn bitwise_unary_op_simd_helper( left: &Buffer, @@ -369,72 +394,96 @@ where result.freeze() } +/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer. +/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. fn bitwise_bin_op_helper( left: &Buffer, - left_offset: usize, + left_offset_in_bits: usize, right: &Buffer, - right_offset: usize, - len: usize, + right_offset_in_bits: usize, + len_in_bits: usize, op: F, ) -> Buffer where - F: Fn(u8, u8) -> u8, + F: Fn(u64, u64) -> u64, { - let mut result = MutableBuffer::new(len).with_bitset(len, false); + // reserve capacity and set length so we can get a typed view of u64 chunks + let mut result = + MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); - result - .data_mut() - .iter_mut() - .zip( - left.data()[left_offset..] - .iter() - .zip(right.data()[right_offset..].iter()), - ) + let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits); + let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits); + let result_chunks = result.typed_data_mut::().iter_mut(); + + result_chunks + .zip(left_chunks.iter().zip(right_chunks.iter())) .for_each(|(res, (left, right))| { - *res = op(*left, *right); + *res = op(left, right); }); + let remainder_bytes = ceil(left_chunks.remainder_len(), 8); + let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); + let rem = &rem.to_le_bytes()[0..remainder_bytes]; + result + .write_all(rem) + .expect("not enough capacity in buffer"); + result.freeze() } +/// Apply a bitwise operation `op` to one input and return the result as a Buffer. +/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits. fn bitwise_unary_op_helper( left: &Buffer, - left_offset: usize, - len: usize, + offset_in_bits: usize, + len_in_bits: usize, op: F, ) -> Buffer where - F: Fn(u8) -> u8, + F: Fn(u64) -> u64, { - let mut result = MutableBuffer::new(len).with_bitset(len, false); + // reserve capacity and set length so we can get a typed view of u64 chunks + let mut result = + MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); - result - .data_mut() - .iter_mut() - .zip(left.data()[left_offset..].iter()) + let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits); + let result_chunks = result.typed_data_mut::().iter_mut(); + + result_chunks + .zip(left_chunks.iter()) .for_each(|(res, left)| { - *res = op(*left); + *res = op(left); }); + let remainder_bytes = ceil(left_chunks.remainder_len(), 8); + let rem = op(left_chunks.remainder_bits()); + let rem = &rem.to_le_bytes()[0..remainder_bytes]; + result + .write_all(rem) + .expect("not enough capacity in buffer"); + result.freeze() } pub(super) fn buffer_bin_and( left: &Buffer, - left_offset: usize, + left_offset_in_bits: usize, right: &Buffer, - right_offset: usize, - len: usize, + right_offset_in_bits: usize, + len_in_bits: usize, ) -> Buffer { - // SIMD implementation if available + // SIMD implementation if available and byte-aligned #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + if left_offset_in_bits % 8 == 0 + && right_offset_in_bits % 8 == 0 + && len_in_bits % 8 == 0 { return bitwise_bin_op_simd_helper( &left, - left_offset, + left_offset_in_bits / 8, &right, - right_offset, - len, + right_offset_in_bits / 8, + len_in_bits / 8, |a, b| a & b, |a, b| a & b, ); @@ -442,26 +491,36 @@ pub(super) fn buffer_bin_and( // Default implementation #[allow(unreachable_code)] { - bitwise_bin_op_helper(&left, left_offset, right, right_offset, len, |a, b| a & b) + bitwise_bin_op_helper( + &left, + left_offset_in_bits, + right, + right_offset_in_bits, + len_in_bits, + |a, b| a & b, + ) } } pub(super) fn buffer_bin_or( left: &Buffer, - left_offset: usize, + left_offset_in_bits: usize, right: &Buffer, - right_offset: usize, - len: usize, + right_offset_in_bits: usize, + len_in_bits: usize, ) -> Buffer { - // SIMD implementation if available + // SIMD implementation if available and byte-aligned #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] + if left_offset_in_bits % 8 == 0 + && right_offset_in_bits % 8 == 0 + && len_in_bits % 8 == 0 { return bitwise_bin_op_simd_helper( &left, - left_offset, + left_offset_in_bits / 8, &right, - right_offset, - len, + right_offset_in_bits / 8, + len_in_bits / 8, |a, b| a | b, |a, b| a | b, ); @@ -469,20 +528,37 @@ pub(super) fn buffer_bin_or( // Default implementation #[allow(unreachable_code)] { - bitwise_bin_op_helper(&left, left_offset, right, right_offset, len, |a, b| a | b) + bitwise_bin_op_helper( + &left, + left_offset_in_bits, + right, + right_offset_in_bits, + len_in_bits, + |a, b| a | b, + ) } } -pub(super) fn buffer_unary_not(left: &Buffer, left_offset: usize, len: usize) -> Buffer { - // SIMD implementation if available +pub(super) fn buffer_unary_not( + left: &Buffer, + offset_in_bits: usize, + len_in_bits: usize, +) -> Buffer { + // SIMD implementation if available and byte-aligned #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))] - { - return bitwise_unary_op_simd_helper(&left, left_offset, len, |a| !a, |a| !a); + if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 { + return bitwise_unary_op_simd_helper( + &left, + offset_in_bits / 8, + len_in_bits / 8, + |a| !a, + |a| !a, + ); } // Default implementation #[allow(unreachable_code)] { - bitwise_unary_op_helper(&left, left_offset, len, |a| !a) + bitwise_unary_op_helper(&left, offset_in_bits, len_in_bits, |a| !a) } } @@ -496,7 +572,8 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { )); } - Ok(buffer_bin_and(&self, 0, &rhs, 0, self.len())) + let len_in_bits = self.len() * 8; + Ok(buffer_bin_and(&self, 0, &rhs, 0, len_in_bits)) } } @@ -510,7 +587,9 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { )); } - Ok(buffer_bin_or(&self, 0, &rhs, 0, self.len())) + let len_in_bits = self.len() * 8; + + Ok(buffer_bin_or(&self, 0, &rhs, 0, len_in_bits)) } } @@ -518,7 +597,8 @@ impl Not for &Buffer { type Output = Buffer; fn not(self) -> Buffer { - buffer_unary_not(&self, 0, self.len()) + let len_in_bits = self.len() * 8; + buffer_unary_not(&self, 0, len_in_bits) } } diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs index af4706e3ab3..248e4c3e595 100644 --- a/rust/arrow/src/compute/kernels/boolean.rs +++ b/rust/arrow/src/compute/kernels/boolean.rs @@ -48,29 +48,22 @@ where )); } - if left.offset() % 8 != 0 || right.offset() % 8 != 0 { - return Err(ArrowError::ComputeError( - "Cannot perform bitwise operation when offsets are not byte-aligned." - .to_string(), - )); - } + let len = left.len(); let left_data = left.data_ref(); let right_data = right.data_ref(); - let null_bit_buffer = combine_option_bitmap(&left_data, &right_data, left.len())?; + let null_bit_buffer = combine_option_bitmap(&left_data, &right_data, len)?; let left_buffer = &left_data.buffers()[0]; let right_buffer = &right_data.buffers()[0]; - let left_offset = &left.offset() / 8; - let right_offset = &right.offset() / 8; - - let len = ceil(left.len(), 8); + let left_offset = left.offset(); + let right_offset = right.offset(); let values = op(&left_buffer, left_offset, &right_buffer, right_offset, len); let data = ArrayData::new( DataType::Boolean, - left.len(), + len, None, null_bit_buffer, 0, @@ -95,15 +88,8 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { /// Performs unary `NOT` operation on an arrays. If value is null then the result is also /// null. pub fn not(left: &BooleanArray) -> Result { - if left.offset() % 8 != 0 { - return Err(ArrowError::ComputeError( - "Cannot perform bitwise operation when offsets are not byte-aligned." - .to_string(), - )); - } - - let left_offset = left.offset() / 8; - let len = ceil(left.len(), 8); + let left_offset = left.offset(); + let len = left.len(); let data = left.data_ref(); let null_bit_buffer = data @@ -115,7 +101,7 @@ pub fn not(left: &BooleanArray) -> Result { let data = ArrayData::new( DataType::Boolean, - left.len(), + len, None, null_bit_buffer, 0, @@ -126,69 +112,39 @@ pub fn not(left: &BooleanArray) -> Result { } pub fn is_null(input: &ArrayRef) -> Result { - if input.offset() % 8 != 0 { - return Err(ArrowError::ComputeError( - "Cannot perform bitwise operation when offsets are not byte-aligned." - .to_string(), - )); - } - - let len_bytes = ceil(input.len(), 8); + let len = input.len(); let output = match input.data_ref().null_buffer() { - None => MutableBuffer::new(len_bytes) - .with_bitset(input.len(), false) - .freeze(), - Some(buffer) => { - let offset_bytes = input.offset() / 8; - - buffer_unary_not(buffer, offset_bytes, len_bytes) + None => { + let len_bytes = ceil(len, 8); + MutableBuffer::new(len_bytes) + .with_bitset(len_bytes, false) + .freeze() } + Some(buffer) => buffer_unary_not(buffer, input.offset(), len), }; - let data = ArrayData::new( - DataType::Boolean, - input.len(), - None, - None, - 0, - vec![output], - vec![], - ); + let data = + ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]); Ok(BooleanArray::from(Arc::new(data))) } pub fn is_not_null(input: &ArrayRef) -> Result { - if input.offset() % 8 != 0 { - return Err(ArrowError::ComputeError( - "Cannot perform bitwise operation when offsets are not byte-aligned." - .to_string(), - )); - } - - let len_bytes = ceil(input.len(), 8); + let len = input.len(); let output = match input.data_ref().null_buffer() { - None => MutableBuffer::new(len_bytes) - .with_bitset(input.len(), true) - .freeze(), - Some(buffer) => { - let offset_bytes = input.offset() / 8; - - buffer.slice(offset_bytes) + None => { + let len_bytes = ceil(len, 8); + MutableBuffer::new(len_bytes) + .with_bitset(len_bytes, true) + .freeze() } + Some(buffer) => buffer.bit_slice(input.offset(), len), }; - let data = ArrayData::new( - DataType::Boolean, - input.len(), - None, - None, - 0, - vec![output], - vec![], - ); + let data = + ArrayData::new(DataType::Boolean, len, None, None, 0, vec![output], vec![]); Ok(BooleanArray::from(Arc::new(data))) } diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs index 8ba35fbf8a7..e499dc3f558 100644 --- a/rust/arrow/src/compute/util.rs +++ b/rust/arrow/src/compute/util.rs @@ -23,8 +23,7 @@ use crate::bitmap::Bitmap; use crate::buffer::{buffer_bin_and, buffer_bin_or, Buffer}; #[cfg(feature = "simd")] use crate::datatypes::*; -use crate::error::{ArrowError, Result}; -use crate::util::bit_util::ceil; +use crate::error::Result; #[cfg(feature = "simd")] use num::One; #[cfg(feature = "simd")] @@ -44,29 +43,21 @@ pub(super) fn combine_option_bitmap( let left = left_data.null_buffer(); let right = right_data.null_buffer(); - if (left.is_some() && left_offset_in_bits % 8 != 0) - || (right.is_some() && right_offset_in_bits % 8 != 0) - { - return Err(ArrowError::ComputeError( - "Cannot combine option bitmaps that are not byte-aligned.".to_string(), - )); - } - - let left_offset = left_offset_in_bits / 8; - let right_offset = right_offset_in_bits / 8; - match left { None => match right { None => Ok(None), - Some(r) => Ok(Some(r.slice(right_offset))), + Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))), }, Some(l) => match right { - None => Ok(Some(l.slice(left_offset))), + None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))), - Some(r) => { - let len = ceil(len_in_bits, 8); - Ok(Some(buffer_bin_and(&l, left_offset, &r, right_offset, len))) - } + Some(r) => Ok(Some(buffer_bin_and( + &l, + left_offset_in_bits, + &r, + right_offset_in_bits, + len_in_bits, + ))), }, } } @@ -85,29 +76,21 @@ pub(super) fn compare_option_bitmap( let left = left_data.null_buffer(); let right = right_data.null_buffer(); - if (left.is_some() && left_offset_in_bits % 8 != 0) - || (right.is_some() && right_offset_in_bits % 8 != 0) - { - return Err(ArrowError::ComputeError( - "Cannot compare option bitmaps that are not byte-aligned.".to_string(), - )); - } - - let left_offset = left_offset_in_bits / 8; - let right_offset = right_offset_in_bits / 8; - match left { None => match right { None => Ok(None), - Some(r) => Ok(Some(r.slice(right_offset))), + Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))), }, Some(l) => match right { - None => Ok(Some(l.slice(left_offset))), + None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))), - Some(r) => { - let len = ceil(len_in_bits, 8); - Ok(Some(buffer_bin_or(&l, left_offset, &r, right_offset, len))) - } + Some(r) => Ok(Some(buffer_bin_or( + &l, + left_offset_in_bits, + &r, + right_offset_in_bits, + len_in_bits, + ))), }, } } diff --git a/rust/arrow/src/util/bit_chunk_iterator.rs b/rust/arrow/src/util/bit_chunk_iterator.rs new file mode 100644 index 00000000000..11f414ca626 --- /dev/null +++ b/rust/arrow/src/util/bit_chunk_iterator.rs @@ -0,0 +1,223 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use crate::buffer::Buffer; +use crate::util::bit_util::ceil; +use std::fmt::Debug; + +#[derive(Debug)] +pub struct BitChunks<'a> { + buffer: &'a Buffer, + raw_data: *const u8, + offset: usize, + chunk_len: usize, + remainder_len: usize, +} + +impl<'a> BitChunks<'a> { + pub fn new(buffer: &'a Buffer, offset: usize, len: usize) -> Self { + assert!(ceil(offset + len, 8) <= buffer.len() * 8); + + let byte_offset = offset / 8; + let offset = offset % 8; + + let raw_data = unsafe { buffer.raw_data().add(byte_offset) }; + + let chunk_bits = 64; + + let chunk_len = len / chunk_bits; + let remainder_len = len & (chunk_bits - 1); + + BitChunks::<'a> { + buffer: &buffer, + raw_data, + offset, + chunk_len, + remainder_len, + } + } +} + +#[derive(Debug)] +pub struct BitChunkIterator<'a> { + buffer: &'a Buffer, + raw_data: *const u8, + offset: usize, + chunk_len: usize, + index: usize, +} + +impl<'a> BitChunks<'a> { + #[inline] + pub fn remainder_len(&self) -> usize { + self.remainder_len + } + + #[inline] + pub fn remainder_bits(&self) -> u64 { + let bit_len = self.remainder_len; + if bit_len == 0 { + 0 + } else { + let byte_len = ceil(bit_len, 8); + + let mut bits = 0; + for i in 0..byte_len { + let byte = unsafe { + std::ptr::read( + self.raw_data + .add(self.chunk_len * std::mem::size_of::() + i), + ) + }; + bits |= (byte as u64) << (i * 8); + } + + let offset = self.offset as u64; + + (bits >> offset) & ((1 << bit_len) - 1) + } + } + + #[inline] + pub fn iter(&self) -> BitChunkIterator<'a> { + BitChunkIterator::<'a> { + buffer: self.buffer, + raw_data: self.raw_data, + offset: self.offset, + chunk_len: self.chunk_len, + index: 0, + } + } +} + +impl<'a> IntoIterator for BitChunks<'a> { + type Item = u64; + type IntoIter = BitChunkIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl Iterator for BitChunkIterator<'_> { + type Item = u64; + + #[inline] + fn next(&mut self) -> Option { + if self.index >= self.chunk_len { + return None; + } + + // cast to *const u64 should be fine since we are using read_unaligned + #[allow(clippy::cast_ptr_alignment)] + let current = unsafe { + std::ptr::read_unaligned((self.raw_data as *const u64).add(self.index)) + }; + + let combined = if self.offset == 0 { + current + } else { + // cast to *const u64 should be fine since we are using read_unaligned + #[allow(clippy::cast_ptr_alignment)] + let next = unsafe { + std::ptr::read_unaligned( + (self.raw_data as *const u64).add(self.index + 1), + ) + }; + current >> self.offset + | (next & ((1 << self.offset) - 1)) << (64 - self.offset) + }; + + self.index += 1; + + Some(combined) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + ( + self.chunk_len - self.index, + Some(self.chunk_len - self.index), + ) + } +} + +impl ExactSizeIterator for BitChunkIterator<'_> { + #[inline] + fn len(&self) -> usize { + self.chunk_len - self.index + } +} + +#[cfg(test)] +mod tests { + use crate::buffer::Buffer; + + #[test] + fn test_iter_aligned() { + let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7]; + let buffer: Buffer = Buffer::from(input); + + let bitchunks = buffer.bit_chunks(0, 64); + let result = bitchunks.into_iter().collect::>(); + + assert_eq!(vec![0x0706050403020100], result); + } + + #[test] + fn test_iter_unaligned() { + let input: &[u8] = &[ + 0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, + 0b00100000, 0b01000000, 0b11111111, + ]; + let buffer: Buffer = Buffer::from(input); + + let bitchunks = buffer.bit_chunks(4, 64); + + assert_eq!(0, bitchunks.remainder_len()); + assert_eq!(0, bitchunks.remainder_bits()); + + let result = bitchunks.into_iter().collect::>(); + + //assert_eq!(vec![0b00010000, 0b00100000, 0b01000000, 0b10000000, 0b00000000, 0b00000001, 0b00000010, 0b11110100], result); + assert_eq!( + vec![0b1111010000000010000000010000000010000000010000000010000000010000], + result + ); + } + + #[test] + fn test_iter_unaligned_remainder_1_byte() { + let input: &[u8] = &[ + 0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, + 0b00100000, 0b01000000, 0b11111111, + ]; + let buffer: Buffer = Buffer::from(input); + + let bitchunks = buffer.bit_chunks(4, 66); + + assert_eq!(2, bitchunks.remainder_len()); + assert_eq!(0b00000011, bitchunks.remainder_bits()); + + let result = bitchunks.into_iter().collect::>(); + + //assert_eq!(vec![0b00010000, 0b00100000, 0b01000000, 0b10000000, 0b00000000, 0b00000001, 0b00000010, 0b11110100], result); + assert_eq!( + vec![0b1111010000000010000000010000000010000000010000000010000000010000], + result + ); + } +} diff --git a/rust/arrow/src/util/bit_util.rs b/rust/arrow/src/util/bit_util.rs index d8ffa6f19c5..3a974339c83 100644 --- a/rust/arrow/src/util/bit_util.rs +++ b/rust/arrow/src/util/bit_util.rs @@ -43,7 +43,7 @@ pub fn round_upto_multiple_of_64(num: usize) -> usize { /// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must /// be a power of 2. -fn round_upto_power_of_2(num: usize, factor: usize) -> usize { +pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize { debug_assert!(factor > 0 && (factor & (factor - 1)) == 0); (num + (factor - 1)) & !(factor - 1) } diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs index f3bcc23a1d4..30a510fc323 100644 --- a/rust/arrow/src/util/mod.rs +++ b/rust/arrow/src/util/mod.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +pub mod bit_chunk_iterator; pub mod bit_util; pub mod integration_util; #[cfg(feature = "prettyprint")]