Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,107 @@ impl BooleanBuffer {
Some(BooleanBuffer::new(buffer, 0, len_in_bits))
}

/// Create a new [`BooleanBuffer`] by applying the bitwise operation `op` to
/// the relevant bits from two input buffers.
///
/// This function is faster than applying the operation bit by bit as
/// it processes input buffers in chunks of 64 bits (8 bytes) at a time
///
/// # Notes:
/// See notes on [Self::from_bitwise_unary_op]
///
/// # See Also
/// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
/// - [`apply_bitwise_binary_op`](bit_util::apply_bitwise_binary_op) for in-place binary bitwise operations
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `AND` of two [`Buffer`]s
/// ```
/// # use arrow_buffer::{Buffer, BooleanBuffer};
/// let left = Buffer::from(vec![0b11001100u8, 0b10111010u8]); // 2 bytes = 16 bits
/// let right = Buffer::from(vec![0b10101010u8, 0b11011100u8, 0b11110000u8]); // 3 bytes = 24 bits
/// // AND of the first 12 bits
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 0, &right, 0, 12, |a, b| a & b
/// );
/// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
/// ```
///
/// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
/// ```
/// # use arrow_buffer::BooleanBuffer;
/// let left = [0b11001100u8, 0b10111010u8];
/// let right = [0b10101010u8, 0b11011100u8];
/// // OR of bits 4..16 from left and bits 0..12 from right
/// let result = BooleanBuffer::from_bitwise_binary_op(
/// &left, 4, &right, 0, 12, |a, b| a | b
/// );
/// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
/// ```
pub fn from_bitwise_binary_op<F>(
left: impl AsRef<[u8]>,
left_offset_in_bits: usize,
right: impl AsRef<[u8]>,
right_offset_in_bits: usize,
len_in_bits: usize,
mut op: F,
) -> Self
where
F: FnMut(u64, u64) -> u64,
{
let left = left.as_ref();
let right = right.as_ref();
// try fast path for aligned input
// If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
// to improve performance.
if left_offset_in_bits == 0 && right_offset_in_bits == 0 {
unsafe {
let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>();
let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>();
// if there is no prefix or suffix, both buffers are aligned and we can do the operation directly
// on u64s
// TODO also handle non empty suffixes by processing them separately
if left_prefix.is_empty()
&& right_prefix.is_empty()
&& left_suffix.is_empty()
&& right_suffix.is_empty()
{
let result_u64s = left_u64s
.iter()
.zip(right_u64s.iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<u64>>();
return BooleanBuffer {
buffer: Buffer::from(result_u64s),
bit_offset: 0,
bit_len: len_in_bits,
};
}
}
}
let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);

let chunks = left_chunks
.iter()
.zip(right_chunks.iter())
.map(|(left, right)| op(left, right));
// Soundness: `BitChunks` is a `BitChunks` iterator which
// correctly reports its upper bound
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };

let remainder_bytes = bit_util::ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
let rem = &rem.to_le_bytes()[0..remainder_bytes];
buffer.extend_from_slice(rem);

BooleanBuffer {
buffer: Buffer::from(buffer),
bit_offset: 0,
bit_len: len_in_bits,
}
}

/// Returns the number of set bits in this buffer
pub fn count_set_bits(&self) -> usize {
self.buffer
Expand Down Expand Up @@ -655,4 +756,42 @@ mod tests {
assert_eq!(result, expected);
}
}

#[test]
fn test_from_bitwise_binary_op() {
// pick random boolean inputs
let input_bools_left = (0..1024)
.map(|_| rand::random::<bool>())
.collect::<Vec<bool>>();
let input_bools_right = (0..1024)
.map(|_| rand::random::<bool>())
.collect::<Vec<bool>>();
let input_buffer_left = BooleanBuffer::from(&input_bools_left[..]);
let input_buffer_right = BooleanBuffer::from(&input_bools_right[..]);

for left_offset in 0..200 {
for right_offset in [0, 4, 5, 17, 33, 24, 45, 64, 65, 100, 200] {
for len_offset in [0, 1, 44, 100, 256, 300, 512] {
let len = 1024 - len_offset - left_offset.max(right_offset); // ensure we don't go out of bounds
// compute with AND
let result = BooleanBuffer::from_bitwise_binary_op(
input_buffer_left.values(),
left_offset,
input_buffer_right.values(),
right_offset,
len,
|a, b| a & b,
);
// compute directly from bools
let expected = input_bools_left[left_offset..]
.iter()
.zip(&input_bools_right[right_offset..])
.take(len)
.map(|(a, b)| *a & *b)
.collect::<BooleanBuffer>();
assert_eq!(result, expected);
}
}
}
}
}
31 changes: 12 additions & 19 deletions arrow-buffer/src/buffer/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,35 +61,28 @@ where

/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
///
/// This is a comment for testing purposes
pub fn bitwise_bin_op_helper<F>(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
mut op: F,
op: F,
) -> Buffer
where
F: FnMut(u64, u64) -> u64,
{
let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);

let chunks = left_chunks
.iter()
.zip(right_chunks.iter())
.map(|(left, right)| op(left, right));
// Soundness: `BitChunks` is a `BitChunks` iterator which
// correctly reports its upper bound
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };

let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
let rem = &rem.to_le_bytes()[0..remainder_bytes];
buffer.extend_from_slice(rem);

buffer.into()
BooleanBuffer::from_bitwise_binary_op(
left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
op,
)
.into_inner()
}

/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
Expand Down
Loading