Skip to content
Closed
144 changes: 3 additions & 141 deletions rust/arrow/benches/buffer_bit_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,6 @@ use criterion::Criterion;
extern crate arrow;

use arrow::buffer::{Buffer, MutableBuffer};
use arrow::error::ArrowError;
use arrow::error::Result;
#[cfg(feature = "simd")]
use arrow::util::bit_util;
use std::borrow::BorrowMut;
#[cfg(feature = "simd")]
use std::slice::{from_raw_parts, from_raw_parts_mut};

/// Helper function to create arrays
fn create_buffer(size: usize) -> Buffer {
Expand All @@ -41,146 +34,15 @@ fn create_buffer(size: usize) -> Buffer {
result.freeze()
}

fn bench_and_current_impl(left: &Buffer, right: &Buffer) {
fn bench_buffer_and(left: &Buffer, right: &Buffer) {
criterion::black_box((left & right).unwrap());
}

#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn bench_and_packed_simd_chunked_exact(left: &Buffer, right: &Buffer) {
criterion::black_box(
bitwise_bin_op_simd_helper(&left, &right, |a, b| a & b).unwrap(),
);
}

fn bench_and_chunked_exact(left: &Buffer, right: &Buffer) {
criterion::black_box(
bitwise_bin_op_autovec_chunked_helper(&left, &right, |a, b| a & b).unwrap(),
);
}

fn bench_and_autovec(left: &Buffer, right: &Buffer) {
criterion::black_box(
bitwise_bin_op_autovec_helper(&left, &right, |a, b| a & b).unwrap(),
);
}

const AUTOVEC_LANES: usize = 64;

fn bitwise_bin_op_autovec_chunked_helper<F>(
left: &Buffer,
right: &Buffer,
op: F,
) -> Result<Buffer>
where
F: Fn(u8, u8) -> u8,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Buffers must be the same size to apply Bitwise AND.".to_string(),
));
}

let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);

let mut left_chunks = left.data().chunks_exact(AUTOVEC_LANES);
let mut right_chunks = right.data().chunks_exact(AUTOVEC_LANES);
let mut result_chunks = result.data_mut().chunks_exact_mut(AUTOVEC_LANES);

result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| {
for i in 0..AUTOVEC_LANES {
res[i] = op(left[i], right[i]);
}
});

result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = op(*left, *right);
});

Ok(result.freeze())
}

fn bitwise_bin_op_autovec_helper<F>(
left: &Buffer,
right: &Buffer,
op: F,
) -> Result<Buffer>
where
F: Fn(u8, u8) -> u8,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Buffers must be the same size to apply Bitwise AND.".to_string(),
));
}

let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);

result
.data_mut()
.iter_mut()
.zip(left.data().iter().zip(right.data().iter()))
.for_each(|(res, (left, right))| {
*res = op(*left, *right);
});

Ok(result.freeze())
}

/// Helper function for SIMD `BitAnd` and `BitOr` implementations
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Result<Buffer>
where
F: Fn(packed_simd::u8x64, packed_simd::u8x64) -> packed_simd::u8x64,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Buffers must be the same size to apply Bitwise AND.".to_string(),
));
}

let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);
let lanes = packed_simd::u8x64::lanes();
for i in (0..left.len()).step_by(lanes) {
let left_data = unsafe { from_raw_parts(left.raw_data().add(i), lanes) };
let right_data = unsafe { from_raw_parts(right.raw_data().add(i), lanes) };
let result_slice: &mut [u8] = unsafe {
from_raw_parts_mut((result.data_mut().as_mut_ptr() as *mut u8).add(i), lanes)
};
unsafe {
bit_util::bitwise_bin_op_simd(&left_data, &right_data, result_slice, &op)
};
}

Ok(result.freeze())
}

fn bit_ops_benchmark(c: &mut Criterion) {
let left = create_buffer(512);
let right = create_buffer(512);
c.bench_function("buffer_bit_ops and current impl", |b| {
b.iter(|| bench_and_current_impl(&left, &right))
});
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
c.bench_function("buffer_bit_ops and packed simd", |b| {
b.iter(|| bench_and_packed_simd_chunked_exact(&left, &right))
});
c.bench_function("buffer_bit_ops and chunked autovec", |b| {
b.iter(|| bench_and_chunked_exact(&left, &right))
});
c.bench_function("buffer_bit_ops and autovec", |b| {
b.iter(|| bench_and_autovec(&left, &right))
c.bench_function("buffer_bit_ops and", |b| {
b.iter(|| bench_buffer_and(&left, &right))
});
}

Expand Down
9 changes: 5 additions & 4 deletions rust/arrow/src/array/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1907,15 +1907,16 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
let mut null: Option<Buffer> = None;
for (field_name, array) in values {
let child_datum = array.data();
let child_datum_len = child_datum.len();
if let Some(len) = len {
if len != child_datum.len() {
if len != child_datum_len {
return Err(ArrowError::InvalidArgumentError(
format!("Array of field \"{}\" has length {}, but previous elements have length {}.
All arrays in every entry in a struct array must have the same length.", field_name, child_datum.len(), len)
All arrays in every entry in a struct array must have the same length.", field_name, child_datum_len, len)
));
}
} else {
len = Some(child_datum.len())
len = Some(child_datum_len)
}
child_data.push(child_datum.clone());
fields.push(Field::new(
Expand All @@ -1926,7 +1927,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {

if let Some(child_null_buffer) = child_datum.null_buffer() {
null = Some(if let Some(null_buffer) = &null {
buffer_bin_or(null_buffer, 0, child_null_buffer, 0, null_buffer.len())
buffer_bin_or(null_buffer, 0, child_null_buffer, 0, child_datum_len)
} else {
child_null_buffer.clone()
});
Expand Down
Loading