diff --git a/benches/comparison_kernels.rs b/benches/comparison_kernels.rs index f42da77276d..b302ba0425d 100644 --- a/benches/comparison_kernels.rs +++ b/benches/comparison_kernels.rs @@ -23,38 +23,42 @@ use arrow2::array::*; use arrow2::util::bench_util::*; use arrow2::{compute::comparison::*, datatypes::DataType, types::NativeType}; -fn bench_eq(arr_a: &PrimitiveArray, arr_b: &PrimitiveArray) +fn bench_op(arr_a: &PrimitiveArray, arr_b: &PrimitiveArray, op: Operator) where T: NativeType, { - compare( - criterion::black_box(arr_a), - criterion::black_box(arr_b), - Operator::Eq, - ) - .unwrap(); + compare(criterion::black_box(arr_a), criterion::black_box(arr_b), op).unwrap(); } -fn bench_eq_scalar(arr_a: &PrimitiveArray, value_b: T) +fn bench_op_scalar(arr_a: &PrimitiveArray, value_b: T, op: Operator) where T: NativeType + std::cmp::PartialOrd, { primtive_compare_scalar( criterion::black_box(arr_a), criterion::black_box(value_b), - Operator::Eq, + op, ) .unwrap(); } fn add_benchmark(c: &mut Criterion) { let size = 65536; - let arr_a = create_primitive_array::(size, DataType::Float32, 0.0); - let arr_b = create_primitive_array::(size, DataType::Float32, 0.0); + let arr_a = create_primitive_array_with_seed::(size, DataType::Float32, 0.0, 42); + let arr_b = create_primitive_array_with_seed::(size, DataType::Float32, 0.0, 43); - c.bench_function("eq Float32", |b| b.iter(|| bench_eq(&arr_a, &arr_b))); + c.bench_function("eq Float32", |b| { + b.iter(|| bench_op(&arr_a, &arr_b, Operator::Eq)) + }); c.bench_function("eq scalar Float32", |b| { - b.iter(|| bench_eq_scalar(&arr_a, 1.0)) + b.iter(|| bench_op_scalar(&arr_a, 0.5, Operator::Eq)) + }); + + c.bench_function("lt Float32", |b| { + b.iter(|| bench_op(&arr_a, &arr_b, Operator::Lt)) + }); + c.bench_function("lt scalar Float32", |b| { + b.iter(|| bench_op_scalar(&arr_a, 0.5, Operator::Lt)) }); } diff --git a/src/compute/comparison/primitive.rs b/src/compute/comparison/primitive.rs index 471445f0df9..6561f463a4a 100644 --- a/src/compute/comparison/primitive.rs +++ b/src/compute/comparison/primitive.rs @@ -17,7 +17,6 @@ use crate::{array::*, types::NativeType}; use crate::{ - bits, buffer::MutableBuffer, error::{ArrowError, Result}, }; @@ -53,11 +52,12 @@ where .zip(lhs_chunks_iter) .zip(rhs_chunks_iter) .for_each(|((byte, lhs), rhs)| { - (0..8).for_each(|i| { - if op(lhs[i], rhs[i]) { - *byte = bits::set(*byte, i) - } - }); + lhs.iter() + .zip(rhs.iter()) + .enumerate() + .for_each(|(i, (&lhs, &rhs))| { + *byte |= if op(lhs, rhs) { 1 << i } else { 0 }; + }); }); if !lhs_remainder.is_empty() { @@ -66,10 +66,8 @@ where .iter() .zip(rhs_remainder.iter()) .enumerate() - .for_each(|(i, (lhs, rhs))| { - if op(*lhs, *rhs) { - *last = bits::set(*last, i) - } + .for_each(|(i, (&lhs, &rhs))| { + *last |= if op(lhs, rhs) { 1 << i } else { 0 }; }); }; @@ -97,20 +95,16 @@ where values[..chunks] .iter_mut() .zip(lhs_chunks_iter) - .for_each(|(byte, lhs)| { - (0..8).for_each(|i| { - if op(lhs[i], rhs) { - *byte = bits::set(*byte, i) - } + .for_each(|(byte, chunk)| { + chunk.iter().enumerate().for_each(|(i, &c_i)| { + *byte |= if op(c_i, rhs) { 1 << i } else { 0 }; }); }); if !lhs_remainder.is_empty() { let last = &mut values[chunks]; - lhs_remainder.iter().enumerate().for_each(|(i, lhs)| { - if op(*lhs, rhs) { - *last = bits::set(*last, i) - } + lhs_remainder.iter().enumerate().for_each(|(i, &lhs)| { + *last |= if op(lhs, rhs) { 1 << i } else { 0 }; }); }; diff --git a/src/util/bench_util.rs b/src/util/bench_util.rs index cfa4d71c6ce..d84da3e862d 100644 --- a/src/util/bench_util.rs +++ b/src/util/bench_util.rs @@ -51,6 +51,30 @@ where .to(data_type) } +pub fn create_primitive_array_with_seed( + size: usize, + data_type: DataType, + null_density: f32, + seed: u64, +) -> PrimitiveArray +where + T: NativeType, + Standard: Distribution, +{ + let mut rng = StdRng::seed_from_u64(seed); + + (0..size) + .map(|_| { + if rng.gen::() < null_density { + None + } else { + Some(rng.gen()) + } + }) + .collect::>() + .to(data_type) +} + /// Creates an random (but fixed-seeded) array of a given size and null density pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray where