From 32344eff6ba6fe8106ad460f695e1fc5652aeec8 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 5 Dec 2020 13:28:07 +0100 Subject: [PATCH 1/3] Comparison kernel speedup --- rust/arrow/src/compute/kernels/comparison.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index fd0bc7343f0..e8e529e9774 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -68,9 +68,14 @@ macro_rules! compare_op { macro_rules! compare_op_scalar { ($left: expr, $right:expr, $op:expr) => {{ let null_bit_buffer = $left.data().null_buffer().cloned(); - let mut result = BooleanBufferBuilder::new($left.len()); + let mut result = MutableBuffer::new($left.len()); + let data = result.raw_data_mut(); for i in 0..$left.len() { - result.append($op($left.value(i), $right))?; + if $op($left.value(i), $right) { + unsafe { + bit_util::set_bit_raw(data, i); + } + } } let data = ArrayData::new( @@ -79,7 +84,7 @@ macro_rules! compare_op_scalar { None, null_bit_buffer, 0, - vec![result.finish()], + vec![result.freeze()], vec![], ); Ok(PrimitiveArray::::from(Arc::new(data))) From 55f382099749abf92c0767bd755b39c8f31f21e5 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 5 Dec 2020 14:08:44 +0100 Subject: [PATCH 2/3] Make it hae same sizing as the bufferbuilder --- rust/arrow/src/compute/kernels/comparison.rs | 27 +++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index e8e529e9774..f449e4ec700 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -47,9 +47,19 @@ macro_rules! compare_op { let null_bit_buffer = combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?; - let mut result = BooleanBufferBuilder::new($left.len()); + let byte_capacity = bit_util::ceil($left.len(), 8); + let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); + let mut buffer = MutableBuffer::new(actual_capacity); + buffer.resize(byte_capacity); + buffer.set_null_bits(0, actual_capacity); + + let data = buffer.raw_data_mut(); for i in 0..$left.len() { - result.append($op($left.value(i), $right.value(i)))?; + if $op($left.value(i), $right.value(i)) { + unsafe { + bit_util::set_bit_raw(data, i); + } + } } let data = ArrayData::new( @@ -58,7 +68,7 @@ macro_rules! compare_op { None, null_bit_buffer, 0, - vec![result.finish()], + vec![buffer.freeze()], vec![], ); Ok(PrimitiveArray::::from(Arc::new(data))) @@ -68,8 +78,13 @@ macro_rules! compare_op { macro_rules! compare_op_scalar { ($left: expr, $right:expr, $op:expr) => {{ let null_bit_buffer = $left.data().null_buffer().cloned(); - let mut result = MutableBuffer::new($left.len()); - let data = result.raw_data_mut(); + let byte_capacity = bit_util::ceil($left.len(), 8); + let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); + let mut buffer = MutableBuffer::new(actual_capacity); + buffer.resize(byte_capacity); + + buffer.set_null_bits(0, actual_capacity); + let data = buffer.raw_data_mut(); for i in 0..$left.len() { if $op($left.value(i), $right) { unsafe { @@ -84,7 +99,7 @@ macro_rules! compare_op_scalar { None, null_bit_buffer, 0, - vec![result.freeze()], + vec![buffer.freeze()], vec![], ); Ok(PrimitiveArray::::from(Arc::new(data))) From 62e71a2f1d9188160750056633a35c01a9c3579b Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Sat, 5 Dec 2020 14:20:45 +0100 Subject: [PATCH 3/3] Simplify --- rust/arrow/src/compute/kernels/comparison.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs index f449e4ec700..e708e12af35 100644 --- a/rust/arrow/src/compute/kernels/comparison.rs +++ b/rust/arrow/src/compute/kernels/comparison.rs @@ -51,7 +51,6 @@ macro_rules! compare_op { let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); let mut buffer = MutableBuffer::new(actual_capacity); buffer.resize(byte_capacity); - buffer.set_null_bits(0, actual_capacity); let data = buffer.raw_data_mut(); for i in 0..$left.len() { @@ -83,7 +82,6 @@ macro_rules! compare_op_scalar { let mut buffer = MutableBuffer::new(actual_capacity); buffer.resize(byte_capacity); - buffer.set_null_bits(0, actual_capacity); let data = buffer.raw_data_mut(); for i in 0..$left.len() { if $op($left.value(i), $right) {