diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml index 6e2d4839bba..04e8ac05c35 100644 --- a/rust/arrow/Cargo.toml +++ b/rust/arrow/Cargo.toml @@ -60,5 +60,5 @@ name = "builder" harness = false [[bench]] -name = "bitwise_ops" +name = "boolean_kernels" harness = false diff --git a/rust/arrow/benches/bitwise_ops.rs b/rust/arrow/benches/bitwise_ops.rs deleted file mode 100644 index 434ff4d9508..00000000000 --- a/rust/arrow/benches/bitwise_ops.rs +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#[macro_use] -extern crate criterion; -use criterion::Criterion; - -extern crate arrow; - -use arrow::buffer::Buffer; -use arrow::builder::{BufferBuilderTrait, UInt8BufferBuilder}; - -fn create_buffer(size: usize) -> Buffer { - let mut builder = UInt8BufferBuilder::new(size); - for _i in 0..size { - builder.append(1_u8).unwrap(); - } - builder.finish() -} - -fn bitwise_default(size: usize, op: F) -where - F: Fn(&u8, &u8) -> u8, -{ - let buffer_a = create_buffer(size); - let buffer_b = create_buffer(size); - - criterion::black_box({ - let mut builder = UInt8BufferBuilder::new(buffer_a.len()); - for i in 0..buffer_a.len() { - unsafe { - builder - .append(op( - buffer_a.data().get_unchecked(i), - buffer_b.data().get_unchecked(i), - )) - .unwrap(); - } - } - builder.finish() - }); -} - -fn bitwise_simd(size: usize, op: F) -where - F: Fn(&Buffer, &Buffer) -> Buffer, -{ - let buffer_a = create_buffer(size); - let buffer_b = create_buffer(size); - criterion::black_box(op(&buffer_a, &buffer_b)); -} - -fn add_benchmark(c: &mut Criterion) { - c.bench_function("add", |b| b.iter(|| bitwise_default(512, |a, b| a & b))); - c.bench_function("add simd", |b| b.iter(|| bitwise_simd(512, |a, b| a & b))); - c.bench_function("or", |b| b.iter(|| bitwise_default(512, |a, b| a | b))); - c.bench_function("or simd", |b| b.iter(|| bitwise_simd(512, |a, b| a | b))); -} - -criterion_group!(benches, add_benchmark); -criterion_main!(benches); diff --git a/rust/arrow/benches/boolean_kernels.rs b/rust/arrow/benches/boolean_kernels.rs new file mode 100644 index 00000000000..237b1e9315a --- /dev/null +++ b/rust/arrow/benches/boolean_kernels.rs @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_use] +extern crate criterion; +use criterion::Criterion; + +extern crate arrow; + +use arrow::array::*; +use arrow::builder::*; +use arrow::compute::boolean_kernels; +use arrow::error::{ArrowError, Result}; + +/// Helper function to create arrays +fn create_boolean_array(size: usize) -> BooleanArray { + let mut builder = BooleanBuilder::new(size); + for i in 0..size { + if i % 2 == 0 { + builder.append_value(true).unwrap(); + } else { + builder.append_value(false).unwrap(); + } + } + builder.finish() +} + +/// Helper function to implement `AND` and `OR` without SIMD +pub fn bin_op_no_simd( + left: &BooleanArray, + right: &BooleanArray, + op: F, +) -> Result +where + F: Fn(bool, bool) -> bool, +{ + if left.len() != right.len() { + return Err(ArrowError::ComputeError( + "Cannot perform boolean operation on arrays of different length".to_string(), + )); + } + let mut b = BooleanArray::builder(left.len()); + for i in 0..left.len() { + if left.is_null(i) || right.is_null(i) { + b.append_null()?; + } else { + b.append_value(op(left.value(i), right.value(i)))?; + } + } + Ok(b.finish()) +} + +/// Benchmark for `AND` and `OR` with no SIMD +fn bench_bin_op_no_simd(size: usize, op: F) +where + F: Fn(bool, bool) -> bool, +{ + let array_a = create_boolean_array(size); + let array_b = create_boolean_array(size); + + criterion::black_box(bin_op_no_simd(&array_a, &array_b, &op).unwrap()); +} + +/// Benchmark for `NOT` with no SIMD +fn bench_not_no_simd(size: usize) { + let array = create_boolean_array(size); + + criterion::black_box({ + let mut b = BooleanArray::builder(array.len()); + for i in 0..array.len() { + if array.is_null(i) { + b.append_null().unwrap(); + } else { + b.append_value(!array.value(i)).unwrap(); + } + } + b.finish() + }); +} + +/// Benchmark for `AND` with SIMD +fn bench_and_simd(size: usize) { + let buffer_a = create_boolean_array(size); + let buffer_b = create_boolean_array(size); + criterion::black_box(boolean_kernels::and(&buffer_a, &buffer_b).unwrap()); +} + +/// Benchmark for `OR` with SIMD +fn bench_or_simd(size: usize) { + let buffer_a = create_boolean_array(size); + let buffer_b = create_boolean_array(size); + criterion::black_box(boolean_kernels::or(&buffer_a, &buffer_b).unwrap()); +} + +/// Benchmark for `NOT` with SIMD +fn bench_not_simd(size: usize) { + let buffer = create_boolean_array(size); + criterion::black_box(boolean_kernels::not(&buffer).unwrap()); +} + +fn add_benchmark(c: &mut Criterion) { + c.bench_function("and", |b| { + b.iter(|| bench_bin_op_no_simd(512, |a, b| a && b)) + }); + c.bench_function("and simd", |b| b.iter(|| bench_and_simd(512))); + c.bench_function("or", |b| { + b.iter(|| bench_bin_op_no_simd(512, |a, b| a || b)) + }); + c.bench_function("or simd", |b| b.iter(|| bench_or_simd(512))); + c.bench_function("not", |b| b.iter(|| bench_not_no_simd(512))); + c.bench_function("not simd", |b| b.iter(|| bench_not_simd(512))); +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/rust/arrow/src/bitmap.rs b/rust/arrow/src/bitmap.rs index 17dc548956d..e42e5608525 100644 --- a/rust/arrow/src/bitmap.rs +++ b/rust/arrow/src/bitmap.rs @@ -18,8 +18,10 @@ //! Defines a bitmap, which is used to track which values in an Arrow array are null. //! This is called a "validity bitmap" in the Arrow documentation. -use super::buffer::Buffer; +use crate::buffer::Buffer; +use crate::error::Result; use crate::util::bit_util; + use std::ops::{BitAnd, BitOr}; #[derive(PartialEq, Debug)] @@ -56,18 +58,18 @@ impl Bitmap { } impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap { - type Output = Bitmap; + type Output = Result; - fn bitand(self, rhs: &'b Bitmap) -> Bitmap { - Bitmap::from(&self.bits & &rhs.bits) + fn bitand(self, rhs: &'b Bitmap) -> Result { + Ok(Bitmap::from((&self.bits & &rhs.bits)?)) } } impl<'a, 'b> BitOr<&'b Bitmap> for &'a Bitmap { - type Output = Bitmap; + type Output = Result; - fn bitor(self, rhs: &'b Bitmap) -> Bitmap { - Bitmap::from(&self.bits | &rhs.bits) + fn bitor(self, rhs: &'b Bitmap) -> Result { + Ok(Bitmap::from((&self.bits | &rhs.bits)?)) } } @@ -94,7 +96,7 @@ mod tests { let bitmap2 = Bitmap::from(Buffer::from([0b01001110])); assert_eq!( Bitmap::from(Buffer::from([0b01001010])), - &bitmap1 & &bitmap2 + (&bitmap1 & &bitmap2).unwrap() ); } @@ -104,7 +106,7 @@ mod tests { let bitmap2 = Bitmap::from(Buffer::from([0b01001110])); assert_eq!( Bitmap::from(Buffer::from([0b01101110])), - &bitmap1 | &bitmap2 + (&bitmap1 | &bitmap2).unwrap() ); } diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs index 447a5753d2b..ff4cae8a0bd 100644 --- a/rust/arrow/src/buffer.rs +++ b/rust/arrow/src/buffer.rs @@ -24,12 +24,12 @@ use packed_simd::u8x64; use std::cmp; use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write}; use std::mem; -use std::ops::{BitAnd, BitOr}; +use std::ops::{BitAnd, BitOr, Not}; use std::slice::{from_raw_parts, from_raw_parts_mut}; use std::sync::Arc; use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder}; -use crate::error::Result; +use crate::error::{ArrowError, Result}; use crate::memory; use crate::util::bit_util; @@ -173,19 +173,19 @@ where } impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { - type Output = Buffer; + type Output = Result; - fn bitand(self, rhs: &'b Buffer) -> Buffer { - assert_eq!( - self.len(), - rhs.len(), - "Buffers must be the same size to apply Bitwise AND." - ); + fn bitand(self, rhs: &'b Buffer) -> Result { + if self.len() != rhs.len() { + return Err(ArrowError::ComputeError( + "Buffers must be the same size to apply Bitwise AND.".to_string(), + )); + } // SIMD implementation if available #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b); + return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b)); } // Default implementation @@ -201,25 +201,25 @@ impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer { .unwrap(); } } - builder.finish() + Ok(builder.finish()) } } } impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { - type Output = Buffer; + type Output = Result; - fn bitor(self, rhs: &'b Buffer) -> Buffer { - assert_eq!( - self.len(), - rhs.len(), - "Buffers must be the same size to apply Bitwise OR." - ); + fn bitor(self, rhs: &'b Buffer) -> Result { + if self.len() != rhs.len() { + return Err(ArrowError::ComputeError( + "Buffers must be the same size to apply Bitwise OR.".to_string(), + )); + } // SIMD implementation if available #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b); + return Ok(bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b)); } // Default implementation @@ -235,6 +235,45 @@ impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer { .unwrap(); } } + Ok(builder.finish()) + } + } +} + +impl Not for &Buffer { + type Output = Buffer; + + fn not(self) -> Buffer { + // SIMD implementation if available + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + let mut result = + MutableBuffer::new(self.len()).with_bitset(self.len(), false); + let lanes = u8x64::lanes(); + for i in (0..self.len()).step_by(lanes) { + unsafe { + let data = from_raw_parts(self.raw_data().offset(i as isize), lanes); + let data_simd = u8x64::from_slice_unaligned_unchecked(data); + let simd_result = !data_simd; + let result_slice: &mut [u8] = from_raw_parts_mut( + (result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize), + lanes, + ); + simd_result.write_to_slice_unaligned_unchecked(result_slice); + } + } + return result.freeze(); + } + + // Default implementation + #[allow(unreachable_code)] + { + let mut builder = UInt8BufferBuilder::new(self.len()); + for i in 0..self.len() { + unsafe { + builder.append(!self.data().get_unchecked(i)).unwrap(); + } + } builder.finish() } } @@ -537,14 +576,20 @@ mod tests { fn test_bitwise_and() { let buf1 = Buffer::from([0b01101010]); let buf2 = Buffer::from([0b01001110]); - assert_eq!(Buffer::from([0b01001010]), &buf1 & &buf2); + assert_eq!(Buffer::from([0b01001010]), (&buf1 & &buf2).unwrap()); } #[test] fn test_bitwise_or() { let buf1 = Buffer::from([0b01101010]); let buf2 = Buffer::from([0b01001110]); - assert_eq!(Buffer::from([0b01101110]), &buf1 | &buf2); + assert_eq!(Buffer::from([0b01101110]), (&buf1 | &buf2).unwrap()); + } + + #[test] + fn test_bitwise_not() { + let buf = Buffer::from([0b01101010]); + assert_eq!(Buffer::from([0b10010101]), !&buf); } #[test] @@ -552,7 +597,7 @@ mod tests { fn test_buffer_bitand_different_sizes() { let buf1 = Buffer::from([1_u8, 1_u8]); let buf2 = Buffer::from([0b01001110]); - let _buf3 = &buf1 | &buf2; + let _buf3 = (&buf1 | &buf2).unwrap(); } #[test] diff --git a/rust/arrow/src/array_ops.rs b/rust/arrow/src/compute/array_ops.rs similarity index 82% rename from rust/arrow/src/array_ops.rs rename to rust/arrow/src/compute/array_ops.rs index 6e847c8b378..1a2d5fa749b 100644 --- a/rust/arrow/src/array_ops.rs +++ b/rust/arrow/src/compute/array_ops.rs @@ -312,58 +312,6 @@ where Ok(b.finish()) } -/// Perform `AND` operation on two arrays. If either left or right value is null then the -/// result is also null. -pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform boolean operation on arrays of different length".to_string(), - )); - } - let mut b = BooleanArray::builder(left.len()); - for i in 0..left.len() { - if left.is_null(i) || right.is_null(i) { - b.append_null()?; - } else { - b.append_value(left.value(i) && right.value(i))?; - } - } - Ok(b.finish()) -} - -/// Perform `OR` operation on two arrays. If either left or right value is null then the -/// result is also null. -pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform boolean operation on arrays of different length".to_string(), - )); - } - let mut b = BooleanArray::builder(left.len()); - for i in 0..left.len() { - if left.is_null(i) || right.is_null(i) { - b.append_null()?; - } else { - b.append_value(left.value(i) || right.value(i))?; - } - } - Ok(b.finish()) -} - -/// Perform unary `NOT` operation on an arrays. If value is null then the result is also -/// null. -pub fn not(left: &BooleanArray) -> Result { - let mut b = BooleanArray::builder(left.len()); - for i in 0..left.len() { - if left.is_null(i) { - b.append_null()?; - } else { - b.append_value(!left.value(i))?; - } - } - Ok(b.finish()) -} - #[cfg(test)] mod tests { use super::*; @@ -611,58 +559,4 @@ mod tests { assert_eq!(5, min(&a).unwrap()); assert_eq!(9, max(&a).unwrap()); } - - #[test] - fn test_bool_array_and() { - let a = BooleanArray::from(vec![false, false, true, true]); - let b = BooleanArray::from(vec![false, true, false, true]); - let c = and(&a, &b).unwrap(); - assert_eq!(false, c.value(0)); - assert_eq!(false, c.value(1)); - assert_eq!(false, c.value(2)); - assert_eq!(true, c.value(3)); - } - - #[test] - fn test_bool_array_or() { - let a = BooleanArray::from(vec![false, false, true, true]); - let b = BooleanArray::from(vec![false, true, false, true]); - let c = or(&a, &b).unwrap(); - assert_eq!(false, c.value(0)); - assert_eq!(true, c.value(1)); - assert_eq!(true, c.value(2)); - assert_eq!(true, c.value(3)); - } - - #[test] - fn test_bool_array_or_nulls() { - let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]); - let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]); - let c = or(&a, &b).unwrap(); - assert_eq!(true, c.is_null(0)); - assert_eq!(true, c.is_null(1)); - assert_eq!(true, c.is_null(2)); - assert_eq!(false, c.is_null(3)); - } - - #[test] - fn test_bool_array_not() { - let a = BooleanArray::from(vec![false, false, true, true]); - let c = not(&a).unwrap(); - assert_eq!(true, c.value(0)); - assert_eq!(true, c.value(1)); - assert_eq!(false, c.value(2)); - assert_eq!(false, c.value(3)); - } - - #[test] - fn test_bool_array_and_nulls() { - let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]); - let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]); - let c = and(&a, &b).unwrap(); - assert_eq!(true, c.is_null(0)); - assert_eq!(true, c.is_null(1)); - assert_eq!(true, c.is_null(2)); - assert_eq!(false, c.is_null(3)); - } } diff --git a/rust/arrow/src/compute/boolean_kernels.rs b/rust/arrow/src/compute/boolean_kernels.rs new file mode 100644 index 00000000000..ededea9d4c4 --- /dev/null +++ b/rust/arrow/src/compute/boolean_kernels.rs @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines boolean kernels on Arrow `BooleanArray`'s, e.g. `AND`, `OR` and `NOT`. +//! +//! Kernels support SIMD using [static CPU feature detection](https://doc.rust-lang.org/stable/std/arch/#static-cpu-feature-detection) +//! . + +use std::sync::Arc; + +use crate::array::{Array, BooleanArray}; +use crate::array_data::ArrayData; +use crate::buffer::Buffer; +use crate::compute::util::apply_bin_op_to_option_bitmap; +use crate::datatypes::DataType; +use crate::error::{ArrowError, Result}; + +/// Helper function to implement binary kernels +fn binary_boolean_kernel( + left: &BooleanArray, + right: &BooleanArray, + op: F, +) -> Result +where + F: Fn(&Buffer, &Buffer) -> Result, +{ + if left.offset() != right.offset() { + return Err(ArrowError::ComputeError( + "Cannot apply Bitwise binary op when arrays have different offsets." + .to_string(), + )); + } + + let left_data = left.data(); + let right_data = right.data(); + let null_bit_buffer = apply_bin_op_to_option_bitmap( + left_data.null_bitmap(), + right_data.null_bitmap(), + |a, b| a & b, + )?; + let values = op(&left_data.buffers()[0], &right_data.buffers()[0])?; + let data = ArrayData::new( + DataType::Boolean, + left.len(), + None, + null_bit_buffer, + left.offset(), + vec![values], + vec![], + ); + Ok(BooleanArray::from(Arc::new(data))) +} + +/// Performs `AND` operation on two arrays. If either left or right value is null then the +/// result is also null. +pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { + binary_boolean_kernel(&left, &right, |a, b| a & b) +} + +/// Performs `OR` operation on two arrays. If either left or right value is null then the +/// result is also null. +pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { + binary_boolean_kernel(&left, &right, |a, b| a | b) +} + +/// Performs unary `NOT` operation on an arrays. If value is null then the result is also +/// null. +pub fn not(left: &BooleanArray) -> Result { + let data = left.data(); + let null_bit_buffer = match *data.null_bitmap() { + None => None, + Some(ref b) => Some(b.bits.clone()), + }; + + let values = !&data.buffers()[0]; + let data = ArrayData::new( + DataType::Boolean, + left.len(), + None, + null_bit_buffer, + left.offset(), + vec![values], + vec![], + ); + Ok(BooleanArray::from(Arc::new(data))) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bool_array_and() { + let a = BooleanArray::from(vec![false, false, true, true]); + let b = BooleanArray::from(vec![false, true, false, true]); + let c = and(&a, &b).unwrap(); + assert_eq!(false, c.value(0)); + assert_eq!(false, c.value(1)); + assert_eq!(false, c.value(2)); + assert_eq!(true, c.value(3)); + } + + #[test] + fn test_bool_array_or() { + let a = BooleanArray::from(vec![false, false, true, true]); + let b = BooleanArray::from(vec![false, true, false, true]); + let c = or(&a, &b).unwrap(); + assert_eq!(false, c.value(0)); + assert_eq!(true, c.value(1)); + assert_eq!(true, c.value(2)); + assert_eq!(true, c.value(3)); + } + + #[test] + fn test_bool_array_or_nulls() { + let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]); + let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]); + let c = or(&a, &b).unwrap(); + assert_eq!(true, c.is_null(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(true, c.is_null(2)); + assert_eq!(false, c.is_null(3)); + } + + #[test] + fn test_bool_array_not() { + let a = BooleanArray::from(vec![false, false, true, true]); + let c = not(&a).unwrap(); + assert_eq!(true, c.value(0)); + assert_eq!(true, c.value(1)); + assert_eq!(false, c.value(2)); + assert_eq!(false, c.value(3)); + } + + #[test] + fn test_bool_array_and_nulls() { + let a = BooleanArray::from(vec![None, Some(false), None, Some(false)]); + let b = BooleanArray::from(vec![None, None, Some(false), Some(false)]); + let c = and(&a, &b).unwrap(); + assert_eq!(true, c.is_null(0)); + assert_eq!(true, c.is_null(1)); + assert_eq!(true, c.is_null(2)); + assert_eq!(false, c.is_null(3)); + } +} diff --git a/rust/arrow/src/compute/mod.rs b/rust/arrow/src/compute/mod.rs new file mode 100644 index 00000000000..5aee2b3ee0d --- /dev/null +++ b/rust/arrow/src/compute/mod.rs @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Computation kernels on Arrow Arrays + +pub mod array_ops; +pub mod boolean_kernels; + +mod util; + +pub use self::array_ops::*; +pub use self::boolean_kernels::*; diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs new file mode 100644 index 00000000000..55726b85eda --- /dev/null +++ b/rust/arrow/src/compute/util.rs @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Common utilities for computation kernels. + +use crate::bitmap::Bitmap; +use crate::buffer::Buffer; +use crate::error::Result; + +/// Applies a given binary operation, `op`, to two references to `Option`'s. +/// +/// This function is useful when implementing operations on higher level arrays. +pub(crate) fn apply_bin_op_to_option_bitmap( + left: &Option, + right: &Option, + op: F, +) -> Result> +where + F: Fn(&Buffer, &Buffer) -> Result, +{ + match *left { + None => match *right { + None => Ok(None), + Some(ref r) => Ok(Some(r.bits.clone())), + }, + Some(ref l) => match *right { + None => Ok(Some(l.bits.clone())), + Some(ref r) => Ok(Some(op(&l.bits, &r.bits)?)), + }, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_apply_bin_op_to_option_bitmap() { + assert_eq!( + Ok(None), + apply_bin_op_to_option_bitmap(&None, &None, |a, b| a & b) + ); + assert_eq!( + Ok(Some(Buffer::from([0b01101010]))), + apply_bin_op_to_option_bitmap( + &Some(Bitmap::from(Buffer::from([0b01101010]))), + &None, + |a, b| a & b + ) + ); + assert_eq!( + Ok(Some(Buffer::from([0b01001110]))), + apply_bin_op_to_option_bitmap( + &None, + &Some(Bitmap::from(Buffer::from([0b01001110]))), + |a, b| a & b + ) + ); + assert_eq!( + Ok(Some(Buffer::from([0b01001010]))), + apply_bin_op_to_option_bitmap( + &Some(Bitmap::from(Buffer::from([0b01101010]))), + &Some(Bitmap::from(Buffer::from([0b01001110]))), + |a, b| a & b + ) + ); + } + +} diff --git a/rust/arrow/src/lib.rs b/rust/arrow/src/lib.rs index dbac4db1151..ca06fc1e612 100644 --- a/rust/arrow/src/lib.rs +++ b/rust/arrow/src/lib.rs @@ -30,10 +30,10 @@ pub mod array; pub mod array_data; -pub mod array_ops; pub mod bitmap; pub mod buffer; pub mod builder; +pub mod compute; pub mod csv; pub mod datatypes; pub mod error; diff --git a/rust/datafusion/src/execution/aggregate.rs b/rust/datafusion/src/execution/aggregate.rs index 5acf5fb63a1..d8cbfdfbb5f 100644 --- a/rust/datafusion/src/execution/aggregate.rs +++ b/rust/datafusion/src/execution/aggregate.rs @@ -24,8 +24,8 @@ use std::str; use std::sync::Arc; use arrow::array::*; -use arrow::array_ops; use arrow::builder::*; +use arrow::compute; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; @@ -346,61 +346,61 @@ fn create_accumulators(aggr_expr: &Vec) -> Result { fn array_min(array: ArrayRef, dt: &DataType) -> Result> { match dt { DataType::UInt8 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt8(n))), None => Ok(None), } } DataType::UInt16 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt16(n))), None => Ok(None), } } DataType::UInt32 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt32(n))), None => Ok(None), } } DataType::UInt64 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt64(n))), None => Ok(None), } } DataType::Int8 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int8(n))), None => Ok(None), } } DataType::Int16 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int16(n))), None => Ok(None), } } DataType::Int32 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int32(n))), None => Ok(None), } } DataType::Int64 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int64(n))), None => Ok(None), } } DataType::Float32 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float32(n))), None => Ok(None), } } DataType::Float64 => { - match array_ops::min(array.as_any().downcast_ref::().unwrap()) { + match compute::min(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float64(n))), None => Ok(None), } @@ -414,61 +414,61 @@ fn array_min(array: ArrayRef, dt: &DataType) -> Result> { fn array_max(array: ArrayRef, dt: &DataType) -> Result> { match dt { DataType::UInt8 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt8(n))), None => Ok(None), } } DataType::UInt16 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt16(n))), None => Ok(None), } } DataType::UInt32 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt32(n))), None => Ok(None), } } DataType::UInt64 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt64(n))), None => Ok(None), } } DataType::Int8 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int8(n))), None => Ok(None), } } DataType::Int16 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int16(n))), None => Ok(None), } } DataType::Int32 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int32(n))), None => Ok(None), } } DataType::Int64 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int64(n))), None => Ok(None), } } DataType::Float32 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float32(n))), None => Ok(None), } } DataType::Float64 => { - match array_ops::max(array.as_any().downcast_ref::().unwrap()) { + match compute::max(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float64(n))), None => Ok(None), } @@ -482,61 +482,61 @@ fn array_max(array: ArrayRef, dt: &DataType) -> Result> { fn array_sum(array: ArrayRef, dt: &DataType) -> Result> { match dt { DataType::UInt8 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt8(n))), None => Ok(None), } } DataType::UInt16 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt16(n))), None => Ok(None), } } DataType::UInt32 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt32(n))), None => Ok(None), } } DataType::UInt64 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::UInt64(n))), None => Ok(None), } } DataType::Int8 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int8(n))), None => Ok(None), } } DataType::Int16 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int16(n))), None => Ok(None), } } DataType::Int32 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int32(n))), None => Ok(None), } } DataType::Int64 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Int64(n))), None => Ok(None), } } DataType::Float32 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float32(n))), None => Ok(None), } } DataType::Float64 => { - match array_ops::sum(array.as_any().downcast_ref::().unwrap()) { + match compute::sum(array.as_any().downcast_ref::().unwrap()) { Some(n) => Ok(Some(ScalarValue::Float64(n))), None => Ok(None), } diff --git a/rust/datafusion/src/execution/expression.rs b/rust/datafusion/src/execution/expression.rs index 108a8558b9a..fa6201a84d8 100644 --- a/rust/datafusion/src/execution/expression.rs +++ b/rust/datafusion/src/execution/expression.rs @@ -19,7 +19,7 @@ use std::rc::Rc; use std::sync::Arc; use arrow::array::*; -use arrow::array_ops; +use arrow::compute; use arrow::datatypes::{DataType, Schema}; use arrow::record_batch::RecordBatch; @@ -127,7 +127,7 @@ macro_rules! binary_op { ($LEFT:expr, $RIGHT:expr, $OP:ident, $DT:ident) => {{ let ll = $LEFT.as_any().downcast_ref::<$DT>().unwrap(); let rr = $RIGHT.as_any().downcast_ref::<$DT>().unwrap(); - Ok(Arc::new(array_ops::$OP(&ll, &rr)?)) + Ok(Arc::new(compute::$OP(&ll, &rr)?)) }}; } @@ -216,7 +216,7 @@ macro_rules! boolean_ops { ($LEFT:expr, $RIGHT:expr, $BATCH:expr, $OP:ident) => {{ let left_values = $LEFT.get_func()($BATCH)?; let right_values = $RIGHT.get_func()($BATCH)?; - Ok(Arc::new(array_ops::$OP( + Ok(Arc::new(compute::$OP( left_values.as_any().downcast_ref::().unwrap(), right_values .as_any()