Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ csv = "1.0.0"
num = "0.2"
regex = "1.1"
lazy_static = "1.2"
packed_simd = "0.3.1"

[dev-dependencies]
criterion = "0.2"
Expand All @@ -56,4 +57,8 @@ harness = false

[[bench]]
name = "builder"
harness = false
harness = false

[[bench]]
name = "bitwise_ops"
harness = false
75 changes: 75 additions & 0 deletions rust/arrow/benches/bitwise_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[macro_use]
extern crate criterion;
use criterion::Criterion;

extern crate arrow;

use arrow::buffer::Buffer;
use arrow::builder::{BufferBuilderTrait, UInt8BufferBuilder};

fn create_buffer(size: usize) -> Buffer {
let mut builder = UInt8BufferBuilder::new(size);
for _i in 0..size {
builder.append(1_u8).unwrap();
}
builder.finish()
}

fn bitwise_default<F>(size: usize, op: F)
where
F: Fn(&u8, &u8) -> u8,
{
let buffer_a = create_buffer(size);
let buffer_b = create_buffer(size);

criterion::black_box({
let mut builder = UInt8BufferBuilder::new(buffer_a.len());
for i in 0..buffer_a.len() {
unsafe {
builder
.append(op(
buffer_a.data().get_unchecked(i),
buffer_b.data().get_unchecked(i),
))
.unwrap();
}
}
builder.finish()
});
}

fn bitwise_simd<F>(size: usize, op: F)
where
F: Fn(&Buffer, &Buffer) -> Buffer,
{
let buffer_a = create_buffer(size);
let buffer_b = create_buffer(size);
criterion::black_box(op(&buffer_a, &buffer_b));
}

fn add_benchmark(c: &mut Criterion) {
c.bench_function("add", |b| b.iter(|| bitwise_default(512, |a, b| a & b)));
c.bench_function("add simd", |b| b.iter(|| bitwise_simd(512, |a, b| a & b)));
c.bench_function("or", |b| b.iter(|| bitwise_default(512, |a, b| a | b)));
c.bench_function("or simd", |b| b.iter(|| bitwise_simd(512, |a, b| a | b)));
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
121 changes: 121 additions & 0 deletions rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,17 @@
//! The main type in the module is `Buffer`, a contiguous immutable memory region of
//! fixed size aligned at a 64-byte boundary. `MutableBuffer` is like `Buffer`, but it can
//! be mutated and grown.
//!
use packed_simd::u8x64;

use std::cmp;
use std::io::{Error as IoError, ErrorKind, Result as IoResult, Write};
use std::mem;
use std::ops::{BitAnd, BitOr};
use std::slice::{from_raw_parts, from_raw_parts_mut};
use std::sync::Arc;

use crate::builder::{BufferBuilderTrait, UInt8BufferBuilder};
use crate::error::Result;
use crate::memory;
use crate::util::bit_util;
Expand Down Expand Up @@ -141,6 +146,100 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
}
}

/// Helper function for SIMD `BitAnd` and `BitOr` implementations
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bitwise_bin_op_simd_helper<F>(left: &Buffer, right: &Buffer, op: F) -> Buffer
where
F: Fn(u8x64, u8x64) -> u8x64,
{
let mut result = MutableBuffer::new(left.len()).with_bitset(left.len(), false);
let lanes = u8x64::lanes();
for i in (0..left.len()).step_by(lanes) {
let left_data =
unsafe { from_raw_parts(left.raw_data().offset(i as isize), lanes) };
let right_data =
unsafe { from_raw_parts(right.raw_data().offset(i as isize), lanes) };
let result_slice: &mut [u8] = unsafe {
from_raw_parts_mut(
(result.data_mut().as_mut_ptr() as *mut u8).offset(i as isize),
lanes,
)
};
unsafe {
bit_util::bitwise_bin_op_simd(&left_data, &right_data, result_slice, &op)
};
}
return result.freeze();
}

impl<'a, 'b> BitAnd<&'b Buffer> for &'a Buffer {
type Output = Buffer;

fn bitand(self, rhs: &'b Buffer) -> Buffer {
assert_eq!(
self.len(),
rhs.len(),
"Buffers must be the same size to apply Bitwise AND."
);

// SIMD implementation if available
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a & b);
}

// Default implementation
#[allow(unreachable_code)]
{
let mut builder = UInt8BufferBuilder::new(self.len());
for i in 0..self.len() {
unsafe {
builder
.append(
self.data().get_unchecked(i) & rhs.data().get_unchecked(i),
)
.unwrap();
}
}
builder.finish()
}
}
}

impl<'a, 'b> BitOr<&'b Buffer> for &'a Buffer {
type Output = Buffer;

fn bitor(self, rhs: &'b Buffer) -> Buffer {
assert_eq!(
self.len(),
rhs.len(),
"Buffers must be the same size to apply Bitwise OR."
);

// SIMD implementation if available
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
return bitwise_bin_op_simd_helper(&self, &rhs, |a, b| a | b);
}

// Default implementation
#[allow(unreachable_code)]
{
let mut builder = UInt8BufferBuilder::new(self.len());
for i in 0..self.len() {
unsafe {
builder
.append(
self.data().get_unchecked(i) | rhs.data().get_unchecked(i),
)
.unwrap();
}
}
builder.finish()
}
}
}

unsafe impl Sync for Buffer {}
unsafe impl Send for Buffer {}

Expand Down Expand Up @@ -434,6 +533,28 @@ mod tests {
assert_eq!(256, bit_util::count_set_bits(buf.data()));
}

#[test]
fn test_bitwise_and() {
let buf1 = Buffer::from([0b01101010]);
let buf2 = Buffer::from([0b01001110]);
assert_eq!(Buffer::from([0b01001010]), &buf1 & &buf2);
}

#[test]
fn test_bitwise_or() {
let buf1 = Buffer::from([0b01101010]);
let buf2 = Buffer::from([0b01001110]);
assert_eq!(Buffer::from([0b01101110]), &buf1 | &buf2);
}

#[test]
#[should_panic(expected = "Buffers must be the same size to apply Bitwise OR.")]
fn test_buffer_bitand_different_sizes() {
let buf1 = Buffer::from([1_u8, 1_u8]);
let buf2 = Buffer::from([0b01001110]);
let _buf3 = &buf1 | &buf2;
}

#[test]
fn test_mutable_new() {
let buf = MutableBuffer::new(63);
Expand Down
42 changes: 42 additions & 0 deletions rust/arrow/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

//! Utils for working with bits

use packed_simd::u8x64;

static BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128];

static POPCOUNT_TABLE: [u8; 256] = [
Expand Down Expand Up @@ -117,6 +119,22 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
result
}

/// Performs SIMD bitwise binary operations.
///
/// Note that each slice should be 64 bytes and it is the callers responsibility to ensure that
/// this is the case. If passed slices larger than 64 bytes the operation will only be performed
/// on the first 64 bytes. Slices less than 64 bytes will panic.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub unsafe fn bitwise_bin_op_simd<F>(left: &[u8], right: &[u8], result: &mut [u8], op: F)
where
F: Fn(u8x64, u8x64) -> u8x64,
{
let left_simd = u8x64::from_slice_unaligned_unchecked(left);
let right_simd = u8x64::from_slice_unaligned_unchecked(right);
let simd_result = op(left_simd, right_simd);
simd_result.write_to_slice_unaligned_unchecked(result);
}

#[cfg(test)]
mod tests {
use rand::{thread_rng, Rng};
Expand Down Expand Up @@ -270,4 +288,28 @@ mod tests {
assert_eq!(ceil(10, 10000000000), 1);
assert_eq!(ceil(10000000000, 1000000000), 10);
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[test]
fn test_bitwise_and_simd() {
let buf1 = [0b00110011u8; 64];
let buf2 = [0b11110000u8; 64];
let mut buf3 = [0b00000000; 64];
unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a & b) };
for i in buf3.iter() {
assert_eq!(&0b00110000u8, i);
}
}

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[test]
fn test_bitwise_or_simd() {
let buf1 = [0b00110011u8; 64];
let buf2 = [0b11110000u8; 64];
let mut buf3 = [0b00000000; 64];
unsafe { bitwise_bin_op_simd(&buf1, &buf2, &mut buf3, |a, b| a | b) };
for i in buf3.iter() {
assert_eq!(&0b11110011u8, i);
}
}
}