Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ prettytable-rs = { version = "0.8.0", optional = true }

[features]
default = []
avx512 = []
simd = ["packed_simd"]
prettyprint = ["prettytable-rs"]

Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/benches/buffer_bit_ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ fn bench_buffer_and(left: &Buffer, right: &Buffer) {
}

fn bit_ops_benchmark(c: &mut Criterion) {
let left = create_buffer(512);
let right = create_buffer(512);
let left = create_buffer(512 * 10);
let right = create_buffer(512 * 10);
c.bench_function("buffer_bit_ops and", |b| {
b.iter(|| bench_buffer_and(&left, &right))
});
Expand Down
107 changes: 98 additions & 9 deletions rust/arrow/src/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::memory;
use crate::util::bit_chunk_iterator::BitChunks;
use crate::util::bit_util;
use crate::util::bit_util::ceil;
#[cfg(feature = "simd")]
#[cfg(any(feature = "simd", feature = "avx512"))]
use std::borrow::BorrowMut;

/// Buffer is a contiguous memory region of fixed size and is aligned at a 64-byte
Expand Down Expand Up @@ -453,32 +453,100 @@ where
result.freeze()
}

#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
const AVX512_U8X64_LANES: usize = 64;

#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
#[target_feature(enable = "avx512f")]
pub(super) unsafe fn avx512_bin_and(left: &[u8], right: &[u8], res: &mut [u8]) {
use core::arch::x86_64::{__m512i, _mm512_and_si512, _mm512_loadu_ps};

let l: __m512i = std::mem::transmute(_mm512_loadu_ps(left.as_ptr() as *const _));
let r: __m512i = std::mem::transmute(_mm512_loadu_ps(right.as_ptr() as *const _));
let f = _mm512_and_si512(l, r);
let s = &f as *const __m512i as *const u8;
let d = res.get_unchecked_mut(0) as *mut _ as *mut u8;
std::ptr::copy_nonoverlapping(s, d, std::mem::size_of::<__m512i>());
}

#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
pub(super) fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
// SIMD implementation if available and byte-aligned
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
return bitwise_bin_op_simd_helper(
let len = len_in_bits / 8;
let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;

let mut result = MutableBuffer::new(len).with_bitset(len, false);

let mut left_chunks = left.data()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut right_chunks =
right.data()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut result_chunks = result.data_mut().chunks_exact_mut(AVX512_U8X64_LANES);

result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| unsafe {
avx512_bin_and(left, right, res);
});

result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = *left & *right;
});

result.freeze()
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
}

#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
pub(super) fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
bitwise_bin_op_simd_helper(
&left,
left_offset_in_bits / 8,
&right,
right_offset_in_bits / 8,
len_in_bits / 8,
|a, b| a & b,
|a, b| a & b,
);
}
// Default implementation
#[allow(unreachable_code)]
{
)
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
Expand All @@ -490,6 +558,27 @@ pub(super) fn buffer_bin_and(
}
}

#[cfg(all(
any(target_arch = "x86", target_arch = "x86_64"),
not(any(feature = "simd", feature = "avx512"))
))]
pub(super) fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}

pub(super) fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
Expand Down
6 changes: 3 additions & 3 deletions rust/arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2742,14 +2742,14 @@ mod tests {
(Ok(_), false) => {
panic!("Was able to cast array from {:?} to {:?} but can_cast_types reported false",
array.data_type(), to_type)
},
}
(Err(e), true) => {
panic!("Was not able to cast array from {:?} to {:?} but can_cast_types reported true. \
Error was {:?}",
array.data_type(), to_type, e)
},
}
// otherwise it was a match
_=> {},
_ => {}
};
}
}
Expand Down
3 changes: 3 additions & 0 deletions rust/arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@
//!
//! The parquet implementation is on a [separate crate](https://crates.io/crates/parquet)

#![cfg_attr(feature = "avx512", feature(stdsimd))]
#![cfg_attr(feature = "avx512", feature(repr_simd))]
#![cfg_attr(feature = "avx512", feature(avx512_target_feature))]
#![allow(dead_code)]
#![allow(non_camel_case_types)]
#![allow(bare_trait_objects)]
Expand Down
16 changes: 16 additions & 0 deletions rust/arrow/src/util/bit_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,22 @@ mod tests {
assert_eq!(ceil(10000000000, 1000000000), 10);
}

#[test]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "avx512"))]
fn test_bitwise_and_avx512() {
use crate::buffer::avx512_bin_and;

let buf1 = [0b00110011u8; 64];
let buf2 = [0b11110000u8; 64];
let mut buf3 = [0b00000000; 64];
unsafe {
avx512_bin_and(&buf1, &buf2, &mut buf3);
};
for i in buf3.iter() {
assert_eq!(&0b00110000u8, i);
}
}

#[test]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn test_bitwise_and_simd() {
Expand Down