Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6131c02
ARROW-10588 - Implement safe bit operations over all crates that uses…
vertexclique Nov 4, 2020
21e83e1
Resolve conflicts against master
vertexclique Nov 15, 2020
de8277e
Resolve conflicts again
vertexclique Nov 15, 2020
4948aa0
Apply checks
vertexclique Nov 15, 2020
2aef46f
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
57b9a38
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
ce647aa
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
d05cd7d
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
4c80784
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
d476883
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
babcbdb
Update rust/arrow/src/util/bit_ops.rs
vertexclique Nov 18, 2020
0b849cf
Resolve conflicts and commit
vertexclique Nov 18, 2020
4d8af27
Add doctest for bit slice reinterpretation
vertexclique Nov 18, 2020
081d365
Apply checks and formatting
vertexclique Nov 18, 2020
6613b94
Address test comments
vertexclique Nov 18, 2020
3625692
Remove bit_util.rs
vertexclique Nov 18, 2020
10b0e9c
Make unnecessary slices immutable
vertexclique Nov 18, 2020
9946245
Rename 'interpret' to 'to_native_iter'.
vertexclique Nov 18, 2020
3e8230c
Follow the conventions and call it into_native_iter
vertexclique Nov 18, 2020
af0f588
Apply clippy feedback
vertexclique Nov 18, 2020
457d8c7
Reword comment and explain 'well-aligned'
vertexclique Nov 18, 2020
e35e0be
ARROW-10588 - Chunked parallel producer consumer
vertexclique Nov 24, 2020
64f0ccc
ARROW-10588 - Chunked parallel SIMD producer consumer
vertexclique Nov 24, 2020
0eb35a0
ARROW-10588 - Horizontal sum operation
vertexclique Nov 24, 2020
92c070e
ARROW-10588 - Simplify and remove unnecessary operations
vertexclique Nov 24, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion rust/arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ name = "arrow"
path = "src/lib.rs"

[dependencies]
rayon = "1.5"
serde = { version = "1.0", features = ["rc"] }
serde_derive = "1.0"
serde_json = { version = "1.0", features = ["preserve_order"] }
Expand All @@ -48,11 +49,12 @@ lazy_static = "1.4"
packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
chrono = "0.4"
flatbuffers = "0.6"
bitvec = "0.19"
hex = "0.4"
prettytable-rs = { version = "0.8.0", optional = true }

[features]
default = []
default = ["simd"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we probably shouldn't change the default to include simd, as we'd like the default features to allow users to compile with stable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh forgot to set it back.

avx512 = []
simd = ["packed_simd"]
prettyprint = ["prettytable-rs"]
Expand Down
21 changes: 11 additions & 10 deletions rust/arrow/benches/aggregate_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,21 +74,22 @@ fn bench_min_string(arr_a: &StringArray) {
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_array(512, false);
let size = 2_usize.pow(20);
let arr_a = create_array(size, false);

c.bench_function("sum 512", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min 512", |b| b.iter(|| bench_min(&arr_a)));
c.bench_function("sum 2^20", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min 2^20", |b| b.iter(|| bench_min(&arr_a)));

let arr_a = create_array(512, true);
let arr_a = create_array(size, true);

c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));
c.bench_function("sum nulls 2^20", |b| b.iter(|| bench_sum(&arr_a)));
c.bench_function("min nulls 2^20", |b| b.iter(|| bench_min(&arr_a)));

let arr_b = create_string_array(512, false);
c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));
let arr_b = create_string_array(size, false);
c.bench_function("min string 2^20", |b| b.iter(|| bench_min_string(&arr_b)));

let arr_b = create_string_array(512, true);
c.bench_function("min nulls string 512", |b| {
let arr_b = create_string_array(size, true);
c.bench_function("min nulls string 2^20", |b| {
b.iter(|| bench_min_string(&arr_b))
});
}
Expand Down
22 changes: 11 additions & 11 deletions rust/arrow/benches/arithmetic_kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,25 +73,25 @@ fn bench_limit(arr_a: &ArrayRef, max: usize) {
}

fn add_benchmark(c: &mut Criterion) {
let arr_a = create_array(512, false);
let arr_b = create_array(512, false);
let arr_a = create_array(2_usize.pow(20), false);
let arr_b = create_array(2_usize.pow(20), false);

c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
c.bench_function("subtract 512", |b| {
c.bench_function("add 2^20", |b| b.iter(|| bench_add(&arr_a, &arr_b)));
c.bench_function("subtract 2^20", |b| {
b.iter(|| bench_subtract(&arr_a, &arr_b))
});
c.bench_function("multiply 512", |b| {
c.bench_function("multiply 2^20", |b| {
b.iter(|| bench_multiply(&arr_a, &arr_b))
});
c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));
c.bench_function("divide 2^20", |b| b.iter(|| bench_divide(&arr_a, &arr_b)));
c.bench_function("limit 2^20, 512", |b| b.iter(|| bench_limit(&arr_a, 512)));

let arr_a_nulls = create_array(512, false);
let arr_b_nulls = create_array(512, false);
c.bench_function("add_nulls_512", |b| {
let arr_a_nulls = create_array(2_usize.pow(20), false);
let arr_b_nulls = create_array(2_usize.pow(20), false);
c.bench_function("add_nulls_2^20", |b| {
b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls))
});
c.bench_function("divide_nulls_512", |b| {
c.bench_function("divide_nulls_2^20", |b| {
b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls))
});
}
Expand Down
6 changes: 2 additions & 4 deletions rust/arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use super::{
Array, ArrayData, ArrayDataRef, FixedSizeListArray, GenericBinaryIter,
GenericListArray, LargeListArray, ListArray, OffsetSizeTrait,
};
use crate::util::bit_util;

use crate::{buffer::Buffer, datatypes::ToByteSlice};
use crate::{buffer::MutableBuffer, datatypes::DataType};

Expand Down Expand Up @@ -231,12 +231,10 @@ where
offsets.push(length_so_far);

{
let null_slice = null_buf.data_mut();

for (i, s) in iter.enumerate() {
if let Some(s) = s {
let s = s.as_ref();
bit_util::set_bit(null_slice, i);
null_buf.set_bit(i);
length_so_far =
length_so_far + OffsetSize::from_usize(s.len()).unwrap();
values.extend_from_slice(s);
Expand Down
48 changes: 30 additions & 18 deletions rust/arrow/src/array/array_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,10 @@ mod tests {
buffer::Buffer,
datatypes::{Field, ToByteSlice},
memory,
util::bit_util,
};

use super::*;
use crate::util::bit_ops::{BufferBitSlice, BufferBitSliceMut};

#[test]
fn test_list_array() {
Expand Down Expand Up @@ -552,11 +552,14 @@ mod tests {
Buffer::from(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10].to_byte_slice());
// 01011001 00000001
let mut null_bits: [u8; 2] = [0; 2];
bit_util::set_bit(&mut null_bits, 0);
bit_util::set_bit(&mut null_bits, 3);
bit_util::set_bit(&mut null_bits, 4);
bit_util::set_bit(&mut null_bits, 6);
bit_util::set_bit(&mut null_bits, 8);
{
let mut null_bit_slice = BufferBitSliceMut::new(&mut null_bits);
null_bit_slice.set_bit(0, true);
null_bit_slice.set_bit(3, true);
null_bit_slice.set_bit(4, true);
null_bit_slice.set_bit(6, true);
null_bit_slice.set_bit(8, true);
}

// Construct a list array from the above two
let list_data_type =
Expand All @@ -582,8 +585,9 @@ mod tests {
assert_eq!(1, sliced_array.offset());
assert_eq!(3, sliced_array.null_count());

let null_bit_slice = BufferBitSliceMut::new(&mut null_bits);
for i in 0..sliced_array.len() {
if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
if null_bit_slice.get_bit(sliced_array.offset() + i) {
assert!(sliced_array.is_valid(i));
} else {
assert!(sliced_array.is_null(i));
Expand Down Expand Up @@ -617,11 +621,14 @@ mod tests {
Buffer::from(&[0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10].to_byte_slice());
// 01011001 00000001
let mut null_bits: [u8; 2] = [0; 2];
bit_util::set_bit(&mut null_bits, 0);
bit_util::set_bit(&mut null_bits, 3);
bit_util::set_bit(&mut null_bits, 4);
bit_util::set_bit(&mut null_bits, 6);
bit_util::set_bit(&mut null_bits, 8);
{
let mut null_bit_slice = BufferBitSliceMut::new(&mut null_bits);
null_bit_slice.set_bit(0, true);
null_bit_slice.set_bit(3, true);
null_bit_slice.set_bit(4, true);
null_bit_slice.set_bit(6, true);
null_bit_slice.set_bit(8, true);
}

// Construct a list array from the above two
let list_data_type =
Expand All @@ -647,8 +654,9 @@ mod tests {
assert_eq!(1, sliced_array.offset());
assert_eq!(3, sliced_array.null_count());

let null_bit_slice = BufferBitSliceMut::new(&mut null_bits);
for i in 0..sliced_array.len() {
if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
if null_bit_slice.get_bit(sliced_array.offset() + i) {
assert!(sliced_array.is_valid(i));
} else {
assert!(sliced_array.is_null(i));
Expand Down Expand Up @@ -678,13 +686,16 @@ mod tests {
))
.build();

// Set null buts for the nested array:
// Set null bits for the nested array:
// [[0, 1], null, null, [6, 7], [8, 9]]
// 01011001 00000001
let mut null_bits: [u8; 1] = [0; 1];
bit_util::set_bit(&mut null_bits, 0);
bit_util::set_bit(&mut null_bits, 3);
bit_util::set_bit(&mut null_bits, 4);
{
let mut null_bit_slice = BufferBitSliceMut::new(&mut null_bits);
null_bit_slice.set_bit(0, true);
null_bit_slice.set_bit(3, true);
null_bit_slice.set_bit(4, true);
}

// Construct a fixed size list array from the above two
let list_data_type = DataType::FixedSizeList(
Expand All @@ -711,8 +722,9 @@ mod tests {
assert_eq!(1, sliced_array.offset());
assert_eq!(2, sliced_array.null_count());

let null_bit_slice = BufferBitSlice::new(&null_bits);
for i in 0..sliced_array.len() {
if bit_util::get_bit(&null_bits, sliced_array.offset() + i) {
if null_bit_slice.get_bit(sliced_array.offset() + i) {
assert!(sliced_array.is_valid(i));
} else {
assert!(sliced_array.is_null(i));
Expand Down
22 changes: 8 additions & 14 deletions rust/arrow/src/array/array_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use super::raw_pointer::RawPtrBox;
use super::*;
use crate::buffer::{Buffer, MutableBuffer};
use crate::memory;
use crate::util::bit_util;
use crate::util::utils;

/// Number of seconds in a day
const SECONDS_IN_DAY: i64 = 86_400;
Expand Down Expand Up @@ -296,18 +296,17 @@ impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as ArrowPrimitiveType>::Native
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.

let num_bytes = bit_util::ceil(data_len, 8);
let num_bytes = utils::ceil(data_len, 8);
let mut null_buf = MutableBuffer::new(num_bytes).with_bitset(num_bytes, false);
let mut val_buf = MutableBuffer::new(
data_len * mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
);

let null = vec![0; mem::size_of::<<T as ArrowPrimitiveType>::Native>()];

let null_slice = null_buf.data_mut();
iter.enumerate().for_each(|(i, item)| {
if let Some(a) = item.borrow() {
bit_util::set_bit(null_slice, i);
null_buf.set_bit(i);
val_buf.extend_from_slice(a.to_byte_slice());
} else {
val_buf.extend_from_slice(&null);
Expand Down Expand Up @@ -401,10 +400,9 @@ impl<T: ArrowTimestampType> PrimitiveArray<T> {

{
let null = vec![0; mem::size_of::<i64>()];
let null_slice = null_buf.data_mut();
for (i, v) in data.iter().enumerate() {
if let Some(n) = v {
bit_util::set_bit(null_slice, i);
null_buf.set_bit(i);
val_buf.extend_from_slice(&n.to_byte_slice());
} else {
val_buf.extend_from_slice(&null);
Expand All @@ -427,10 +425,9 @@ impl From<Vec<bool>> for BooleanArray {
fn from(data: Vec<bool>) -> Self {
let mut mut_buf = MutableBuffer::new_null(data.len());
{
let mut_slice = mut_buf.data_mut();
for (i, b) in data.iter().enumerate() {
if *b {
bit_util::set_bit(mut_slice, i);
mut_buf.set_bit(i);
}
}
}
Expand All @@ -445,19 +442,16 @@ impl From<Vec<bool>> for BooleanArray {
impl From<Vec<Option<bool>>> for BooleanArray {
fn from(data: Vec<Option<bool>>) -> Self {
let data_len = data.len();
let num_byte = bit_util::ceil(data_len, 8);
let num_byte = utils::ceil(data_len, 8);
let mut null_buf = MutableBuffer::new_null(data.len());
let mut val_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false);

{
let null_slice = null_buf.data_mut();
let val_slice = val_buf.data_mut();

for (i, v) in data.iter().enumerate() {
if let Some(b) = v {
bit_util::set_bit(null_slice, i);
null_buf.set_bit(i);
if *b {
bit_util::set_bit(val_slice, i);
val_buf.set_bit(i);
}
}
}
Expand Down
5 changes: 2 additions & 3 deletions rust/arrow/src/array/array_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use super::{
Array, ArrayData, ArrayDataRef, GenericListArray, GenericStringIter, LargeListArray,
ListArray, OffsetSizeTrait,
};
use crate::util::bit_util;

use crate::{buffer::Buffer, datatypes::ToByteSlice};
use crate::{buffer::MutableBuffer, datatypes::DataType};

Expand Down Expand Up @@ -168,8 +168,7 @@ where
if let Some(s) = s {
let s = s.as_ref();
// set null bit
let null_slice = null_buf.data_mut();
bit_util::set_bit(null_slice, i);
null_buf.set_bit(i);

length_so_far = length_so_far + OffsetSize::from_usize(s.len()).unwrap();
offsets.push(length_so_far);
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/array_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
child_datum_len,
)
} else {
child_null_buffer.bit_slice(child_datum_offset, child_datum_len)
child_null_buffer.bit_view(child_datum_offset, child_datum_len)
});
} else if null.is_some() {
// when one of the fields has no nulls, them there is no null in the array
Expand All @@ -154,7 +154,7 @@ impl TryFrom<Vec<(&str, ArrayRef)>> for StructArray {
.len(len)
.child_data(child_data);
if let Some(null_buffer) = null {
let null_count = len - null_buffer.count_set_bits();
let null_count = len - null_buffer.count_ones();
builder = builder.null_count(null_count).null_bit_buffer(null_buffer);
}

Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/array/array_union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ impl UnionArray {
bitmap: Option<Buffer>,
) -> Result<Self> {
let bitmap_data = bitmap.map(|b| {
let null_count = type_ids.len() - b.count_set_bits();
let null_count = type_ids.len() - b.count_ones();
(b, null_count)
});

Expand Down Expand Up @@ -233,7 +233,7 @@ impl UnionArray {
// In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values
// Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer
let valid_slots = match self.data.null_buffer() {
Some(b) => b.count_set_bits_offset(0, index),
Some(b) => b.bit_slice().slicing(0, index).count_ones(),
None => index,
};
self.data().buffers()[1].data()[valid_slots * size_of::<i32>()] as i32
Expand Down
Loading