-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-10500: [Rust] Refactor bit slice, bit view iterator for array buffers #8598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,9 +33,10 @@ use std::sync::Arc; | |
| use crate::datatypes::ArrowNativeType; | ||
| use crate::error::{ArrowError, Result}; | ||
| use crate::memory; | ||
| use crate::util::bit_chunk_iterator::BitChunks; | ||
| use crate::util::bit_slice_iterator::*; | ||
| use crate::util::bit_util; | ||
| use crate::util::bit_util::ceil; | ||
|
|
||
| #[cfg(feature = "simd")] | ||
| use std::borrow::BorrowMut; | ||
|
|
||
|
|
@@ -258,16 +259,20 @@ impl Buffer { | |
| /// Returns a slice of this buffer starting at a certain bit offset. | ||
| /// If the offset is byte-aligned the returned buffer is a shallow clone, | ||
| /// otherwise a new buffer is allocated and filled with a copy of the bits in the range. | ||
| pub fn bit_slice(&self, offset: usize, len: usize) -> Self { | ||
| if offset % 8 == 0 && len % 8 == 0 { | ||
| return self.slice(offset / 8); | ||
| pub fn bit_view(&self, offset_in_bits: usize, len_in_bits: usize) -> Self { | ||
| if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 { | ||
| self.slice(offset_in_bits / 8) | ||
| } else { | ||
| self.bit_slice() | ||
| .view(offset_in_bits, len_in_bits) | ||
| .as_buffer() | ||
| } | ||
|
|
||
| bitwise_unary_op_helper(&self, offset, len, |a| a) | ||
| } | ||
|
|
||
| pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks { | ||
| BitChunks::new(&self, offset, len) | ||
| /// Gives bit slice of the underlying buffer | ||
| /// This method can be used to get bit views for bit operations on the immutable view over the buffer. | ||
| pub fn bit_slice(&self) -> BufferBitSlice { | ||
| BufferBitSlice::new(self.data.data()) | ||
| } | ||
|
|
||
| /// Returns an empty buffer. | ||
|
|
@@ -401,20 +406,27 @@ where | |
| let mut result = | ||
| MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); | ||
|
|
||
| let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits); | ||
| let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits); | ||
| let left_slice = left.bit_slice().view(left_offset_in_bits, len_in_bits); | ||
|
||
| let left_chunks = left_slice.chunks::<u64>(); | ||
|
|
||
| let right_slice = right.bit_slice().view(right_offset_in_bits, len_in_bits); | ||
| let right_chunks = right_slice.chunks::<u64>(); | ||
|
|
||
| let remainder_bytes = ceil(left_chunks.remainder_bit_len(), 8); | ||
| let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); | ||
| let rem = &rem.to_ne_bytes()[0..remainder_bytes]; | ||
|
|
||
| let left_chunk_iter = left_chunks.interpret(); | ||
| let right_chunk_iter = right_chunks.interpret(); | ||
|
|
||
| let result_chunks = result.typed_data_mut::<u64>().iter_mut(); | ||
|
|
||
| result_chunks | ||
| .zip(left_chunks.iter().zip(right_chunks.iter())) | ||
| .zip(left_chunk_iter.zip(right_chunk_iter)) | ||
| .for_each(|(res, (left, right))| { | ||
| *res = op(left, right); | ||
| }); | ||
|
|
||
| let remainder_bytes = ceil(left_chunks.remainder_len(), 8); | ||
| let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits()); | ||
| // we are counting its starting from the least significant bit, to to_le_bytes should be correct | ||
| let rem = &rem.to_le_bytes()[0..remainder_bytes]; | ||
| result.extend_from_slice(rem); | ||
|
|
||
| result.freeze() | ||
|
|
@@ -435,19 +447,21 @@ where | |
| let mut result = | ||
| MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false); | ||
|
|
||
| let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits); | ||
| let left_slice = left.bit_slice().view(offset_in_bits, len_in_bits); | ||
| let left_chunks = left_slice.chunks::<u64>(); | ||
|
|
||
| let remainder_bytes = ceil(left_chunks.remainder_bit_len(), 8); | ||
| let rem = op(left_chunks.remainder_bits()); | ||
| let rem = &rem.to_ne_bytes()[0..remainder_bytes]; | ||
|
|
||
| let left_chunk_iter = left_chunks.interpret(); | ||
|
|
||
| let result_chunks = result.typed_data_mut::<u64>().iter_mut(); | ||
|
|
||
| result_chunks | ||
| .zip(left_chunks.iter()) | ||
| .for_each(|(res, left)| { | ||
| *res = op(left); | ||
| }); | ||
| result_chunks.zip(left_chunk_iter).for_each(|(res, left)| { | ||
| *res = op(left); | ||
| }); | ||
|
|
||
| let remainder_bytes = ceil(left_chunks.remainder_len(), 8); | ||
| let rem = op(left_chunks.remainder_bits()); | ||
| // we are counting its starting from the least significant bit, to to_le_bytes should be correct | ||
| let rem = &rem.to_le_bytes()[0..remainder_bytes]; | ||
| result.extend_from_slice(rem); | ||
|
|
||
| result.freeze() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't fully understand why this doesn't need to refer to
len_in_bits-- how do we know thatlen_in_bitscovers the entire buffer? Maybe this should beself.slice(len_in_bits/8)?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is the idea, bit view doesn't cover the whole Buffer. If you give the whole buffer's length in bits and start offset as 0 then it will cover the whole buffer. Otherwise, we can use a partial bit view on the Buffer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right, what I don't understand is how the test for
len_in_bits % 8 == 0is checking for the whole buffer length. It seems like it is checking thatlen_in_bitsis a multiple of 8 (aka represents whole bytes)Maybe there is some assumption here like
self.len_in_bits < 8?