diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs index c8e5144c14cb..42d5ef22a254 100644 --- a/arrow-buffer/src/buffer/boolean.rs +++ b/arrow-buffer/src/buffer/boolean.rs @@ -16,7 +16,7 @@ // under the License. use crate::bit_chunk_iterator::BitChunks; -use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator}; +use crate::bit_iterator::{BitIndexIterator, BitIndexU32Iterator, BitIterator, BitSliceIterator}; use crate::{ bit_util, buffer_bin_and, buffer_bin_or, buffer_bin_xor, buffer_unary_not, BooleanBufferBuilder, Buffer, MutableBuffer, @@ -208,6 +208,11 @@ impl BooleanBuffer { BitIndexIterator::new(self.values(), self.offset, self.len) } + /// Returns a `u32` iterator over set bit positions without any usize->u32 conversion + pub fn set_indices_u32(&self) -> BitIndexU32Iterator<'_> { + BitIndexU32Iterator::new(self.values(), self.offset, self.len) + } + /// Returns a [`BitSliceIterator`] yielding contiguous ranges of set bits pub fn set_slices(&self) -> BitSliceIterator<'_> { BitSliceIterator::new(self.values(), self.offset, self.len) diff --git a/arrow-buffer/src/util/bit_iterator.rs b/arrow-buffer/src/util/bit_iterator.rs index 6a783138884b..c7f6f94fb869 100644 --- a/arrow-buffer/src/util/bit_iterator.rs +++ b/arrow-buffer/src/util/bit_iterator.rs @@ -231,6 +231,63 @@ impl Iterator for BitIndexIterator<'_> { } } +/// An iterator of u32 whose index in a provided bitmask is true +/// Respects arbitrary offsets and slice lead/trail padding exactly like BitIndexIterator +#[derive(Debug)] +pub struct BitIndexU32Iterator<'a> { + curr: u64, + chunk_offset: i64, + iter: UnalignedBitChunkIterator<'a>, +} + +impl<'a> BitIndexU32Iterator<'a> { + /// Create a new [BitIndexU32Iterator] from the provided buffer, + /// offset and len in bits. + pub fn new(buffer: &'a [u8], offset: usize, len: usize) -> Self { + // Build the aligned chunks (including prefix/suffix masked) + let chunks = UnalignedBitChunk::new(buffer, offset, len); + let mut iter = chunks.iter(); + + // First 64-bit word (masked for lead padding), or 0 if empty + let curr = iter.next().unwrap_or(0); + // Negative lead padding ensures the first bit in curr maps to index 0 + let chunk_offset = -(chunks.lead_padding() as i64); + + Self { + curr, + chunk_offset, + iter, + } + } +} + +impl<'a> Iterator for BitIndexU32Iterator<'a> { + type Item = u32; + + #[inline(always)] + fn next(&mut self) -> Option { + loop { + if self.curr != 0 { + // Position of least-significant set bit + let tz = self.curr.trailing_zeros(); + // Clear that bit + self.curr &= self.curr - 1; + // Return global index = chunk_offset + tz + return Some((self.chunk_offset + tz as i64) as u32); + } + // Advance to next 64-bit chunk + match self.iter.next() { + Some(next_chunk) => { + // Move offset forward by 64 bits + self.chunk_offset += 64; + self.curr = next_chunk; + } + None => return None, + } + } + } +} + /// Calls the provided closure for each index in the provided null mask that is set, /// using an adaptive strategy based on the null count /// @@ -323,4 +380,110 @@ mod tests { let mask = &[223, 23]; BitIterator::new(mask, 17, 0); } + + #[test] + fn test_bit_index_u32_iterator_basic() { + let mask = &[0b00010010, 0b00100011]; + + let result: Vec = BitIndexU32Iterator::new(mask, 0, 16).collect(); + let expected: Vec = BitIndexIterator::new(mask, 0, 16) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + + let result: Vec = BitIndexU32Iterator::new(mask, 4, 8).collect(); + let expected: Vec = BitIndexIterator::new(mask, 4, 8) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + + let result: Vec = BitIndexU32Iterator::new(mask, 10, 4).collect(); + let expected: Vec = BitIndexIterator::new(mask, 10, 4) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + + let result: Vec = BitIndexU32Iterator::new(mask, 0, 0).collect(); + let expected: Vec = BitIndexIterator::new(mask, 0, 0) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_iterator_all_set() { + let mask = &[0xFF, 0xFF]; + let result: Vec = BitIndexU32Iterator::new(mask, 0, 16).collect(); + let expected: Vec = BitIndexIterator::new(mask, 0, 16) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_iterator_none_set() { + let mask = &[0x00, 0x00]; + let result: Vec = BitIndexU32Iterator::new(mask, 0, 16).collect(); + let expected: Vec = BitIndexIterator::new(mask, 0, 16) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_cross_chunk() { + let mut buf = vec![0u8; 16]; + for bit in 60..68 { + let byte = (bit / 8) as usize; + let bit_in_byte = bit % 8; + buf[byte] |= 1 << bit_in_byte; + } + let offset = 58; + let len = 10; + + let result: Vec = BitIndexU32Iterator::new(&buf, offset, len).collect(); + let expected: Vec = BitIndexIterator::new(&buf, offset, len) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_unaligned_offset() { + let mask = &[0b0110_1100, 0b1010_0000]; + let offset = 2; + let len = 12; + + let result: Vec = BitIndexU32Iterator::new(mask, offset, len).collect(); + let expected: Vec = BitIndexIterator::new(mask, offset, len) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_long_all_set() { + let len = 200; + let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 }; + let bytes = vec![0xFFu8; num_bytes]; + + let result: Vec = BitIndexU32Iterator::new(&bytes, 0, len).collect(); + let expected: Vec = BitIndexIterator::new(&bytes, 0, len) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } + + #[test] + fn test_bit_index_u32_none_set() { + let len = 50; + let num_bytes = len / 8 + if len % 8 != 0 { 1 } else { 0 }; + let bytes = vec![0u8; num_bytes]; + + let result: Vec = BitIndexU32Iterator::new(&bytes, 0, len).collect(); + let expected: Vec = BitIndexIterator::new(&bytes, 0, len) + .map(|i| i as u32) + .collect(); + assert_eq!(result, expected); + } } diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index be515c3f109f..a405aa7a3735 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -178,44 +178,66 @@ where } } -// partition indices into valid and null indices -fn partition_validity(array: &dyn Array) -> (Vec, Vec) { +/// Partition indices of an Arrow array into two categories: +/// - `valid`: indices of non-null elements +/// - `nulls`: indices of null elements +/// +/// Optimized for performance with fast-path for all-valid arrays +/// and bit-parallel scan for null-containing arrays. +#[inline(always)] +pub fn partition_validity(array: &dyn Array) -> (Vec, Vec) { let len = array.len(); let null_count = array.null_count(); - match array.nulls() { - Some(nulls) if null_count > 0 => { - let mut valid_indices = Vec::with_capacity(len - null_count); - let mut null_indices = Vec::with_capacity(null_count); - - let valid_slice = valid_indices.spare_capacity_mut(); - let null_slice = null_indices.spare_capacity_mut(); - let mut valid_idx = 0; - let mut null_idx = 0; - - nulls.into_iter().enumerate().for_each(|(i, v)| { - if v { - valid_slice[valid_idx].write(i as u32); - valid_idx += 1; - } else { - null_slice[null_idx].write(i as u32); - null_idx += 1; - } - }); - assert_eq!(null_idx, null_count); - assert_eq!(valid_idx, len - null_count); - // Safety: The new lengths match the initial capacity as asserted above, - // the bounds checks while writing also ensure they less than or equal to the capacity. - unsafe { - valid_indices.set_len(valid_idx); - null_indices.set_len(null_idx); - } + // Fast path: if there are no nulls, all elements are valid + if null_count == 0 { + // Simply return a range of indices [0, len) + let valid = (0..len as u32).collect(); + return (valid, Vec::new()); + } + + // null bitmap exists and some values are null + partition_validity_scan(array, len, null_count) +} - (valid_indices, null_indices) +/// Scans the null bitmap and partitions valid/null indices efficiently. +/// Uses bit-level operations to extract bit positions. +/// This function is only called when nulls exist. +#[inline(always)] +fn partition_validity_scan( + array: &dyn Array, + len: usize, + null_count: usize, +) -> (Vec, Vec) { + // SAFETY: Guaranteed by caller that null_count > 0, so bitmap must exist + let bitmap = array.nulls().unwrap(); + + // Preallocate result vectors with exact capacities (avoids reallocations) + let mut valid = Vec::with_capacity(len - null_count); + let mut nulls = Vec::with_capacity(null_count); + + unsafe { + // 1) Write valid indices (bits == 1) + let valid_slice = valid.spare_capacity_mut(); + for (i, idx) in bitmap.inner().set_indices_u32().enumerate() { + valid_slice[i].write(idx); + } + + // 2) Write null indices by inverting + let inv_buf = !bitmap.inner(); + let null_slice = nulls.spare_capacity_mut(); + for (i, idx) in inv_buf.set_indices_u32().enumerate() { + null_slice[i].write(idx); } - // faster path - _ => ((0..(len as u32)).collect(), vec![]), + + // Finalize lengths + valid.set_len(len - null_count); + nulls.set_len(null_count); } + + assert_eq!(valid.len(), len - null_count); + assert_eq!(nulls.len(), null_count); + (valid, nulls) } /// Whether `sort_to_indices` can sort an array of given data type. @@ -4709,4 +4731,114 @@ mod tests { assert_eq!(&sorted[0], &expected_struct_array); } + + /// A simple, correct but slower reference implementation. + fn naive_partition(array: &BooleanArray) -> (Vec, Vec) { + let len = array.len(); + let mut valid = Vec::with_capacity(len); + let mut nulls = Vec::with_capacity(len); + for i in 0..len { + if array.is_valid(i) { + valid.push(i as u32); + } else { + nulls.push(i as u32); + } + } + (valid, nulls) + } + + #[test] + fn fuzz_partition_validity() { + let mut rng = StdRng::seed_from_u64(0xF00D_CAFE); + for _ in 0..1_000 { + // build a random BooleanArray with some nulls + let len = rng.random_range(0..512); + let mut builder = BooleanBuilder::new(); + for _ in 0..len { + if rng.random_bool(0.2) { + builder.append_null(); + } else { + builder.append_value(rng.random_bool(0.5)); + } + } + let array = builder.finish(); + + // Test both implementations on the full array + let (v1, n1) = partition_validity(&array); + let (v2, n2) = naive_partition(&array); + assert_eq!(v1, v2, "valid mismatch on full array"); + assert_eq!(n1, n2, "null mismatch on full array"); + + if len >= 8 { + // 1) Random slice within the array + let max_offset = len - 4; + let offset = rng.random_range(0..=max_offset); + let max_slice_len = len - offset; + let slice_len = rng.random_range(1..=max_slice_len); + + // Bind the sliced ArrayRef to keep it alive + let sliced = array.slice(offset, slice_len); + let slice = sliced + .as_any() + .downcast_ref::() + .expect("slice should be a BooleanArray"); + + let (sv1, sn1) = partition_validity(slice); + let (sv2, sn2) = naive_partition(slice); + assert_eq!( + sv1, sv2, + "valid mismatch on random slice at offset {offset} length {slice_len}", + ); + assert_eq!( + sn1, sn2, + "null mismatch on random slice at offset {offset} length {slice_len}", + ); + + // 2) Ensure we test slices that start beyond one 64-bit chunk boundary + if len > 68 { + let offset2 = rng.random_range(65..(len - 3)); + let len2 = rng.random_range(1..=(len - offset2)); + + let sliced2 = array.slice(offset2, len2); + let slice2 = sliced2 + .as_any() + .downcast_ref::() + .expect("slice2 should be a BooleanArray"); + + let (sv3, sn3) = partition_validity(slice2); + let (sv4, sn4) = naive_partition(slice2); + assert_eq!( + sv3, sv4, + "valid mismatch on chunk-crossing slice at offset {offset2} length {len2}", + ); + assert_eq!( + sn3, sn4, + "null mismatch on chunk-crossing slice at offset {offset2} length {len2}", + ); + } + } + } + } + + // A few small deterministic checks + #[test] + fn test_partition_edge_cases() { + // all valid + let array = BooleanArray::from(vec![Some(true), Some(false), Some(true)]); + let (valid, nulls) = partition_validity(&array); + assert_eq!(valid, vec![0, 1, 2]); + assert!(nulls.is_empty()); + + // all null + let array = BooleanArray::from(vec![None, None, None]); + let (valid, nulls) = partition_validity(&array); + assert!(valid.is_empty()); + assert_eq!(nulls, vec![0, 1, 2]); + + // alternating + let array = BooleanArray::from(vec![Some(true), None, Some(true), None]); + let (valid, nulls) = partition_validity(&array); + assert_eq!(valid, vec![0, 2]); + assert_eq!(nulls, vec![1, 3]); + } }