apache · Dandandan · Mar 20, 2021 · Mar 20, 2021 · Mar 20, 2021 · Mar 20, 2021
diff --git a/rust/arrow/src/buffer/mutable.rs b/rust/arrow/src/buffer/mutable.rs
@@ -415,6 +415,61 @@ impl MutableBuffer {
         buffer
     }
 
+    /// Creates a [`MutableBuffer`] from a boolean [`Iterator`] with a trusted (upper) length.
+    /// # use arrow::buffer::MutableBuffer;
+    /// # Example
+    /// ```
+    /// # use arrow::buffer::MutableBuffer;
+    /// let v = vec![false, true, false];
+    /// let iter = v.iter().map(|x| *x || true);
+    /// let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(iter) };
+    /// assert_eq!(buffer.len(), 1) // 3 booleans have 1 byte
+    /// ```
+    /// # Safety
+    /// This method assumes that the iterator's size is correct and is undefined behavior
+    /// to use it on an iterator that reports an incorrect length.
+    // This implementation is required for two reasons:
+    // 1. there is no trait `TrustedLen` in stable rust and therefore
+    //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
+    // 2. `from_trusted_len_iter_bool` is faster.
+    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
+        mut iterator: I,
+    ) -> Self {
+        let (_, upper) = iterator.size_hint();
+        let upper = upper.expect("from_trusted_len_iter requires an upper limit");
+
+        let mut result = {
+            let byte_capacity: usize = upper.saturating_add(7) / 8;
+            MutableBuffer::new(byte_capacity)
+        };
+
+        'a: loop {
+            let mut byte_accum: u8 = 0;
+            let mut mask: u8 = 1;
+
+            //collect (up to) 8 bits into a byte
+            while mask != 0 {
+                if let Some(value) = iterator.next() {
+                    byte_accum |= match value {
+                        true => mask,
+                        false => 0,
+                    };
+                    mask <<= 1;
+                } else {
+                    if mask != 1 {
+                        // Add last byte
+                        result.push_unchecked(byte_accum);
+                    }
+                    break 'a;
+                }
+            }
+
+            // Soundness: from_trusted_len
+            result.push_unchecked(byte_accum);
+        }
+        result
+    }
+
     /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors
     /// if any of the items of the iterator is an error.
     /// Prefer this to `collect` whenever possible, as it is faster ~60% faster.

diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs
@@ -47,17 +47,17 @@ macro_rules! compare_op {
         let null_bit_buffer =
             combine_option_bitmap($left.data_ref(), $right.data_ref(), $left.len())?;
 
-        let buffer = (0..$left.len())
-            .map(|i| $op($left.value(i), $right.value(i)))
-            .collect();
+        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right.value(i)));
+        // same size as $left.len() and $right.len()
+        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
 
         let data = ArrayData::new(
             DataType::Boolean,
             $left.len(),
             None,
             null_bit_buffer,
             0,
-            vec![buffer],
+            vec![Buffer::from(buffer)],
             vec![],
         );
         Ok(BooleanArray::from(Arc::new(data)))
@@ -68,17 +68,17 @@ macro_rules! compare_op_scalar {
     ($left: expr, $right:expr, $op:expr) => {{
         let null_bit_buffer = $left.data().null_buffer().cloned();
 
-        let buffer = (0..$left.len())
-            .map(|i| $op($left.value(i), $right))
-            .collect();
+        let comparison = (0..$left.len()).map(|i| $op($left.value(i), $right));
+        // same as $left.len()
+        let buffer = unsafe { MutableBuffer::from_trusted_len_iter_bool(comparison) };
 
         let data = ArrayData::new(
             DataType::Boolean,
             $left.len(),
             None,
             null_bit_buffer,
             0,
-            vec![buffer],
+            vec![Buffer::from(buffer)],
             vec![],
         );
         Ok(BooleanArray::from(Arc::new(data)))