diff --git a/rust/arrow/Cargo.toml b/rust/arrow/Cargo.toml
index 71445768207..86f1fac627d 100644
--- a/rust/arrow/Cargo.toml
+++ b/rust/arrow/Cargo.toml
@@ -48,6 +48,7 @@ lazy_static = "1.4"
 packed_simd = { version = "0.3.4", optional = true, package = "packed_simd_2" }
 chrono = "0.4"
 flatbuffers = "0.6"
+bitvec = "0.19"
 hex = "0.4"
 prettytable-rs = { version = "0.8.0", optional = true }
 
diff --git a/rust/arrow/src/buffer.rs b/rust/arrow/src/buffer.rs
index d5b824e2992..08fb582d35c 100644
--- a/rust/arrow/src/buffer.rs
+++ b/rust/arrow/src/buffer.rs
@@ -33,9 +33,10 @@ use std::sync::Arc;
 use crate::datatypes::ArrowNativeType;
 use crate::error::{ArrowError, Result};
 use crate::memory;
-use crate::util::bit_chunk_iterator::BitChunks;
+use crate::util::bit_slice_iterator::*;
 use crate::util::bit_util;
 use crate::util::bit_util::ceil;
+
 #[cfg(feature = "simd")]
 use std::borrow::BorrowMut;
 
@@ -258,16 +259,20 @@ impl Buffer {
     /// Returns a slice of this buffer starting at a certain bit offset.
     /// If the offset is byte-aligned the returned buffer is a shallow clone,
     /// otherwise a new buffer is allocated and filled with a copy of the bits in the range.
-    pub fn bit_slice(&self, offset: usize, len: usize) -> Self {
-        if offset % 8 == 0 && len % 8 == 0 {
-            return self.slice(offset / 8);
+    pub fn bit_view(&self, offset_in_bits: usize, len_in_bits: usize) -> Self {
+        if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
+            self.slice(offset_in_bits / 8)
+        } else {
+            self.bit_slice()
+                .view(offset_in_bits, len_in_bits)
+                .as_buffer()
         }
-
-        bitwise_unary_op_helper(&self, offset, len, |a| a)
     }
 
-    pub fn bit_chunks(&self, offset: usize, len: usize) -> BitChunks {
-        BitChunks::new(&self, offset, len)
+    /// Gives bit slice of the underlying buffer
+    /// This method can be used to get bit views for bit operations on the immutable view over the buffer.
+    pub fn bit_slice(&self) -> BufferBitSlice {
+        BufferBitSlice::new(self.data.data())
     }
 
     /// Returns an empty buffer.
@@ -401,20 +406,27 @@ where
     let mut result =
         MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
 
-    let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
-    let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
+    let left_slice = left.bit_slice().view(left_offset_in_bits, len_in_bits);
+    let left_chunks = left_slice.chunks::<u64>();
+
+    let right_slice = right.bit_slice().view(right_offset_in_bits, len_in_bits);
+    let right_chunks = right_slice.chunks::<u64>();
+
+    let remainder_bytes = ceil(left_chunks.remainder_bit_len(), 8);
+    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
+    let rem = &rem.to_ne_bytes()[0..remainder_bytes];
+
+    let left_chunk_iter = left_chunks.interpret();
+    let right_chunk_iter = right_chunks.interpret();
+
     let result_chunks = result.typed_data_mut::<u64>().iter_mut();
 
     result_chunks
-        .zip(left_chunks.iter().zip(right_chunks.iter()))
+        .zip(left_chunk_iter.zip(right_chunk_iter))
         .for_each(|(res, (left, right))| {
             *res = op(left, right);
         });
 
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
     result.extend_from_slice(rem);
 
     result.freeze()
@@ -435,19 +447,21 @@ where
     let mut result =
         MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
 
-    let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
+    let left_slice = left.bit_slice().view(offset_in_bits, len_in_bits);
+    let left_chunks = left_slice.chunks::<u64>();
+
+    let remainder_bytes = ceil(left_chunks.remainder_bit_len(), 8);
+    let rem = op(left_chunks.remainder_bits());
+    let rem = &rem.to_ne_bytes()[0..remainder_bytes];
+
+    let left_chunk_iter = left_chunks.interpret();
+
     let result_chunks = result.typed_data_mut::<u64>().iter_mut();
 
-    result_chunks
-        .zip(left_chunks.iter())
-        .for_each(|(res, left)| {
-            *res = op(left);
-        });
+    result_chunks.zip(left_chunk_iter).for_each(|(res, left)| {
+        *res = op(left);
+    });
 
-    let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
-    let rem = op(left_chunks.remainder_bits());
-    // we are counting its starting from the least significant bit, to to_le_bytes should be correct
-    let rem = &rem.to_le_bytes()[0..remainder_bytes];
     result.extend_from_slice(rem);
 
     result.freeze()
diff --git a/rust/arrow/src/compute/kernels/aggregate.rs b/rust/arrow/src/compute/kernels/aggregate.rs
index 444e2454a1c..e332a644de5 100644
--- a/rust/arrow/src/compute/kernels/aggregate.rs
+++ b/rust/arrow/src/compute/kernels/aggregate.rs
@@ -152,9 +152,15 @@ where
             let data_chunks = data.chunks_exact(64);
             let remainder = data_chunks.remainder();
 
-            let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
+            let buffer_slice = buffer.bit_slice().view(array.offset(), array.len());
+            let buffer_chunks = buffer_slice.chunks::<u64>();
+
+            let buffer_remainder_bits: u64 = buffer_chunks.remainder_bits();
+
+            let buffer_chunk_iter = buffer_chunks.interpret();
+
             &data_chunks
-                .zip(bit_chunks.iter())
+                .zip(buffer_chunk_iter)
                 .for_each(|(chunk, mask)| {
                     chunk.iter().enumerate().for_each(|(i, value)| {
                         if (mask & (1 << i)) != 0 {
@@ -163,10 +169,8 @@ where
                     });
                 });
 
-            let remainder_bits = bit_chunks.remainder_bits();
-
             remainder.iter().enumerate().for_each(|(i, value)| {
-                if remainder_bits & (1 << i) != 0 {
+                if buffer_remainder_bits & (1 << i) != 0 {
                     sum = sum + *value;
                 }
             });
@@ -216,24 +220,27 @@ where
             let data_chunks = data.chunks_exact(64);
             let remainder = data_chunks.remainder();
 
-            let bit_chunks = buffer.bit_chunks(array.offset(), array.len());
+            let bit_slice = buffer.bit_slice().view(array.offset(), array.len());
+            let bit_chunks = bit_slice.chunks::<u64>();
             let remainder_bits = bit_chunks.remainder_bits();
 
-            data_chunks.zip(bit_chunks).for_each(|(chunk, mut mask)| {
-                // split chunks further into slices corresponding to the vector length
-                // the compiler is able to unroll this inner loop and remove bounds checks
-                // since the outer chunk size (64) is always a multiple of the number of lanes
-                chunk.chunks_exact(T::lanes()).for_each(|chunk| {
-                    let zero = T::init(T::default_value());
-                    let vecmask = T::mask_from_u64(mask);
-                    let chunk = T::load(&chunk);
-                    let blended = T::mask_select(vecmask, chunk, zero);
-
-                    vector_sum = vector_sum + blended;
-
-                    mask = mask >> T::lanes();
+            data_chunks
+                .zip(bit_chunks.interpret())
+                .for_each(|(chunk, mut mask)| {
+                    // split chunks further into slices corresponding to the vector length
+                    // the compiler is able to unroll this inner loop and remove bounds checks
+                    // since the outer chunk size (64) is always a multiple of the number of lanes
+                    chunk.chunks_exact(T::lanes()).for_each(|chunk| {
+                        let zero = T::init(T::default_value());
+                        let vecmask = T::mask_from_u64(mask);
+                        let chunk = T::load(&chunk);
+                        let blended = T::mask_select(vecmask, chunk, zero);
+
+                        vector_sum = vector_sum + blended;
+
+                        mask = mask >> T::lanes();
+                    });
                 });
-            });
 
             remainder.iter().enumerate().for_each(|(i, value)| {
                 if remainder_bits & (1 << i) != 0 {
diff --git a/rust/arrow/src/compute/kernels/boolean.rs b/rust/arrow/src/compute/kernels/boolean.rs
index 075ffb0d67b..6a4bff5e3ab 100644
--- a/rust/arrow/src/compute/kernels/boolean.rs
+++ b/rust/arrow/src/compute/kernels/boolean.rs
@@ -140,7 +140,7 @@ pub fn is_not_null(input: &ArrayRef) -> Result<BooleanArray> {
                 .with_bitset(len_bytes, true)
                 .freeze()
         }
-        Some(buffer) => buffer.bit_slice(input.offset(), len),
+        Some(buffer) => buffer.bit_view(input.offset(), len),
     };
 
     let data =
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
index 3b578011640..dde50e4511c 100644
--- a/rust/arrow/src/compute/util.rs
+++ b/rust/arrow/src/compute/util.rs
@@ -45,10 +45,10 @@ pub(super) fn combine_option_bitmap(
     match left {
         None => match right {
             None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
+            Some(r) => Ok(Some(r.bit_view(right_offset_in_bits, len_in_bits))),
         },
         Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
+            None => Ok(Some(l.bit_view(left_offset_in_bits, len_in_bits))),
 
             Some(r) => Ok(Some(buffer_bin_and(
                 &l,
@@ -78,10 +78,10 @@ pub(super) fn compare_option_bitmap(
     match left {
         None => match right {
             None => Ok(None),
-            Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
+            Some(r) => Ok(Some(r.bit_view(right_offset_in_bits, len_in_bits))),
         },
         Some(l) => match right {
-            None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
+            None => Ok(Some(l.bit_view(left_offset_in_bits, len_in_bits))),
 
             Some(r) => Ok(Some(buffer_bin_or(
                 &l,
diff --git a/rust/arrow/src/util/bit_chunk_iterator.rs b/rust/arrow/src/util/bit_chunk_iterator.rs
deleted file mode 100644
index ec10c6f1d41..00000000000
--- a/rust/arrow/src/util/bit_chunk_iterator.rs
+++ /dev/null
@@ -1,257 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-use crate::buffer::Buffer;
-use crate::util::bit_util::ceil;
-use std::fmt::Debug;
-
-#[derive(Debug)]
-pub struct BitChunks<'a> {
-    buffer: &'a Buffer,
-    raw_data: *const u8,
-    /// offset inside a byte, guaranteed to be between 0 and 7 (inclusive)
-    bit_offset: usize,
-    /// number of complete u64 chunks
-    chunk_len: usize,
-    /// number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    remainder_len: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    pub fn new(buffer: &'a Buffer, offset: usize, len: usize) -> Self {
-        assert!(ceil(offset + len, 8) <= buffer.len() * 8);
-
-        let byte_offset = offset / 8;
-        let bit_offset = offset % 8;
-
-        let raw_data = unsafe { buffer.raw_data().add(byte_offset) };
-
-        let chunk_bits = 8 * std::mem::size_of::<u64>();
-
-        let chunk_len = len / chunk_bits;
-        let remainder_len = len & (chunk_bits - 1);
-
-        BitChunks::<'a> {
-            buffer: &buffer,
-            raw_data,
-            bit_offset,
-            chunk_len,
-            remainder_len,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct BitChunkIterator<'a> {
-    buffer: &'a Buffer,
-    raw_data: *const u8,
-    bit_offset: usize,
-    chunk_len: usize,
-    index: usize,
-}
-
-impl<'a> BitChunks<'a> {
-    /// Returns the number of remaining bits, guaranteed to be between 0 and 63 (inclusive)
-    #[inline]
-    pub const fn remainder_len(&self) -> usize {
-        self.remainder_len
-    }
-
-    /// Returns the bitmask of remaining bits
-    #[inline]
-    pub fn remainder_bits(&self) -> u64 {
-        let bit_len = self.remainder_len;
-        if bit_len == 0 {
-            0
-        } else {
-            let bit_offset = self.bit_offset;
-            // number of bytes to read
-            // might be one more than sizeof(u64) if the offset is in the middle of a byte
-            let byte_len = ceil(bit_len + bit_offset, 8);
-            // pointer to remainder bytes after all complete chunks
-            let base = unsafe {
-                self.raw_data
-                    .add(self.chunk_len * std::mem::size_of::<u64>())
-            };
-
-            let mut bits = unsafe { std::ptr::read(base) } as u64 >> bit_offset;
-            for i in 1..byte_len {
-                let byte = unsafe { std::ptr::read(base.add(i)) };
-                bits |= (byte as u64) << (i * 8 - bit_offset);
-            }
-
-            bits & ((1 << bit_len) - 1)
-        }
-    }
-
-    /// Returns an iterator over chunks of 64 bits represented as an u64
-    #[inline]
-    pub const fn iter(&self) -> BitChunkIterator<'a> {
-        BitChunkIterator::<'a> {
-            buffer: self.buffer,
-            raw_data: self.raw_data,
-            bit_offset: self.bit_offset,
-            chunk_len: self.chunk_len,
-            index: 0,
-        }
-    }
-}
-
-impl<'a> IntoIterator for BitChunks<'a> {
-    type Item = u64;
-    type IntoIter = BitChunkIterator<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.iter()
-    }
-}
-
-impl Iterator for BitChunkIterator<'_> {
-    type Item = u64;
-
-    #[inline]
-    fn next(&mut self) -> Option<u64> {
-        let index = self.index;
-        if index >= self.chunk_len {
-            return None;
-        }
-
-        // cast to *const u64 should be fine since we are using read_unaligned below
-        #[allow(clippy::cast_ptr_alignment)]
-        let raw_data = self.raw_data as *const u64;
-
-        // bit-packed buffers are stored starting with the least-significant byte first
-        // so when reading as u64 on a big-endian machine, the bytes need to be swapped
-        let current = unsafe { std::ptr::read_unaligned(raw_data.add(index)).to_le() };
-
-        let combined = if self.bit_offset == 0 {
-            current
-        } else {
-            let next =
-                unsafe { std::ptr::read_unaligned(raw_data.add(index + 1)).to_le() };
-
-            current >> self.bit_offset
-                | (next & ((1 << self.bit_offset) - 1)) << (64 - self.bit_offset)
-        };
-
-        self.index = index + 1;
-
-        Some(combined)
-    }
-
-    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        (
-            self.chunk_len - self.index,
-            Some(self.chunk_len - self.index),
-        )
-    }
-}
-
-impl ExactSizeIterator for BitChunkIterator<'_> {
-    #[inline]
-    fn len(&self) -> usize {
-        self.chunk_len - self.index
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::buffer::Buffer;
-
-    #[test]
-    fn test_iter_aligned() {
-        let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(0, 64);
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(vec![0x0706050403020100], result);
-    }
-
-    #[test]
-    fn test_iter_unaligned() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 64);
-
-        assert_eq!(0, bitchunks.remainder_len());
-        assert_eq!(0, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_1_byte() {
-        let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(4, 66);
-
-        assert_eq!(2, bitchunks.remainder_len());
-        assert_eq!(0b00000011, bitchunks.remainder_bits());
-
-        let result = bitchunks.into_iter().collect::<Vec<_>>();
-
-        assert_eq!(
-            vec![0b1111010000000010000000010000000010000000010000000010000000010000],
-            result
-        );
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_across_bytes() {
-        let input: &[u8] = &[0b00111111, 0b11111100];
-        let buffer: Buffer = Buffer::from(input);
-
-        // remainder contains bits from both bytes
-        // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes
-        let bitchunks = buffer.bit_chunks(6, 7);
-
-        assert_eq!(7, bitchunks.remainder_len());
-        assert_eq!(0b1110000, bitchunks.remainder_bits());
-    }
-
-    #[test]
-    fn test_iter_unaligned_remainder_bits_large() {
-        let input: &[u8] = &[
-            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
-            0b11111111, 0b00000000, 0b11111111,
-        ];
-        let buffer: Buffer = Buffer::from(input);
-
-        let bitchunks = buffer.bit_chunks(2, 63);
-
-        assert_eq!(63, bitchunks.remainder_len());
-        assert_eq!(
-            0b1000000_00111111_11000000_00111111_11000000_00111111_11000000_00111111,
-            bitchunks.remainder_bits()
-        );
-    }
-}
diff --git a/rust/arrow/src/util/bit_slice_iterator.rs b/rust/arrow/src/util/bit_slice_iterator.rs
new file mode 100644
index 00000000000..dd771288165
--- /dev/null
+++ b/rust/arrow/src/util/bit_slice_iterator.rs
@@ -0,0 +1,259 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::buffer::Buffer;
+
+use bitvec::prelude::*;
+use bitvec::slice::ChunksExact;
+
+use std::fmt::Debug;
+
+///
+/// Bit slice representation of buffer data
+#[derive(Debug)]
+pub struct BufferBitSlice<'a> {
+    buffer_data: &'a [u8],
+    bit_slice: &'a BitSlice<LocalBits, u8>,
+}
+
+impl<'a> BufferBitSlice<'a> {
+    ///
+    /// Creates a bit slice over the given data
+    #[inline]
+    pub fn new(buffer_data: &'a [u8]) -> Self {
+        let bit_slice = BitSlice::<LocalBits, _>::from_slice(buffer_data).unwrap();
+
+        BufferBitSlice {
+            buffer_data,
+            bit_slice,
+        }
+    }
+
+    ///
+    /// Returns immutable view with the given offset in bits and length in bits.
+    /// This view have zero-copy representation over the actual data.
+    #[inline(always)]
+    pub fn view(&self, offset_in_bits: usize, len_in_bits: usize) -> Self {
+        Self {
+            buffer_data: self.buffer_data,
+            bit_slice: &self.bit_slice[offset_in_bits..offset_in_bits + len_in_bits],
+        }
+    }
+
+    ///
+    /// Returns bit chunks in native 64-bit allocation size.
+    /// Native representations in Arrow follows 64-bit convention.
+    /// Chunks can still be reinterpreted in any primitive type lower than u64.
+    #[inline(always)]
+    pub fn chunks<T>(&self) -> BufferBitChunksExact<T>
+    where
+        T: BitMemory,
+    {
+        let offset_size_in_bits = 8 * std::mem::size_of::<T>();
+        let chunks_exact = self.bit_slice.chunks_exact(offset_size_in_bits);
+        let remainder_bits = chunks_exact.remainder();
+        let remainder: T = if remainder_bits.len() == 0 {
+            T::default()
+        } else {
+            remainder_bits.load::<T>()
+        };
+        BufferBitChunksExact {
+            chunks_exact,
+            remainder,
+            remainder_len_in_bits: remainder_bits.len(),
+        }
+    }
+
+    ///
+    /// Converts the bit view into the Buffer.
+    /// Buffer is always byte-aligned and well-aligned.
+    #[inline]
+    pub fn as_buffer(&self) -> Buffer {
+        Buffer::from(self.bit_slice.as_slice())
+    }
+}
+
+///
+/// Exact chunk view over the bit slice
+#[derive(Clone, Debug)]
+pub struct BufferBitChunksExact<'a, T>
+where
+    T: BitMemory,
+{
+    chunks_exact: ChunksExact<'a, LocalBits, u8>,
+    remainder: T,
+    remainder_len_in_bits: usize,
+}
+
+impl<'a, T> BufferBitChunksExact<'a, T>
+where
+    T: BitMemory,
+{
+    ///
+    /// Returns remainder bit length from the exact chunk iterator
+    #[inline(always)]
+    pub fn remainder_bit_len(&self) -> usize {
+        self.remainder_len_in_bits
+    }
+
+    ///
+    /// Returns the remainder bits interpreted as given type.
+    #[inline(always)]
+    pub fn remainder_bits(&self) -> T {
+        self.remainder
+    }
+
+    ///
+    /// Interprets underlying chunk's view's bits as a given type.
+    #[inline(always)]
+    pub fn interpret(self) -> impl Iterator<Item = T> + 'a
+    where
+        T: BitMemory,
+    {
+        self.chunks_exact.map(|e| e.load::<T>())
+    }
+
+    ///
+    /// Returns underlying iterator as it is
+    #[inline(always)]
+    pub fn iter(&self) -> &ChunksExact<'a, LocalBits, u8> {
+        &self.chunks_exact
+    }
+}
+
+///
+/// Implements consuming iterator for exact chunk iterator
+impl<'a, T> IntoIterator for BufferBitChunksExact<'a, T>
+where
+    T: BitMemory,
+{
+    type Item = &'a BitSlice<LocalBits, u8>;
+    type IntoIter = ChunksExact<'a, LocalBits, u8>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.chunks_exact
+    }
+}
+
+#[cfg(all(test, target_endian = "little"))]
+mod tests_bit_slices_little_endian {
+    use super::*;
+    use crate::datatypes::ToByteSlice;
+
+    #[test]
+    fn test_bit_slice_iter_aligned() {
+        let input: &[u8] = &[0, 1, 2, 3, 4, 5, 6, 7];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bit_slice = buffer.bit_slice();
+        let result = bit_slice.chunks().interpret().collect::<Vec<u64>>();
+
+        assert_eq!(vec![0x0706050403020100], result);
+    }
+
+    #[test]
+    fn test_bit_slice_iter_unaligned() {
+        let input: &[u8] = &[
+            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
+            0b00100000, 0b01000000, 0b11111111,
+        ];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bit_slice = buffer.bit_slice().view(4, 64);
+        let chunks = bit_slice.chunks::<u64>();
+
+        assert_eq!(0, chunks.remainder_bit_len());
+        assert_eq!(0, chunks.remainder_bits());
+
+        let result = chunks.interpret().collect::<Vec<u64>>();
+
+        assert_eq!(
+            vec![0b1111_01000000_00100000_00010000_00001000_00000100_00000010_00000001_0000],
+            result
+        );
+    }
+
+    #[test]
+    fn test_bit_slice_iter_unaligned_remainder_1_byte() {
+        let input: &[u8] = &[
+            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
+            0b00100000, 0b01000000, 0b11111111,
+        ];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bit_slice = buffer.bit_slice().view(4, 66);
+        let chunks = bit_slice.chunks::<u64>();
+
+        assert_eq!(2, chunks.remainder_bit_len());
+        assert_eq!(0b00000011, chunks.remainder_bits());
+
+        let result = chunks.interpret().collect::<Vec<u64>>();
+
+        assert_eq!(
+            vec![0b1111_01000000_00100000_00010000_00001000_00000100_00000010_00000001_0000],
+            result
+        );
+    }
+
+    #[test]
+    fn test_bit_slice_iter_unaligned_remainder_bits_across_bytes() {
+        let input: &[u8] = &[0b00111111, 0b11111100];
+        let buffer: Buffer = Buffer::from(input);
+
+        // remainder contains bits from both bytes
+        // result should be the highest 2 bits from first byte followed by lowest 5 bits of second bytes
+        let bit_slice = buffer.bit_slice().view(6, 7);
+        let chunks = bit_slice.chunks::<u64>();
+
+        assert_eq!(7, chunks.remainder_bit_len());
+        assert_eq!(0b1110000, chunks.remainder_bits());
+    }
+
+    #[test]
+    fn test_bit_slice_iter_unaligned_remainder_bits_large() {
+        let input: &[u8] = &[
+            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
+            0b11111111, 0b00000000, 0b11111111,
+        ];
+        let buffer: Buffer = Buffer::from(input);
+
+        let bit_slice = buffer.bit_slice().view(2, 63);
+        let chunks = bit_slice.chunks::<u64>();
+
+        assert_eq!(63, chunks.remainder_bit_len());
+        assert_eq!(
+            0b1000000_00111111_11000000_00111111_11000000_00111111_11000000_00111111,
+            chunks.remainder_bits()
+        );
+    }
+
+    #[test]
+    fn test_bit_slice_iter_reinterpret() {
+        assert_eq!(LocalBits::default(), Lsb0::default());
+        let buffer_slice = &[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice();
+        // Name of the bit slice comes from byte slice, since it is still on the stack and behaves similarly to Rust's byte slice.
+        let buffer = Buffer::from(buffer_slice);
+
+        // Let's get the whole buffer.
+        let bit_slice = buffer.bit_slice().view(0, buffer_slice.len() * 8);
+        // Let's also get a chunked bits as u8, not u64 this time...
+        let chunks = bit_slice.chunks::<u8>();
+
+        let result = chunks.interpret().collect::<Vec<_>>();
+        assert_eq!(buffer_slice.to_vec(), result);
+    }
+}
diff --git a/rust/arrow/src/util/buffered_iterator.rs b/rust/arrow/src/util/buffered_iterator.rs
index 059b82424a8..5d42ee43e66 100644
--- a/rust/arrow/src/util/buffered_iterator.rs
+++ b/rust/arrow/src/util/buffered_iterator.rs
@@ -54,7 +54,7 @@ where
     /// Useful to extract the exact item where an error occurred
     #[inline]
     pub fn n(&self) -> usize {
-        return self.buffer.len();
+        self.buffer.len()
     }
 }
 
diff --git a/rust/arrow/src/util/mod.rs b/rust/arrow/src/util/mod.rs
index 053d1329631..c440745fc93 100644
--- a/rust/arrow/src/util/mod.rs
+++ b/rust/arrow/src/util/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod bit_chunk_iterator;
+pub mod bit_slice_iterator;
 pub mod bit_util;
 pub mod buffered_iterator;
 pub mod display;
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 8dd7afa1d7e..c6ad1b381eb 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -349,7 +349,7 @@ async fn csv_query_avg_sqrt() -> Result<()> {
     let mut actual = execute(&mut ctx, sql).await;
     actual.sort();
     let expected = vec![vec!["0.6706002946036462"]];
-    assert_eq!(actual, expected);
+    assert_float_eq(&expected, &actual);
     Ok(())
 }
 
@@ -363,7 +363,7 @@ async fn csv_query_custom_udf_with_cast() -> Result<()> {
     let sql = "SELECT avg(custom_sqrt(c11)) FROM aggregate_test_100";
     let actual = execute(&mut ctx, sql).await;
     let expected = vec![vec!["0.6584408483418833"]];
-    assert_eq!(actual, expected);
+    assert_float_eq(&expected, &actual);
     Ok(())
 }
 
@@ -377,11 +377,11 @@ async fn sqrt_f32_vs_f64() -> Result<()> {
     let actual = execute(&mut ctx, sql).await;
     let expected = vec![vec!["0.6584408485889435"]];
 
-    assert_eq!(actual, expected);
+    assert_float_eq(&expected, &actual);
     let sql = "SELECT avg(sqrt(CAST(c11 AS double))) FROM aggregate_test_100";
     let actual = execute(&mut ctx, sql).await;
     let expected = vec![vec!["0.6584408483418833"]];
-    assert_eq!(actual, expected);
+    assert_float_eq(&expected, &actual);
     Ok(())
 }
 
@@ -405,7 +405,7 @@ async fn csv_query_sqrt_sqrt() -> Result<()> {
     let actual = execute(&mut ctx, sql).await;
     // sqrt(sqrt(c12=0.9294097332465232)) = 0.9818650561397431
     let expected = vec![vec!["0.9818650561397431"]];
-    assert_eq!(actual, expected);
+    assert_float_eq(&expected, &actual);
     Ok(())
 }
 
@@ -448,7 +448,7 @@ async fn csv_query_avg() -> Result<()> {
     let mut actual = execute(&mut ctx, sql).await;
     actual.sort();
     let expected = vec![vec!["0.5089725099127211"]];
-    assert_eq!(expected, actual);
+    assert_float_eq(&expected, &actual);
     Ok(())
 }
 
@@ -1399,3 +1399,25 @@ async fn query_on_string_dictionary() -> Result<()> {
 
     Ok(())
 }
+
+fn assert_float_eq<T>(expected: &Vec<Vec<T>>, received: &Vec<Vec<String>>)
+where
+    T: AsRef<str>,
+{
+    expected
+        .into_iter()
+        .flatten()
+        .zip(received.into_iter().flatten())
+        .for_each(|(l, r)| {
+            let (l, r) = (
+                l.as_ref().parse::<f64>().unwrap(),
+                r.as_str().parse::<f64>().unwrap(),
+            );
+            // Too small floats are hard to approximate, give them some more error range.
+            if l < 1.0 && r < 1.0 {
+                assert!((l - r).abs() <= 2.0_f64 * f64::EPSILON);
+            } else {
+                assert!((l - r).abs() <= f64::EPSILON);
+            }
+        });
+}