diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs index e5e3a610ead2..64a21d99e830 100644 --- a/arrow-buffer/src/buffer/null.rs +++ b/arrow-buffer/src/buffer/null.rs @@ -19,13 +19,14 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator}; use crate::buffer::BooleanBuffer; use crate::{Buffer, MutableBuffer}; -/// A [`BooleanBuffer`] used to encode validity for Arrow arrays +/// A [`BooleanBuffer`] used to encode validity (null values) for Arrow arrays /// /// In the [Arrow specification], array validity is encoded in a packed bitmask with a /// `true` value indicating the corresponding slot is not null, and `false` indicating /// that it is null. /// -/// `NullBuffer`s can be creating using [`NullBufferBuilder`] +/// # See also +/// * [`NullBufferBuilder`] for creating `NullBuffer`s /// /// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps /// [`NullBufferBuilder`]: crate::NullBufferBuilder diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs index 7990be1e7cc9..956f4a3e22ae 100644 --- a/arrow-buffer/src/builder/boolean.rs +++ b/arrow-buffer/src/builder/boolean.rs @@ -21,11 +21,28 @@ use std::ops::Range; /// Builder for [`BooleanBuffer`] /// +/// Builds a packed buffer of bits representing boolean values. Each bit in the +/// buffer corresponds to a boolean value, +/// /// # See Also /// -/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls +/// * [`NullBufferBuilder`] for building [`BooleanBuffer`]s for representing nulls +/// * [`BufferBuilder`] for building [`Buffer`]s +/// +/// # Example +/// ``` +/// # use arrow_buffer::builder::BooleanBufferBuilder; +/// let mut builder = BooleanBufferBuilder::new(10); +/// builder.append(true); +/// builder.append(false); +/// builder.append_n(3, true); // append 3 trues +/// let buffer = builder.build(); +/// assert_eq!(buffer.len(), 5); // 5 bits appended +/// assert_eq!(buffer.values(), &[0b00011101_u8]); // packed bits +///``` /// -/// [`NullBuffer`]: crate::NullBuffer +/// [`BufferBuilder`]: crate::builder::BufferBuilder +/// [`NullBufferBuilder`]: crate::builder::NullBufferBuilder #[derive(Debug)] pub struct BooleanBufferBuilder { buffer: MutableBuffer, @@ -247,7 +264,9 @@ impl BooleanBufferBuilder { self.buffer.as_slice_mut() } - /// Creates a [`BooleanBuffer`] + /// Resets this builder and returns a [`BooleanBuffer`]. + /// + /// Use [`Self::build`] when you don't need to reuse this builder. #[inline] pub fn finish(&mut self) -> BooleanBuffer { let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); @@ -255,6 +274,14 @@ impl BooleanBufferBuilder { BooleanBuffer::new(buf.into(), 0, len) } + /// Builds a [`BooleanBuffer`] without resetting the builder. + /// + /// This consumes the builder. Use [`Self::finish`] to reuse it. + #[inline] + pub fn build(self) -> BooleanBuffer { + BooleanBuffer::new(self.buffer.into(), 0, self.len) + } + /// Builds the [BooleanBuffer] without resetting the builder. pub fn finish_cloned(&self) -> BooleanBuffer { BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len) @@ -285,7 +312,7 @@ impl From for Buffer { impl From for BooleanBuffer { #[inline] fn from(builder: BooleanBufferBuilder) -> Self { - BooleanBuffer::new(builder.buffer.into(), 0, builder.len) + builder.build() } } diff --git a/arrow-buffer/src/builder/mod.rs b/arrow-buffer/src/builder/mod.rs index abe510bdabc6..1abb8018cec6 100644 --- a/arrow-buffer/src/builder/mod.rs +++ b/arrow-buffer/src/builder/mod.rs @@ -28,23 +28,31 @@ pub use offset::*; use crate::{ArrowNativeType, Buffer, MutableBuffer}; use std::marker::PhantomData; -/// Builder for creating a [Buffer] object. +/// Builder for creating Arrow [`Buffer`] objects /// -/// A [Buffer] is the underlying data structure of Arrow's Arrays. +/// A [`Buffer`] is the underlying data structure of Arrow's Arrays. /// /// For all supported types, there are type definitions for the /// generic version of `BufferBuilder`, e.g. `BufferBuilder`. /// +/// **Note it is typically faster to create buffers directly from `Vec`**. +/// See example on [`Buffer`]. +/// +/// # See Also +/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s +/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values +/// +/// [`BooleanBuffer`]: crate::BooleanBuffer +/// [`NullBuffer`]: crate::NullBuffer +/// /// # Example: /// /// ``` /// # use arrow_buffer::builder::BufferBuilder; -/// /// let mut builder = BufferBuilder::::new(100); /// builder.append_slice(&[42, 43, 44]); /// builder.append(45); /// let buffer = builder.finish(); -/// /// assert_eq!(unsafe { buffer.typed_data::() }, &[42, 43, 44, 45]); /// ``` #[derive(Debug)] @@ -341,16 +349,15 @@ impl BufferBuilder { /// Resets this builder and returns an immutable [Buffer]. /// + /// Use [`Self::build`] when you don't need to reuse this builder. + /// /// # Example: /// /// ``` /// # use arrow_buffer::builder::BufferBuilder; - /// /// let mut builder = BufferBuilder::::new(10); /// builder.append_slice(&[42, 44, 46]); - /// /// let buffer = builder.finish(); - /// /// assert_eq!(unsafe { buffer.typed_data::() }, &[42, 44, 46]); /// ``` #[inline] @@ -359,6 +366,24 @@ impl BufferBuilder { self.len = 0; buf.into() } + + /// Builds an immutable [Buffer] without resetting the builder. + /// + /// This consumes the builder. Use [`Self::finish`] to reuse it. + /// + /// # Example: + /// + /// ``` + /// # use arrow_buffer::builder::BufferBuilder; + /// let mut builder = BufferBuilder::::new(10); + /// builder.append_slice(&[42, 44, 46]); + /// let buffer = builder.build(); + /// assert_eq!(unsafe { buffer.typed_data::() }, &[42, 44, 46]); + /// ``` + #[inline] + pub fn build(self) -> Buffer { + self.buffer.into() + } } impl Default for BufferBuilder { diff --git a/arrow-buffer/src/builder/null.rs b/arrow-buffer/src/builder/null.rs index e6f426615be5..2ffd4dcd4c35 100644 --- a/arrow-buffer/src/builder/null.rs +++ b/arrow-buffer/src/builder/null.rs @@ -17,19 +17,22 @@ use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer}; -/// Builder for creating [`NullBuffer`] +/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls). +/// +/// # See also +/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder. +/// * [`Self::allocated_size`] for the current memory allocated by the builder. /// /// # Performance /// -/// This builder only materializes the buffer when we append `false`. -/// If you only append `true`s to the builder, what you get will be -/// `None` when calling [`finish`](#method.finish). +/// This builder only materializes the buffer when null values (`false`) are +/// appended. If you only append non-null, (`true`) to the builder, no buffer is +/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return +/// `None`. /// /// This optimization is **very** important for the performance as it avoids /// allocating memory for the null buffer when there are no nulls. /// -/// See [`Self::allocated_size`] to get the current memory allocated by the builder. -/// /// # Example /// ``` /// # use arrow_buffer::NullBufferBuilder; @@ -193,11 +196,20 @@ impl NullBufferBuilder { } } - /// Builds the null buffer and resets the builder. - /// Returns `None` if the builder only contains `true`s. + /// Builds the [`NullBuffer`] and resets the builder. + /// + /// Returns `None` if the builder only contains `true`s. Use [`Self::build`] + /// when you don't need to reuse this builder. pub fn finish(&mut self) -> Option { self.len = 0; - Some(NullBuffer::new(self.bitmap_builder.take()?.finish())) + Some(NullBuffer::new(self.bitmap_builder.take()?.build())) + } + + /// Builds the [`NullBuffer`] without resetting the builder. + /// + /// This consumes the builder. Use [`Self::finish`] to reuse it. + pub fn build(self) -> Option { + self.bitmap_builder.map(NullBuffer::from) } /// Builds the [NullBuffer] without resetting the builder. @@ -238,9 +250,7 @@ impl NullBufferBuilder { .map(|b| b.capacity() / 8) .unwrap_or(0) } -} -impl NullBufferBuilder { /// Return the number of bits in the buffer. pub fn len(&self) -> usize { self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())