Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions arrow-buffer/src/buffer/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ use crate::bit_iterator::{BitIndexIterator, BitIterator, BitSliceIterator};
use crate::buffer::BooleanBuffer;
use crate::{Buffer, MutableBuffer};

/// A [`BooleanBuffer`] used to encode validity for Arrow arrays
/// A [`BooleanBuffer`] used to encode validity (null values) for Arrow arrays
///
/// In the [Arrow specification], array validity is encoded in a packed bitmask with a
/// `true` value indicating the corresponding slot is not null, and `false` indicating
/// that it is null.
///
/// `NullBuffer`s can be creating using [`NullBufferBuilder`]
/// # See also
/// * [`NullBufferBuilder`] for creating `NullBuffer`s
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

adding some comments here to help navigate the maze of builders available

///
/// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
/// [`NullBufferBuilder`]: crate::NullBufferBuilder
Expand Down
35 changes: 31 additions & 4 deletions arrow-buffer/src/builder/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,28 @@ use std::ops::Range;

/// Builder for [`BooleanBuffer`]
///
/// Builds a packed buffer of bits representing boolean values. Each bit in the
/// buffer corresponds to a boolean value,
///
/// # See Also
///
/// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NullBufferBuilder is the correct reference so I fixed that

/// * [`NullBufferBuilder`] for building [`BooleanBuffer`]s for representing nulls
/// * [`BufferBuilder`] for building [`Buffer`]s
///
/// # Example
/// ```
/// # use arrow_buffer::builder::BooleanBufferBuilder;
/// let mut builder = BooleanBufferBuilder::new(10);
/// builder.append(true);
/// builder.append(false);
/// builder.append_n(3, true); // append 3 trues
/// let buffer = builder.build();
/// assert_eq!(buffer.len(), 5); // 5 bits appended
/// assert_eq!(buffer.values(), &[0b00011101_u8]); // packed bits
///```
///
/// [`NullBuffer`]: crate::NullBuffer
/// [`BufferBuilder`]: crate::builder::BufferBuilder
/// [`NullBufferBuilder`]: crate::builder::NullBufferBuilder
#[derive(Debug)]
pub struct BooleanBufferBuilder {
buffer: MutableBuffer,
Expand Down Expand Up @@ -247,14 +264,24 @@ impl BooleanBufferBuilder {
self.buffer.as_slice_mut()
}

/// Creates a [`BooleanBuffer`]
/// Resets this builder and returns a [`BooleanBuffer`].
///
/// Use [`Self::build`] when you don't need to reuse this builder.
#[inline]
pub fn finish(&mut self) -> BooleanBuffer {
let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0));
let len = std::mem::replace(&mut self.len, 0);
BooleanBuffer::new(buf.into(), 0, len)
}

/// Builds a [`BooleanBuffer`] without resetting the builder.
///
/// This consumes the builder. Use [`Self::finish`] to reuse it.
#[inline]
pub fn build(self) -> BooleanBuffer {
Copy link
Contributor Author

@alamb alamb Jan 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

most builders have a build method. The fact that the *Buffer builders in the crate do not is a small source of API friction I would like to remove

BooleanBuffer::new(self.buffer.into(), 0, self.len)
}

/// Builds the [BooleanBuffer] without resetting the builder.
pub fn finish_cloned(&self) -> BooleanBuffer {
BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len)
Expand Down Expand Up @@ -285,7 +312,7 @@ impl From<BooleanBufferBuilder> for Buffer {
impl From<BooleanBufferBuilder> for BooleanBuffer {
#[inline]
fn from(builder: BooleanBufferBuilder) -> Self {
BooleanBuffer::new(builder.buffer.into(), 0, builder.len)
builder.build()
}
}

Expand Down
39 changes: 32 additions & 7 deletions arrow-buffer/src/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,31 @@ pub use offset::*;
use crate::{ArrowNativeType, Buffer, MutableBuffer};
use std::marker::PhantomData;

/// Builder for creating a [Buffer] object.
/// Builder for creating Arrow [`Buffer`] objects
///
/// A [Buffer] is the underlying data structure of Arrow's Arrays.
/// A [`Buffer`] is the underlying data structure of Arrow's Arrays.
///
/// For all supported types, there are type definitions for the
/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
///
/// **Note it is typically faster to create buffers directly from `Vec`**.
/// See example on [`Buffer`].
///
/// # See Also
/// * [`BooleanBufferBuilder`]: for packing bits in [`BooleanBuffer`]s
/// * [`NullBufferBuilder`]: for creating [`NullBuffer`]s of null values
///
/// [`BooleanBuffer`]: crate::BooleanBuffer
/// [`NullBuffer`]: crate::NullBuffer
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(100);
/// builder.append_slice(&[42, 43, 44]);
/// builder.append(45);
/// let buffer = builder.finish();
///
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
/// ```
#[derive(Debug)]
Expand Down Expand Up @@ -341,16 +349,15 @@ impl<T: ArrowNativeType> BufferBuilder<T> {

/// Resets this builder and returns an immutable [Buffer].
///
/// Use [`Self::build`] when you don't need to reuse this builder.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append_slice(&[42, 44, 46]);
///
/// let buffer = builder.finish();
///
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
/// ```
#[inline]
Expand All @@ -359,6 +366,24 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
self.len = 0;
buf.into()
}

/// Builds an immutable [Buffer] without resetting the builder.
///
/// This consumes the builder. Use [`Self::finish`] to reuse it.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append_slice(&[42, 44, 46]);
/// let buffer = builder.build();
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
/// ```
#[inline]
pub fn build(self) -> Buffer {
self.buffer.into()
}
}

impl<T: ArrowNativeType> Default for BufferBuilder<T> {
Expand Down
32 changes: 21 additions & 11 deletions arrow-buffer/src/builder/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,22 @@

use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};

/// Builder for creating [`NullBuffer`]
/// Builder for creating [`NullBuffer`]s (bitmaps indicating validity/nulls).
///
/// # See also
/// * [`BooleanBufferBuilder`] for a lower-level bitmap builder.
/// * [`Self::allocated_size`] for the current memory allocated by the builder.
///
/// # Performance
///
/// This builder only materializes the buffer when we append `false`.
/// If you only append `true`s to the builder, what you get will be
/// `None` when calling [`finish`](#method.finish).
/// This builder only materializes the buffer when null values (`false`) are
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive by wording cleanup

/// appended. If you only append non-null, (`true`) to the builder, no buffer is
/// allocated and [`build`](#method.build) or [`finish`](#method.finish) return
/// `None`.
///
/// This optimization is **very** important for the performance as it avoids
/// allocating memory for the null buffer when there are no nulls.
///
/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
///
/// # Example
/// ```
/// # use arrow_buffer::NullBufferBuilder;
Expand Down Expand Up @@ -193,11 +196,20 @@ impl NullBufferBuilder {
}
}

/// Builds the null buffer and resets the builder.
/// Returns `None` if the builder only contains `true`s.
/// Builds the [`NullBuffer`] and resets the builder.
///
/// Returns `None` if the builder only contains `true`s. Use [`Self::build`]
/// when you don't need to reuse this builder.
pub fn finish(&mut self) -> Option<NullBuffer> {
self.len = 0;
Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
Some(NullBuffer::new(self.bitmap_builder.take()?.build()))
}

/// Builds the [`NullBuffer`] without resetting the builder.
///
/// This consumes the builder. Use [`Self::finish`] to reuse it.
pub fn build(self) -> Option<NullBuffer> {
self.bitmap_builder.map(NullBuffer::from)
}

/// Builds the [NullBuffer] without resetting the builder.
Expand Down Expand Up @@ -238,9 +250,7 @@ impl NullBufferBuilder {
.map(|b| b.capacity() / 8)
.unwrap_or(0)
}
}

impl NullBufferBuilder {
/// Return the number of bits in the buffer.
pub fn len(&self) -> usize {
self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
Expand Down
Loading