diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 3ac6d2be6c72..bcac113669de 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -304,7 +304,7 @@ mod test { use arrow::buffer::NullBuffer; use arrow::compute::CastOptions; use arrow_schema::{DataType, Field, FieldRef, Fields}; - use parquet_variant::{Variant, VariantPath}; + use parquet_variant::{Variant, VariantPath, EMPTY_VARIANT_METADATA_BYTES}; use crate::json_to_variant; use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; @@ -701,8 +701,10 @@ mod test { fn $func() -> ArrayRef { // At the time of writing, the `VariantArrayBuilder` does not support shredding. // so we must construct the array manually. see https://github.com/apache/arrow-rs/issues/7895 - let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() }; - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( + EMPTY_VARIANT_METADATA_BYTES, + 3, + )); let typed_value = $array_type::from(vec![ Some(<$primitive_type>::try_from(1u8).unwrap()), Some(<$primitive_type>::try_from(2u8).unwrap()), @@ -1032,8 +1034,6 @@ mod test { /// } /// ``` fn all_null_variant_array() -> ArrayRef { - let (metadata, _value) = { parquet_variant::VariantBuilder::new().finish() }; - let nulls = NullBuffer::from(vec![ false, // row 0 is null false, // row 1 is null @@ -1041,7 +1041,8 @@ mod test { ]); // metadata is the same for all rows (though they're all null) - let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); + let metadata = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3)); let struct_array = StructArrayBuilder::new() .with_field("metadata", Arc::new(metadata), false) @@ -2502,8 +2503,8 @@ mod test { .build(); // Build final VariantArray with top-level nulls - let (metadata, _) = parquet_variant::VariantBuilder::new().finish(); - let metadata_array = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4)); + let metadata_array = + BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 4)); let nulls = NullBuffer::from(vec![ true, // row 0: inner struct exists with typed_value=42 true, // row 1: inner field NULL diff --git a/parquet-variant/src/variant.rs b/parquet-variant/src/variant.rs index 3dae4daa0ff8..cc4c3bcadd66 100644 --- a/parquet-variant/src/variant.rs +++ b/parquet-variant/src/variant.rs @@ -17,7 +17,7 @@ pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; pub use self::list::VariantList; -pub use self::metadata::VariantMetadata; +pub use self::metadata::{VariantMetadata, EMPTY_VARIANT_METADATA, EMPTY_VARIANT_METADATA_BYTES}; pub use self::object::VariantObject; use crate::decoder::{ self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index 1c9da6bcc0cf..941247c9f23d 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -141,6 +141,39 @@ pub struct VariantMetadata<'m> { // could increase the size of Variant. All those size increases could hurt performance. const _: () = crate::utils::expect_size_of::(32); +/// The canonical byte slice corresponding to an empty metadata dictionary. +/// +/// ``` +/// # use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, VariantMetadata, WritableMetadataBuilder}; +/// let mut metadata_builder = WritableMetadataBuilder::default(); +/// metadata_builder.finish(); +/// let metadata_bytes = metadata_builder.into_inner(); +/// assert_eq!(&metadata_bytes, EMPTY_VARIANT_METADATA_BYTES); +/// ``` +pub const EMPTY_VARIANT_METADATA_BYTES: &[u8] = &[1, 0, 0]; + +/// The empty metadata dictionary. +/// +/// ``` +/// # use parquet_variant::{EMPTY_VARIANT_METADATA, VariantMetadata, WritableMetadataBuilder}; +/// let mut metadata_builder = WritableMetadataBuilder::default(); +/// metadata_builder.finish(); +/// let metadata_bytes = metadata_builder.into_inner(); +/// let empty_metadata = VariantMetadata::try_new(&metadata_bytes).unwrap(); +/// assert_eq!(empty_metadata, EMPTY_VARIANT_METADATA); +/// ``` +pub const EMPTY_VARIANT_METADATA: VariantMetadata = VariantMetadata { + bytes: EMPTY_VARIANT_METADATA_BYTES, + header: VariantMetadataHeader { + version: CORRECT_VERSION_VALUE, + is_sorted: false, + offset_size: OffsetSizeBytes::One, + }, + dictionary_size: 0, + first_value_byte: 3, + validated: true, +}; + impl<'m> VariantMetadata<'m> { /// Attempts to interpret `bytes` as a variant metadata instance, with full [validation] of all /// dictionary entries.