diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs index 0ec1f5cda4af..f42fa51f512c 100644 --- a/parquet-variant-compute/src/variant_array.rs +++ b/parquet-variant-compute/src/variant_array.rs @@ -140,12 +140,12 @@ impl VariantArray { nulls: Option, ) -> Self { let mut builder = - StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone())); + StructArrayBuilder::new().with_field("metadata", Arc::new(metadata.clone()), false); if let Some(value) = value.clone() { - builder = builder.with_field("value", Arc::new(value)); + builder = builder.with_field("value", Arc::new(value), true); } if let Some(typed_value) = typed_value.clone() { - builder = builder.with_field("typed_value", typed_value); + builder = builder.with_field("typed_value", typed_value, true); } if let Some(nulls) = nulls { builder = builder.with_nulls(nulls); @@ -564,8 +564,8 @@ impl StructArrayBuilder { } /// Add an array to this struct array as a field with the specified name. - pub fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self { - let field = Field::new(field_name, array.data_type().clone(), true); + pub fn with_field(mut self, field_name: &str, array: ArrayRef, nullable: bool) -> Self { + let field = Field::new(field_name, array.data_type().clone(), nullable); self.fields.push(Arc::new(field)); self.arrays.push(array); self diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs index 9b4b7bbd7d5f..f9026735db1a 100644 --- a/parquet-variant-compute/src/variant_get.rs +++ b/parquet-variant-compute/src/variant_get.rs @@ -307,7 +307,8 @@ mod test { use parquet_variant::{Variant, VariantPath}; use crate::json_to_variant; - use crate::{variant_array::ShreddedVariantFieldArray, VariantArray}; + use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder}; + use crate::VariantArray; use super::{variant_get, GetOptions}; @@ -692,8 +693,8 @@ mod test { ]); let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata)) - .with_field("typed_value", Arc::new(typed_value)) + .with_field("metadata", Arc::new(metadata), false) + .with_field("typed_value", Arc::new(typed_value), true) .build(); Arc::new( @@ -821,9 +822,9 @@ mod test { ]); let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata)) - .with_field("typed_value", Arc::new(typed_value)) - .with_field("value", Arc::new(values)) + .with_field("metadata", Arc::new(metadata), false) + .with_field("typed_value", Arc::new(typed_value), true) + .with_field("value", Arc::new(values), true) .with_nulls(nulls) .build(); @@ -926,9 +927,9 @@ mod test { ]); let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata)) - .with_field("typed_value", Arc::new(typed_value)) - .with_field("value", Arc::new(values)) + .with_field("metadata", Arc::new(metadata), true) + .with_field("typed_value", Arc::new(typed_value), true) + .with_field("value", Arc::new(values), true) .with_nulls(nulls) .build(); @@ -937,45 +938,6 @@ mod test { ) } - /// Builds struct arrays from component fields - /// - /// TODO: move to arrow crate - #[derive(Debug, Default, Clone)] - struct StructArrayBuilder { - fields: Vec, - arrays: Vec, - nulls: Option, - } - - impl StructArrayBuilder { - fn new() -> Self { - Default::default() - } - - /// Add an array to this struct array as a field with the specified name. - fn with_field(mut self, field_name: &str, array: ArrayRef) -> Self { - let field = Field::new(field_name, array.data_type().clone(), true); - self.fields.push(Arc::new(field)); - self.arrays.push(array); - self - } - - /// Set the null buffer for this struct array. - fn with_nulls(mut self, nulls: NullBuffer) -> Self { - self.nulls = Some(nulls); - self - } - - pub fn build(self) -> StructArray { - let Self { - fields, - arrays, - nulls, - } = self; - StructArray::new(Fields::from(fields), arrays, nulls) - } - } - /// Return a VariantArray that represents an "all null" variant /// for the following example (3 null values): /// @@ -1005,7 +967,7 @@ mod test { let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 3)); let struct_array = StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata)) + .with_field("metadata", Arc::new(metadata), false) .with_nulls(nulls) .build(); @@ -1096,8 +1058,8 @@ mod test { let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]); // For perfect shredding of the x field, no "value" column, only typed_value - let x_field_struct = crate::variant_array::StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_field_typed_value)) + let x_field_struct = StructArrayBuilder::new() + .with_field("typed_value", Arc::new(x_field_typed_value), true) .build(); // Wrap the x field struct in a ShreddedVariantFieldArray @@ -1118,10 +1080,10 @@ mod test { .unwrap(); // Create the main VariantArray - let main_struct = crate::variant_array::StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array)) - .with_field("value", Arc::new(value_array)) - .with_field("typed_value", Arc::new(typed_value_struct)) + let main_struct = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata_array), false) + .with_field("value", Arc::new(value_array), true) + .with_field("typed_value", Arc::new(typed_value_struct), true) .build(); Arc::new(VariantArray::try_new(Arc::new(main_struct)).expect("should create variant array")) @@ -1476,8 +1438,8 @@ mod test { let x_field_typed_value = Int32Array::from(vec![Some(42), None]); // For the x field, only typed_value (perfect shredding when possible) - let x_field_struct = crate::variant_array::StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_field_typed_value)) + let x_field_struct = StructArrayBuilder::new() + .with_field("typed_value", Arc::new(x_field_typed_value), true) .build(); let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct)) @@ -1494,10 +1456,10 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = crate::variant_array::StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array)) - .with_field("value", Arc::new(value_array)) - .with_field("typed_value", Arc::new(typed_value_struct)) + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata_array), false) + .with_field("value", Arc::new(value_array), true) + .with_field("typed_value", Arc::new(typed_value_struct), true) .build(); Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray")) @@ -1555,8 +1517,8 @@ mod test { // Create the nested shredded structure // Level 2: x field (the deepest level) let x_typed_value = Int32Array::from(vec![Some(55), None]); - let x_field_struct = crate::variant_array::StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_typed_value)) + let x_field_struct = StructArrayBuilder::new() + .with_field("typed_value", Arc::new(x_typed_value), true) .build(); let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct)) .expect("should create ShreddedVariantFieldArray for x"); @@ -1582,15 +1544,16 @@ mod test { x_field_shredded.data_type().clone(), true, )]); - let a_inner_struct = crate::variant_array::StructArrayBuilder::new() + let a_inner_struct = StructArrayBuilder::new() .with_field( "typed_value", Arc::new( StructArray::try_new(a_inner_fields, vec![Arc::new(x_field_shredded)], None) .unwrap(), ), + true, ) - .with_field("value", Arc::new(a_value_array)) + .with_field("value", Arc::new(a_value_array), true) .build(); let a_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(a_inner_struct)) .expect("should create ShreddedVariantFieldArray for a"); @@ -1606,10 +1569,10 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = crate::variant_array::StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array)) - .with_field("value", Arc::new(value_array)) - .with_field("typed_value", Arc::new(typed_value_struct)) + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata_array), false) + .with_field("value", Arc::new(value_array), true) + .with_field("typed_value", Arc::new(typed_value_struct), true) .build(); Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray")) @@ -1660,8 +1623,8 @@ mod test { // Level 3: x field (deepest level) let x_typed_value = Int32Array::from(vec![Some(100), None, None]); - let x_field_struct = crate::variant_array::StructArrayBuilder::new() - .with_field("typed_value", Arc::new(x_typed_value)) + let x_field_struct = StructArrayBuilder::new() + .with_field("typed_value", Arc::new(x_typed_value), true) .build(); let x_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(x_field_struct)) .expect("should create ShreddedVariantFieldArray for x"); @@ -1685,15 +1648,16 @@ mod test { x_field_shredded.data_type().clone(), true, )]); - let b_inner_struct = crate::variant_array::StructArrayBuilder::new() + let b_inner_struct = StructArrayBuilder::new() .with_field( "typed_value", Arc::new( StructArray::try_new(b_inner_fields, vec![Arc::new(x_field_shredded)], None) .unwrap(), ), + true, ) - .with_field("value", Arc::new(b_value_array)) + .with_field("value", Arc::new(b_value_array), true) .build(); let b_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(b_inner_struct)) .expect("should create ShreddedVariantFieldArray for b"); @@ -1717,15 +1681,16 @@ mod test { b_field_shredded.data_type().clone(), true, )]); - let a_inner_struct = crate::variant_array::StructArrayBuilder::new() + let a_inner_struct = StructArrayBuilder::new() .with_field( "typed_value", Arc::new( StructArray::try_new(a_inner_fields, vec![Arc::new(b_field_shredded)], None) .unwrap(), ), + true, ) - .with_field("value", Arc::new(a_value_array)) + .with_field("value", Arc::new(a_value_array), true) .build(); let a_field_shredded = ShreddedVariantFieldArray::try_new(Arc::new(a_inner_struct)) .expect("should create ShreddedVariantFieldArray for a"); @@ -1741,10 +1706,10 @@ mod test { .unwrap(); // Build final VariantArray - let struct_array = crate::variant_array::StructArrayBuilder::new() - .with_field("metadata", Arc::new(metadata_array)) - .with_field("value", Arc::new(value_array)) - .with_field("typed_value", Arc::new(typed_value_struct)) + let struct_array = StructArrayBuilder::new() + .with_field("metadata", Arc::new(metadata_array), false) + .with_field("value", Arc::new(value_array), true) + .with_field("typed_value", Arc::new(typed_value_struct), true) .build(); Arc::new(VariantArray::try_new(Arc::new(struct_array)).expect("should create VariantArray"))