diff --git a/parquet-variant/src/builder.rs b/parquet-variant/src/builder.rs index 0bdcd8019aaf..e224ec0e4d99 100644 --- a/parquet-variant/src/builder.rs +++ b/parquet-variant/src/builder.rs @@ -61,17 +61,6 @@ fn write_offset(buf: &mut Vec, value: usize, nbytes: u8) { buf.extend_from_slice(&bytes[..nbytes as usize]); } -fn write_header(buf: &mut Vec, header_byte: u8, is_large: bool, num_items: usize) { - buf.push(header_byte); - - if is_large { - let num_items = num_items as u32; - buf.extend_from_slice(&num_items.to_le_bytes()); - } else { - let num_items = num_items as u8; - buf.push(num_items); - }; -} #[derive(Default)] struct ValueBuffer(Vec); @@ -231,6 +220,36 @@ impl ValueBuffer { } } } + + /// Writes out the header byte for a variant object or list + fn append_header(&mut self, header_byte: u8, is_large: bool, num_items: usize) { + let buf = self.inner_mut(); + buf.push(header_byte); + + if is_large { + let num_items = num_items as u32; + buf.extend_from_slice(&num_items.to_le_bytes()); + } else { + let num_items = num_items as u8; + buf.push(num_items); + }; + } + + /// Writes out the offsets for an array of offsets, including the final offset (data size). + fn append_offset_array( + &mut self, + offsets: impl IntoIterator, + data_size: Option, + nbytes: u8, + ) { + let buf = self.inner_mut(); + for offset in offsets { + write_offset(buf, offset, nbytes); + } + if let Some(data_size) = data_size { + write_offset(buf, data_size, nbytes); + } + } } #[derive(Default)] @@ -314,7 +333,7 @@ impl MetadataBuilder { write_offset(&mut metadata, cur_offset, offset_size); // Write string data - for key in self.field_names.iter() { + for key in self.field_names { metadata.extend_from_slice(key.as_bytes()); } @@ -339,6 +358,80 @@ impl> Extend for MetadataBuilder { } } +/// Tracks information needed to correctly finalize a nested builder, for each parent builder type. +/// +/// A child builder has no effect on its parent unless/until its `finalize` method is called, at +/// which point the child appends the new value to the parent. As a (desirable) side effect, +/// creating a parent state instance captures mutable references to a subset of the parent's fields, +/// rendering the parent object completely unusable until the parent state goes out of scope. This +/// ensures that at most one child builder can exist at a time. +/// +/// The redundancy in buffer and metadata_builder is because all the references come from the +/// parent, and we cannot "split" a mutable reference across two objects (parent state and the child +/// builder that uses it). So everything has to be here. Rust layout optimizations should treat the +/// variants as a union, so that accessing a `buffer` or `metadata_builder` is branch-free. +enum ParentState<'a> { + Variant { + buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, + }, + List { + buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, + offsets: &'a mut Vec, + }, + Object { + buffer: &'a mut ValueBuffer, + metadata_builder: &'a mut MetadataBuilder, + fields: &'a mut IndexMap, + field_name: &'a str, + }, +} + +impl ParentState<'_> { + fn buffer(&mut self) -> &mut ValueBuffer { + match self { + ParentState::Variant { buffer, .. } => buffer, + ParentState::List { buffer, .. } => buffer, + ParentState::Object { buffer, .. } => buffer, + } + } + + fn metadata_builder(&mut self) -> &mut MetadataBuilder { + match self { + ParentState::Variant { + metadata_builder, .. + } => metadata_builder, + ParentState::List { + metadata_builder, .. + } => metadata_builder, + ParentState::Object { + metadata_builder, .. + } => metadata_builder, + } + } + + // Performs any parent-specific aspects of finishing, after the child has appended all necessary + // bytes to the parent's value buffer. ListBuilder records the new value's starting offset; + // ObjectBuilder associates the new value's starting offset with its field id; VariantBuilder + // doesn't need anything special. + fn finish(&mut self, starting_offset: usize) { + match self { + ParentState::Variant { .. } => (), + ParentState::List { offsets, .. } => offsets.push(starting_offset), + ParentState::Object { + metadata_builder, + fields, + field_name, + .. + } => { + let field_id = metadata_builder.upsert_field_name(field_name); + fields.insert(field_id, starting_offset); + } + } + } +} + /// Top level builder for [`Variant`] values /// /// # Example: create a Primitive Int8 @@ -579,20 +672,29 @@ impl VariantBuilder { self.metadata_builder.upsert_field_name(field_name); } + // Returns validate_unique_fields because we can no longer reference self once this method returns. + fn parent_state(&mut self) -> (ParentState, bool) { + let state = ParentState::Variant { + buffer: &mut self.buffer, + metadata_builder: &mut self.metadata_builder, + }; + (state, self.validate_unique_fields) + } + /// Create an [`ListBuilder`] for creating [`Variant::List`] values. /// /// See the examples on [`VariantBuilder`] for usage. pub fn new_list(&mut self) -> ListBuilder { - ListBuilder::new(&mut self.buffer, &mut self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields) + let (parent_state, validate_unique_fields) = self.parent_state(); + ListBuilder::new(parent_state, validate_unique_fields) } /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values. /// /// See the examples on [`VariantBuilder`] for usage. pub fn new_object(&mut self) -> ObjectBuilder { - ObjectBuilder::new(&mut self.buffer, &mut self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields) + let (parent_state, validate_unique_fields) = self.parent_state(); + ObjectBuilder::new(parent_state, validate_unique_fields) } /// Append a non-nested value to the builder. @@ -618,36 +720,20 @@ impl VariantBuilder { /// /// See the examples on [`VariantBuilder`] for usage. pub struct ListBuilder<'a> { - parent_buffer: &'a mut ValueBuffer, - metadata_builder: &'a mut MetadataBuilder, + parent_state: ParentState<'a>, offsets: Vec, buffer: ValueBuffer, - /// Is there a pending nested object or list that needs to be finalized? - pending: bool, validate_unique_fields: bool, } impl<'a> ListBuilder<'a> { - fn new(parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { + fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> Self { Self { - parent_buffer, - metadata_builder, - offsets: vec![0], + parent_state, + offsets: vec![], buffer: ValueBuffer::default(), - pending: false, - validate_unique_fields: false, - } - } - - fn check_new_offset(&mut self) { - if !self.pending { - return; + validate_unique_fields, } - - let element_end = self.buffer.offset(); - self.offsets.push(element_end); - - self.pending = false; } /// Enables unique field key validation for objects created within this list. @@ -659,58 +745,58 @@ impl<'a> ListBuilder<'a> { self } - pub fn new_object(&mut self) -> ObjectBuilder { - self.check_new_offset(); - - let obj_builder = ObjectBuilder::new(&mut self.buffer, self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields); - self.pending = true; + // Returns validate_unique_fields because we can no longer reference self once this method returns. + fn parent_state(&mut self) -> (ParentState, bool) { + let state = ParentState::List { + buffer: &mut self.buffer, + metadata_builder: self.parent_state.metadata_builder(), + offsets: &mut self.offsets, + }; + (state, self.validate_unique_fields) + } - obj_builder + /// Returns an object builder that can be used to append a new (nested) object to this list. + /// + /// WARNING: The builder will have no effect unless/until [`ObjectBuilder::finish`] is called. + pub fn new_object(&mut self) -> ObjectBuilder { + let (parent_state, validate_unique_fields) = self.parent_state(); + ObjectBuilder::new(parent_state, validate_unique_fields) } + /// Returns a list builder that can be used to append a new (nested) list to this list. + /// + /// WARNING: The builder will have no effect unless/until [`ListBuilder::finish`] is called. pub fn new_list(&mut self) -> ListBuilder { - self.check_new_offset(); - - let list_builder = ListBuilder::new(&mut self.buffer, self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields); - self.pending = true; - - list_builder + let (parent_state, validate_unique_fields) = self.parent_state(); + ListBuilder::new(parent_state, validate_unique_fields) } + /// Appends a new primitive value to this list pub fn append_value<'m, 'd, T: Into>>(&mut self, value: T) { - self.check_new_offset(); - + self.offsets.push(self.buffer.offset()); self.buffer.append_non_nested_value(value); - let element_end = self.buffer.offset(); - self.offsets.push(element_end); } - /// Finish the list, writing it to the parent buffer and consuming self. + /// Finalizes this list and appends it to its parent, which otherwise remains unmodified. pub fn finish(mut self) { - self.check_new_offset(); - let data_size = self.buffer.offset(); - let num_elements = self.offsets.len() - 1; + let num_elements = self.offsets.len(); let is_large = num_elements > u8::MAX as usize; let offset_size = int_size(data_size); - // Write header - write_header( - self.parent_buffer.inner_mut(), - array_header(is_large, offset_size), - is_large, - num_elements, - ); + // Get parent's buffer + let parent_buffer = self.parent_state.buffer(); + let starting_offset = parent_buffer.offset(); - // Write offsets - for offset in &self.offsets { - write_offset(self.parent_buffer.inner_mut(), *offset, offset_size); - } + // Write header + let header = array_header(is_large, offset_size); + parent_buffer.append_header(header, is_large, num_elements); - // Append values - self.parent_buffer.append_slice(self.buffer.inner()); + // Write out the offset array followed by the value bytes + let offsets = std::mem::take(&mut self.offsets); + parent_buffer.append_offset_array(offsets, Some(data_size), offset_size); + parent_buffer.append_slice(self.buffer.inner()); + self.parent_state.finish(starting_offset); } } @@ -725,50 +811,35 @@ impl Drop for ListBuilder<'_> { /// A builder for creating [`Variant::Object`] values. /// /// See the examples on [`VariantBuilder`] for usage. -pub struct ObjectBuilder<'a, 'b> { - parent_buffer: &'a mut ValueBuffer, - metadata_builder: &'a mut MetadataBuilder, +pub struct ObjectBuilder<'a> { + parent_state: ParentState<'a>, fields: IndexMap, // (field_id, offset) buffer: ValueBuffer, - /// Is there a pending list or object that needs to be finalized? - pending: Option<(&'b str, usize)>, validate_unique_fields: bool, /// Set of duplicate fields to report for errors duplicate_fields: HashSet, } -impl<'a, 'b> ObjectBuilder<'a, 'b> { - fn new(parent_buffer: &'a mut ValueBuffer, metadata_builder: &'a mut MetadataBuilder) -> Self { +impl<'a> ObjectBuilder<'a> { + fn new(parent_state: ParentState<'a>, validate_unique_fields: bool) -> Self { Self { - parent_buffer, - metadata_builder, + parent_state, fields: IndexMap::new(), buffer: ValueBuffer::default(), - pending: None, - validate_unique_fields: false, + validate_unique_fields, duplicate_fields: HashSet::new(), } } - fn check_pending_field(&mut self) { - let Some(&(field_name, field_start)) = self.pending.as_ref() else { - return; - }; - - let field_id = self.metadata_builder.upsert_field_name(field_name); - self.fields.insert(field_id, field_start); - - self.pending = None; - } - /// Add a field with key and value to the object /// /// Note: when inserting duplicate keys, the new value overwrites the previous mapping, /// but the old value remains in the buffer, resulting in a larger variant pub fn insert<'m, 'd, T: Into>>(&mut self, key: &str, value: T) { - self.check_pending_field(); + // Get metadata_builder from parent state + let metadata_builder = self.parent_state.metadata_builder(); - let field_id = self.metadata_builder.upsert_field_name(key); + let field_id = metadata_builder.upsert_field_name(key); let field_start = self.buffer.offset(); if self.fields.insert(field_id, field_start).is_some() && self.validate_unique_fields { @@ -787,41 +858,41 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { self } - /// Return a new [`ObjectBuilder`] to add a nested object with the specified - /// key to the object. - pub fn new_object(&mut self, key: &'b str) -> ObjectBuilder { - self.check_pending_field(); - - let field_start = self.buffer.offset(); - let obj_builder = ObjectBuilder::new(&mut self.buffer, self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields); - self.pending = Some((key, field_start)); - - obj_builder + // Returns validate_unique_fields because we can no longer reference self once this method returns. + fn parent_state<'b>(&'b mut self, key: &'b str) -> (ParentState<'b>, bool) { + let state = ParentState::Object { + buffer: &mut self.buffer, + metadata_builder: self.parent_state.metadata_builder(), + fields: &mut self.fields, + field_name: key, + }; + (state, self.validate_unique_fields) } - /// Return a new [`ListBuilder`] to add a list with the specified key to the - /// object. - pub fn new_list(&mut self, key: &'b str) -> ListBuilder { - self.check_pending_field(); - - let field_start = self.buffer.offset(); - let list_builder = ListBuilder::new(&mut self.buffer, self.metadata_builder) - .with_validate_unique_fields(self.validate_unique_fields); - self.pending = Some((key, field_start)); + /// Returns an object builder that can be used to append a new (nested) object to this object. + /// + /// WARNING: The builder will have no effect unless/until [`ObjectBuilder::finish`] is called. + pub fn new_object<'b>(&'b mut self, key: &'b str) -> ObjectBuilder<'b> { + let (parent_state, validate_unique_fields) = self.parent_state(key); + ObjectBuilder::new(parent_state, validate_unique_fields) + } - list_builder + /// Returns a list builder that can be used to append a new (nested) list to this object. + /// + /// WARNING: The builder will have no effect unless/until [`ListBuilder::finish`] is called. + pub fn new_list<'b>(&'b mut self, key: &'b str) -> ListBuilder<'b> { + let (parent_state, validate_unique_fields) = self.parent_state(key); + ListBuilder::new(parent_state, validate_unique_fields) } - /// Finalize the object, writing it to the parent buffer and consuming self. + /// Finalizes this object and appends it to its parent, which otherwise remains unmodified. pub fn finish(mut self) -> Result<(), ArrowError> { - self.check_pending_field(); - + let metadata_builder = self.parent_state.metadata_builder(); if self.validate_unique_fields && !self.duplicate_fields.is_empty() { let mut names = self .duplicate_fields .iter() - .map(|id| self.metadata_builder.field_name(*id as usize)) + .map(|id| metadata_builder.field_name(*id as usize)) .collect::>(); names.sort_unstable(); @@ -837,8 +908,8 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let is_large = num_fields > u8::MAX as usize; self.fields.sort_by(|&field_a_id, _, &field_b_id, _| { - let key_a = &self.metadata_builder.field_name(field_a_id as usize); - let key_b = &self.metadata_builder.field_name(field_b_id as usize); + let key_a = &metadata_builder.field_name(field_a_id as usize); + let key_b = &metadata_builder.field_name(field_b_id as usize); key_a.cmp(key_b) }); @@ -847,27 +918,23 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { let id_size = int_size(max_id as usize); let offset_size = int_size(data_size); + // Get parent's buffer + let parent_buffer = self.parent_state.buffer(); + let starting_offset = parent_buffer.offset(); + // Write header - write_header( - self.parent_buffer.inner_mut(), - object_header(is_large, id_size, offset_size), - is_large, - num_fields, - ); + let header = object_header(is_large, id_size, offset_size); + parent_buffer.append_header(header, is_large, num_fields); // Write field IDs (sorted order) - for (&id, _) in &self.fields { - write_offset(self.parent_buffer.inner_mut(), id as usize, id_size); - } - - // Write field offsets - for (_, &offset) in &self.fields { - write_offset(self.parent_buffer.inner_mut(), offset, offset_size); - } - - write_offset(self.parent_buffer.inner_mut(), data_size, offset_size); + let ids = self.fields.keys().map(|id| *id as usize); + parent_buffer.append_offset_array(ids, None, id_size); - self.parent_buffer.append_slice(self.buffer.inner()); + // Write the field offset array, followed by the value bytes + let offsets = std::mem::take(&mut self.fields).into_values(); + parent_buffer.append_offset_array(offsets, Some(data_size), offset_size); + parent_buffer.append_slice(self.buffer.inner()); + self.parent_state.finish(starting_offset); Ok(()) } @@ -877,12 +944,12 @@ impl<'a, 'b> ObjectBuilder<'a, 'b> { /// as the `finish` method must be called to finalize the object. /// This is to ensure that the object is always finalized before its parent builder /// is finalized. -impl Drop for ObjectBuilder<'_, '_> { +impl Drop for ObjectBuilder<'_> { fn drop(&mut self) {} } -/// Trait that abstracts functionality from Variant fconstruction implementations, namely -/// `VariantBuilder`, `ListBuilder` and `ObjectFieldBuilder` to minimize code duplication. +/// Trait that abstracts functionality from Variant construction implementations, such as +/// [`VariantBuilder`] and [`ListBuilder`], to minimize code duplication. pub(crate) trait VariantBuilderExt<'m, 'v> { fn append_value(&mut self, value: impl Into>); @@ -1846,4 +1913,260 @@ mod tests { let metadata = MetadataBuilder::from_iter(field_names); assert_eq!(metadata.num_field_names(), 3); } + + #[test] + fn test_variant_builder_to_list_builder_no_finish() { + // Create a list builder but never finish it + let mut builder = VariantBuilder::new(); + let mut list_builder = builder.new_list(); + list_builder.append_value("hi"); + drop(list_builder); + + builder.append_value(42i8); + + // The original builder should be unchanged + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert!(metadata.is_empty()); + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert!(metadata.is_empty()); + assert_eq!(variant, Variant::Int8(42)); + } + + #[test] + fn test_variant_builder_to_object_builder_no_finish() { + // Create an object builder but never finish it + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("name", "unknown"); + drop(object_builder); + + builder.append_value(42i8); + + // The original builder should be unchanged + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 1); + assert_eq!(&metadata[0], "name"); // not rolled back + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert_eq!(variant, Variant::Int8(42)); + } + + #[test] + fn test_list_builder_to_list_builder_inner_no_finish() { + let mut builder = VariantBuilder::new(); + let mut list_builder = builder.new_list(); + list_builder.append_value(1i8); + + // Create a nested list builder but never finish it + let mut nested_list_builder = list_builder.new_list(); + nested_list_builder.append_value("hi"); + drop(nested_list_builder); + + list_builder.append_value(2i8); + + // The parent list should only contain the original values + list_builder.finish(); + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert!(metadata.is_empty()); + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + let list = variant.as_list().unwrap(); + assert_eq!(list.len(), 2); + assert_eq!(list.get(0).unwrap(), Variant::Int8(1)); + assert_eq!(list.get(1).unwrap(), Variant::Int8(2)); + } + + #[test] + fn test_list_builder_to_list_builder_outer_no_finish() { + let mut builder = VariantBuilder::new(); + let mut list_builder = builder.new_list(); + list_builder.append_value(1i8); + + // Create a nested list builder and finish it + let mut nested_list_builder = list_builder.new_list(); + nested_list_builder.append_value("hi"); + nested_list_builder.finish(); + + // Drop the outer list builder without finishing it + drop(list_builder); + + builder.append_value(2i8); + + // Only the second attempt should appear in the final variant + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert!(metadata.is_empty()); + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert_eq!(variant, Variant::Int8(2)); + } + + #[test] + fn test_list_builder_to_object_builder_inner_no_finish() { + let mut builder = VariantBuilder::new(); + let mut list_builder = builder.new_list(); + list_builder.append_value(1i8); + + // Create a nested object builder but never finish it + let mut nested_object_builder = list_builder.new_object(); + nested_object_builder.insert("name", "unknown"); + drop(nested_object_builder); + + list_builder.append_value(2i8); + + // The parent list should only contain the original values + list_builder.finish(); + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 1); + assert_eq!(&metadata[0], "name"); // not rolled back + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + let list = variant.as_list().unwrap(); + assert_eq!(list.len(), 2); + assert_eq!(list.get(0).unwrap(), Variant::Int8(1)); + assert_eq!(list.get(1).unwrap(), Variant::Int8(2)); + } + + #[test] + fn test_list_builder_to_object_builder_outer_no_finish() { + let mut builder = VariantBuilder::new(); + let mut list_builder = builder.new_list(); + list_builder.append_value(1i8); + + // Create a nested object builder and finish it + let mut nested_object_builder = list_builder.new_object(); + nested_object_builder.insert("name", "unknown"); + nested_object_builder.finish().unwrap(); + + // Drop the outer list builder without finishing it + drop(list_builder); + + builder.append_value(2i8); + + // Only the second attempt should appear in the final variant + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 1); + assert_eq!(&metadata[0], "name"); // not rolled back + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert_eq!(variant, Variant::Int8(2)); + } + + #[test] + fn test_object_builder_to_list_builder_inner_no_finish() { + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("first", 1i8); + + // Create a nested list builder but never finish it + let mut nested_list_builder = object_builder.new_list("nested"); + nested_list_builder.append_value("hi"); + drop(nested_list_builder); + + object_builder.insert("second", 2i8); + + // The parent object should only contain the original fields + object_builder.finish().unwrap(); + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 2); + assert_eq!(&metadata[0], "first"); + assert_eq!(&metadata[1], "second"); + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + let obj = variant.as_object().unwrap(); + assert_eq!(obj.len(), 2); + assert_eq!(obj.get("first"), Some(Variant::Int8(1))); + assert_eq!(obj.get("second"), Some(Variant::Int8(2))); + } + + #[test] + fn test_object_builder_to_list_builder_outer_no_finish() { + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("first", 1i8); + + // Create a nested list builder and finish it + let mut nested_list_builder = object_builder.new_list("nested"); + nested_list_builder.append_value("hi"); + nested_list_builder.finish(); + + // Drop the outer object builder without finishing it + drop(object_builder); + + builder.append_value(2i8); + + // Only the second attempt should appear in the final variant + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 2); + assert_eq!(&metadata[0], "first"); + assert_eq!(&metadata[1], "nested"); // not rolled back + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert_eq!(variant, Variant::Int8(2)); + } + + #[test] + fn test_object_builder_to_object_builder_inner_no_finish() { + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("first", 1i8); + + // Create a nested object builder but never finish it + let mut nested_object_builder = object_builder.new_object("nested"); + nested_object_builder.insert("name", "unknown"); + drop(nested_object_builder); + + object_builder.insert("second", 2i8); + + // The parent object should only contain the original fields + object_builder.finish().unwrap(); + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 3); + assert_eq!(&metadata[0], "first"); + assert_eq!(&metadata[1], "name"); // not rolled back + assert_eq!(&metadata[2], "second"); + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + let obj = variant.as_object().unwrap(); + assert_eq!(obj.len(), 2); + assert_eq!(obj.get("first"), Some(Variant::Int8(1))); + assert_eq!(obj.get("second"), Some(Variant::Int8(2))); + } + + #[test] + fn test_object_builder_to_object_builder_outer_no_finish() { + let mut builder = VariantBuilder::new(); + let mut object_builder = builder.new_object(); + object_builder.insert("first", 1i8); + + // Create a nested object builder and finish it + let mut nested_object_builder = object_builder.new_object("nested"); + nested_object_builder.insert("name", "unknown"); + nested_object_builder.finish().unwrap(); + + // Drop the outer object builder without finishing it + drop(object_builder); + + builder.append_value(2i8); + + // Only the second attempt should appear in the final variant + let (metadata, value) = builder.finish(); + let metadata = VariantMetadata::try_new(&metadata).unwrap(); + assert_eq!(metadata.len(), 3); + assert_eq!(&metadata[0], "first"); // not rolled back + assert_eq!(&metadata[1], "name"); // not rolled back + assert_eq!(&metadata[2], "nested"); // not rolled back + + let variant = Variant::try_new_with_metadata(metadata, &value).unwrap(); + assert_eq!(variant, Variant::Int8(2)); + } } diff --git a/parquet-variant/src/from_json.rs b/parquet-variant/src/from_json.rs index 00d205f38584..c4adbd1377a8 100644 --- a/parquet-variant/src/from_json.rs +++ b/parquet-variant/src/from_json.rs @@ -131,9 +131,9 @@ fn append_json<'m, 'v>( Ok(()) } -struct ObjectFieldBuilder<'s, 'o, 'v> { +struct ObjectFieldBuilder<'o, 'v, 's> { key: &'s str, - builder: &'o mut ObjectBuilder<'v, 's>, + builder: &'o mut ObjectBuilder<'v>, } impl<'m, 'v> VariantBuilderExt<'m, 'v> for ObjectFieldBuilder<'_, '_, '_> { diff --git a/parquet-variant/src/variant/metadata.rs b/parquet-variant/src/variant/metadata.rs index 46d89557bfae..742f586fb3b4 100644 --- a/parquet-variant/src/variant/metadata.rs +++ b/parquet-variant/src/variant/metadata.rs @@ -206,6 +206,16 @@ impl<'m> VariantMetadata<'m> { Ok(new_self) } + /// The number of metadata dictionary entries + pub fn len(&self) -> usize { + self.dictionary_size + } + + /// True if this metadata dictionary contains no entries + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + /// True if this instance is fully [validated] for panic-free infallible accesses. /// /// [validated]: Self#Validation diff --git a/parquet-variant/tests/test_json_to_variant.rs b/parquet-variant/tests/test_json_to_variant.rs index 6b7ace9220f8..e4c001d7a382 100644 --- a/parquet-variant/tests/test_json_to_variant.rs +++ b/parquet-variant/tests/test_json_to_variant.rs @@ -28,7 +28,7 @@ struct JsonToVariantTest<'a> { expected: Variant<'a, 'a>, } -impl<'a> JsonToVariantTest<'a> { +impl JsonToVariantTest<'_> { fn run(self) -> Result<(), ArrowError> { let mut variant_builder = VariantBuilder::new(); json_to_variant(self.json, &mut variant_builder)?;