-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[Variant] Remove superflous validate call and rename methods #7871
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a15ec78
6e6dd14
6bf97c3
3b7506a
89ffbed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,3 @@ | ||
| use std::ops::Deref; | ||
|
|
||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
|
|
@@ -16,6 +14,7 @@ use std::ops::Deref; | |
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| pub use self::decimal::{VariantDecimal16, VariantDecimal4, VariantDecimal8}; | ||
| pub use self::list::VariantList; | ||
| pub use self::metadata::VariantMetadata; | ||
|
|
@@ -24,6 +23,7 @@ use crate::decoder::{ | |
| self, get_basic_type, get_primitive_type, VariantBasicType, VariantPrimitiveType, | ||
| }; | ||
| use crate::utils::{first_byte_from_slice, slice_from_slice}; | ||
| use std::ops::Deref; | ||
|
|
||
| use arrow_schema::ArrowError; | ||
| use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; | ||
|
|
@@ -184,15 +184,15 @@ impl Deref for ShortString<'_> { | |
| /// Every instance of variant is either _valid_ or _invalid_. depending on whether the | ||
| /// underlying bytes are a valid encoding of a variant value (see below). | ||
| /// | ||
| /// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::validate`] | ||
| /// Instances produced by [`Self::try_new`], [`Self::try_new_with_metadata`], or [`Self::with_full_validation`] | ||
| /// are fully _validated_. They always contain _valid_ data, and infallible accesses such as | ||
| /// iteration and indexing are panic-free. The validation cost is `O(m + v)` where `m` and | ||
| /// `v` are the number of bytes in the metadata and value buffers, respectively. | ||
| /// | ||
| /// Instances produced by [`Self::new`] and [`Self::new_with_metadata`] are _unvalidated_ and so | ||
| /// they may contain either _valid_ or _invalid_ data. Infallible accesses to variant objects and | ||
| /// arrays, such as iteration and indexing will panic if the underlying bytes are _invalid_, and | ||
| /// fallible alternatives are provided as panic-free alternatives. [`Self::validate`] can also be | ||
| /// fallible alternatives are provided as panic-free alternatives. [`Self::with_full_validation`] can also be | ||
| /// used to _validate_ an _unvalidated_ instance, if desired. | ||
| /// | ||
| /// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller | ||
|
|
@@ -297,8 +297,10 @@ impl<'m, 'v> Variant<'m, 'v> { | |
| /// | ||
| /// [unvalidated]: Self#Validation | ||
| pub fn new(metadata: &'m [u8], value: &'v [u8]) -> Self { | ||
| let metadata = VariantMetadata::try_new_impl(metadata).expect("Invalid variant metadata"); | ||
| Self::try_new_with_metadata_impl(metadata, value).expect("Invalid variant data") | ||
| let metadata = VariantMetadata::try_new_with_shallow_validation(metadata) | ||
| .expect("Invalid variant metadata"); | ||
| Self::try_new_with_metadata_and_shallow_validation(metadata, value) | ||
| .expect("Invalid variant data") | ||
| } | ||
|
|
||
| /// Create a new variant with existing metadata. | ||
|
|
@@ -323,18 +325,19 @@ impl<'m, 'v> Variant<'m, 'v> { | |
| metadata: VariantMetadata<'m>, | ||
| value: &'v [u8], | ||
| ) -> Result<Self, ArrowError> { | ||
| Self::try_new_with_metadata_impl(metadata, value)?.validate() | ||
| Self::try_new_with_metadata_and_shallow_validation(metadata, value)?.with_full_validation() | ||
| } | ||
|
|
||
| /// Similar to [`Self::try_new_with_metadata`], but [unvalidated]. | ||
| /// | ||
| /// [unvalidated]: Self#Validation | ||
| pub fn new_with_metadata(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self { | ||
| Self::try_new_with_metadata_impl(metadata, value).expect("Invalid variant") | ||
| Self::try_new_with_metadata_and_shallow_validation(metadata, value) | ||
| .expect("Invalid variant") | ||
| } | ||
|
|
||
| // The actual constructor, which only performs shallow (constant-time) validation. | ||
| fn try_new_with_metadata_impl( | ||
| fn try_new_with_metadata_and_shallow_validation( | ||
|
||
| metadata: VariantMetadata<'m>, | ||
| value: &'v [u8], | ||
| ) -> Result<Self, ArrowError> { | ||
|
|
@@ -382,21 +385,23 @@ impl<'m, 'v> Variant<'m, 'v> { | |
| VariantBasicType::ShortString => { | ||
| Variant::ShortString(decoder::decode_short_string(value_metadata, value_data)?) | ||
| } | ||
| VariantBasicType::Object => { | ||
| Variant::Object(VariantObject::try_new_impl(metadata, value)?) | ||
| } | ||
| VariantBasicType::Array => Variant::List(VariantList::try_new_impl(metadata, value)?), | ||
| VariantBasicType::Object => Variant::Object( | ||
| VariantObject::try_new_with_shallow_validation(metadata, value)?, | ||
| ), | ||
| VariantBasicType::Array => Variant::List(VariantList::try_new_with_shallow_validation( | ||
| metadata, value, | ||
|
||
| )?), | ||
| }; | ||
| Ok(new_self) | ||
| } | ||
|
|
||
| /// True if this variant instance has already been [validated]. | ||
| /// | ||
| /// [validated]: Self#Validation | ||
| pub fn is_validated(&self) -> bool { | ||
| pub fn is_fully_validated(&self) -> bool { | ||
| match self { | ||
| Variant::List(list) => list.is_validated(), | ||
| Variant::Object(obj) => obj.is_validated(), | ||
| Variant::List(list) => list.is_fully_validated(), | ||
| Variant::Object(obj) => obj.is_fully_validated(), | ||
| _ => true, | ||
| } | ||
| } | ||
|
|
@@ -407,16 +412,16 @@ impl<'m, 'v> Variant<'m, 'v> { | |
| /// Variant leaf values are always valid by construction, but [objects] and [arrays] can be | ||
| /// constructed in unvalidated (and potentially invalid) state. | ||
| /// | ||
| /// If [`Self::is_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)` | ||
| /// If [`Self::is_fully_validated`] is true, validation is a no-op. Otherwise, the cost is `O(m + v)` | ||
| /// where `m` and `v` are the sizes of metadata and value buffers, respectively. | ||
| /// | ||
| /// [objects]: VariantObject#Validation | ||
| /// [arrays]: VariantList#Validation | ||
| pub fn validate(self) -> Result<Self, ArrowError> { | ||
| pub fn with_full_validation(self) -> Result<Self, ArrowError> { | ||
| use Variant::*; | ||
| match self { | ||
| List(list) => list.validate().map(List), | ||
| Object(obj) => obj.validate().map(Object), | ||
| List(list) => list.with_full_validation().map(List), | ||
| Object(obj) => obj.with_full_validation().map(Object), | ||
| _ => Ok(self), | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,14 +82,14 @@ impl VariantListHeader { | |
| /// Every instance of variant list is either _valid_ or _invalid_. depending on whether the | ||
| /// underlying bytes are a valid encoding of a variant array (see below). | ||
| /// | ||
| /// Instances produced by [`Self::try_new`] or [`Self::validate`] are fully _validated_. They always | ||
| /// Instances produced by [`Self::try_new`] or [`Self::with_full_validation`] are fully _validated_. They always | ||
| /// contain _valid_ data, and infallible accesses such as iteration and indexing are panic-free. The | ||
| /// validation cost is linear in the number of underlying bytes. | ||
| /// | ||
| /// Instances produced by [`Self::new`] are _unvalidated_ and so they may contain either _valid_ or | ||
| /// _invalid_ data. Infallible accesses such as iteration and indexing will panic if the underlying | ||
| /// bytes are _invalid_, and fallible alternatives such as [`Self::iter_try`] and [`Self::get`] are | ||
| /// provided as panic-free alternatives. [`Self::validate`] can also be used to _validate_ an | ||
| /// provided as panic-free alternatives. [`Self::with_full_validation`] can also be used to _validate_ an | ||
| /// _unvalidated_ instance, if desired. | ||
| /// | ||
| /// _Unvalidated_ instances can be constructed in constant time. This can be useful if the caller | ||
|
|
@@ -136,18 +136,18 @@ impl<'m, 'v> VariantList<'m, 'v> { | |
| /// This constructor verifies that `value` points to a valid variant array value. In particular, | ||
| /// that all offsets are in-bounds and point to valid (recursively validated) objects. | ||
| pub fn try_new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Result<Self, ArrowError> { | ||
| Self::try_new_impl(metadata, value)?.validate() | ||
| Self::try_new_with_shallow_validation(metadata, value)?.with_full_validation() | ||
| } | ||
|
|
||
| pub fn new(metadata: VariantMetadata<'m>, value: &'v [u8]) -> Self { | ||
| Self::try_new_impl(metadata, value).expect("Invalid variant list value") | ||
| Self::try_new_with_shallow_validation(metadata, value).expect("Invalid variant list value") | ||
friendlymatthew marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /// Attempts to interpet `metadata` and `value` as a variant array, performing only basic | ||
| /// (constant-cost) [validation]. | ||
| /// | ||
| /// [validation]: Self#Validation | ||
| pub(crate) fn try_new_impl( | ||
| pub(crate) fn try_new_with_shallow_validation( | ||
| metadata: VariantMetadata<'m>, | ||
| value: &'v [u8], | ||
| ) -> Result<Self, ArrowError> { | ||
|
|
@@ -196,18 +196,18 @@ impl<'m, 'v> VariantList<'m, 'v> { | |
| /// True if this instance is fully [validated] for panic-free infallible accesses. | ||
| /// | ||
| /// [validated]: Self#Validation | ||
| pub fn is_validated(&self) -> bool { | ||
| pub fn is_fully_validated(&self) -> bool { | ||
| self.validated | ||
| } | ||
|
|
||
| /// Performs a full [validation] of this variant array and returns the result. | ||
| /// | ||
| /// [validation]: Self#Validation | ||
| pub fn validate(mut self) -> Result<Self, ArrowError> { | ||
| pub fn with_full_validation(mut self) -> Result<Self, ArrowError> { | ||
| if !self.validated { | ||
| // Validate the metadata dictionary first, if not already validated, because we pass it | ||
| // by value to all the children (who would otherwise re-validate it repeatedly). | ||
| self.metadata = self.metadata.validate()?; | ||
| self.metadata = self.metadata.with_full_validation()?; | ||
|
|
||
| // Iterate over all string keys in this dictionary in order to prove that the offset | ||
| // array is valid, all offsets are in bounds, and all string bytes are valid utf-8. | ||
|
|
@@ -232,52 +232,55 @@ impl<'m, 'v> VariantList<'m, 'v> { | |
| /// [invalid]: Self#Validation | ||
| pub fn get(&self, index: usize) -> Option<Variant<'m, 'v>> { | ||
| (index < self.num_elements).then(|| { | ||
| self.try_get_impl(index) | ||
| .and_then(Variant::validate) | ||
| self.try_get_with_shallow_validation(index) | ||
| .expect("Invalid variant array element") | ||
| }) | ||
| } | ||
|
|
||
| /// Fallible version of `get`. Returns element by index, capturing validation errors | ||
| pub fn try_get(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> { | ||
| self.try_get_impl(index)?.validate() | ||
| self.try_get_with_shallow_validation(index)? | ||
| .with_full_validation() | ||
| } | ||
|
|
||
| /// Fallible iteration over the elements of this list. | ||
| pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { | ||
| self.iter_try_impl().map(|result| result?.validate()) | ||
| } | ||
|
|
||
| // Fallible iteration that only performs basic (constant-time) validation. | ||
| fn iter_try_impl(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { | ||
| (0..self.len()).map(move |i| self.try_get_impl(i)) | ||
| // Fallible version of `get`, performing only basic (constant-time) validation. | ||
| fn try_get_with_shallow_validation(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> { | ||
| // Fetch the value bytes between the two offsets for this index, from the value array region | ||
| // of the byte buffer | ||
| let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?; | ||
| let value_bytes = | ||
| slice_from_slice_at_offset(self.value, self.first_value_byte, byte_range)?; | ||
| Variant::try_new_with_metadata_and_shallow_validation(self.metadata, value_bytes) | ||
| } | ||
|
|
||
| /// Iterates over the values of this list. When working with [unvalidated] input, consider | ||
| /// [`Self::iter_try`] to avoid panics due to invalid data. | ||
| /// | ||
| /// [unvalidated]: Self#Validation | ||
| pub fn iter(&self) -> impl Iterator<Item = Variant<'m, 'v>> + '_ { | ||
| self.iter_try_impl() | ||
| self.iter_try_with_shallow_validation() | ||
| .map(|result| result.expect("Invalid variant list entry")) | ||
| } | ||
|
|
||
| /// Fallible iteration over the elements of this list. | ||
| pub fn iter_try(&self) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { | ||
| self.iter_try_with_shallow_validation() | ||
| .map(|result| result?.with_full_validation()) | ||
| } | ||
|
|
||
| // Fallible iteration that only performs basic (constant-time) validation. | ||
| fn iter_try_with_shallow_validation( | ||
| &self, | ||
| ) -> impl Iterator<Item = Result<Variant<'m, 'v>, ArrowError>> + '_ { | ||
| (0..self.len()).map(move |i| self.try_get_with_shallow_validation(i)) | ||
friendlymatthew marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| // Attempts to retrieve the ith offset from the offset array region of the byte buffer. | ||
| fn get_offset(&self, index: usize) -> Result<usize, ArrowError> { | ||
| let byte_range = self.header.first_offset_byte()..self.first_value_byte; | ||
| let offset_bytes = slice_from_slice(self.value, byte_range)?; | ||
| self.header.offset_size.unpack_usize(offset_bytes, index) | ||
| } | ||
|
|
||
| // Fallible version of `get`, performing only basic (constant-time) validation. | ||
| fn try_get_impl(&self, index: usize) -> Result<Variant<'m, 'v>, ArrowError> { | ||
|
||
| // Fetch the value bytes between the two offsets for this index, from the value array region | ||
| // of the byte buffer | ||
| let byte_range = self.get_offset(index)?..self.get_offset(index + 1)?; | ||
| let value_bytes = | ||
| slice_from_slice_at_offset(self.value, self.first_value_byte, byte_range)?; | ||
| Variant::try_new_with_metadata(self.metadata, value_bytes) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a very nice name change