Skip to content

Commit

Permalink
Add allow_partial (#1512)
Browse files Browse the repository at this point in the history
Co-authored-by: David Hewitt <[email protected]>
  • Loading branch information
samuelcolvin and davidhewitt authored Nov 4, 2024
1 parent 5e95c05 commit a1fa596
Show file tree
Hide file tree
Showing 27 changed files with 758 additions and 100 deletions.
270 changes: 219 additions & 51 deletions benches/main.rs

Large diffs are not rendered by default.

17 changes: 16 additions & 1 deletion python/pydantic_core/_pydantic_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class SchemaValidator:
from_attributes: bool | None = None,
context: Any | None = None,
self_instance: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate a Python object against the schema and return the validated object.
Expand All @@ -110,6 +111,8 @@ class SchemaValidator:
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
self_instance: An instance of a model set attributes on from validation, this is used when running
validation from the `__init__` method of a model.
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
and mappings are ignored.
Raises:
ValidationError: If validation fails.
Expand Down Expand Up @@ -143,6 +146,7 @@ class SchemaValidator:
strict: bool | None = None,
context: Any | None = None,
self_instance: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate JSON data directly against the schema and return the validated Python object.
Expand All @@ -160,6 +164,8 @@ class SchemaValidator:
context: The context to use for validation, this is passed to functional validators as
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
self_instance: An instance of a model set attributes on from validation.
allow_partial: Whether to allow partial validation; if `True` incomplete JSON will be parsed successfully
and errors in the last element of sequences and mappings are ignored.
Raises:
ValidationError: If validation fails or if the JSON data is invalid.
Expand All @@ -168,7 +174,14 @@ class SchemaValidator:
Returns:
The validated Python object.
"""
def validate_strings(self, input: _StringInput, *, strict: bool | None = None, context: Any | None = None) -> Any:
def validate_strings(
self,
input: _StringInput,
*,
strict: bool | None = None,
context: Any | None = None,
allow_partial: bool = False,
) -> Any:
"""
Validate a string against the schema and return the validated Python object.
Expand All @@ -181,6 +194,8 @@ class SchemaValidator:
If `None`, the value of [`CoreConfig.strict`][pydantic_core.core_schema.CoreConfig] is used.
context: The context to use for validation, this is passed to functional validators as
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
and mappings are ignored.
Raises:
ValidationError: If validation fails or if the JSON data is invalid.
Expand Down
4 changes: 2 additions & 2 deletions python/pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2840,7 +2840,7 @@ def typed_dict_field(
Args:
schema: The schema to use for the field
required: Whether the field is required
required: Whether the field is required, otherwise uses the value from `total` on the typed dict
validation_alias: The alias(es) to use to find the field in the validation data
serialization_alias: The alias to use as a key when serializing
serialization_exclude: Whether to exclude the field when serializing
Expand Down Expand Up @@ -2916,7 +2916,7 @@ class MyTypedDict(TypedDict):
ref: optional unique identifier of the schema, used to reference the schema in other places
metadata: Any other information you want to include with the schema, not used by pydantic-core
extra_behavior: The extra behavior to use for the typed dict
total: Whether the typed dict is total
total: Whether the typed dict is total, otherwise uses `typed_dict_total` from config
populate_by_name: Whether the typed dict should populate by name
serialization: Custom serialization schema
"""
Expand Down
8 changes: 8 additions & 0 deletions src/errors/line_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,14 @@ impl ValLineError {
self.error_type = error_type;
self
}

pub fn first_loc_item(&self) -> Option<&LocItem> {
match &self.location {
Location::Empty => None,
// last because order is reversed
Location::List(loc_items) => loc_items.last(),
}
}
}

#[cfg_attr(debug_assertions, derive(Debug))]
Expand Down
2 changes: 1 addition & 1 deletion src/errors/location.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::lookup_key::{LookupPath, PathItem};

/// Used to store individual items of the error location, e.g. a string for key/field names
/// or a number for array indices.
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
#[cfg_attr(debug_assertions, derive(Debug))]
pub enum LocItem {
/// string type key, used to identify items from a dict or anything that implements `__getitem__`
Expand Down
5 changes: 5 additions & 0 deletions src/input/input_abstract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ pub trait ValidatedDict<'py> {
&'a self,
consumer: impl ConsumeIterator<ValResult<(Self::Key<'a>, Self::Item<'a>)>, Output = R>,
) -> ValResult<R>;
// used in partial mode to check all errors occurred in the last value
fn last_key(&self) -> Option<Self::Key<'_>>;
}

/// For validations from a list
Expand Down Expand Up @@ -276,6 +278,9 @@ impl<'py> ValidatedDict<'py> for Never {
) -> ValResult<R> {
unreachable!()
}
fn last_key(&self) -> Option<Self::Key<'_>> {
unreachable!()
}
}

impl<'py> ValidatedList<'py> for Never {
Expand Down
4 changes: 4 additions & 0 deletions src/input/input_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,10 @@ impl<'py, 'data> ValidatedDict<'py> for &'_ JsonObject<'data> {
) -> ValResult<R> {
Ok(consumer.consume_iterator(LazyIndexMap::iter(self).map(|(k, v)| Ok((k.as_ref(), v)))))
}

fn last_key(&self) -> Option<Self::Key<'_>> {
self.keys().last().map(AsRef::as_ref)
}
}

impl<'a, 'py, 'data> ValidatedList<'py> for &'a JsonArray<'data> {
Expand Down
15 changes: 15 additions & 0 deletions src/input/input_python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -823,6 +823,21 @@ impl<'py> ValidatedDict<'py> for GenericPyMapping<'_, 'py> {
Self::GetAttr(obj, _) => Ok(consumer.consume_iterator(iterate_attributes(obj)?)),
}
}

fn last_key(&self) -> Option<Self::Key<'_>> {
match self {
Self::Dict(dict) => dict.keys().iter().last(),
// see https://github.com/pydantic/pydantic-core/pull/1512#discussion_r1826057970
Self::Mapping(mapping) => mapping
.call_method0(intern!(mapping.py(), "keys"))
.ok()?
.iter()
.ok()?
.last()?
.ok(),
Self::GetAttr(_, _) => None,
}
}
}

/// Container for all the collections (sized iterable containers) types, which
Expand Down
8 changes: 8 additions & 0 deletions src/input/input_string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,12 @@ impl<'py> ValidatedDict<'py> for StringMappingDict<'py> {
.map(|(key, val)| Ok((StringMapping::new_key(key)?, StringMapping::new_value(val)?))),
))
}

fn last_key(&self) -> Option<Self::Key<'_>> {
self.0
.keys()
.iter()
.last()
.and_then(|key| StringMapping::new_key(key).ok())
}
}
22 changes: 15 additions & 7 deletions src/input/return_enums.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ pub(crate) fn validate_iter_to_vec<'py>(
) -> ValResult<Vec<PyObject>> {
let mut output: Vec<PyObject> = Vec::with_capacity(capacity);
let mut errors: Vec<ValLineError> = Vec::new();
for (index, item_result) in iter.enumerate() {

for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
state.allow_partial = is_last_partial;
let item = item_result.map_err(|e| any_next_error!(py, e, max_length_check.input, index))?;
match validator.validate(py, item.borrow_input(), state) {
Ok(item) => {
Expand All @@ -137,9 +139,11 @@ pub(crate) fn validate_iter_to_vec<'py>(
}
Err(ValError::LineErrors(line_errors)) => {
max_length_check.incr()?;
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if fail_fast {
break;
if !is_last_partial {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if fail_fast {
return Err(ValError::LineErrors(errors));
}
}
}
Err(ValError::Omit) => (),
Expand Down Expand Up @@ -197,7 +201,9 @@ pub(crate) fn validate_iter_to_set<'py>(
fail_fast: bool,
) -> ValResult<()> {
let mut errors: Vec<ValLineError> = Vec::new();
for (index, item_result) in iter.enumerate() {

for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
state.allow_partial = is_last_partial;
let item = item_result.map_err(|e| any_next_error!(py, e, input, index))?;
match validator.validate(py, item.borrow_input(), state) {
Ok(item) => {
Expand All @@ -220,13 +226,15 @@ pub(crate) fn validate_iter_to_set<'py>(
}
}
Err(ValError::LineErrors(line_errors)) => {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
if !is_last_partial {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(index)));
}
}
Err(ValError::Omit) => (),
Err(err) => return Err(err),
}
if fail_fast && !errors.is_empty() {
break;
return Err(ValError::LineErrors(errors));
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/serializers/fields.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl SerField {
}
}

pub fn get_key_py<'py>(&'py self, py: Python<'py>, extra: &Extra) -> &Bound<'py, PyAny> {
pub fn get_key_py<'py>(&self, py: Python<'py>, extra: &Extra) -> &Bound<'py, PyAny> {
if extra.by_alias {
if let Some(ref alias_py) = self.alias_py {
return alias_py.bind(py);
Expand Down
4 changes: 2 additions & 2 deletions src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ impl PyUrl {
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_URL
.get_or_init(py, || build_schema_validator(py, "url"))
.validate_python(py, url, None, None, None, None)?;
.validate_python(py, url, None, None, None, None, false)?;
schema_obj.extract(py)
}

Expand Down Expand Up @@ -225,7 +225,7 @@ impl PyMultiHostUrl {
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_MULTI_HOST_URL
.get_or_init(py, || build_schema_validator(py, "multi-host-url"))
.validate_python(py, url, None, None, None, None)?;
.validate_python(py, url, None, None, None, None, false)?;
schema_obj.extract(py)
}

Expand Down
3 changes: 3 additions & 0 deletions src/validators/arguments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ impl Validator for ArgumentsValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let args = input.validate_args()?;

let mut output_args: Vec<PyObject> = Vec::with_capacity(self.positional_params_count);
Expand Down
3 changes: 3 additions & 0 deletions src/validators/dataclass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ impl Validator for DataclassArgsValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let args = input.validate_dataclass_args(&self.dataclass_name)?;

let output_dict = PyDict::new_bound(py);
Expand Down
3 changes: 3 additions & 0 deletions src/validators/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ impl Validator for DefinitionRefValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

self.definition.read(|validator| {
let validator = validator.unwrap();
if let Some(id) = input.as_python().map(py_identity) {
Expand Down
16 changes: 9 additions & 7 deletions src/validators/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ where
let output = PyDict::new_bound(self.py);
let mut errors: Vec<ValLineError> = Vec::new();

for item_result in iterator {
for (_, is_last_partial, item_result) in self.state.enumerate_last_partial(iterator) {
self.state.allow_partial = false;
let (key, value) = item_result?;
let output_key = match self.key_validator.validate(self.py, key.borrow_input(), self.state) {
Ok(value) => Some(value),
Expand All @@ -124,19 +125,20 @@ where
Err(ValError::Omit) => continue,
Err(err) => return Err(err),
};
self.state.allow_partial = is_last_partial;
let output_value = match self.value_validator.validate(self.py, value.borrow_input(), self.state) {
Ok(value) => Some(value),
Ok(value) => value,
Err(ValError::LineErrors(line_errors)) => {
for err in line_errors {
errors.push(err.with_outer_location(key.clone()));
if !is_last_partial {
errors.extend(line_errors.into_iter().map(|err| err.with_outer_location(key.clone())));
}
None
continue;
}
Err(ValError::Omit) => continue,
Err(err) => return Err(err),
};
if let (Some(key), Some(value)) = (output_key, output_value) {
output.set_item(key, value)?;
if let Some(key) = output_key {
output.set_item(key, output_value)?;
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/validators/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ impl Validator for GeneratorValidator {
input: &(impl Input<'py> + ?Sized),
state: &mut ValidationState<'_, 'py>,
) -> ValResult<PyObject> {
// this validator does not yet support partial validation, disable it to avoid incorrect results
state.allow_partial = false;

let iterator = input.validate_iter()?.into_static();
let validator = self.item_validator.as_ref().map(|v| {
InternalValidator::new(
Expand Down Expand Up @@ -279,7 +282,7 @@ impl InternalValidator {
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
cache_str: self.cache_str,
};
let mut state = ValidationState::new(extra, &mut self.recursion_guard);
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
state.exactness = self.exactness;
let result = self
.validator
Expand Down Expand Up @@ -314,7 +317,7 @@ impl InternalValidator {
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
cache_str: self.cache_str,
};
let mut state = ValidationState::new(extra, &mut self.recursion_guard);
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
state.exactness = self.exactness;
let result = self.validator.validate(py, input, &mut state).map_err(|e| {
ValidationError::from_val_error(
Expand Down
12 changes: 8 additions & 4 deletions src/validators/json.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use jiter::FloatMode;
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::PyDict;

use jiter::{JsonValue, PartialMode, PythonParse};
use jiter::{FloatMode, JsonValue, PartialMode, PythonParse};

use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValResult};
use crate::input::{EitherBytes, Input, InputType, ValidationMatch};
Expand Down Expand Up @@ -60,7 +59,8 @@ impl Validator for JsonValidator {
let json_bytes = json_either_bytes.as_slice();
match self.validator {
Some(ref validator) => {
let json_value = JsonValue::parse(json_bytes, true).map_err(|e| map_json_err(input, e, json_bytes))?;
let json_value = JsonValue::parse_with_config(json_bytes, true, state.allow_partial)
.map_err(|e| map_json_err(input, e, json_bytes))?;
let mut json_state = state.rebind_extra(|e| {
e.input_type = InputType::Json;
});
Expand All @@ -70,7 +70,11 @@ impl Validator for JsonValidator {
let parse_builder = PythonParse {
allow_inf_nan: true,
cache_mode: state.cache_str(),
partial_mode: PartialMode::Off,
partial_mode: if state.allow_partial {
PartialMode::TrailingStrings
} else {
PartialMode::Off
},
catch_duplicate_keys: false,
float_mode: FloatMode::Float,
};
Expand Down
Loading

0 comments on commit a1fa596

Please sign in to comment.