Skip to content

Commit be03c66

Browse files
authored
support trailing-strings with allow_partial (#1539)
1 parent a3f13c7 commit be03c66

21 files changed

+159
-106
lines changed

.mypy-stubtest-allowlist

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# TODO: don't want to expose this staticmethod, requires https://github.com/PyO3/pyo3/issues/2384
22
pydantic_core._pydantic_core.PydanticUndefinedType.new
3-
# As per #1240, from_json has custom logic to coverage the `cache_strings` kwarg
3+
# See #1540 for discussion
44
pydantic_core._pydantic_core.from_json
5+
pydantic_core._pydantic_core.SchemaValidator.validate_python
6+
pydantic_core._pydantic_core.SchemaValidator.validate_json
7+
pydantic_core._pydantic_core.SchemaValidator.validate_strings
58
# the `warnings` kwarg for SchemaSerializer functions has custom logic
69
pydantic_core._pydantic_core.SchemaSerializer.to_python
710
pydantic_core._pydantic_core.SchemaSerializer.to_json

Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ base64 = "0.22.1"
4646
num-bigint = "0.4.6"
4747
python3-dll-a = "0.2.10"
4848
uuid = "1.11.0"
49-
jiter = { version = "0.7", features = ["python"] }
49+
jiter = { version = "0.7.1", features = ["python"] }
5050
hex = "0.4.3"
5151

5252
[lib]

benches/main.rs

+51-51
Large diffs are not rendered by default.

pyproject.toml

+3
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,6 @@ require_change_file = false
109109
[tool.pyright]
110110
include = ['pydantic_core', 'tests/test_typing.py']
111111
reportUnnecessaryTypeIgnoreComment = true
112+
113+
[tool.inline-snapshot.shortcuts]
114+
fix = ["create", "fix"]

python/pydantic_core/_pydantic_core.pyi

+7-3
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class SchemaValidator:
9696
from_attributes: bool | None = None,
9797
context: Any | None = None,
9898
self_instance: Any | None = None,
99-
allow_partial: bool = False,
99+
allow_partial: bool | Literal['off', 'on', 'trailing-strings'] = False,
100100
) -> Any:
101101
"""
102102
Validate a Python object against the schema and return the validated object.
@@ -113,6 +113,7 @@ class SchemaValidator:
113113
validation from the `__init__` method of a model.
114114
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
115115
and mappings are ignored.
116+
`'trailing-strings'` means any final unfinished JSON string is included in the result.
116117
117118
Raises:
118119
ValidationError: If validation fails.
@@ -146,7 +147,7 @@ class SchemaValidator:
146147
strict: bool | None = None,
147148
context: Any | None = None,
148149
self_instance: Any | None = None,
149-
allow_partial: bool = False,
150+
allow_partial: bool | Literal['off', 'on', 'trailing-strings'] = False,
150151
) -> Any:
151152
"""
152153
Validate JSON data directly against the schema and return the validated Python object.
@@ -166,6 +167,7 @@ class SchemaValidator:
166167
self_instance: An instance of a model set attributes on from validation.
167168
allow_partial: Whether to allow partial validation; if `True` incomplete JSON will be parsed successfully
168169
and errors in the last element of sequences and mappings are ignored.
170+
`'trailing-strings'` means any final unfinished JSON string is included in the result.
169171
170172
Raises:
171173
ValidationError: If validation fails or if the JSON data is invalid.
@@ -180,7 +182,7 @@ class SchemaValidator:
180182
*,
181183
strict: bool | None = None,
182184
context: Any | None = None,
183-
allow_partial: bool = False,
185+
allow_partial: bool | Literal['off', 'on', 'trailing-strings'] = False,
184186
) -> Any:
185187
"""
186188
Validate a string against the schema and return the validated Python object.
@@ -196,6 +198,7 @@ class SchemaValidator:
196198
[`info.context`][pydantic_core.core_schema.ValidationInfo.context].
197199
allow_partial: Whether to allow partial validation; if `True` errors in the last element of sequences
198200
and mappings are ignored.
201+
`'trailing-strings'` means any final unfinished JSON string is included in the result.
199202
200203
Raises:
201204
ValidationError: If validation fails or if the JSON data is invalid.
@@ -433,6 +436,7 @@ def from_json(
433436
`all/True` means cache all strings, `keys` means cache only dict keys, `none/False` means no caching.
434437
allow_partial: Whether to allow partial deserialization, if `True` JSON data is returned if the end of the
435438
input is reached before the full object is deserialized, e.g. `["aa", "bb", "c` would return `['aa', 'bb']`.
439+
`'trailing-strings'` means any final unfinished JSON string is included in the result.
436440
437441
Raises:
438442
ValueError: If deserialization fails.

src/input/return_enums.rs

+12-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::cmp::Ordering;
33
use std::ops::Rem;
44
use std::str::FromStr;
55

6-
use jiter::{JsonArray, JsonValue, StringCacheMode};
6+
use jiter::{JsonArray, JsonValue, PartialMode, StringCacheMode};
77
use num_bigint::BigInt;
88

99
use pyo3::exceptions::PyTypeError;
@@ -128,9 +128,13 @@ pub(crate) fn validate_iter_to_vec<'py>(
128128
) -> ValResult<Vec<PyObject>> {
129129
let mut output: Vec<PyObject> = Vec::with_capacity(capacity);
130130
let mut errors: Vec<ValLineError> = Vec::new();
131+
let allow_partial = state.allow_partial;
131132

132133
for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
133-
state.allow_partial = is_last_partial;
134+
state.allow_partial = match is_last_partial {
135+
true => allow_partial,
136+
false => PartialMode::Off,
137+
};
134138
let item = item_result.map_err(|e| any_next_error!(py, e, max_length_check.input, index))?;
135139
match validator.validate(py, item.borrow_input(), state) {
136140
Ok(item) => {
@@ -202,8 +206,13 @@ pub(crate) fn validate_iter_to_set<'py>(
202206
) -> ValResult<()> {
203207
let mut errors: Vec<ValLineError> = Vec::new();
204208

209+
let allow_partial = state.allow_partial;
210+
205211
for (index, is_last_partial, item_result) in state.enumerate_last_partial(iter) {
206-
state.allow_partial = is_last_partial;
212+
state.allow_partial = match is_last_partial {
213+
true => allow_partial,
214+
false => PartialMode::Off,
215+
};
207216
let item = item_result.map_err(|e| any_next_error!(py, e, input, index))?;
208217
match validator.validate(py, item.borrow_input(), state) {
209218
Ok(item) => {

src/url.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ impl PyUrl {
4545
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
4646
let schema_obj = SCHEMA_DEFINITION_URL
4747
.get_or_init(py, || build_schema_validator(py, "url"))
48-
.validate_python(py, url, None, None, None, None, false)?;
48+
.validate_python(py, url, None, None, None, None, false.into())?;
4949
schema_obj.extract(py)
5050
}
5151

@@ -225,7 +225,7 @@ impl PyMultiHostUrl {
225225
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
226226
let schema_obj = SCHEMA_DEFINITION_MULTI_HOST_URL
227227
.get_or_init(py, || build_schema_validator(py, "multi-host-url"))
228-
.validate_python(py, url, None, None, None, None, false)?;
228+
.validate_python(py, url, None, None, None, None, false.into())?;
229229
schema_obj.extract(py)
230230
}
231231

src/validators/arguments.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use crate::build_tools::{schema_or_config_same, ExtraBehavior};
1111
use crate::errors::{ErrorTypeDefaults, ValError, ValLineError, ValResult};
1212
use crate::input::{Arguments, BorrowInput, Input, KeywordArgs, PositionalArgs, ValidationMatch};
1313
use crate::lookup_key::LookupKey;
14-
1514
use crate::tools::SchemaDict;
1615

1716
use super::validation_state::ValidationState;
@@ -189,7 +188,7 @@ impl Validator for ArgumentsValidator {
189188
state: &mut ValidationState<'_, 'py>,
190189
) -> ValResult<PyObject> {
191190
// this validator does not yet support partial validation, disable it to avoid incorrect results
192-
state.allow_partial = false;
191+
state.allow_partial = false.into();
193192

194193
let args = input.validate_args()?;
195194

src/validators/dataclass.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl Validator for DataclassArgsValidator {
146146
state: &mut ValidationState<'_, 'py>,
147147
) -> ValResult<PyObject> {
148148
// this validator does not yet support partial validation, disable it to avoid incorrect results
149-
state.allow_partial = false;
149+
state.allow_partial = false.into();
150150

151151
let args = input.validate_dataclass_args(&self.dataclass_name)?;
152152

src/validators/definitions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ impl Validator for DefinitionRefValidator {
7676
state: &mut ValidationState<'_, 'py>,
7777
) -> ValResult<PyObject> {
7878
// this validator does not yet support partial validation, disable it to avoid incorrect results
79-
state.allow_partial = false;
79+
state.allow_partial = false.into();
8080

8181
self.definition.read(|validator| {
8282
let validator = validator.unwrap();

src/validators/dict.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,10 @@ where
109109
fn consume_iterator(self, iterator: impl Iterator<Item = ValResult<(Key, Value)>>) -> ValResult<PyObject> {
110110
let output = PyDict::new_bound(self.py);
111111
let mut errors: Vec<ValLineError> = Vec::new();
112+
let allow_partial = self.state.allow_partial;
112113

113114
for (_, is_last_partial, item_result) in self.state.enumerate_last_partial(iterator) {
114-
self.state.allow_partial = false;
115+
self.state.allow_partial = false.into();
115116
let (key, value) = item_result?;
116117
let output_key = match self.key_validator.validate(self.py, key.borrow_input(), self.state) {
117118
Ok(value) => Some(value),
@@ -125,7 +126,10 @@ where
125126
Err(ValError::Omit) => continue,
126127
Err(err) => return Err(err),
127128
};
128-
self.state.allow_partial = is_last_partial;
129+
self.state.allow_partial = match is_last_partial {
130+
true => allow_partial,
131+
false => false.into(),
132+
};
129133
let output_value = match self.value_validator.validate(self.py, value.borrow_input(), self.state) {
130134
Ok(value) => value,
131135
Err(ValError::LineErrors(line_errors)) => {

src/validators/generator.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Validator for GeneratorValidator {
6767
state: &mut ValidationState<'_, 'py>,
6868
) -> ValResult<PyObject> {
6969
// this validator does not yet support partial validation, disable it to avoid incorrect results
70-
state.allow_partial = false;
70+
state.allow_partial = false.into();
7171

7272
let iterator = input.validate_iter()?.into_static();
7373
let validator = self.item_validator.as_ref().map(|v| {
@@ -282,7 +282,7 @@ impl InternalValidator {
282282
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
283283
cache_str: self.cache_str,
284284
};
285-
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
285+
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false.into());
286286
state.exactness = self.exactness;
287287
let result = self
288288
.validator
@@ -317,7 +317,7 @@ impl InternalValidator {
317317
self_instance: self.self_instance.as_ref().map(|data| data.bind(py)),
318318
cache_str: self.cache_str,
319319
};
320-
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false);
320+
let mut state = ValidationState::new(extra, &mut self.recursion_guard, false.into());
321321
state.exactness = self.exactness;
322322
let result = self.validator.validate(py, input, &mut state).map_err(|e| {
323323
ValidationError::from_val_error(

src/validators/json.rs

+2-6
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use pyo3::intern;
22
use pyo3::prelude::*;
33
use pyo3::types::PyDict;
44

5-
use jiter::{FloatMode, JsonValue, PartialMode, PythonParse};
5+
use jiter::{FloatMode, JsonValue, PythonParse};
66

77
use crate::errors::{ErrorType, ErrorTypeDefaults, ValError, ValLineError, ValResult};
88
use crate::input::{EitherBytes, Input, InputType, ValidationMatch};
@@ -70,11 +70,7 @@ impl Validator for JsonValidator {
7070
let parse_builder = PythonParse {
7171
allow_inf_nan: true,
7272
cache_mode: state.cache_str(),
73-
partial_mode: if state.allow_partial {
74-
PartialMode::TrailingStrings
75-
} else {
76-
PartialMode::Off
77-
},
73+
partial_mode: state.allow_partial,
7874
catch_duplicate_keys: false,
7975
float_mode: FloatMode::Float,
8076
};

src/validators/mod.rs

+13-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use std::fmt::Debug;
22

33
use enum_dispatch::enum_dispatch;
4-
use jiter::StringCacheMode;
4+
use jiter::{PartialMode, StringCacheMode};
55

66
use pyo3::exceptions::PyTypeError;
77
use pyo3::prelude::*;
@@ -165,7 +165,7 @@ impl SchemaValidator {
165165
}
166166

167167
#[allow(clippy::too_many_arguments)]
168-
#[pyo3(signature = (input, *, strict=None, from_attributes=None, context=None, self_instance=None, allow_partial=false))]
168+
#[pyo3(signature = (input, *, strict=None, from_attributes=None, context=None, self_instance=None, allow_partial=PartialMode::Off))]
169169
pub fn validate_python(
170170
&self,
171171
py: Python,
@@ -174,7 +174,7 @@ impl SchemaValidator {
174174
from_attributes: Option<bool>,
175175
context: Option<&Bound<'_, PyAny>>,
176176
self_instance: Option<&Bound<'_, PyAny>>,
177-
allow_partial: bool,
177+
allow_partial: PartialMode,
178178
) -> PyResult<PyObject> {
179179
self._validate(
180180
py,
@@ -207,7 +207,7 @@ impl SchemaValidator {
207207
from_attributes,
208208
context,
209209
self_instance,
210-
false,
210+
false.into(),
211211
) {
212212
Ok(_) => Ok(true),
213213
Err(ValError::InternalErr(err)) => Err(err),
@@ -217,15 +217,15 @@ impl SchemaValidator {
217217
}
218218
}
219219

220-
#[pyo3(signature = (input, *, strict=None, context=None, self_instance=None, allow_partial=false))]
220+
#[pyo3(signature = (input, *, strict=None, context=None, self_instance=None, allow_partial=PartialMode::Off))]
221221
pub fn validate_json(
222222
&self,
223223
py: Python,
224224
input: &Bound<'_, PyAny>,
225225
strict: Option<bool>,
226226
context: Option<&Bound<'_, PyAny>>,
227227
self_instance: Option<&Bound<'_, PyAny>>,
228-
allow_partial: bool,
228+
allow_partial: PartialMode,
229229
) -> PyResult<PyObject> {
230230
let r = match json::validate_json_bytes(input) {
231231
Ok(v_match) => self._validate_json(
@@ -242,14 +242,14 @@ impl SchemaValidator {
242242
r.map_err(|e| self.prepare_validation_err(py, e, InputType::Json))
243243
}
244244

245-
#[pyo3(signature = (input, *, strict=None, context=None, allow_partial=false))]
245+
#[pyo3(signature = (input, *, strict=None, context=None, allow_partial=PartialMode::Off))]
246246
pub fn validate_strings(
247247
&self,
248248
py: Python,
249249
input: Bound<'_, PyAny>,
250250
strict: Option<bool>,
251251
context: Option<&Bound<'_, PyAny>>,
252-
allow_partial: bool,
252+
allow_partial: PartialMode,
253253
) -> PyResult<PyObject> {
254254
let t = InputType::String;
255255
let string_mapping = StringMapping::new_value(input).map_err(|e| self.prepare_validation_err(py, e, t))?;
@@ -283,7 +283,7 @@ impl SchemaValidator {
283283
};
284284

285285
let guard = &mut RecursionState::default();
286-
let mut state = ValidationState::new(extra, guard, false);
286+
let mut state = ValidationState::new(extra, guard, false.into());
287287
self.validator
288288
.validate_assignment(py, &obj, field_name, &field_value, &mut state)
289289
.map_err(|e| self.prepare_validation_err(py, e, InputType::Python))
@@ -306,7 +306,7 @@ impl SchemaValidator {
306306
cache_str: self.cache_str,
307307
};
308308
let recursion_guard = &mut RecursionState::default();
309-
let mut state = ValidationState::new(extra, recursion_guard, false);
309+
let mut state = ValidationState::new(extra, recursion_guard, false.into());
310310
let r = self.validator.default_value(py, None::<i64>, &mut state);
311311
match r {
312312
Ok(maybe_default) => match maybe_default {
@@ -352,7 +352,7 @@ impl SchemaValidator {
352352
from_attributes: Option<bool>,
353353
context: Option<&Bound<'py, PyAny>>,
354354
self_instance: Option<&Bound<'py, PyAny>>,
355-
allow_partial: bool,
355+
allow_partial: PartialMode,
356356
) -> ValResult<PyObject> {
357357
let mut recursion_guard = RecursionState::default();
358358
let mut state = ValidationState::new(
@@ -379,7 +379,7 @@ impl SchemaValidator {
379379
strict: Option<bool>,
380380
context: Option<&Bound<'_, PyAny>>,
381381
self_instance: Option<&Bound<'_, PyAny>>,
382-
allow_partial: bool,
382+
allow_partial: PartialMode,
383383
) -> ValResult<PyObject> {
384384
let json_value = jiter::JsonValue::parse_with_config(json_data, true, allow_partial)
385385
.map_err(|e| json::map_json_err(input, e, json_data))?;
@@ -430,7 +430,7 @@ impl<'py> SelfValidator<'py> {
430430
let mut state = ValidationState::new(
431431
Extra::new(strict, None, None, None, InputType::Python, true.into()),
432432
&mut recursion_guard,
433-
false,
433+
false.into(),
434434
);
435435
match self.validator.validator.validate(py, schema, &mut state) {
436436
Ok(schema_obj) => Ok(schema_obj.into_bound(py)),

src/validators/model_fields.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ impl Validator for ModelFieldsValidator {
121121
state: &mut ValidationState<'_, 'py>,
122122
) -> ValResult<PyObject> {
123123
// this validator does not yet support partial validation, disable it to avoid incorrect results
124-
state.allow_partial = false;
124+
state.allow_partial = false.into();
125125

126126
let strict = state.strict_or(self.strict);
127127
let from_attributes = state.extra().from_attributes.unwrap_or(self.from_attributes);

0 commit comments

Comments
 (0)