Skip to content

Commit 05ff9c1

Browse files
committed
adopt jiter main
1 parent abcdbb7 commit 05ff9c1

File tree

4 files changed

+30
-40
lines changed

4 files changed

+30
-40
lines changed

Cargo.lock

Lines changed: 1 addition & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,8 @@ base64 = "0.21.7"
4444
num-bigint = "0.4.4"
4545
python3-dll-a = "0.2.7"
4646
uuid = "1.7.0"
47-
bytecount = { version = "0.6.7", default_features = false, features = ["runtime-dispatch-simd"] }
4847
#jiter = { version = "0.1.1", features = ["python"] }
49-
jiter = { git = "https://github.com/pydantic/jiter", branch = "ascii-string-creation", features = ["python"] }
48+
jiter = { git = "https://github.com/pydantic/jiter", branch = "main", features = ["python"] }
5049

5150
[lib]
5251
name = "_pydantic_core"

src/input/shared.rs

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use pyo3::prelude::*;
22
use pyo3::sync::GILOnceCell;
33
use pyo3::{intern, Py, PyAny, Python};
44

5-
use num_bigint::BigInt;
5+
use jiter::{JsonErrorType, NumberInt};
66

77
use crate::errors::{ErrorTypeDefaults, ValError, ValResult};
88

@@ -68,29 +68,24 @@ fn strip_underscores(s: &str) -> Option<String> {
6868
}
6969

7070
/// parse a string as an int
71-
///
72-
/// max length of the input is 4300, see
73-
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
74-
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
7571
pub fn str_as_int<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
7672
let str = str.trim();
77-
let len = str.len();
78-
if len > 4300 {
79-
Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input))
80-
} else if let Some(int) = _parse_str(input, str, len) {
81-
Ok(int)
82-
} else if let Some(str_stripped) = strip_decimal_zeros(str) {
83-
if let Some(int) = _parse_str(input, str_stripped, len) {
84-
Ok(int)
85-
} else {
86-
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
73+
74+
// we have to call `NumberInt::try_from` directly first so we fail fast if the string is too long
75+
match NumberInt::try_from(str.as_bytes()) {
76+
Ok(NumberInt::Int(i)) => return Ok(EitherInt::I64(i)),
77+
Ok(NumberInt::BigInt(i)) => return Ok(EitherInt::BigInt(i)),
78+
Err(e) => {
79+
if e.error_type == JsonErrorType::NumberOutOfRange {
80+
return Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input));
81+
}
8782
}
83+
}
84+
85+
if let Some(str_stripped) = strip_decimal_zeros(str) {
86+
_parse_str(input, str_stripped)
8887
} else if let Some(str_stripped) = strip_underscores(str) {
89-
if let Some(int) = _parse_str(input, &str_stripped, len) {
90-
Ok(int)
91-
} else {
92-
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
93-
}
88+
_parse_str(input, &str_stripped)
9489
} else {
9590
Err(ValError::new(ErrorTypeDefaults::IntParsing, input))
9691
}
@@ -108,16 +103,18 @@ pub fn str_as_float<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValRe
108103
}
109104

110105
/// parse a string as an int, `input` is required here to get lifetimes to match up
111-
///
112-
fn _parse_str<'py>(_input: &(impl Input<'py> + ?Sized), str: &str, len: usize) -> Option<EitherInt<'py>> {
113-
if len < 19 {
114-
if let Ok(i) = str.parse::<i64>() {
115-
return Some(EitherInt::I64(i));
116-
}
117-
} else if let Ok(i) = str.parse::<BigInt>() {
118-
return Some(EitherInt::BigInt(i));
106+
/// max length of the input is 4300 which is checked by jiter, see
107+
/// https://docs.python.org/3/whatsnew/3.11.html#other-cpython-implementation-changes and
108+
/// https://github.com/python/cpython/issues/95778 for more info in that length bound
109+
fn _parse_str<'py>(input: &(impl Input<'py> + ?Sized), str: &str) -> ValResult<EitherInt<'py>> {
110+
match NumberInt::try_from(str.as_bytes()) {
111+
Ok(jiter::NumberInt::Int(i)) => Ok(EitherInt::I64(i)),
112+
Ok(jiter::NumberInt::BigInt(i)) => Ok(EitherInt::BigInt(i)),
113+
Err(e) => match e.error_type {
114+
JsonErrorType::NumberOutOfRange => Err(ValError::new(ErrorTypeDefaults::IntParsingSize, input)),
115+
_ => Err(ValError::new(ErrorTypeDefaults::IntParsing, input)),
116+
},
119117
}
120-
None
121118
}
122119

123120
/// we don't want to parse as f64 then call `float_as_int` as it can loose precision for large ints, therefore

src/tools.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ pub fn extract_i64(v: &Bound<'_, PyAny>) -> Option<i64> {
147147
}
148148

149149
pub(crate) fn new_py_string<'py>(py: Python<'py>, s: &str, cache_str: StringCacheMode) -> Bound<'py, PyString> {
150-
let ascii_only = bytecount::num_chars(s.as_bytes()) == s.len();
150+
// we could use `bytecount::num_chars(s.as_bytes()) == s.len()` as orjson does, but it doesn't appear to be faster
151+
let ascii_only = false;
151152
if matches!(cache_str, StringCacheMode::All) {
152153
cached_py_string(py, s, ascii_only)
153154
} else {

0 commit comments

Comments
 (0)