Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
.DEFAULT_GOAL := all

.PHONY: format
format:
@cargo fmt --version
cargo fmt

.PHONY: lint
lint:
@cargo clippy --version
cargo clippy -- -D warnings
cargo doc

.PHONY: test
test:
cargo test

.PHONY: python-dev
python-dev:
maturin develop -m crates/jiter-python/Cargo.toml

.PHONY: test-python
test-python: python-dev
pytest crates/jiter-python/tests

.PHONY: bench
bench:
cargo bench -p jiter -F python

.PHONY: fuzz
fuzz:
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_to_serde --release

.PHONY: fuzz-skip
fuzz-skip:
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_skip --release

.PHONY: all
all: format lint test test-python
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ fn main() {
"+44 2345678"
]
}"#;
let mut jiter = Jiter::new(json_data.as_bytes(), true);
let mut jiter = Jiter::new(json_data.as_bytes());
assert_eq!(jiter.next_object().unwrap(), Some("name"));
assert_eq!(jiter.next_str().unwrap(), "John Doe");
assert_eq!(jiter.next_key().unwrap(), Some("age"));
Expand Down
4 changes: 2 additions & 2 deletions crates/fuzz/fuzz_targets/compare_skip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ fuzz_target!(|json: String| {
let json_data = json.as_bytes();
match JsonValue::parse(json_data, false) {
Ok(_) => {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
jiter.next_skip().unwrap();
jiter.finish().unwrap();
}
Err(json_error) => {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let jiter_error = match jiter.next_skip() {
Ok(_) => jiter.finish().unwrap_err(),
Err(e) => e,
Expand Down
7 changes: 5 additions & 2 deletions crates/jiter-python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def from_json(
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
allow_partial: bool = False,
allow_partial: Literal[True, False, "off", "on", "trailing-strings"] = False,
catch_duplicate_keys: bool = False,
) -> Any:
"""
Expand All @@ -30,7 +30,10 @@ def from_json(
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
allow_partial: How to handle incomplete strings:
- False / 'off' - raise an exception if the input is incomplete
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times

Returns:
Expand Down
7 changes: 5 additions & 2 deletions crates/jiter-python/jiter.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def from_json(
*,
allow_inf_nan: bool = True,
cache_strings: Literal[True, False, "all", "keys", "none"] = "all",
allow_partial: bool = False,
allow_partial: Literal[True, False, "off", "on", "trailing-strings"] = False,
catch_duplicate_keys: bool = False,
lossless_floats: bool = False,
) -> Any:
Expand All @@ -22,7 +22,10 @@ def from_json(
- True / 'all' - cache all strings
- 'keys' - cache only object keys
- False / 'none' - cache nothing
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
allow_partial: How to handle incomplete strings:
- False / 'off' - raise an exception if the input is incomplete
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`

Expand Down
8 changes: 4 additions & 4 deletions crates/jiter-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::sync::OnceLock;

use pyo3::prelude::*;

use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParseBuilder, StringCacheMode};

#[allow(clippy::fn_params_excessive_bools)]
#[pyfunction(
Expand All @@ -12,7 +12,7 @@ use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
*,
allow_inf_nan=true,
cache_strings=StringCacheMode::All,
allow_partial=false,
allow_partial=PartialMode::Off,
catch_duplicate_keys=false,
lossless_floats=false,
)
Expand All @@ -22,14 +22,14 @@ pub fn from_json<'py>(
json_data: &[u8],
allow_inf_nan: bool,
cache_strings: StringCacheMode,
allow_partial: bool,
allow_partial: PartialMode,
catch_duplicate_keys: bool,
lossless_floats: bool,
) -> PyResult<Bound<'py, PyAny>> {
let parse_builder = PythonParseBuilder {
allow_inf_nan,
cache_mode: cache_strings,
allow_partial,
partial_mode: allow_partial,
catch_duplicate_keys,
lossless_floats,
};
Expand Down
46 changes: 46 additions & 0 deletions crates/jiter-python/tests/test_jiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ def test_partial_array():
assert isinstance(parsed, list)


def test_partial_array_trailing_strings():
json = b'["string", true, null, 1, "foo'
parsed = jiter.from_json(json, allow_partial='trailing-strings')
assert parsed == ["string", True, None, 1, "foo"]

# test that stopping at every points is ok
for i in range(1, len(json)):
parsed = jiter.from_json(json[:i], allow_partial='trailing-strings')
assert isinstance(parsed, list)


def test_partial_array_first():
json = b"["
parsed = jiter.from_json(json, allow_partial=True)
Expand All @@ -80,6 +91,9 @@ def test_partial_array_first():
with pytest.raises(ValueError, match="EOF while parsing a list at line 1 column 1"):
jiter.from_json(json)

with pytest.raises(ValueError, match="EOF while parsing a list at line 1 column 1"):
jiter.from_json(json, allow_partial='off')


def test_partial_object():
json = b'{"a": 1, "b": 2, "c'
Expand All @@ -92,6 +106,38 @@ def test_partial_object():
assert isinstance(parsed, dict)


def test_partial_object_string():
json = b'{"a": 1, "b": 2, "c": "foo'
parsed = jiter.from_json(json, allow_partial=True)
assert parsed == {"a": 1, "b": 2}
parsed = jiter.from_json(json, allow_partial='on')
assert parsed == {"a": 1, "b": 2}

# test that stopping at every points is ok
for i in range(1, len(json)):
parsed = jiter.from_json(json, allow_partial=True)
assert isinstance(parsed, dict)

json = b'{"title": "Pride and Prejudice", "author": "Jane A'
parsed = jiter.from_json(json, allow_partial=True)
assert parsed == {"title": "Pride and Prejudice"}


def test_partial_object_string_trailing_strings():
json = b'{"a": 1, "b": 2, "c": "foo'
parsed = jiter.from_json(json, allow_partial='trailing-strings')
assert parsed == {"a": 1, "b": 2, "c": "foo"}

# test that stopping at every points is ok
for i in range(1, len(json)):
parsed = jiter.from_json(json, allow_partial=True)
assert isinstance(parsed, dict)

json = b'{"title": "Pride and Prejudice", "author": "Jane A'
parsed = jiter.from_json(json, allow_partial='trailing-strings')
assert parsed == {"title": "Pride and Prejudice", "author": "Jane A"}


def test_partial_nested():
json = b'{"a": 1, "b": 2, "c": [1, 2, {"d": 1, '
parsed = jiter.from_json(json, allow_partial=True)
Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ fn main() {
"+44 2345678"
]
}"#;
let mut jiter = Jiter::new(json_data.as_bytes(), true);
let mut jiter = Jiter::new(json_data.as_bytes()).with_allow_inf_nan();
assert_eq!(jiter.next_object().unwrap(), Some("name"));
assert_eq!(jiter.next_str().unwrap(), "John Doe");
assert_eq!(jiter.next_key().unwrap(), Some("age"));
Expand Down
18 changes: 9 additions & 9 deletions crates/jiter/benches/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ fn jiter_skip(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
jiter.next_skip().unwrap();
})
}
Expand All @@ -36,7 +36,7 @@ fn jiter_iter_big(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
jiter.next_array().unwrap();

loop {
Expand Down Expand Up @@ -73,7 +73,7 @@ fn jiter_iter_pass2(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let string = find_string(&mut jiter);
jiter.finish().unwrap();
black_box(string)
Expand All @@ -84,7 +84,7 @@ fn jiter_iter_string_array(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
jiter.next_array().unwrap();
let i = jiter.known_str().unwrap();
// record len instead of allocating the string to simulate something like constructing a PyString
Expand All @@ -101,7 +101,7 @@ fn jiter_iter_true_array(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let first_peek = jiter.next_array().unwrap().unwrap();
let i = jiter.known_bool(first_peek).unwrap();
black_box(i);
Expand All @@ -116,7 +116,7 @@ fn jiter_iter_true_object(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
if let Some(first_key) = jiter.next_object().unwrap() {
let first_key = first_key.to_string();
let first_value = jiter.next_bool().unwrap();
Expand All @@ -134,7 +134,7 @@ fn jiter_iter_ints_array(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let first_peek = jiter.next_array().unwrap().unwrap();
let i = jiter.known_int(first_peek).unwrap();
black_box(i);
Expand All @@ -149,7 +149,7 @@ fn jiter_iter_floats_array(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let first_peek = jiter.next_array().unwrap().unwrap();
let i = jiter.known_float(first_peek).unwrap();
black_box(i);
Expand All @@ -164,7 +164,7 @@ fn jiter_string(path: &str, bench: &mut Bencher) {
let json = read_file(path);
let json_data = black_box(json.as_bytes());
bench.iter(|| {
let mut jiter = Jiter::new(json_data, false);
let mut jiter = Jiter::new(json_data);
let string = jiter.next_str().unwrap();
black_box(string);
jiter.finish().unwrap();
Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/benches/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::io::Read;

use pyo3::Python;

use jiter::{cache_clear, PythonParseBuilder, StringCacheMode};
use jiter::{cache_clear, PartialMode, PythonParseBuilder, StringCacheMode};

fn python_parse_numeric(bench: &mut Bencher) {
Python::with_gil(|py| {
Expand Down
26 changes: 22 additions & 4 deletions crates/jiter/src/jiter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ pub struct Jiter<'j> {
parser: Parser<'j>,
tape: Tape,
allow_inf_nan: bool,
allow_partial_strings: bool,
}

impl Clone for Jiter<'_> {
Expand All @@ -24,6 +25,7 @@ impl Clone for Jiter<'_> {
parser: self.parser.clone(),
tape: Tape::default(),
allow_inf_nan: self.allow_inf_nan,
allow_partial_strings: self.allow_partial_strings,
}
}
}
Expand All @@ -34,15 +36,26 @@ impl<'j> Jiter<'j> {
/// # Arguments
/// - `data`: The JSON data to be parsed.
/// - `allow_inf_nan`: Whether to allow `NaN`, `Infinity` and `-Infinity` as numbers.
pub fn new(data: &'j [u8], allow_inf_nan: bool) -> Self {
pub fn new(data: &'j [u8]) -> Self {
Self {
data,
parser: Parser::new(data),
tape: Tape::default(),
allow_inf_nan,
allow_inf_nan: false,
allow_partial_strings: false,
}
}

pub fn with_allow_inf_nan(mut self) -> Self {
self.allow_inf_nan = true;
self
}

pub fn with_allow_partial_strings(mut self) -> Self {
self.allow_partial_strings = true;
self
}

/// Get the current [LinePosition] of the parser.
pub fn current_position(&self) -> LinePosition {
self.parser.current_position()
Expand Down Expand Up @@ -186,7 +199,10 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a string, parse it.
pub fn known_str(&mut self) -> JiterResult<&str> {
match self.parser.consume_string::<StringDecoder>(&mut self.tape) {
match self
.parser
.consume_string::<StringDecoder>(&mut self.tape, self.allow_partial_strings)
{
Ok(output) => Ok(output.as_str()),
Err(e) => Err(e.into()),
}
Expand All @@ -203,7 +219,9 @@ impl<'j> Jiter<'j> {

/// Knowing the next value is a string, parse it and return bytes from the original JSON data.
pub fn known_bytes(&mut self) -> JiterResult<&[u8]> {
let range = self.parser.consume_string::<StringDecoderRange>(&mut self.tape)?;
let range = self
.parser
.consume_string::<StringDecoderRange>(&mut self.tape, self.allow_partial_strings)?;
Ok(&self.data[range])
}

Expand Down
2 changes: 1 addition & 1 deletion crates/jiter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ pub use py_lossless_float::LosslessFloat;
#[cfg(feature = "python")]
pub use py_string_cache::{cache_clear, cache_usage, cached_py_string, pystring_fast_new, StringCacheMode};
#[cfg(feature = "python")]
pub use python::{map_json_error, PythonParseBuilder};
pub use python::{map_json_error, PartialMode, PythonParseBuilder};
Loading