diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f97de98c..021be702 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,7 +57,7 @@ jobs: - run: cargo careful t -F python if: matrix.rust-version == 'nightly' - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: env_vars: RUNS_ON,RUST_VERSION token: ${{ secrets.CODECOV_TOKEN }} @@ -103,11 +103,53 @@ jobs: - run: cargo test --doc - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v4 with: env_vars: RUNS_ON,RUST_VERSION token: ${{ secrets.CODECOV_TOKEN }} + test-python: + name: test jiter-python + + runs-on: ubuntu-latest + + env: + RUNS_ON: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: set up python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - uses: dtolnay/rust-toolchain@stable + + - id: cache-rust + uses: Swatinem/rust-cache@v2 + + - run: cargo install rustfilt coverage-prepare + if: steps.cache-rust.outputs.cache-hit != 'true' + + - run: rustup component add llvm-tools-preview + + - run: make python-install + + - run: pip install -e crates/jiter-python + env: + RUSTFLAGS: '-C instrument-coverage' + + - run: pytest crates/jiter-python/tests + env: + RUST_BACKTRACE: 1 + + - run: coverage-prepare lcov $(python -c 'import jiter.jiter;print(jiter.jiter.__file__)') + + - uses: codecov/codecov-action@v4 + with: + token: ${{ secrets.CODECOV_TOKEN }} + bench: runs-on: ubuntu-latest steps: @@ -504,7 +546,7 @@ jobs: # https://github.com/marketplace/actions/alls-green#why used for branch protection checks check: if: always() - needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint] + needs: [test-linux, test-macos, test-python, bench, fuzz, fuzz-skip, lint] runs-on: ubuntu-latest steps: - name: Decide whether the needed jobs succeeded or failed diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..78ca30ea --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +.DEFAULT_GOAL := all + +.PHONY: format +format: + @cargo fmt --version + cargo fmt + +.PHONY: lint +lint: + @cargo clippy --version + cargo clippy -- -D warnings + cargo doc + +.PHONY: test +test: + cargo test + +.PHONY: python-install +python-install: + pip install maturin + pip install -r crates/jiter-python/tests/requirements.txt + +.PHONY: python-dev +python-dev: + maturin develop -m crates/jiter-python/Cargo.toml + +.PHONY: python-test +python-test: python-dev + pytest crates/jiter-python/tests + +.PHONY: bench +bench: + cargo bench -p jiter -F python + +.PHONY: fuzz +fuzz: + cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_to_serde --release + +.PHONY: fuzz-skip +fuzz-skip: + cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_skip --release + +.PHONY: all +all: format lint test test-python diff --git a/README.md b/README.md index fc01d46e..cfe6464a 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,13 @@ Fast iterable JSON parser. Documentation is available at [docs.rs/jiter](https://docs.rs/jiter). jiter has three interfaces: -* [`JsonValue`] an enum representing JSON data -* [`Jiter`] an iterator over JSON data -* [`python_parse`] which parses a JSON string into a Python object +* `JsonValue` an enum representing JSON data +* `Jiter` an iterator over JSON data +* `PythonParse` which parses a JSON string into a Python object ## JsonValue Example -See [the `JsonValue` docs][JsonValue] for more details. +See [the `JsonValue` docs](https://docs.rs/jiter/latest/jiter/enum.JsonValue.html) for more details. ```rust use jiter::JsonValue; @@ -54,7 +54,7 @@ Object( ## Jiter Example -To use [Jiter], you need to know what schema you're expecting: +To use [Jiter](https://docs.rs/jiter/latest/jiter/struct.Jiter.html), you need to know what schema you're expecting: ```rust use jiter::{Jiter, NumberInt, Peek}; @@ -69,7 +69,7 @@ fn main() { "+44 2345678" ] }"#; - let mut jiter = Jiter::new(json_data.as_bytes(), true); + let mut jiter = Jiter::new(json_data.as_bytes()); assert_eq!(jiter.next_object().unwrap(), Some("name")); assert_eq!(jiter.next_str().unwrap(), "John Doe"); assert_eq!(jiter.next_key().unwrap(), Some("age")); diff --git a/crates/fuzz/fuzz_targets/compare_skip.rs b/crates/fuzz/fuzz_targets/compare_skip.rs index 9cec2aa2..ecb58d79 100644 --- a/crates/fuzz/fuzz_targets/compare_skip.rs +++ b/crates/fuzz/fuzz_targets/compare_skip.rs @@ -16,12 +16,12 @@ fuzz_target!(|json: String| { let json_data = json.as_bytes(); match JsonValue::parse(json_data, false) { Ok(_) => { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); jiter.next_skip().unwrap(); jiter.finish().unwrap(); } Err(json_error) => { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let jiter_error = match jiter.next_skip() { Ok(_) => jiter.finish().unwrap_err(), Err(e) => e, diff --git a/crates/jiter-python/README.md b/crates/jiter-python/README.md index d125b5c0..e13bd14e 100644 --- a/crates/jiter-python/README.md +++ b/crates/jiter-python/README.md @@ -15,9 +15,10 @@ def from_json( /, *, allow_inf_nan: bool = True, - cache_strings: Literal[True, False, "all", "keys", "none"] = True, - allow_partial: bool = False, + cache_mode: Literal[True, False, "all", "keys", "none"] = "all", + partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False, catch_duplicate_keys: bool = False, + lossless_floats: bool = False, ) -> Any: """ Parse input bytes into a JSON object. @@ -26,12 +27,16 @@ def from_json( json_data: The JSON data to parse allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. Defaults to True. - cache_strings: cache Python strings to improve performance at the cost of some memory usage + cache_mode: cache Python strings to improve performance at the cost of some memory usage - True / 'all' - cache all strings - 'keys' - cache only object keys - False / 'none' - cache nothing - allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays + partial_mode: How to handle incomplete strings: + - False / 'off' - raise an exception if the input is incomplete + - True / 'on' - allow incomplete JSON but discard the last string if it is incomplete + - 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times + lossless_floats: if True, preserve full detail on floats using `LosslessFloat` Returns: Python object built from the JSON input. diff --git a/crates/jiter-python/jiter.pyi b/crates/jiter-python/jiter.pyi index c488de7b..1fe3d9bd 100644 --- a/crates/jiter-python/jiter.pyi +++ b/crates/jiter-python/jiter.pyi @@ -6,8 +6,8 @@ def from_json( /, *, allow_inf_nan: bool = True, - cache_strings: Literal[True, False, "all", "keys", "none"] = "all", - allow_partial: bool = False, + cache_mode: Literal[True, False, "all", "keys", "none"] = "all", + partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False, catch_duplicate_keys: bool = False, lossless_floats: bool = False, ) -> Any: @@ -18,11 +18,14 @@ def from_json( json_data: The JSON data to parse allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields. Defaults to True. - cache_strings: cache Python strings to improve performance at the cost of some memory usage + cache_mode: cache Python strings to improve performance at the cost of some memory usage - True / 'all' - cache all strings - 'keys' - cache only object keys - False / 'none' - cache nothing - allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays + partial_mode: How to handle incomplete strings: + - False / 'off' - raise an exception if the input is incomplete + - True / 'on' - allow incomplete JSON but discard the last string if it is incomplete + - 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times lossless_floats: if True, preserve full detail on floats using `LosslessFloat` diff --git a/crates/jiter-python/src/lib.rs b/crates/jiter-python/src/lib.rs index 06879009..7cd08ed6 100644 --- a/crates/jiter-python/src/lib.rs +++ b/crates/jiter-python/src/lib.rs @@ -2,7 +2,7 @@ use std::sync::OnceLock; use pyo3::prelude::*; -use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode}; +use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParse, StringCacheMode}; #[allow(clippy::fn_params_excessive_bools)] #[pyfunction( @@ -11,8 +11,8 @@ use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode}; /, *, allow_inf_nan=true, - cache_strings=StringCacheMode::All, - allow_partial=false, + cache_mode=StringCacheMode::All, + partial_mode=PartialMode::Off, catch_duplicate_keys=false, lossless_floats=false, ) @@ -21,15 +21,15 @@ pub fn from_json<'py>( py: Python<'py>, json_data: &[u8], allow_inf_nan: bool, - cache_strings: StringCacheMode, - allow_partial: bool, + cache_mode: StringCacheMode, + partial_mode: PartialMode, catch_duplicate_keys: bool, lossless_floats: bool, ) -> PyResult> { - let parse_builder = PythonParseBuilder { + let parse_builder = PythonParse { allow_inf_nan, - cache_mode: cache_strings, - allow_partial, + cache_mode, + partial_mode, catch_duplicate_keys, lossless_floats, }; diff --git a/crates/jiter-python/tests/test_jiter.py b/crates/jiter-python/tests/test_jiter.py index ce9569c0..ebe72971 100644 --- a/crates/jiter-python/tests/test_jiter.py +++ b/crates/jiter-python/tests/test_jiter.py @@ -17,7 +17,7 @@ def test_python_parse_other_cached(): parsed = jiter.from_json( b'["string", true, false, null, NaN, Infinity, -Infinity]', allow_inf_nan=True, - cache_strings=True, + cache_mode=True, ) assert parsed == ["string", True, False, None, IsFloatNan(), inf, -inf] @@ -25,7 +25,7 @@ def test_python_parse_other_cached(): def test_python_parse_other_no_cache(): parsed = jiter.from_json( b'["string", true, false, null]', - cache_strings=False, + cache_mode=False, ) assert parsed == ["string", True, False, None] @@ -63,56 +63,102 @@ def test_extracted_value_error(): def test_partial_array(): json = b'["string", true, null, 1, "foo' - parsed = jiter.from_json(json, allow_partial=True) + parsed = jiter.from_json(json, partial_mode=True) assert parsed == ["string", True, None, 1] # test that stopping at every points is ok for i in range(1, len(json)): - parsed = jiter.from_json(json[:i], allow_partial=True) + parsed = jiter.from_json(json[:i], partial_mode=True) + assert isinstance(parsed, list) + + +def test_partial_array_trailing_strings(): + json = b'["string", true, null, 1, "foo' + parsed = jiter.from_json(json, partial_mode='trailing-strings') + assert parsed == ["string", True, None, 1, "foo"] + + # test that stopping at every points is ok + for i in range(1, len(json)): + parsed = jiter.from_json(json[:i], partial_mode='trailing-strings') assert isinstance(parsed, list) def test_partial_array_first(): json = b"[" - parsed = jiter.from_json(json, allow_partial=True) + parsed = jiter.from_json(json, partial_mode=True) assert parsed == [] with pytest.raises(ValueError, match="EOF while parsing a list at line 1 column 1"): jiter.from_json(json) + with pytest.raises(ValueError, match="EOF while parsing a list at line 1 column 1"): + jiter.from_json(json, partial_mode='off') + def test_partial_object(): json = b'{"a": 1, "b": 2, "c' - parsed = jiter.from_json(json, allow_partial=True) + parsed = jiter.from_json(json, partial_mode=True) assert parsed == {"a": 1, "b": 2} # test that stopping at every points is ok for i in range(1, len(json)): - parsed = jiter.from_json(json, allow_partial=True) + parsed = jiter.from_json(json, partial_mode=True) assert isinstance(parsed, dict) +def test_partial_object_string(): + json = b'{"a": 1, "b": 2, "c": "foo' + parsed = jiter.from_json(json, partial_mode=True) + assert parsed == {"a": 1, "b": 2} + parsed = jiter.from_json(json, partial_mode='on') + assert parsed == {"a": 1, "b": 2} + + # test that stopping at every points is ok + for i in range(1, len(json)): + parsed = jiter.from_json(json, partial_mode=True) + assert isinstance(parsed, dict) + + json = b'{"title": "Pride and Prejudice", "author": "Jane A' + parsed = jiter.from_json(json, partial_mode=True) + assert parsed == {"title": "Pride and Prejudice"} + + +def test_partial_object_string_trailing_strings(): + json = b'{"a": 1, "b": 2, "c": "foo' + parsed = jiter.from_json(json, partial_mode='trailing-strings') + assert parsed == {"a": 1, "b": 2, "c": "foo"} + + # test that stopping at every points is ok + for i in range(1, len(json)): + parsed = jiter.from_json(json, partial_mode=True) + assert isinstance(parsed, dict) + + json = b'{"title": "Pride and Prejudice", "author": "Jane A' + parsed = jiter.from_json(json, partial_mode='trailing-strings') + assert parsed == {"title": "Pride and Prejudice", "author": "Jane A"} + + def test_partial_nested(): json = b'{"a": 1, "b": 2, "c": [1, 2, {"d": 1, ' - parsed = jiter.from_json(json, allow_partial=True) + parsed = jiter.from_json(json, partial_mode=True) assert parsed == {"a": 1, "b": 2, "c": [1, 2, {"d": 1}]} # test that stopping at every points is ok for i in range(1, len(json)): - parsed = jiter.from_json(json[:i], allow_partial=True) + parsed = jiter.from_json(json[:i], partial_mode=True) assert isinstance(parsed, dict) def test_python_cache_usage_all(): jiter.cache_clear() - parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_strings="all") + parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_mode="all") assert parsed == {"foo": "bar", "spam": 3} assert jiter.cache_usage() == 3 def test_python_cache_usage_keys(): jiter.cache_clear() - parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_strings="keys") + parsed = jiter.from_json(b'{"foo": "bar", "spam": 3}', cache_mode="keys") assert parsed == {"foo": "bar", "spam": 3} assert jiter.cache_usage() == 2 @@ -121,7 +167,7 @@ def test_python_cache_usage_none(): jiter.cache_clear() parsed = jiter.from_json( b'{"foo": "bar", "spam": 3}', - cache_strings="none", + cache_mode="none", ) assert parsed == {"foo": "bar", "spam": 3} assert jiter.cache_usage() == 0 @@ -130,14 +176,14 @@ def test_python_cache_usage_none(): def test_use_tape(): json = ' "foo\\nbar" '.encode() jiter.cache_clear() - parsed = jiter.from_json(json, cache_strings=False) + parsed = jiter.from_json(json, cache_mode=False) assert parsed == "foo\nbar" def test_unicode(): json = '{"💩": "£"}'.encode() jiter.cache_clear() - parsed = jiter.from_json(json, cache_strings=False) + parsed = jiter.from_json(json, cache_mode=False) assert parsed == {"💩": "£"} diff --git a/crates/jiter/README.md b/crates/jiter/README.md index fc01d46e..418e1808 100644 --- a/crates/jiter/README.md +++ b/crates/jiter/README.md @@ -11,7 +11,7 @@ Documentation is available at [docs.rs/jiter](https://docs.rs/jiter). jiter has three interfaces: * [`JsonValue`] an enum representing JSON data * [`Jiter`] an iterator over JSON data -* [`python_parse`] which parses a JSON string into a Python object +* [`PythonParse`] which parses a JSON string into a Python object ## JsonValue Example @@ -69,7 +69,7 @@ fn main() { "+44 2345678" ] }"#; - let mut jiter = Jiter::new(json_data.as_bytes(), true); + let mut jiter = Jiter::new(json_data.as_bytes()).with_allow_inf_nan(); assert_eq!(jiter.next_object().unwrap(), Some("name")); assert_eq!(jiter.next_str().unwrap(), "John Doe"); assert_eq!(jiter.next_key().unwrap(), Some("age")); diff --git a/crates/jiter/benches/main.rs b/crates/jiter/benches/main.rs index 7af88a84..ed6b4ed6 100644 --- a/crates/jiter/benches/main.rs +++ b/crates/jiter/benches/main.rs @@ -27,7 +27,7 @@ fn jiter_skip(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); jiter.next_skip().unwrap(); }) } @@ -36,7 +36,7 @@ fn jiter_iter_big(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); jiter.next_array().unwrap(); loop { @@ -73,7 +73,7 @@ fn jiter_iter_pass2(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let string = find_string(&mut jiter); jiter.finish().unwrap(); black_box(string) @@ -84,7 +84,7 @@ fn jiter_iter_string_array(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); jiter.next_array().unwrap(); let i = jiter.known_str().unwrap(); // record len instead of allocating the string to simulate something like constructing a PyString @@ -101,7 +101,7 @@ fn jiter_iter_true_array(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let first_peek = jiter.next_array().unwrap().unwrap(); let i = jiter.known_bool(first_peek).unwrap(); black_box(i); @@ -116,7 +116,7 @@ fn jiter_iter_true_object(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); if let Some(first_key) = jiter.next_object().unwrap() { let first_key = first_key.to_string(); let first_value = jiter.next_bool().unwrap(); @@ -134,7 +134,7 @@ fn jiter_iter_ints_array(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let first_peek = jiter.next_array().unwrap().unwrap(); let i = jiter.known_int(first_peek).unwrap(); black_box(i); @@ -149,7 +149,7 @@ fn jiter_iter_floats_array(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let first_peek = jiter.next_array().unwrap().unwrap(); let i = jiter.known_float(first_peek).unwrap(); black_box(i); @@ -164,7 +164,7 @@ fn jiter_string(path: &str, bench: &mut Bencher) { let json = read_file(path); let json_data = black_box(json.as_bytes()); bench.iter(|| { - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); let string = jiter.next_str().unwrap(); black_box(string); jiter.finish().unwrap(); diff --git a/crates/jiter/benches/python.rs b/crates/jiter/benches/python.rs index 9820b3b2..ce3c6dd1 100644 --- a/crates/jiter/benches/python.rs +++ b/crates/jiter/benches/python.rs @@ -5,13 +5,13 @@ use std::io::Read; use pyo3::Python; -use jiter::{cache_clear, PythonParseBuilder, StringCacheMode}; +use jiter::{cache_clear, PartialMode, PythonParse, StringCacheMode}; fn python_parse_numeric(bench: &mut Bencher) { Python::with_gil(|py| { cache_clear(py); bench.iter(|| { - PythonParseBuilder::default() + PythonParse::default() .python_parse( py, br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#, @@ -25,7 +25,7 @@ fn python_parse_other(bench: &mut Bencher) { Python::with_gil(|py| { cache_clear(py); bench.iter(|| { - PythonParseBuilder::default() + PythonParse::default() .python_parse(py, br#"["string", true, false, null]"#) .unwrap() }); @@ -41,7 +41,7 @@ fn _python_parse_file(path: &str, bench: &mut Bencher, cache_mode: StringCacheMo Python::with_gil(|py| { cache_clear(py); bench.iter(|| { - PythonParseBuilder { + PythonParse { cache_mode, ..Default::default() } diff --git a/crates/jiter/src/jiter.rs b/crates/jiter/src/jiter.rs index b0adc2bc..fda20dae 100644 --- a/crates/jiter/src/jiter.rs +++ b/crates/jiter/src/jiter.rs @@ -14,6 +14,7 @@ pub struct Jiter<'j> { parser: Parser<'j>, tape: Tape, allow_inf_nan: bool, + allow_partial_strings: bool, } impl Clone for Jiter<'_> { @@ -24,6 +25,7 @@ impl Clone for Jiter<'_> { parser: self.parser.clone(), tape: Tape::default(), allow_inf_nan: self.allow_inf_nan, + allow_partial_strings: self.allow_partial_strings, } } } @@ -34,15 +36,26 @@ impl<'j> Jiter<'j> { /// # Arguments /// - `data`: The JSON data to be parsed. /// - `allow_inf_nan`: Whether to allow `NaN`, `Infinity` and `-Infinity` as numbers. - pub fn new(data: &'j [u8], allow_inf_nan: bool) -> Self { + pub fn new(data: &'j [u8]) -> Self { Self { data, parser: Parser::new(data), tape: Tape::default(), - allow_inf_nan, + allow_inf_nan: false, + allow_partial_strings: false, } } + pub fn with_allow_inf_nan(mut self) -> Self { + self.allow_inf_nan = true; + self + } + + pub fn with_allow_partial_strings(mut self) -> Self { + self.allow_partial_strings = true; + self + } + /// Get the current [LinePosition] of the parser. pub fn current_position(&self) -> LinePosition { self.parser.current_position() @@ -186,7 +199,10 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a string, parse it. pub fn known_str(&mut self) -> JiterResult<&str> { - match self.parser.consume_string::(&mut self.tape) { + match self + .parser + .consume_string::(&mut self.tape, self.allow_partial_strings) + { Ok(output) => Ok(output.as_str()), Err(e) => Err(e.into()), } @@ -203,7 +219,9 @@ impl<'j> Jiter<'j> { /// Knowing the next value is a string, parse it and return bytes from the original JSON data. pub fn known_bytes(&mut self) -> JiterResult<&[u8]> { - let range = self.parser.consume_string::(&mut self.tape)?; + let range = self + .parser + .consume_string::(&mut self.tape, self.allow_partial_strings)?; Ok(&self.data[range]) } diff --git a/crates/jiter/src/lib.rs b/crates/jiter/src/lib.rs index 4d1c27c9..b9eea501 100644 --- a/crates/jiter/src/lib.rs +++ b/crates/jiter/src/lib.rs @@ -28,4 +28,4 @@ pub use py_lossless_float::LosslessFloat; #[cfg(feature = "python")] pub use py_string_cache::{cache_clear, cache_usage, cached_py_string, pystring_fast_new, StringCacheMode}; #[cfg(feature = "python")] -pub use python::{map_json_error, PythonParseBuilder}; +pub use python::{map_json_error, PartialMode, PythonParse}; diff --git a/crates/jiter/src/parse.rs b/crates/jiter/src/parse.rs index 027bcaf9..4ece726f 100644 --- a/crates/jiter/src/parse.rs +++ b/crates/jiter/src/parse.rs @@ -196,11 +196,15 @@ impl<'j> Parser<'j> { self.consume_ident(NULL_REST) } - pub fn consume_string<'t, D: AbstractStringDecoder<'t, 'j>>(&mut self, tape: &'t mut Tape) -> JsonResult + pub fn consume_string<'t, D: AbstractStringDecoder<'t, 'j>>( + &mut self, + tape: &'t mut Tape, + allow_partial: bool, + ) -> JsonResult where 'j: 't, { - let (output, index) = D::decode(self.data, self.index, tape)?; + let (output, index) = D::decode(self.data, self.index, tape, allow_partial)?; self.index = index; Ok(output) } @@ -220,7 +224,7 @@ impl<'j> Parser<'j> { where 'j: 't, { - let (output, index) = D::decode(self.data, self.index, tape)?; + let (output, index) = D::decode(self.data, self.index, tape, false)?; self.index = index; if let Some(next) = self.eat_whitespace() { if next == b':' { diff --git a/crates/jiter/src/py_lossless_float.rs b/crates/jiter/src/py_lossless_float.rs index 852c752d..39f32af2 100644 --- a/crates/jiter/src/py_lossless_float.rs +++ b/crates/jiter/src/py_lossless_float.rs @@ -34,7 +34,7 @@ impl LosslessFloat { fn __float__(&self) -> PyResult { let bytes = &self.0; - let mut jiter = Jiter::new(bytes, true); + let mut jiter = Jiter::new(bytes).with_allow_inf_nan(); let f = jiter .next_float() .map_err(|e| PyValueError::new_err(e.description(&jiter)))?; diff --git a/crates/jiter/src/python.rs b/crates/jiter/src/python.rs index e5d31613..0bb49d1d 100644 --- a/crates/jiter/src/python.rs +++ b/crates/jiter/src/python.rs @@ -1,10 +1,10 @@ use ahash::AHashSet; use std::marker::PhantomData; -use pyo3::exceptions::PyValueError; +use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::ffi; use pyo3::prelude::*; -use pyo3::types::{PyDict, PyList, PyString}; +use pyo3::types::{PyBool, PyDict, PyList, PyString}; use pyo3::ToPyObject; use smallvec::SmallVec; @@ -18,20 +18,20 @@ use crate::{JsonErrorType, LosslessFloat}; #[derive(Default)] #[allow(clippy::struct_excessive_bools)] -pub struct PythonParseBuilder { +pub struct PythonParse { /// Whether to allow `(-)Infinity` and `NaN` values. pub allow_inf_nan: bool, /// Whether to cache strings to avoid constructing new Python objects, pub cache_mode: StringCacheMode, /// Whether to allow partial JSON data. - pub allow_partial: bool, + pub partial_mode: PartialMode, /// Whether to catch duplicate keys in objects. pub catch_duplicate_keys: bool, /// Whether to preserve full detail on floats using [`LosslessFloat`] pub lossless_floats: bool, } -impl PythonParseBuilder { +impl PythonParse { /// Parse a JSON value from a byte slice and return a Python object. /// /// # Arguments @@ -50,7 +50,7 @@ impl PythonParseBuilder { py, json_data, self.allow_inf_nan, - self.allow_partial, + self.partial_mode, ) }; } @@ -86,7 +86,7 @@ struct PythonParser<'j, StringCache, KeyCheck, ParseNumber> { tape: Tape, recursion_limit: u8, allow_inf_nan: bool, - allow_partial: bool, + partial_mode: PartialMode, } impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: MaybeParseNumber> @@ -96,7 +96,7 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma py: Python<'py>, json_data: &[u8], allow_inf_nan: bool, - allow_partial: bool, + partial_mode: PartialMode, ) -> JsonResult> { let mut slf = PythonParser { _string_cache: PhantomData::, @@ -106,12 +106,12 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma tape: Tape::default(), recursion_limit: DEFAULT_RECURSION_LIMIT, allow_inf_nan, - allow_partial, + partial_mode, }; let peek = slf.parser.peek()?; let v = slf.py_take_value(py, peek)?; - if !allow_partial { + if !slf.partial_mode.is_active() { slf.parser.finish()?; } Ok(v) @@ -132,7 +132,9 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma Ok(false.to_object(py).into_bound(py)) } Peek::String => { - let s = self.parser.consume_string::(&mut self.tape)?; + let s = self + .parser + .consume_string::(&mut self.tape, self.partial_mode.allow_trailing_str())?; Ok(StringCache::get_value(py, s.as_str(), s.ascii_only()).into_any()) } Peek::Array => { @@ -208,7 +210,7 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma } fn _allow_partial_err(&self, e: &JsonError) -> bool { - if self.allow_partial { + if self.partial_mode.is_active() { matches!( e.error_type, JsonErrorType::EofWhileParsingList @@ -236,6 +238,58 @@ impl<'j, StringCache: StringMaybeCache, KeyCheck: MaybeKeyCheck, ParseNumber: Ma } } +#[derive(Debug, Clone, Copy)] +pub enum PartialMode { + Off, + On, + TrailingStrings, +} + +impl Default for PartialMode { + fn default() -> Self { + Self::Off + } +} + +const PARTIAL_ERROR: &str = "Invalid partial mode, should be `'off'`, `'on'`, `'trailing-strings'` or a `bool`"; + +impl<'py> FromPyObject<'py> for PartialMode { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + if let Ok(bool_mode) = ob.downcast::() { + Ok(bool_mode.is_true().into()) + } else if let Ok(str_mode) = ob.extract::<&str>() { + match str_mode { + "off" => Ok(Self::Off), + "on" => Ok(Self::On), + "trailing-strings" => Ok(Self::TrailingStrings), + _ => Err(PyValueError::new_err(PARTIAL_ERROR)), + } + } else { + Err(PyTypeError::new_err(PARTIAL_ERROR)) + } + } +} + +impl From for PartialMode { + fn from(mode: bool) -> Self { + if mode { + Self::On + } else { + Self::Off + } + } +} + +impl PartialMode { + fn is_active(self) -> bool { + !matches!(self, Self::Off) + } + + fn allow_trailing_str(self) -> bool { + matches!(self, Self::TrailingStrings) + } +} + trait MaybeKeyCheck: Default { fn check(&mut self, key: &str, index: usize) -> JsonResult<()>; } diff --git a/crates/jiter/src/simd_aarch64.rs b/crates/jiter/src/simd_aarch64.rs index 9343ddb3..b4d08ff3 100644 --- a/crates/jiter/src/simd_aarch64.rs +++ b/crates/jiter/src/simd_aarch64.rs @@ -196,6 +196,7 @@ pub fn decode_string_chunk( data: &[u8], mut index: usize, mut ascii_only: bool, + allow_partial: bool, ) -> JsonResult<(StringChunk, bool, usize)> { while let Some(byte_chunk) = data.get(index..index + SIMD_STEP) { let byte_vec = load_slice(byte_chunk); @@ -216,7 +217,7 @@ pub fn decode_string_chunk( } } // we got near the end of the string, fall back to the slow path - StringChunk::decode_fallback(data, index, ascii_only) + StringChunk::decode_fallback(data, index, ascii_only, allow_partial) } #[rustfmt::skip] diff --git a/crates/jiter/src/string_decoder.rs b/crates/jiter/src/string_decoder.rs index 3bfd0441..c2d93184 100644 --- a/crates/jiter/src/string_decoder.rs +++ b/crates/jiter/src/string_decoder.rs @@ -13,9 +13,14 @@ pub trait AbstractStringDecoder<'t, 'j> where 'j: 't, { - type Output; - - fn decode(data: &'j [u8], index: usize, tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)>; + type Output: std::fmt::Debug; + + fn decode( + data: &'j [u8], + index: usize, + tape: &'t mut Tape, + allow_partial: bool, + ) -> JsonResult<(Self::Output, usize)>; } pub struct StringDecoder; @@ -69,15 +74,22 @@ where { type Output = StringOutput<'t, 'j>; - fn decode(data: &'j [u8], index: usize, tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)> { + fn decode( + data: &'j [u8], + index: usize, + tape: &'t mut Tape, + allow_partial: bool, + ) -> JsonResult<(Self::Output, usize)> { let start = index + 1; - match decode_chunk(data, start, true)? { - (StringChunk::Quote, ascii_only, index) => { + match decode_chunk(data, start, true, allow_partial)? { + (StringChunk::StringEnd, ascii_only, index) => { let s = to_str(&data[start..index], ascii_only, start)?; Ok((StringOutput::Data(s, ascii_only), index + 1)) } - (StringChunk::Backslash, ascii_only, index) => decode_to_tape(data, index, tape, start, ascii_only), + (StringChunk::Backslash, ascii_only, index) => { + decode_to_tape(data, index, tape, start, ascii_only, allow_partial) + } } } } @@ -88,6 +100,7 @@ fn decode_to_tape<'t, 'j>( tape: &'t mut Tape, start: usize, mut ascii_only: bool, + allow_partial: bool, ) -> JsonResult<(StringOutput<'t, 'j>, usize)> { tape.clear(); let mut chunk_start = start; @@ -115,8 +128,8 @@ fn decode_to_tape<'t, 'j>( return json_err!(EofWhileParsingString, index); } - match decode_chunk(data, index, ascii_only)? { - (StringChunk::Quote, ascii_only, new_index) => { + match decode_chunk(data, index, ascii_only, allow_partial)? { + (StringChunk::StringEnd, ascii_only, new_index) => { tape.extend_from_slice(&data[index..new_index]); index = new_index + 1; let s = to_str(tape, ascii_only, start)?; @@ -132,31 +145,41 @@ fn decode_to_tape<'t, 'j>( } #[inline(always)] -pub fn decode_chunk(data: &[u8], index: usize, ascii_only: bool) -> JsonResult<(StringChunk, bool, usize)> { +pub fn decode_chunk( + data: &[u8], + index: usize, + ascii_only: bool, + allow_partial: bool, +) -> JsonResult<(StringChunk, bool, usize)> { // TODO x86_64: use simd #[cfg(target_arch = "aarch64")] { - crate::simd_aarch64::decode_string_chunk(data, index, ascii_only) + crate::simd_aarch64::decode_string_chunk(data, index, ascii_only, allow_partial) } #[cfg(not(target_arch = "aarch64"))] { - StringChunk::decode_fallback(data, index, ascii_only) + StringChunk::decode_fallback(data, index, ascii_only, allow_partial) } } pub(crate) enum StringChunk { - Quote, + StringEnd, Backslash, } impl StringChunk { #[inline(always)] - pub fn decode_fallback(data: &[u8], mut index: usize, mut ascii_only: bool) -> JsonResult<(Self, bool, usize)> { + pub fn decode_fallback( + data: &[u8], + mut index: usize, + mut ascii_only: bool, + allow_partial: bool, + ) -> JsonResult<(Self, bool, usize)> { while let Some(next) = data.get(index) { if !JSON_ASCII[*next as usize] { match &CHAR_TYPE[*next as usize] { - CharType::Quote => return Ok((Self::Quote, ascii_only, index)), + CharType::Quote => return Ok((Self::StringEnd, ascii_only, index)), CharType::Backslash => return Ok((Self::Backslash, ascii_only, index)), CharType::ControlChar => return json_err!(ControlCharacterWhileParsingString, index), CharType::Other => { @@ -166,7 +189,11 @@ impl StringChunk { } index += 1; } - json_err!(EofWhileParsingString, index) + if allow_partial { + Ok((Self::StringEnd, ascii_only, index)) + } else { + json_err!(EofWhileParsingString, index) + } } /// decode an array (generally from SIMD) return the result of the chunk, or none if the non-ascii character @@ -181,7 +208,7 @@ impl StringChunk { for u8_char in data { if !JSON_ASCII[u8_char as usize] { return match &CHAR_TYPE[u8_char as usize] { - CharType::Quote => Some(Ok((Self::Quote, ascii_only, *index))), + CharType::Quote => Some(Ok((Self::StringEnd, ascii_only, *index))), CharType::Backslash => Some(Ok((Self::Backslash, ascii_only, *index))), CharType::ControlChar => Some(json_err!(ControlCharacterWhileParsingString, *index)), CharType::Other => { @@ -338,13 +365,18 @@ where { type Output = Range; - fn decode(data: &'j [u8], mut index: usize, _tape: &'t mut Tape) -> JsonResult<(Self::Output, usize)> { + fn decode( + data: &'j [u8], + mut index: usize, + _tape: &'t mut Tape, + allow_partial: bool, + ) -> JsonResult<(Self::Output, usize)> { index += 1; let start = index; loop { - index = match decode_chunk(data, index, true)? { - (StringChunk::Quote, _, index) => { + index = match decode_chunk(data, index, true, allow_partial)? { + (StringChunk::StringEnd, _, index) => { let r = start..index; return Ok((r, index + 1)); } diff --git a/crates/jiter/src/value.rs b/crates/jiter/src/value.rs index d086a495..c835c8bd 100644 --- a/crates/jiter/src/value.rs +++ b/crates/jiter/src/value.rs @@ -168,7 +168,7 @@ fn take_value<'j, 's>( Ok(JsonValue::Null) } Peek::String => { - let s: StringOutput<'_, 'j> = parser.consume_string::(tape)?; + let s: StringOutput<'_, 'j> = parser.consume_string::(tape, false)?; Ok(JsonValue::Str(create_cow(s))) } Peek::Array => { @@ -242,7 +242,7 @@ pub(crate) fn take_value_skip( Peek::False => parser.consume_false(), Peek::Null => parser.consume_null(), Peek::String => { - parser.consume_string::(tape)?; + parser.consume_string::(tape, false)?; Ok(()) } Peek::Array => { diff --git a/crates/jiter/tests/main.rs b/crates/jiter/tests/main.rs index af592b00..5d907b7c 100644 --- a/crates/jiter/tests/main.rs +++ b/crates/jiter/tests/main.rs @@ -95,7 +95,7 @@ macro_rules! single_expect_ok_or_error { #[allow(non_snake_case)] #[test] fn [< single_element_ok__ $name >]() { - let mut jiter = Jiter::new($json.as_bytes(), true); + let mut jiter = Jiter::new($json.as_bytes()).with_allow_inf_nan(); let elements = json_vec(&mut jiter, None).unwrap().join(", "); assert_eq!(elements, $expected); jiter.finish().unwrap(); @@ -108,7 +108,7 @@ macro_rules! single_expect_ok_or_error { #[allow(non_snake_case)] #[test] fn [< single_element_xerror__ $name >]() { - let mut jiter = Jiter::new($json.as_bytes(), true); + let mut jiter = Jiter::new($json.as_bytes()).with_allow_inf_nan(); let result = json_vec(&mut jiter, None); let first_value = match result { Ok(v) => v, @@ -239,7 +239,7 @@ single_tests! { fn invalid_string_controls() { let json = "\"123\x08\x0c\n\r\t\""; let b = json.as_bytes(); - let mut jiter = Jiter::new(b, false); + let mut jiter = Jiter::new(b); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -257,7 +257,7 @@ fn invalid_string_controls() { fn json_parse_str() { let json = r#" "foobar" "#; let data = json.as_bytes(); - let mut jiter = Jiter::new(data, false); + let mut jiter = Jiter::new(data); let peek = jiter.peek().unwrap(); assert_eq!(peek, Peek::String); assert_eq!(jiter.current_position(), LinePosition::new(1, 2)); @@ -274,7 +274,7 @@ macro_rules! string_tests { #[test] fn [< string_parsing_ $name >]() { let data = $json.as_bytes(); - let mut jiter = Jiter::new(data, false); + let mut jiter = Jiter::new(data); let str = jiter.next_str().unwrap(); assert_eq!(str, $expected); jiter.finish().unwrap(); @@ -301,7 +301,7 @@ macro_rules! string_test_errors { #[test] fn [< string_parsing_errors_ $name >]() { let data = $json.as_bytes(); - let mut jiter = Jiter::new(data, false); + let mut jiter = Jiter::new(data); match jiter.next_str() { Ok(t) => panic!("unexpectedly valid: {:?} -> {:?}", $json, t), Err(e) => { @@ -340,7 +340,7 @@ string_test_errors! { fn invalid_unicode_code() { let json = vec![34, 92, 34, 206, 44, 163, 34]; // dbg!(json.iter().map(|b| *b as char).collect::>()); - let mut jiter = Jiter::new(&json, false); + let mut jiter = Jiter::new(&json); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -354,7 +354,7 @@ fn invalid_unicode_code() { fn invalid_control() { let json = vec![34, 206, 34]; // dbg!(json.iter().map(|b| *b as char).collect::>()); - let mut jiter = Jiter::new(&json, false); + let mut jiter = Jiter::new(&json); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -412,7 +412,7 @@ fn utf8_range_long() { #[test] fn nan_disallowed() { let json = r#"[NaN]"#; - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::NaN); let e = jiter.next_number().unwrap_err(); assert_eq!( @@ -426,7 +426,7 @@ fn nan_disallowed() { #[test] fn inf_disallowed() { let json = r#"[Infinity]"#; - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::Infinity); let e = jiter.next_number().unwrap_err(); assert_eq!( @@ -440,7 +440,7 @@ fn inf_disallowed() { #[test] fn inf_neg_disallowed() { let json = r#"[-Infinity]"#; - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::Minus); let e = jiter.next_number().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); @@ -451,7 +451,7 @@ fn inf_neg_disallowed() { #[test] fn num_after() { let json = r#"2:"#; // `:` is 58, directly after 9 - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); let num = jiter.next_number().unwrap(); assert_eq!(num, NumberAny::Int(NumberInt::Int(2))); let e = jiter.finish().unwrap_err(); @@ -466,7 +466,7 @@ fn num_after() { #[test] fn num_before() { let json = r#"2/"#; // `/` is 47, directly before 0 - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); let num = jiter.next_number().unwrap(); assert_eq!(num, NumberAny::Int(NumberInt::Int(2))); let e = jiter.finish().unwrap_err(); @@ -481,7 +481,7 @@ fn num_before() { #[test] fn nan_disallowed_wrong_type() { let json = r#"[NaN]"#; - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); assert_eq!(jiter.next_array().unwrap().unwrap(), Peek::NaN); let e = jiter.next_str().unwrap_err(); assert_eq!( @@ -517,7 +517,7 @@ fn value_disallow_nan() { #[test] fn key_str() { let json = r#"{"foo": "bar"}"#; - let mut jiter = Jiter::new(json.as_bytes(), false); + let mut jiter = Jiter::new(json.as_bytes()); assert_eq!(jiter.next_object().unwrap().unwrap(), "foo"); assert_eq!(jiter.next_str().unwrap(), "bar"); assert!(jiter.next_key().unwrap().is_none()); @@ -527,7 +527,7 @@ fn key_str() { #[test] fn key_bytes() { let json = r#"{"foo": "bar"}"#.as_bytes(); - let mut jiter = Jiter::new(json, false); + let mut jiter = Jiter::new(json); assert_eq!(jiter.next_object_bytes().unwrap().unwrap(), b"foo"); assert_eq!(jiter.next_bytes().unwrap(), *b"bar"); assert!(jiter.next_key().unwrap().is_none()); @@ -734,7 +734,7 @@ fn pass1_to_value() { fn pass1_skip() { let json = read_file("./benches/pass1.json"); let json_data = json.as_bytes(); - let mut jiter = Jiter::new(json_data, false); + let mut jiter = Jiter::new(json_data); jiter.next_skip().unwrap(); jiter.finish().unwrap(); } @@ -754,7 +754,7 @@ fn escaped_string() { #[test] fn jiter_object() { - let mut jiter = Jiter::new(br#"{"foo": "bar", "spam": [ 1, -2, "x"]}"#, false); + let mut jiter = Jiter::new(br#"{"foo": "bar", "spam": [ 1, -2, "x"]}"#); assert_eq!(jiter.next_object().unwrap(), Some("foo")); assert_eq!(jiter.next_str().unwrap(), "bar"); assert_eq!(jiter.next_key().unwrap(), Some("spam")); @@ -771,7 +771,7 @@ fn jiter_object() { #[test] fn jiter_inf() { - let mut jiter = Jiter::new(b"[Infinity, -Infinity, NaN]", true); + let mut jiter = Jiter::new(b"[Infinity, -Infinity, NaN]").with_allow_inf_nan(); assert_eq!(jiter.next_array().unwrap(), Some(Peek::Infinity)); assert_eq!(jiter.next_float().unwrap(), f64::INFINITY); assert_eq!(jiter.array_step().unwrap(), Some(Peek::Minus)); @@ -784,7 +784,7 @@ fn jiter_inf() { #[test] fn jiter_bool() { - let mut jiter = Jiter::new(b"[true, false, null]", false); + let mut jiter = Jiter::new(b"[true, false, null]"); assert_eq!(jiter.next_array().unwrap(), Some(Peek::True)); assert_eq!(jiter.next_bool().unwrap(), true); assert_eq!(jiter.array_step().unwrap(), Some(Peek::False)); @@ -797,7 +797,7 @@ fn jiter_bool() { #[test] fn jiter_bytes() { - let mut jiter = Jiter::new(br#"{"foo": "bar", "new-line": "\\n"}"#, false); + let mut jiter = Jiter::new(br#"{"foo": "bar", "new-line": "\\n"}"#); assert_eq!(jiter.next_object_bytes().unwrap().unwrap(), b"foo"); assert_eq!(jiter.next_bytes().unwrap(), b"bar"); assert_eq!(jiter.next_key_bytes().unwrap().unwrap(), b"new-line"); @@ -808,7 +808,7 @@ fn jiter_bytes() { #[test] fn jiter_number() { - let mut jiter = Jiter::new(br#" [1, 2.2, 3, 4.1, 5.67]"#, false); + let mut jiter = Jiter::new(br#" [1, 2.2, 3, 4.1, 5.67]"#); assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert_eq!(jiter.array_step().unwrap().unwrap().into_inner(), b'2'); @@ -830,7 +830,7 @@ fn jiter_number() { #[test] fn jiter_bytes_u_escape() { - let mut jiter = Jiter::new(br#"{"foo": "xx \u00a3"}"#, false); + let mut jiter = Jiter::new(br#"{"foo": "xx \u00a3"}"#); assert_eq!(jiter.next_object_bytes().unwrap().unwrap(), b"foo"); assert_eq!(jiter.next_bytes().unwrap(), b"xx \\u00a3"); @@ -841,14 +841,14 @@ fn jiter_bytes_u_escape() { #[test] fn jiter_empty_array() { - let mut jiter = Jiter::new(b"[]", false); + let mut jiter = Jiter::new(b"[]"); assert_eq!(jiter.next_array().unwrap(), None); jiter.finish().unwrap(); } #[test] fn jiter_trailing_bracket() { - let mut jiter = Jiter::new(b"[1]]", false); + let mut jiter = Jiter::new(b"[1]]"); assert_eq!(jiter.next_array().unwrap().unwrap().into_inner(), b'1'); assert_eq!(jiter.next_int().unwrap(), NumberInt::Int(1)); assert!(jiter.array_step().unwrap().is_none()); @@ -862,7 +862,7 @@ fn jiter_trailing_bracket() { #[test] fn jiter_wrong_type() { - let mut jiter = Jiter::new(b" 123", false); + let mut jiter = Jiter::new(b" 123"); let e = jiter.next_str().unwrap_err(); assert_eq!( e.error_type, @@ -885,7 +885,7 @@ fn test_crazy_massive_int() { let mut s = "5".to_string(); s.push_str(&"0".repeat(500)); s.push_str("E-6666"); - let mut jiter = Jiter::new(s.as_bytes(), false); + let mut jiter = Jiter::new(s.as_bytes()); assert_eq!(jiter.next_float().unwrap(), 0.0); jiter.finish().unwrap(); } @@ -950,7 +950,7 @@ macro_rules! number_bytes { paste::item! { #[test] fn [< $name >]() { - let mut jiter = Jiter::new($json, false); + let mut jiter = Jiter::new($json); let bytes = jiter.next_number_bytes().unwrap(); assert_eq!(bytes, $expected); } @@ -1071,7 +1071,7 @@ fn readme_jiter() { "+44 2345678" ] }"#; - let mut jiter = Jiter::new(json_data.as_bytes(), false); + let mut jiter = Jiter::new(json_data.as_bytes()); assert_eq!(jiter.next_object().unwrap(), Some("name")); assert_eq!(jiter.next_str().unwrap(), "John Doe"); assert_eq!(jiter.next_key().unwrap(), Some("age")); @@ -1094,7 +1094,7 @@ fn readme_jiter() { #[test] fn jiter_clone() { let json = r#"[1, 2]"#; - let mut jiter1 = Jiter::new(json.as_bytes(), false); + let mut jiter1 = Jiter::new(json.as_bytes()); assert_eq!(jiter1.next_array().unwrap().unwrap().into_inner(), b'1'); let n = jiter1.next_number().unwrap(); assert_eq!(n, NumberAny::Int(NumberInt::Int(1))); @@ -1118,7 +1118,7 @@ fn jiter_clone() { #[test] fn jiter_invalid_value() { - let mut jiter = Jiter::new(b" bar", false); + let mut jiter = Jiter::new(b" bar"); let e = jiter.next_value().unwrap_err(); assert_eq!( e.error_type, @@ -1132,7 +1132,7 @@ fn jiter_invalid_value() { fn jiter_wrong_types() { macro_rules! expect_wrong_type_inner { ($actual:path, $input:expr, $method: ident, $expected:path) => { - let mut jiter = Jiter::new($input, false); + let mut jiter = Jiter::new($input); let result = jiter.$method(); if $actual == $expected || matches!(($actual, $expected), (JsonType::Int, JsonType::Float)) { // Type matches, or int input to float @@ -1182,7 +1182,7 @@ fn peek_debug() { #[test] fn jiter_invalid_numbers() { - let mut jiter = Jiter::new(b" -a", false); + let mut jiter = Jiter::new(b" -a"); let peek = jiter.peek().unwrap(); let e = jiter.known_int(peek).unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); @@ -1196,7 +1196,7 @@ fn jiter_invalid_numbers() { #[test] fn jiter_invalid_numbers_expected_some_value() { - let mut jiter = Jiter::new(b" bar", false); + let mut jiter = Jiter::new(b" bar"); let peek = jiter.peek().unwrap(); let e = jiter.known_int(peek).unwrap_err(); assert_eq!( @@ -1280,7 +1280,7 @@ fn test_into_static() { #[test] fn jiter_next_value_borrowed() { - let mut jiter = Jiter::new(br#" "v" "#, false); + let mut jiter = Jiter::new(br#" "v" "#); let v = jiter.next_value().unwrap(); let s = match v { JsonValue::Str(s) => s, @@ -1292,7 +1292,7 @@ fn jiter_next_value_borrowed() { #[test] fn jiter_next_value_owned() { - let mut jiter = Jiter::new(br#" "v" "#, false); + let mut jiter = Jiter::new(br#" "v" "#); let v = jiter.next_value_owned().unwrap(); let s = match v { JsonValue::Str(s) => s, @@ -1366,7 +1366,7 @@ fn test_number_int_try_from_bytes() { #[test] fn jiter_skip_whole_object() { - let mut jiter = Jiter::new(br#"{"x": 1}"#, false); + let mut jiter = Jiter::new(br#"{"x": 1}"#); jiter.next_skip().unwrap(); jiter.finish().unwrap(); } @@ -1383,7 +1383,6 @@ fn jiter_skip_in_object() { "is_object": {"x": 1, "y": ["2"], "z": {}}, "last": 123 } "#, - false, ); assert_eq!(jiter.next_object(), Ok(Some("is_bool"))); @@ -1433,8 +1432,8 @@ fn jiter_skip_in_array() { "\"", "last item" ] "#, - true, - ); + ) + .with_allow_inf_nan(); assert_eq!(jiter.next_array(), Ok(Some(Peek::True))); jiter.known_skip(Peek::True).unwrap(); // true @@ -1482,14 +1481,14 @@ fn jiter_skip_in_array() { #[test] fn jiter_skip_backslash_strings() { - let mut jiter = Jiter::new(br#" ["\"", "\n", "\t", "\u00a3", "\\"] "#, false); + let mut jiter = Jiter::new(br#" ["\"", "\n", "\t", "\u00a3", "\\"] "#); jiter.next_skip().unwrap(); jiter.finish().unwrap(); } #[test] fn jiter_skip_invalid_ident() { - let mut jiter = Jiter::new(br#"trUe"#, true); + let mut jiter = Jiter::new(br#"trUe"#).with_allow_inf_nan(); let e = jiter.next_skip().unwrap_err(); assert_eq!( e.error_type, @@ -1499,7 +1498,7 @@ fn jiter_skip_invalid_ident() { #[test] fn jiter_skip_invalid_string() { - let mut jiter = Jiter::new(br#" "foo "#, true); + let mut jiter = Jiter::new(br#" "foo "#).with_allow_inf_nan(); let e = jiter.next_skip().unwrap_err(); assert_eq!( e.error_type, @@ -1509,21 +1508,21 @@ fn jiter_skip_invalid_string() { #[test] fn jiter_skip_invalid_int() { - let mut jiter = Jiter::new(br#"01"#, false); + let mut jiter = Jiter::new(br#"01"#); let e = jiter.next_skip().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::InvalidNumber)); } #[test] fn jiter_skip_invalid_object() { - let mut jiter = Jiter::new(br#"{{"#, false); + let mut jiter = Jiter::new(br#"{{"#); let e = jiter.next_skip().unwrap_err(); assert_eq!(e.error_type, JiterErrorType::JsonError(JsonErrorType::KeyMustBeAString)); } #[test] fn jiter_skip_invalid_string_u() { - let mut jiter = Jiter::new(br#" "\uddBd" "#, false); + let mut jiter = Jiter::new(br#" "\uddBd" "#); let e = jiter.next_skip().unwrap_err(); assert_eq!( e.error_type, @@ -1533,7 +1532,7 @@ fn jiter_skip_invalid_string_u() { #[test] fn jiter_skip_invalid_nan() { - let mut jiter = Jiter::new(b"NaN", false); + let mut jiter = Jiter::new(b"NaN"); let e = jiter.next_skip().unwrap_err(); assert_eq!( e.error_type, @@ -1544,7 +1543,7 @@ fn jiter_skip_invalid_nan() { #[test] fn jiter_skip_invalid_string_high() { let json = vec![34, 92, 34, 206, 44, 163, 34]; - let mut jiter = Jiter::new(&json, false); + let mut jiter = Jiter::new(&json); // NOTE this would raise an error with next_value etc, but next_skip does not check UTF-8 jiter.next_skip().unwrap(); jiter.finish().unwrap(); @@ -1552,7 +1551,7 @@ fn jiter_skip_invalid_string_high() { #[test] fn jiter_skip_invalid_long_float() { - let mut jiter = Jiter::new(br#"2121515572557277572557277e"#, false); + let mut jiter = Jiter::new(br#"2121515572557277572557277e"#); let e = jiter.next_skip().unwrap_err(); assert_eq!( e.error_type, @@ -1563,5 +1562,17 @@ fn jiter_skip_invalid_long_float() { #[test] fn jiter_value_invalid_long_float() { let e = JsonValue::parse(br#"2121515572557277572557277e"#, false).unwrap_err(); - assert_eq!(e.error_type, JsonErrorType::EofWhileParsingValue,); + assert_eq!(e.error_type, JsonErrorType::EofWhileParsingValue); +} + +#[test] +fn jiter_partial_string() { + let mut jiter = Jiter::new(br#"["foo"#).with_allow_partial_strings(); + assert_eq!(jiter.next_array().unwrap(), Some(Peek::String)); + assert_eq!(jiter.next_str().unwrap(), "foo"); + let e = jiter.array_step().unwrap_err(); + assert_eq!( + e.error_type, + JiterErrorType::JsonError(JsonErrorType::EofWhileParsingList) + ); }