Skip to content

Commit

Permalink
Add an ArrayDeserializer to read a JSON array as a stream
Browse files Browse the repository at this point in the history
This mimics the StreamDeserializer API and implements issue serde-rs#404.
  • Loading branch information
Yorhel committed Mar 19, 2019
1 parent e6b02d1 commit 2094291
Show file tree
Hide file tree
Showing 2 changed files with 243 additions and 0 deletions.
124 changes: 124 additions & 0 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,19 @@ impl<'de, R: Read<'de>> Deserializer<R> {
}
}

/// Parse the JSON array as an iterator over values of type T.
pub fn into_array_iter<T>(self) -> ArrayDeserializer<'de, R, T>
where
T: de::Deserialize<'de>,
{
ArrayDeserializer {
de: self,
started: false,
output: PhantomData,
lifetime: PhantomData,
}
}

/// Parse arbitrarily deep JSON structures without any consideration for
/// overflowing the stack.
///
Expand Down Expand Up @@ -2149,6 +2162,117 @@ where
}
}



//////////////////////////////////////////////////////////////////////////////

/// Iterator that deserializes an array into multiple JSON values.
///
/// An array deserializer can be created from any JSON deserializer using the
/// `Deserializer::into_array_iter` method.
///
/// The top-level data should be a JSON array, but each array element can consist of any JSON
/// value. An array deserializer only needs to keep a single array element in memory, and is
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
/// array is too large to fit in memory.
///
/// ```edition2018
/// use serde_json::{Deserializer, Value};
///
/// fn main() {
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
///
/// let iter = Deserializer::from_str(data).into_array_iter::<Value>();
///
/// for value in iter {
/// println!("{}", value.unwrap());
/// }
/// }
/// ```
pub struct ArrayDeserializer<'de, R, T> {
de: Deserializer<R>,
started: bool, // True if we have consumed the first '['
output: PhantomData<T>,
lifetime: PhantomData<&'de ()>,
}

impl<'de, R, T> ArrayDeserializer<'de, R, T>
where
R: read::Read<'de>,
T: de::Deserialize<'de>,
{
/// Create a JSON array deserializer from one of the possible serde_json
/// input sources.
///
/// Typically it is more convenient to use one of these methods instead:
///
/// - Deserializer::from_str(...).into_array_iter()
/// - Deserializer::from_bytes(...).into_array_iter()
/// - Deserializer::from_reader(...).into_array_iter()
pub fn new(read: R) -> Self {
ArrayDeserializer {
de: Deserializer::new(read),
started: false,
output: PhantomData,
lifetime: PhantomData,
}
}

fn end(&mut self) -> Option<Result<T>> {
self.de.eat_char();
match self.de.end() {
Ok(_) => None,
Err(e) => Some(Err(e)),
}
}

fn next_value(&mut self) -> Option<Result<T>> {
match de::Deserialize::deserialize(&mut self.de) {
Ok(v) => Some(Ok(v)),
Err(e) => Some(Err(e))
}
}
}

impl<'de, R, T> Iterator for ArrayDeserializer<'de, R, T>
where
R: Read<'de>,
T: de::Deserialize<'de>,
{
type Item = Result<T>;

fn next(&mut self) -> Option<Result<T>> {
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b'[')) if !self.started => {
self.started = true;
self.de.eat_char();

// We have to peek at the next character here to handle an empty array.
match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => self.end(),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(b']')) if self.started => self.end(),
Ok(Some(b',')) if self.started => {
self.de.eat_char();

match self.de.parse_whitespace() {
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
Ok(Some(_)) => self.next_value(),
Err(e) => Some(Err(e)),
}
},
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
Err(e) => Some(Err(e)),
}
}
}

//////////////////////////////////////////////////////////////////////////////

fn from_trait<'de, R, T>(read: R) -> Result<T>
Expand Down
119 changes: 119 additions & 0 deletions tests/array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#![cfg(not(feature = "preserve_order"))]

extern crate serde;

#[macro_use]
extern crate serde_json;

use serde_json::{Deserializer, Value};

// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
#[cfg_attr(rustfmt, rustfmt_skip)]
macro_rules! test_stream {
($data:expr, $ty:ty, |$stream:ident| $test:block) => {
{
let de = Deserializer::from_str($data);
let mut $stream = de.into_array_iter::<$ty>();
$test
}
{
let de = Deserializer::from_slice($data.as_bytes());
let mut $stream = de.into_array_iter::<$ty>();
$test
}
{
let mut bytes = $data.as_bytes();
let de = Deserializer::from_reader(&mut bytes);
let mut $stream = de.into_array_iter::<$ty>();
$test
}
};
}

#[test]
fn test_json_array_empty() {
let data = "[]";

test_stream!(data, Value, |stream| {
assert!(stream.next().is_none());
});
}

#[test]
fn test_json_array_whitespace() {
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";

test_stream!(data, Value, |stream| {
assert_eq!(stream.next().unwrap().unwrap()["x"], 42);

assert_eq!(stream.next().unwrap().unwrap()["y"], 43);

assert!(stream.next().is_none());
});
}

#[test]
fn test_json_array_truncated() {
let data = "[{\"x\":40},{\"x\":";

test_stream!(data, Value, |stream| {
assert_eq!(stream.next().unwrap().unwrap()["x"], 40);

assert!(stream.next().unwrap().unwrap_err().is_eof());
});
}

#[test]
fn test_json_array_primitive() {
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";

test_stream!(data, Value, |stream| {
assert_eq!(stream.next().unwrap().unwrap(), json!({}));

assert_eq!(stream.next().unwrap().unwrap(), true);

assert_eq!(stream.next().unwrap().unwrap(), 1);

assert_eq!(stream.next().unwrap().unwrap(), json!([]));

assert_eq!(stream.next().unwrap().unwrap(), 1.0);

assert_eq!(stream.next().unwrap().unwrap(), "hey");

assert_eq!(stream.next().unwrap().unwrap(), Value::Null);

assert!(stream.next().is_none());
});
}

#[test]
fn test_json_array_tailing_data() {
let data = "[]e";

test_stream!(data, Value, |stream| {
let second = stream.next().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
});
}

#[test]
fn test_json_array_tailing_comma() {
let data = "[true,]";

test_stream!(data, Value, |stream| {
assert_eq!(stream.next().unwrap().unwrap(), true);

let second = stream.next().unwrap().unwrap_err();
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
});
}

#[test]
fn test_json_array_eof() {
let data = "";

test_stream!(data, Value, |stream| {
let second = stream.next().unwrap().unwrap_err();
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
});
}

0 comments on commit 2094291

Please sign in to comment.