Skip to content
This repository was archived by the owner on Mar 25, 2024. It is now read-only.

Yaml 1.1 octal parsing quirk #225

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 63 additions & 17 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ use yaml_rust::scanner::{Marker, TScalarStyle, TokenType};
/// ```
pub struct Deserializer<'a> {
input: Input<'a>,
quirks: DeserializingQuirks,
}

/// Handling of nonstandard yaml features
#[derive(Default, Clone)]
pub struct DeserializingQuirks {
/// Support yaml 1.1 octal literal
///
/// I.e 0123 should be deserialized as string "0123" in yaml 1.2,
/// but some yaml parsers deserialize it as number 83 per yaml 1.1 spec
pub old_octals: bool,
}

enum Input<'a> {
Expand All @@ -67,16 +78,35 @@ enum Input<'a> {
}

impl<'a> Deserializer<'a> {
/// Creates a YAML deserializer from a `&str` and set quirks.
pub fn from_str_with_quirks(s: &'a str, quirks: DeserializingQuirks) -> Self {
let input = Input::Str(s);
Deserializer { input, quirks }
}

/// Creates a YAML deserializer from a `&str`.
pub fn from_str(s: &'a str) -> Self {
let input = Input::Str(s);
Deserializer { input }
Self::from_str_with_quirks(s, Default::default())
}

/// Creates a YAML deserializer from a `&[u8]` and set quirks.
pub fn from_slice_with_quirks(v: &'a [u8], quirks: DeserializingQuirks) -> Self {
let input = Input::Slice(v);
Deserializer { input, quirks }
}

/// Creates a YAML deserializer from a `&[u8]`.
pub fn from_slice(v: &'a [u8]) -> Self {
let input = Input::Slice(v);
Deserializer { input }
Self::from_slice_with_quirks(v, Default::default())
}

/// Creates a YAML deserializer from an `io::Read` and set quirks.
pub fn from_reader_with_quirks<R>(rdr: R, quirks: DeserializingQuirks) -> Self
where
R: io::Read + 'a,
{
let input = Input::Read(Box::new(rdr));
Deserializer { input, quirks }
}

/// Creates a YAML deserializer from an `io::Read`.
Expand All @@ -88,8 +118,7 @@ impl<'a> Deserializer<'a> {
where
R: io::Read + 'a,
{
let input = Input::Read(Box::new(rdr));
Deserializer { input }
Self::from_reader_with_quirks(rdr, Default::default())
}

fn de<T>(self, f: impl FnOnce(&mut DeserializerFromEvents) -> Result<T>) -> Result<T> {
Expand All @@ -101,6 +130,7 @@ impl<'a> Deserializer<'a> {
pos: &mut pos,
path: Path::Root,
remaining_depth: 128,
quirks: self.quirks.clone(),
})?;
multidoc.pos.store(pos, Ordering::Relaxed);
return Ok(t);
Expand All @@ -117,6 +147,7 @@ impl<'a> Deserializer<'a> {
pos: &mut pos,
path: Path::Root,
remaining_depth: 128,
quirks: self.quirks.clone(),
})?;
if pos == loader.events.len() {
Ok(t)
Expand Down Expand Up @@ -174,6 +205,7 @@ impl<'de> Iterator for Deserializer<'de> {
return if pos < multidoc.loader.events.len() {
Some(Deserializer {
input: Input::Multidoc(Arc::clone(multidoc)),
quirks: self.quirks.clone(),
})
} else {
None
Expand All @@ -182,6 +214,7 @@ impl<'de> Iterator for Deserializer<'de> {
Input::Fail(err) => {
return Some(Deserializer {
input: Input::Fail(Arc::clone(err)),
quirks: self.quirks.clone(),
});
}
_ => {}
Expand All @@ -201,6 +234,7 @@ impl<'de> Iterator for Deserializer<'de> {
} else {
Some(Deserializer {
input: Input::Multidoc(multidoc),
quirks: self.quirks.clone(),
})
}
}
Expand All @@ -209,6 +243,7 @@ impl<'de> Iterator for Deserializer<'de> {
self.input = Input::Fail(Arc::clone(&fail));
Some(Deserializer {
input: Input::Fail(fail),
quirks: self.quirks.clone(),
})
}
}
Expand Down Expand Up @@ -503,6 +538,7 @@ struct DeserializerFromEvents<'a> {
pos: &'a mut usize,
path: Path<'a>,
remaining_depth: u8,
quirks: DeserializingQuirks,
}

impl<'a> DeserializerFromEvents<'a> {
Expand Down Expand Up @@ -534,6 +570,7 @@ impl<'a> DeserializerFromEvents<'a> {
pos,
path: Path::Alias { parent: &self.path },
remaining_depth: self.remaining_depth,
quirks: self.quirks.clone(),
})
}
None => panic!("unresolved alias: {}", *pos),
Expand Down Expand Up @@ -678,6 +715,7 @@ fn visit_scalar<'de, V>(
style: TScalarStyle,
tag: &Option<TokenType>,
visitor: V,
quirks: &DeserializingQuirks,
) -> Result<V::Value>
where
V: Visitor<'de>,
Expand Down Expand Up @@ -707,7 +745,7 @@ where
visitor.visit_str(v)
}
} else if style == TScalarStyle::Plain {
visit_untagged_str(visitor, v)
visit_untagged_str(visitor, quirks, v)
} else {
visitor.visit_str(v)
}
Expand Down Expand Up @@ -737,6 +775,7 @@ impl<'de, 'a, 'r> de::SeqAccess<'de> for SeqAccess<'a, 'r> {
index: self.len,
},
remaining_depth: self.de.remaining_depth,
quirks: self.de.quirks.clone(),
};
self.len += 1;
seed.deserialize(&mut element_de).map(Some)
Expand Down Expand Up @@ -792,6 +831,7 @@ impl<'de, 'a, 'r> de::MapAccess<'de> for MapAccess<'a, 'r> {
}
},
remaining_depth: self.de.remaining_depth,
quirks: self.de.quirks.clone(),
};
seed.deserialize(&mut value_de)
}
Expand Down Expand Up @@ -850,6 +890,7 @@ impl<'de, 'a, 'r> de::EnumAccess<'de> for EnumAccess<'a, 'r> {
key: variant,
},
remaining_depth: self.de.remaining_depth,
quirks: self.de.quirks.clone(),
};
Ok((ret, variant_visitor))
}
Expand Down Expand Up @@ -938,7 +979,7 @@ impl<'de, 'a, 'r> de::VariantAccess<'de> for UnitVariantAccess<'a, 'r> {
}
}

fn visit_untagged_str<'de, V>(visitor: V, v: &str) -> Result<V::Value>
fn visit_untagged_str<'de, V>(visitor: V, quirks: &DeserializingQuirks, v: &str) -> Result<V::Value>
where
V: Visitor<'de>,
{
Expand Down Expand Up @@ -992,6 +1033,11 @@ where
// with leading zero(s) followed by numeric characters this is in fact a
// string according to the YAML 1.2 spec.
// https://yaml.org/spec/1.2/spec.html#id2761292
if quirks.old_octals {
if let Ok(n) = u64::from_str_radix(&v[1..], 8) {
return visitor.visit_u64(n);
}
}
return visitor.visit_str(v);
}
if let Ok(n) = v.parse() {
Expand Down Expand Up @@ -1032,7 +1078,7 @@ fn trim_start_matches(s: &str, pat: char) -> &str {
s.trim_left_matches(pat)
}

fn invalid_type(event: &Event, exp: &dyn Expected) -> Error {
fn invalid_type(event: &Event, exp: &dyn Expected, quirks: &DeserializingQuirks) -> Error {
enum Void {}

struct InvalidType<'a> {
Expand All @@ -1051,7 +1097,7 @@ fn invalid_type(event: &Event, exp: &dyn Expected) -> Error {
Event::Alias(_) => unreachable!(),
Event::Scalar(v, style, tag) => {
let get_type = InvalidType { exp };
match visit_scalar(v, *style, tag, get_type) {
match visit_scalar(v, *style, tag, get_type, quirks) {
Ok(void) => match void {},
Err(invalid_type) => invalid_type,
}
Expand All @@ -1071,8 +1117,8 @@ impl<'a> DeserializerFromEvents<'a> {
let (next, marker) = self.next()?;
match next {
Event::Alias(mut pos) => self.jump(&mut pos)?.deserialize_scalar(visitor),
Event::Scalar(v, style, tag) => visit_scalar(v, *style, tag, visitor),
other => Err(invalid_type(other, &visitor)),
Event::Scalar(v, style, tag) => visit_scalar(v, *style, tag, visitor, &self.quirks),
other => Err(invalid_type(other, &visitor, &self.quirks)),
}
.map_err(|err| error::fix_marker(err, marker, self.path))
}
Expand All @@ -1088,7 +1134,7 @@ impl<'de, 'a, 'r> de::Deserializer<'de> for &'r mut DeserializerFromEvents<'a> {
let (next, marker) = self.next()?;
match next {
Event::Alias(mut pos) => self.jump(&mut pos)?.deserialize_any(visitor),
Event::Scalar(v, style, tag) => visit_scalar(v, *style, tag, visitor),
Event::Scalar(v, style, tag) => visit_scalar(v, *style, tag, visitor, &self.quirks),
Event::SequenceStart => self.visit_sequence(visitor),
Event::MappingStart => self.visit_mapping(visitor),
Event::SequenceEnd => panic!("unexpected end of sequence"),
Expand Down Expand Up @@ -1205,7 +1251,7 @@ impl<'de, 'a, 'r> de::Deserializer<'de> for &'r mut DeserializerFromEvents<'a> {
match next {
Event::Scalar(v, _, _) => visitor.visit_str(v),
Event::Alias(mut pos) => self.jump(&mut pos)?.deserialize_str(visitor),
other => Err(invalid_type(other, &visitor)),
other => Err(invalid_type(other, &visitor, &self.quirks)),
}
.map_err(|err: Error| error::fix_marker(err, marker, self.path))
}
Expand Down Expand Up @@ -1300,7 +1346,7 @@ impl<'de, 'a, 'r> de::Deserializer<'de> for &'r mut DeserializerFromEvents<'a> {
match next {
Event::Alias(mut pos) => self.jump(&mut pos)?.deserialize_seq(visitor),
Event::SequenceStart => self.visit_sequence(visitor),
other => Err(invalid_type(other, &visitor)),
other => Err(invalid_type(other, &visitor, &self.quirks)),
}
.map_err(|err| error::fix_marker(err, marker, self.path))
}
Expand Down Expand Up @@ -1332,7 +1378,7 @@ impl<'de, 'a, 'r> de::Deserializer<'de> for &'r mut DeserializerFromEvents<'a> {
match next {
Event::Alias(mut pos) => self.jump(&mut pos)?.deserialize_map(visitor),
Event::MappingStart => self.visit_mapping(visitor),
other => Err(invalid_type(other, &visitor)),
other => Err(invalid_type(other, &visitor, &self.quirks)),
}
.map_err(|err| error::fix_marker(err, marker, self.path))
}
Expand All @@ -1353,7 +1399,7 @@ impl<'de, 'a, 'r> de::Deserializer<'de> for &'r mut DeserializerFromEvents<'a> {
.deserialize_struct(name, fields, visitor),
Event::SequenceStart => self.visit_sequence(visitor),
Event::MappingStart => self.visit_mapping(visitor),
other => Err(invalid_type(other, &visitor)),
other => Err(invalid_type(other, &visitor, &self.quirks)),
}
.map_err(|err| error::fix_marker(err, marker, self.path))
}
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
clippy::must_use_candidate,
)]

pub use crate::de::{from_reader, from_slice, from_str, Deserializer};
pub use crate::de::{from_reader, from_slice, from_str, Deserializer, DeserializingQuirks};
pub use crate::error::{Error, Location, Result};
pub use crate::ser::{to_string, to_vec, to_writer, Serializer};
pub use crate::value::{from_value, to_value, Index, Number, Sequence, Value};
Expand Down
10 changes: 10 additions & 0 deletions tests/test_de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,16 @@ fn test_numbers() {
}
}

#[test]
fn test_quirk_old_octals() {
use serde::Deserialize;
let de = serde_yaml::Deserializer::from_str_with_quirks("0777", serde_yaml::DeserializingQuirks {
old_octals: true,
});
let value = u64::deserialize(de).unwrap();
assert_eq!(value, 0o777);
}

#[test]
fn test_stateful() {
struct Seed(i64);
Expand Down