Skip to content
This repository has been archived by the owner on May 30, 2022. It is now read-only.

Fix of erroneously accepted input when deserializing scalars #9

Merged
merged 2 commits into from
May 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@

## Unreleased

### Bug Fixes

- [#9]: Deserialization erroneously was successful in some cases where error is expected.
This broke deserialization of untagged enums which rely on error if variant cannot be parsed

### Misc Changes

- [#8]: Changes in the error type `DeError`:
Expand All @@ -26,7 +31,12 @@
|`DeError::EndOfAttributes`|Renamed to `DeError::KeyNotFound`
|`DeError::ExpectedStart`|Added

### New Tests

- [#9]: Added tests for incorrect nested tags in input

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9

## 0.23.0 -- 2022-05-08

Expand Down
218 changes: 209 additions & 9 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

use crate::{
de::escape::EscapedDeserializer,
de::{DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::IterState,
events::BytesStart,
events::{BytesCData, BytesStart},
reader::Decoder,
};
use serde::de::{self, DeserializeSeed, IntoDeserializer};
use serde::de::{self, DeserializeSeed, IntoDeserializer, Visitor};
use serde::serde_if_integer128;
use std::borrow::Cow;
use std::ops::Range;

Expand All @@ -25,11 +27,9 @@ enum ValueSource {
/// [`next_key_seed()`]: de::MapAccess::next_key_seed
/// [`next_value_seed()`]: de::MapAccess::next_value_seed
Unknown,
/// `next_key_seed` checked the attributes list and find it is not exhausted yet.
/// Next call to the `next_value_seed` will deserialize type from the attribute value
/// Next value should be deserialized from an attribute value; value is located
/// at specified span.
Attribute(Range<usize>),
/// The same as `Text`
Nested,
/// Value should be deserialized from the text content of the XML node, which
/// represented or by an ordinary text node, or by a CDATA node:
///
Expand All @@ -43,6 +43,96 @@ enum ValueSource {
/// </any-tag>
/// ```
Text,
/// Next value should be deserialized from an element with an any name.
/// Corresponding tag name will always be associated with a field with name
/// [`INNER_VALUE`].
///
/// That state is set when call to [`peek()`] returns a [`Start`] event
/// _and_ struct has a field with a special name [`INNER_VALUE`].
///
/// When in this state, next event, returned by [`next()`], will be a [`Start`],
/// which represents both a key, and a value. Value would be deserialized from
/// the whole element and how is will be done determined by the value deserializer.
/// The [`MapAccess`] do not consume any events in that state.
///
/// Because in that state any encountered `<tag>` is mapped to the [`INNER_VALUE`]
/// field, it is possible to use tag name as an enum discriminator, so `enum`s
/// can be deserialized from that XMLs:
///
/// ```xml
/// <any-tag>
/// <variant1>...</variant1>
/// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant1 -->
/// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
/// </any-tag>
/// ```
/// ```xml
/// <any-tag>
/// <variant2>...</variant2>
/// <!-- ~~~~~~~~ - this data will determine that this is Enum::variant2 -->
/// <!--^^^^^^^^^^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
/// </any-tag>
/// ```
///
/// both can be deserialized into
///
/// ```ignore
/// enum Enum {
/// variant1,
/// variant2,
/// }
/// struct AnyName {
/// #[serde(rename = "$value")]
/// field: Enum,
/// }
/// ```
///
/// That is possible, because value deserializer have access to the full content
/// of a `<variant1>...</variant1>` or `<variant2>...</variant2>` node, including
/// the tag name.
///
/// Currently, processing of that enum variant is fully equivalent to the
/// processing of a [`Text`] variant. Split of variants made for clarity.
///
/// [`Start`]: DeEvent::Start
/// [`peek()`]: Deserializer::peek()
/// [`next()`]: Deserializer::next()
/// [`Text`]: Self::Text
Content,
/// Next value should be deserialized from an element with a dedicated name.
///
/// That state is set when call to [`peek()`] returns a [`Start`] event, which
/// [`name()`] represents a field name. That name will be deserialized as a key.
///
/// When in this state, next event, returned by [`next()`], will be a [`Start`],
/// which represents both a key, and a value. Value would be deserialized from
/// the whole element and how is will be done determined by the value deserializer.
/// The [`MapAccess`] do not consume any events in that state.
///
/// An illustration below shows, what data is used to deserialize key and value:
/// ```xml
/// <any-tag>
/// <key>...</key>
/// <!-- ~~~ - this data will be used to deserialize a map key -->
/// <!--^^^^^^^^^^^^^^ - this data will be used to deserialize a map value -->
/// </any-tag>
/// ```
///
/// Although value deserializer will have access to the full content of a `<key>`
/// node (including the tag name), it will not get much benefits from that,
/// because tag name will always be fixed for a given map field (equal to a
/// field name). So, if the field type is an `enum`, it cannot select its
/// variant based on the tag name. If that is needed, then [`Content`] variant
/// of this enum should be used. Such usage is enabled by annotating a struct
/// field as "content" field, which implemented as given the field a special
/// [`INNER_VALUE`] name.
///
/// [`Start`]: DeEvent::Start
/// [`peek()`]: Deserializer::peek()
/// [`next()`]: Deserializer::next()
/// [`name()`]: BytesStart::name()
/// [`Content`]: Self::Content
Nested,
}

/// A deserializer for `Attributes`
Expand Down Expand Up @@ -141,7 +231,7 @@ where
// TODO: This should be handled by #[serde(flatten)]
// See https://github.com/serde-rs/serde/issues/1905
DeEvent::Start(_) if has_value_field => {
self.source = ValueSource::Text;
self.source = ValueSource::Content;
seed.deserialize(INNER_VALUE.into_deserializer()).map(Some)
}
DeEvent::Start(e) => {
Expand Down Expand Up @@ -189,8 +279,118 @@ where
true,
))
}
ValueSource::Nested | ValueSource::Text => seed.deserialize(&mut *self.de),
// This arm processes the following XML shape:
// <any-tag>
// text value
// </any-tag>
// The whole map represented by an `<any-tag>` element, the map key
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Text` or a `CData` event (the value deserializer will see one
// of that events)
ValueSource::Text => seed.deserialize(MapValueDeserializer { map: self }),
// This arm processes the following XML shape:
// <any-tag>
// <any>...</any>
// </any-tag>
// The whole map represented by an `<any-tag>` element, the map key
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Start` event (the value deserializer will see that event)
ValueSource::Content => seed.deserialize(MapValueDeserializer { map: self }),
// This arm processes the following XML shape:
// <any-tag>
// <tag>...</tag>
// </any-tag>
// The whole map represented by an `<any-tag>` element, the map key
// is a `tag`, and the value is a `Start` event (the value deserializer
// will see that event)
ValueSource::Nested => seed.deserialize(&mut *self.de),
ValueSource::Unknown => Err(DeError::KeyNotRead),
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

macro_rules! forward {
(
$deserialize:ident
$(
($($name:ident : $type:ty),*)
)?
) => {
#[inline]
fn $deserialize<V: Visitor<'de>>(
self,
$($($name: $type,)*)?
visitor: V
) -> Result<V::Value, Self::Error> {
self.map.de.$deserialize($($($name,)*)? visitor)
}
};
}

/// A deserializer for a value of map or struct. That deserializer slightly
/// differently processes events for a primitive types and sequences than
/// a [`Deserializer`].
struct MapValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Access to the map that created this deserializer. Gives access to the
/// context, such as list of fields, that current map known about.
map: &'m mut MapAccess<'de, 'a, R>,
}

impl<'de, 'a, 'm, R> MapValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Returns a text event, used inside [`deserialize_primitives!()`]
#[inline]
fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
self.map.de.next_text_impl(unescape, false)
}

/// Returns a decoder, used inside [`deserialize_primitives!()`]
#[inline]
fn decoder(&self) -> Decoder {
self.map.de.reader.decoder()
}
}

impl<'de, 'a, 'm, R> de::Deserializer<'de> for MapValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
type Error = DeError;

deserialize_primitives!(mut);

forward!(deserialize_option);
forward!(deserialize_unit);
forward!(deserialize_unit_struct(name: &'static str));
forward!(deserialize_newtype_struct(name: &'static str));

forward!(deserialize_seq);
forward!(deserialize_tuple(len: usize));
forward!(deserialize_tuple_struct(name: &'static str, len: usize));

forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
));

forward!(deserialize_enum(
name: &'static str,
variants: &'static [&'static str]
));

forward!(deserialize_any);
forward!(deserialize_ignored_any);

#[inline]
fn is_human_readable(&self) -> bool {
self.map.de.is_human_readable()
}
}
Loading