Skip to content

Commit

Permalink
Add functions for attribute value normalization
Browse files Browse the repository at this point in the history
closes tafia#371
  • Loading branch information
dralley committed Nov 12, 2023
1 parent bbc7bda commit 5817baf
Show file tree
Hide file tree
Showing 7 changed files with 533 additions and 14 deletions.
11 changes: 10 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ configuration is serializable.
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
direct access to fields of config using `reader.config_mut().<...>`.
- [#379]: Improved compliance with the XML attribute value normalization process by
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
which ought to be used in place of `Attribute::unescape_value()` and
`Attribute::unescape_value_with()`

### Bug Fixes

Expand All @@ -34,12 +38,13 @@ configuration is serializable.
- `Error::UnexpectedEof` replaced by `SyntaxError` in some cases
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`
- [#379]: Added tests for attribute value normalization

[#379]: https://github.com/tafia/quick-xml/pull/379
[#513]: https://github.com/tafia/quick-xml/issues/513
[#675]: https://github.com/tafia/quick-xml/pull/675
[#677]: https://github.com/tafia/quick-xml/pull/677


## 0.31.0 -- 2023-10-22

MSRV bumped to 1.56! Crate now uses Rust 2021 edition.
Expand Down Expand Up @@ -210,6 +215,10 @@ serde >= 1.0.181
- [#565]: Fix compilation error when build with serde <1.0.139


### New Tests

- [#379]: Added tests for attribute value normalization

[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
[#490]: https://github.com/tafia/quick-xml/pull/490
[#510]: https://github.com/tafia/quick-xml/issues/510
Expand Down
12 changes: 4 additions & 8 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
("players.xml", PLAYERS),
];

// TODO: use fully normalized attribute values
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
let mut r = Reader::from_str(doc);
loop {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -67,15 +66,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut buf = Vec::new();
loop {
match criterion::black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -93,15 +91,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
let mut r = NsReader::from_str(doc);
loop {
match criterion::black_box(r.read_resolved_event()?) {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand All @@ -120,7 +117,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
let mut r = NsReader::from_reader(doc);
let mut buf = Vec::new();
Expand All @@ -129,7 +125,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
72 changes: 72 additions & 0 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use std::borrow::Cow;

use criterion::{self, criterion_group, criterion_main, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::escape::{escape, unescape};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::reader::{NsReader, Reader};
Expand Down Expand Up @@ -242,6 +245,74 @@ fn attributes(c: &mut Criterion) {
assert_eq!(count, 150);
})
});

group.finish();
}

/// Benchmarks normalizing attribute values
fn attribute_value_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("attribute_value_normalization");

group.bench_function("noop_short", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"foobar"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("noop_long", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("replacement_chars", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("char_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&#38;&#60;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.bench_function("entity_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.finish();
}

Expand Down Expand Up @@ -354,6 +425,7 @@ criterion_group!(
read_resolved_event_into,
one_event,
attributes,
attribute_value_normalization,
escaping,
unescaping,
);
Expand Down
1 change: 1 addition & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ impl From<EscapeError> for Error {
}

impl From<AttrError> for Error {
/// Creates a new `Error::InvalidAttr` from the given error
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
Expand Down
Loading

0 comments on commit 5817baf

Please sign in to comment.