diff --git a/compare/Cargo.toml b/compare/Cargo.toml index c7f87163..052ce033 100644 --- a/compare/Cargo.toml +++ b/compare/Cargo.toml @@ -8,16 +8,20 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dev-dependencies] -criterion = { version = "0.5", features = ["html_reports"] } -maybe_xml = "0.10.1" +criterion = { version = "0.6", features = ["html_reports"] } +markup5ever = "0.16" +# maybe_xml 0.11 regressed perfomance by x2, and because this was the fastest +# XML parser, we keep benchmarking version 0.10 as well +maybe_xml_0_10 = { version = "0.10", package = "maybe_xml" } +maybe_xml = "0.11" quick-xml = { path = "..", features = ["serialize"] } rapid-xml = "0.2" rusty_xml = { version = "0.3", package = "RustyXML" } +serde-xml-rs = "0.8" xml_oxide = "0.3" xml-rs = "0.8" -xml5ever = "0.17" +xml5ever = "0.22" xmlparser = "0.13" -serde-xml-rs = "0.6" # Do not use "derive" feature, because it slowdown compilation # See https://github.com/serde-rs/serde/pull/2588 serde = "1.0" @@ -25,5 +29,9 @@ serde_derive = "1.0" pretty_assertions = "1.4" [[bench]] -name = "bench" +name = "low-level" +harness = false + +[[bench]] +name = "serde" harness = false diff --git a/compare/README.md b/compare/README.md new file mode 100644 index 00000000..d1a1aed3 --- /dev/null +++ b/compare/README.md @@ -0,0 +1,11 @@ +# XML libraries benchmark suite + +Standalone project to benchmark different implementations of XML parser. To run benchmark +(assuming we are in `quick_xml` checkout directory): + +``` +cd compare +cargo bench +``` + +The results can be obserded in HTML at `./compare/target/criterion/report/index.html`. diff --git a/compare/benches/bench.rs b/compare/benches/low-level.rs similarity index 69% rename from compare/benches/bench.rs rename to compare/benches/low-level.rs index a091ff06..bd6175bb 100644 --- a/compare/benches/bench.rs +++ b/compare/benches/low-level.rs @@ -2,8 +2,7 @@ use criterion::{self, criterion_group, criterion_main, BenchmarkId, Criterion, T use pretty_assertions::assert_eq; use quick_xml::events::Event; use quick_xml::reader::Reader; -use serde::Deserialize; -use serde_xml_rs; +use std::hint::black_box; use xml::reader::{EventReader, XmlEvent}; static RPM_PRIMARY: &str = include_str!("../../tests/documents/rpm_primary.xml"); @@ -60,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) { b.iter(|| { let mut reader = Reader::from_str(input); reader.config_mut().check_end_names = false; - let mut count = criterion::black_box(0); + let mut count = black_box(0); loop { match reader.read_event() { Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1, @@ -80,7 +79,7 @@ fn low_level_comparison(c: &mut Criterion) { b.iter(|| { let mut reader = Reader::from_reader(input.as_bytes()); reader.config_mut().check_end_names = false; - let mut count = criterion::black_box(0); + let mut count = black_box(0); let mut buf = Vec::new(); loop { match reader.read_event_into(&mut buf) { @@ -96,7 +95,29 @@ fn low_level_comparison(c: &mut Criterion) { ); group.bench_with_input( - BenchmarkId::new("maybe_xml", filename), + BenchmarkId::new("maybe_xml:0.10", filename), + *data, + |b, input| { + use maybe_xml_0_10::token::Ty; + use maybe_xml_0_10::Reader; + + b.iter(|| { + let reader = Reader::from_str(input); + + let mut count = black_box(0); + for token in reader.into_iter() { + match token.ty() { + Ty::StartTag(_) | Ty::EmptyElementTag(_) => count += 1, + _ => (), + } + } + assert_eq!(count, total_tags, "Overall tag count in {}", filename); + }) + }, + ); + + group.bench_with_input( + BenchmarkId::new("maybe_xml:0.11", filename), *data, |b, input| { use maybe_xml::token::Ty; @@ -105,7 +126,7 @@ fn low_level_comparison(c: &mut Criterion) { b.iter(|| { let reader = Reader::from_str(input); - let mut count = criterion::black_box(0); + let mut count = black_box(0); for token in reader.into_iter() { match token.ty() { Ty::StartTag(_) | Ty::EmptyElementTag(_) => count += 1, @@ -124,7 +145,7 @@ fn low_level_comparison(c: &mut Criterion) { // b.iter(|| { // let mut r = Parser::new(input.as_bytes()); - // let mut count = criterion::black_box(0); + // let mut count = black_box(0); // loop { // // Makes no progress if error is returned, so need unwrap() // match r.next().unwrap().code() { @@ -147,7 +168,7 @@ fn low_level_comparison(c: &mut Criterion) { use xmlparser::{Token, Tokenizer}; b.iter(|| { - let mut count = criterion::black_box(0); + let mut count = black_box(0); for token in Tokenizer::from(input) { match token { Ok(Token::ElementStart { .. }) => count += 1, @@ -166,7 +187,7 @@ fn low_level_comparison(c: &mut Criterion) { let mut r = Parser::new(); r.feed_str(input); - let mut count = criterion::black_box(0); + let mut count = black_box(0); for event in r { match event.unwrap() { Event::ElementStart(_) => count += 1, @@ -187,7 +208,7 @@ fn low_level_comparison(c: &mut Criterion) { b.iter(|| { let mut r = Parser::from_reader(input.as_bytes()); - let mut count = criterion::black_box(0); + let mut count = black_box(0); loop { // Makes no progress if error is returned, so need unwrap() match r.read_event().unwrap() { @@ -202,38 +223,51 @@ fn low_level_comparison(c: &mut Criterion) { ); group.bench_with_input(BenchmarkId::new("xml5ever", filename), *data, |b, input| { - use xml5ever::buffer_queue::BufferQueue; - use xml5ever::tokenizer::{TagKind, Token, TokenSink, XmlTokenizer}; + use markup5ever::buffer_queue::BufferQueue; + use std::cell::Cell; + use xml5ever::tokenizer::{ProcessResult, TagKind, Token, TokenSink, XmlTokenizer}; - struct Sink(usize); + struct Sink(Cell); impl TokenSink for Sink { - fn process_token(&mut self, token: Token) { + type Handle = (); + + fn process_token(&self, token: Token) -> ProcessResult { match token { - Token::TagToken(tag) if tag.kind == TagKind::StartTag => self.0 += 1, - Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => self.0 += 1, + Token::TagToken(tag) if tag.kind == TagKind::StartTag => { + self.0.set(self.0.get() + 1); + } + Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => { + self.0.set(self.0.get() + 1); + } _ => (), } + ProcessResult::Continue } } // Copied from xml5ever benchmarks - // https://github.com/servo/html5ever/blob/429f23943b24f739b78f4d703620d7b1b526475b/xml5ever/benches/xml5ever.rs + // https://github.com/servo/html5ever/blob/a7c9d989b9b3426288a4ed362fb4c4671b2dd8c2/xml5ever/benches/xml5ever.rs#L57-L68 b.iter(|| { - let sink = criterion::black_box(Sink(0)); - let mut tok = XmlTokenizer::new(sink, Default::default()); - let mut buffer = BufferQueue::new(); + let sink = black_box(Sink(Cell::new(0))); + let tok = XmlTokenizer::new(sink, Default::default()); + let buffer = BufferQueue::default(); buffer.push_back(input.into()); - let _ = tok.feed(&mut buffer); + let _ = tok.feed(&buffer); tok.end(); - assert_eq!(tok.sink.0, total_tags, "Overall tag count in {}", filename); + assert_eq!( + tok.sink.0.into_inner(), + total_tags, + "Overall tag count in {}", + filename + ); }) }); group.bench_with_input(BenchmarkId::new("xml_rs", filename), *data, |b, input| { b.iter(|| { let r = EventReader::new(input.as_bytes()); - let mut count = criterion::black_box(0); + let mut count = black_box(0); for e in r { if let Ok(XmlEvent::StartElement { .. }) = e { count += 1; @@ -247,96 +281,5 @@ fn low_level_comparison(c: &mut Criterion) { group.finish(); } -/// Runs benchmarks for several XML libraries using serde deserialization -#[allow(dead_code)] // We do not use structs -fn serde_comparison(c: &mut Criterion) { - let mut group = c.benchmark_group("serde"); - - #[derive(Debug, Deserialize)] - struct Rss { - channel: Channel, - } - - #[derive(Debug, Deserialize)] - struct Channel { - title: String, - #[serde(rename = "item", default = "Vec::new")] - items: Vec>, - } - - #[derive(Debug, Deserialize)] - struct Item { - title: String, - link: String, - #[serde(rename = "pubDate")] - pub_date: String, - enclosure: Option, - } - - group.throughput(Throughput::Bytes(SAMPLE_RSS.len() as u64)); - - group.bench_with_input( - BenchmarkId::new("quick_xml", "sample_rss.xml"), - SAMPLE_RSS, - |b, input| { - #[derive(Debug, Deserialize)] - struct Enclosure { - #[serde(rename = "@url")] - url: String, - - #[serde(rename = "@length")] - length: String, - - #[serde(rename = "@type")] - typ: String, - } - - b.iter(|| { - let rss: Rss = - criterion::black_box(quick_xml::de::from_str(input).unwrap()); - assert_eq!(rss.channel.items.len(), 99); - }) - }, - ); - - /* NOTE: Most parts of deserializer are not implemented yet, so benchmark failed - group.bench_with_input(BenchmarkId::new("rapid-xml", "sample_rss.xml"), SAMPLE_RSS, |b, input| { - use rapid_xml::de::Deserializer; - use rapid_xml::parser::Parser; - - b.iter(|| { - let mut r = Parser::new(input.as_bytes()); - let mut de = Deserializer::new(&mut r).unwrap(); - let rss = criterion::black_box(Rss::deserialize(&mut de).unwrap()); - assert_eq!(rss.channel.items.len(), 99); - }); - });*/ - - group.bench_with_input( - BenchmarkId::new("xml_rs", "sample_rss.xml"), - SAMPLE_RSS, - |b, input| { - // serde_xml_rs supports @-notation for attributes, but applies it only - // for serialization - #[derive(Debug, Deserialize)] - struct Enclosure { - url: String, - length: String, - - #[serde(rename = "type")] - typ: String, - } - - b.iter(|| { - let rss: Rss = - criterion::black_box(serde_xml_rs::from_str(input).unwrap()); - assert_eq!(rss.channel.items.len(), 99); - }) - }, - ); - - group.finish(); -} - -criterion_group!(benches, low_level_comparison, serde_comparison); +criterion_group!(benches, low_level_comparison); criterion_main!(benches); diff --git a/compare/benches/serde.rs b/compare/benches/serde.rs new file mode 100644 index 00000000..fb0399d0 --- /dev/null +++ b/compare/benches/serde.rs @@ -0,0 +1,89 @@ +use criterion::{self, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput}; +use pretty_assertions::assert_eq; +use serde::Deserialize; +use serde_xml_rs; +use std::hint::black_box; + +static SAMPLE_RSS: &str = include_str!("../../tests/documents/sample_rss.xml"); + +/// Runs benchmarks for several XML libraries using serde deserialization +#[allow(dead_code)] // We do not use structs +fn serde_comparison(c: &mut Criterion) { + let mut group = c.benchmark_group("serde"); + + #[derive(Debug, Deserialize)] + struct Rss { + channel: Channel, + } + + #[derive(Debug, Deserialize)] + struct Channel { + title: String, + #[serde(rename = "item", default = "Vec::new")] + items: Vec>, + } + + #[derive(Debug, Deserialize)] + struct Item { + title: String, + link: String, + #[serde(rename = "pubDate")] + pub_date: String, + enclosure: Option, + } + + #[derive(Debug, Deserialize)] + struct Enclosure { + #[serde(rename = "@url")] + url: String, + + #[serde(rename = "@length")] + length: String, + + #[serde(rename = "@type")] + typ: String, + } + + group.throughput(Throughput::Bytes(SAMPLE_RSS.len() as u64)); + + group.bench_with_input( + BenchmarkId::new("quick_xml", "sample_rss.xml"), + SAMPLE_RSS, + |b, input| { + + b.iter(|| { + let rss: Rss = black_box(quick_xml::de::from_str(input).unwrap()); + assert_eq!(rss.channel.items.len(), 99); + }) + }, + ); + + /* NOTE: Most parts of deserializer are not implemented yet, so benchmark failed + group.bench_with_input(BenchmarkId::new("rapid-xml", "sample_rss.xml"), SAMPLE_RSS, |b, input| { + use rapid_xml::de::Deserializer; + use rapid_xml::parser::Parser; + + b.iter(|| { + let mut r = Parser::new(input.as_bytes()); + let mut de = Deserializer::new(&mut r).unwrap(); + let rss = black_box(Rss::deserialize(&mut de).unwrap()); + assert_eq!(rss.channel.items.len(), 99); + }); + });*/ + + group.bench_with_input( + BenchmarkId::new("xml_rs", "sample_rss.xml"), + SAMPLE_RSS, + |b, input| { + b.iter(|| { + let rss: Rss = black_box(serde_xml_rs::from_str(input).unwrap()); + assert_eq!(rss.channel.items.len(), 99); + }) + }, + ); + + group.finish(); +} + +criterion_group!(benches, serde_comparison); +criterion_main!(benches); diff --git a/src/de/map.rs b/src/de/map.rs index c5540459..bbc032c6 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -220,8 +220,8 @@ where /// Used to map elements with `xsi:nil` attribute set to true to `None` in optional contexts. /// /// We need to handle two attributes: - /// - on parent element: - /// - on this element: + /// - on parent element: `` + /// - on this element: `` /// /// We check parent element too because `xsi:nil` affects only nested elements of the /// tag where it is defined. We can map structure with fields mapped to attributes to