Skip to content

Commit d6dfeed

Browse files
authored
Merge pull request #872 from Mingun/update-compare
Update `compare` project to latest versions
2 parents 6017acf + 15a3e86 commit d6dfeed

File tree

5 files changed

+173
-122
lines changed

5 files changed

+173
-122
lines changed

compare/Cargo.toml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,30 @@ edition = "2021"
88
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
99

1010
[dev-dependencies]
11-
criterion = { version = "0.5", features = ["html_reports"] }
12-
maybe_xml = "0.10.1"
11+
criterion = { version = "0.6", features = ["html_reports"] }
12+
markup5ever = "0.16"
13+
# maybe_xml 0.11 regressed perfomance by x2, and because this was the fastest
14+
# XML parser, we keep benchmarking version 0.10 as well
15+
maybe_xml_0_10 = { version = "0.10", package = "maybe_xml" }
16+
maybe_xml = "0.11"
1317
quick-xml = { path = "..", features = ["serialize"] }
1418
rapid-xml = "0.2"
1519
rusty_xml = { version = "0.3", package = "RustyXML" }
20+
serde-xml-rs = "0.8"
1621
xml_oxide = "0.3"
1722
xml-rs = "0.8"
18-
xml5ever = "0.17"
23+
xml5ever = "0.22"
1924
xmlparser = "0.13"
20-
serde-xml-rs = "0.6"
2125
# Do not use "derive" feature, because it slowdown compilation
2226
# See https://github.com/serde-rs/serde/pull/2588
2327
serde = "1.0"
2428
serde_derive = "1.0"
2529
pretty_assertions = "1.4"
2630

2731
[[bench]]
28-
name = "bench"
32+
name = "low-level"
33+
harness = false
34+
35+
[[bench]]
36+
name = "serde"
2937
harness = false

compare/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# XML libraries benchmark suite
2+
3+
Standalone project to benchmark different implementations of XML parser. To run benchmark
4+
(assuming we are in `quick_xml` checkout directory):
5+
6+
```
7+
cd compare
8+
cargo bench
9+
```
10+
11+
The results can be obserded in HTML at `./compare/target/criterion/report/index.html`.

compare/benches/bench.rs renamed to compare/benches/low-level.rs

Lines changed: 58 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@ use criterion::{self, criterion_group, criterion_main, BenchmarkId, Criterion, T
22
use pretty_assertions::assert_eq;
33
use quick_xml::events::Event;
44
use quick_xml::reader::Reader;
5-
use serde::Deserialize;
6-
use serde_xml_rs;
5+
use std::hint::black_box;
76
use xml::reader::{EventReader, XmlEvent};
87

98
static RPM_PRIMARY: &str = include_str!("../../tests/documents/rpm_primary.xml");
@@ -60,7 +59,7 @@ fn low_level_comparison(c: &mut Criterion) {
6059
b.iter(|| {
6160
let mut reader = Reader::from_str(input);
6261
reader.config_mut().check_end_names = false;
63-
let mut count = criterion::black_box(0);
62+
let mut count = black_box(0);
6463
loop {
6564
match reader.read_event() {
6665
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
@@ -80,7 +79,7 @@ fn low_level_comparison(c: &mut Criterion) {
8079
b.iter(|| {
8180
let mut reader = Reader::from_reader(input.as_bytes());
8281
reader.config_mut().check_end_names = false;
83-
let mut count = criterion::black_box(0);
82+
let mut count = black_box(0);
8483
let mut buf = Vec::new();
8584
loop {
8685
match reader.read_event_into(&mut buf) {
@@ -96,7 +95,29 @@ fn low_level_comparison(c: &mut Criterion) {
9695
);
9796

9897
group.bench_with_input(
99-
BenchmarkId::new("maybe_xml", filename),
98+
BenchmarkId::new("maybe_xml:0.10", filename),
99+
*data,
100+
|b, input| {
101+
use maybe_xml_0_10::token::Ty;
102+
use maybe_xml_0_10::Reader;
103+
104+
b.iter(|| {
105+
let reader = Reader::from_str(input);
106+
107+
let mut count = black_box(0);
108+
for token in reader.into_iter() {
109+
match token.ty() {
110+
Ty::StartTag(_) | Ty::EmptyElementTag(_) => count += 1,
111+
_ => (),
112+
}
113+
}
114+
assert_eq!(count, total_tags, "Overall tag count in {}", filename);
115+
})
116+
},
117+
);
118+
119+
group.bench_with_input(
120+
BenchmarkId::new("maybe_xml:0.11", filename),
100121
*data,
101122
|b, input| {
102123
use maybe_xml::token::Ty;
@@ -105,7 +126,7 @@ fn low_level_comparison(c: &mut Criterion) {
105126
b.iter(|| {
106127
let reader = Reader::from_str(input);
107128

108-
let mut count = criterion::black_box(0);
129+
let mut count = black_box(0);
109130
for token in reader.into_iter() {
110131
match token.ty() {
111132
Ty::StartTag(_) | Ty::EmptyElementTag(_) => count += 1,
@@ -124,7 +145,7 @@ fn low_level_comparison(c: &mut Criterion) {
124145
// b.iter(|| {
125146
// let mut r = Parser::new(input.as_bytes());
126147

127-
// let mut count = criterion::black_box(0);
148+
// let mut count = black_box(0);
128149
// loop {
129150
// // Makes no progress if error is returned, so need unwrap()
130151
// match r.next().unwrap().code() {
@@ -147,7 +168,7 @@ fn low_level_comparison(c: &mut Criterion) {
147168
use xmlparser::{Token, Tokenizer};
148169

149170
b.iter(|| {
150-
let mut count = criterion::black_box(0);
171+
let mut count = black_box(0);
151172
for token in Tokenizer::from(input) {
152173
match token {
153174
Ok(Token::ElementStart { .. }) => count += 1,
@@ -166,7 +187,7 @@ fn low_level_comparison(c: &mut Criterion) {
166187
let mut r = Parser::new();
167188
r.feed_str(input);
168189

169-
let mut count = criterion::black_box(0);
190+
let mut count = black_box(0);
170191
for event in r {
171192
match event.unwrap() {
172193
Event::ElementStart(_) => count += 1,
@@ -187,7 +208,7 @@ fn low_level_comparison(c: &mut Criterion) {
187208
b.iter(|| {
188209
let mut r = Parser::from_reader(input.as_bytes());
189210

190-
let mut count = criterion::black_box(0);
211+
let mut count = black_box(0);
191212
loop {
192213
// Makes no progress if error is returned, so need unwrap()
193214
match r.read_event().unwrap() {
@@ -202,38 +223,51 @@ fn low_level_comparison(c: &mut Criterion) {
202223
);
203224

204225
group.bench_with_input(BenchmarkId::new("xml5ever", filename), *data, |b, input| {
205-
use xml5ever::buffer_queue::BufferQueue;
206-
use xml5ever::tokenizer::{TagKind, Token, TokenSink, XmlTokenizer};
226+
use markup5ever::buffer_queue::BufferQueue;
227+
use std::cell::Cell;
228+
use xml5ever::tokenizer::{ProcessResult, TagKind, Token, TokenSink, XmlTokenizer};
207229

208-
struct Sink(usize);
230+
struct Sink(Cell<usize>);
209231
impl TokenSink for Sink {
210-
fn process_token(&mut self, token: Token) {
232+
type Handle = ();
233+
234+
fn process_token(&self, token: Token) -> ProcessResult<Self::Handle> {
211235
match token {
212-
Token::TagToken(tag) if tag.kind == TagKind::StartTag => self.0 += 1,
213-
Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => self.0 += 1,
236+
Token::TagToken(tag) if tag.kind == TagKind::StartTag => {
237+
self.0.set(self.0.get() + 1);
238+
}
239+
Token::TagToken(tag) if tag.kind == TagKind::EmptyTag => {
240+
self.0.set(self.0.get() + 1);
241+
}
214242
_ => (),
215243
}
244+
ProcessResult::Continue
216245
}
217246
}
218247

219248
// Copied from xml5ever benchmarks
220-
// https://github.com/servo/html5ever/blob/429f23943b24f739b78f4d703620d7b1b526475b/xml5ever/benches/xml5ever.rs
249+
// https://github.com/servo/html5ever/blob/a7c9d989b9b3426288a4ed362fb4c4671b2dd8c2/xml5ever/benches/xml5ever.rs#L57-L68
221250
b.iter(|| {
222-
let sink = criterion::black_box(Sink(0));
223-
let mut tok = XmlTokenizer::new(sink, Default::default());
224-
let mut buffer = BufferQueue::new();
251+
let sink = black_box(Sink(Cell::new(0)));
252+
let tok = XmlTokenizer::new(sink, Default::default());
253+
let buffer = BufferQueue::default();
225254
buffer.push_back(input.into());
226-
let _ = tok.feed(&mut buffer);
255+
let _ = tok.feed(&buffer);
227256
tok.end();
228257

229-
assert_eq!(tok.sink.0, total_tags, "Overall tag count in {}", filename);
258+
assert_eq!(
259+
tok.sink.0.into_inner(),
260+
total_tags,
261+
"Overall tag count in {}",
262+
filename
263+
);
230264
})
231265
});
232266

233267
group.bench_with_input(BenchmarkId::new("xml_rs", filename), *data, |b, input| {
234268
b.iter(|| {
235269
let r = EventReader::new(input.as_bytes());
236-
let mut count = criterion::black_box(0);
270+
let mut count = black_box(0);
237271
for e in r {
238272
if let Ok(XmlEvent::StartElement { .. }) = e {
239273
count += 1;
@@ -247,96 +281,5 @@ fn low_level_comparison(c: &mut Criterion) {
247281
group.finish();
248282
}
249283

250-
/// Runs benchmarks for several XML libraries using serde deserialization
251-
#[allow(dead_code)] // We do not use structs
252-
fn serde_comparison(c: &mut Criterion) {
253-
let mut group = c.benchmark_group("serde");
254-
255-
#[derive(Debug, Deserialize)]
256-
struct Rss<E> {
257-
channel: Channel<E>,
258-
}
259-
260-
#[derive(Debug, Deserialize)]
261-
struct Channel<E> {
262-
title: String,
263-
#[serde(rename = "item", default = "Vec::new")]
264-
items: Vec<Item<E>>,
265-
}
266-
267-
#[derive(Debug, Deserialize)]
268-
struct Item<E> {
269-
title: String,
270-
link: String,
271-
#[serde(rename = "pubDate")]
272-
pub_date: String,
273-
enclosure: Option<E>,
274-
}
275-
276-
group.throughput(Throughput::Bytes(SAMPLE_RSS.len() as u64));
277-
278-
group.bench_with_input(
279-
BenchmarkId::new("quick_xml", "sample_rss.xml"),
280-
SAMPLE_RSS,
281-
|b, input| {
282-
#[derive(Debug, Deserialize)]
283-
struct Enclosure {
284-
#[serde(rename = "@url")]
285-
url: String,
286-
287-
#[serde(rename = "@length")]
288-
length: String,
289-
290-
#[serde(rename = "@type")]
291-
typ: String,
292-
}
293-
294-
b.iter(|| {
295-
let rss: Rss<Enclosure> =
296-
criterion::black_box(quick_xml::de::from_str(input).unwrap());
297-
assert_eq!(rss.channel.items.len(), 99);
298-
})
299-
},
300-
);
301-
302-
/* NOTE: Most parts of deserializer are not implemented yet, so benchmark failed
303-
group.bench_with_input(BenchmarkId::new("rapid-xml", "sample_rss.xml"), SAMPLE_RSS, |b, input| {
304-
use rapid_xml::de::Deserializer;
305-
use rapid_xml::parser::Parser;
306-
307-
b.iter(|| {
308-
let mut r = Parser::new(input.as_bytes());
309-
let mut de = Deserializer::new(&mut r).unwrap();
310-
let rss = criterion::black_box(Rss::deserialize(&mut de).unwrap());
311-
assert_eq!(rss.channel.items.len(), 99);
312-
});
313-
});*/
314-
315-
group.bench_with_input(
316-
BenchmarkId::new("xml_rs", "sample_rss.xml"),
317-
SAMPLE_RSS,
318-
|b, input| {
319-
// serde_xml_rs supports @-notation for attributes, but applies it only
320-
// for serialization
321-
#[derive(Debug, Deserialize)]
322-
struct Enclosure {
323-
url: String,
324-
length: String,
325-
326-
#[serde(rename = "type")]
327-
typ: String,
328-
}
329-
330-
b.iter(|| {
331-
let rss: Rss<Enclosure> =
332-
criterion::black_box(serde_xml_rs::from_str(input).unwrap());
333-
assert_eq!(rss.channel.items.len(), 99);
334-
})
335-
},
336-
);
337-
338-
group.finish();
339-
}
340-
341-
criterion_group!(benches, low_level_comparison, serde_comparison);
284+
criterion_group!(benches, low_level_comparison);
342285
criterion_main!(benches);

0 commit comments

Comments
 (0)