Skip to content

Commit

Permalink
fix: speed up Position::line_col for large inputs using SIMD (#707)
Browse files Browse the repository at this point in the history
note that this may have extra overhead for small inputs and
requires two extra dependencies, hence the faster `line_col`
was put under the optional `fast-line-col` feature flag.

closes #560

Co-authored-by: Tomas Tauber <[email protected]>
  • Loading branch information
tomtau and Tomas Tauber authored Sep 12, 2022
1 parent af4f105 commit bfbdc4b
Show file tree
Hide file tree
Showing 9 changed files with 10,255 additions and 54 deletions.
6 changes: 3 additions & 3 deletions derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_derive"
description = "pest's derive macro"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"]

[dependencies]
# for tests, included transitively anyway
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_generator = { path = "../generator", version = "2.3.0", default-features = false }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_generator = { path = "../generator", version = "2.3.1", default-features = false }
6 changes: 3 additions & 3 deletions generator/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_generator"
description = "pest code generator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -18,8 +18,8 @@ default = ["std"]
std = ["pest/std"]

[dependencies]
pest = { path = "../pest", version = "2.3.0", default-features = false }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1", default-features = false }
pest_meta = { path = "../meta", version = "2.3.1" }
proc-macro2 = "1.0"
quote = "1.0"
syn = "1.0"
6 changes: 3 additions & 3 deletions grammars/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_grammars"
description = "pest popular grammar implementations"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,8 +14,8 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_derive = { path = "../derive", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_derive = { path = "../derive", version = "2.3.1" }

[dev-dependencies]
criterion = "0.3"
Expand Down
42 changes: 41 additions & 1 deletion grammars/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,45 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}

criterion_group!(benches, criterion_benchmark);
mod autocorrect {
use pest_derive::Parser;

#[derive(Parser)]
#[grammar_inline = r#"
newline = ${ "\n" | "\r" }
space = ${ " "+ }
other = ${ !(pair) ~ ANY }
comment = ${ single_line_comment | multiline_comment }
single_line_comment = _{ "//" ~ (!(newline) ~ ANY)* }
multiline_comment = _{ "/*" ~ (!("*/") ~ ANY)* ~ "*/"}
string_type = _{
("\"" ~ (!(newline | "\"") ~ ANY)* ~ "\"")
}
key = ${ string_type ~ (" ")* ~ ":" ~ (" ")* }
string = ${ string_type }
pair = _{ key ~ string }
line = _{ pair | comment | space | other | newline }
item = _{ SOI ~ line* ~ EOI }
"#]
pub struct JsonParser;
}

fn line_col_benchmark(c: &mut Criterion) {
let mut file = File::open("benches/main.i18n.json").unwrap();
let mut data = String::new();

file.read_to_string(&mut data).unwrap();
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();
let last_pair = pairs.last().unwrap();
c.bench_function("line col", |b| {
b.iter(|| {
let _ = last_pair.as_span().start_pos().line_col();
});
});
}

criterion_group!(benches, criterion_benchmark, line_col_benchmark,);
criterion_main!(benches);
10,128 changes: 10,128 additions & 0 deletions grammars/benches/main.i18n.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions meta/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_meta"
description = "pest meta language parser and validator"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
once_cell = "1.8.0"

[build-dependencies]
Expand Down
7 changes: 6 additions & 1 deletion pest/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest"
description = "The Elegant Parser"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -21,9 +21,14 @@ std = ["ucd-trie/std", "thiserror"]
pretty-print = ["serde", "serde_json"]
# Enable const fn constructor for `PrecClimber`
const_prec_climber = []
# Enable faster `Position::line_col` calculation using SIMD
# (note that this may have extra overhead for small inputs)
fast-line-col = ["memchr", "bytecount"]

[dependencies]
ucd-trie = { version = "0.1.1", default-features = false }
serde = { version = "1.0.89", optional = true }
serde_json = { version = "1.0.39", optional = true}
thiserror = { version = "1.0.31", optional = true }
memchr = { version = "2", optional = true }
bytecount = { version = "0.6", optional = true }
104 changes: 66 additions & 38 deletions pest/src/position.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,45 +135,14 @@ impl<'i> Position<'i> {
if self.pos > self.input.len() {
panic!("position out of bounds");
}

let mut pos = self.pos;
// Position's pos is always a UTF-8 border.
let slice = &self.input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
#[cfg(feature = "fast-line-col")]
{
fast_line_col(self.input, self.pos)
}
#[cfg(not(feature = "fast-line-col"))]
{
original_line_col(self.input, self.pos)
}

line_col
}

/// Returns the entire line of the input that contains this `Position`.
Expand Down Expand Up @@ -432,6 +401,63 @@ impl<'i> Hash for Position<'i> {
}
}

#[inline]
#[cfg(not(feature = "fast-line-col"))]
fn original_line_col(input: &str, mut pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];
let mut chars = slice.chars().peekable();

let mut line_col = (1, 1);

while pos != 0 {
match chars.next() {
Some('\r') => {
if let Some(&'\n') = chars.peek() {
chars.next();

if pos == 1 {
pos -= 1;
} else {
pos -= 2;
}

line_col = (line_col.0 + 1, 1);
} else {
pos -= 1;
line_col = (line_col.0, line_col.1 + 1);
}
}
Some('\n') => {
pos -= 1;
line_col = (line_col.0 + 1, 1);
}
Some(c) => {
pos -= c.len_utf8();
line_col = (line_col.0, line_col.1 + 1);
}
None => unreachable!(),
}
}

line_col
}

#[inline]
#[cfg(feature = "fast-line-col")]
fn fast_line_col(input: &str, pos: usize) -> (usize, usize) {
// Position's pos is always a UTF-8 border.
let slice = &input[..pos];

let prec_ln = memchr::memrchr(b'\n', slice.as_bytes());
if let Some(prec_nl_pos) = prec_ln {
let lines = bytecount::count(slice[..=prec_nl_pos].as_bytes(), b'\n') + 1;
(lines, slice[prec_nl_pos..].chars().count())
} else {
(1, slice.chars().count() + 1)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -465,6 +491,8 @@ mod tests {
assert_eq!(Position::new(input, 7).unwrap().line_col(), (3, 1));
assert_eq!(Position::new(input, 8).unwrap().line_col(), (3, 2));
assert_eq!(Position::new(input, 11).unwrap().line_col(), (3, 3));
let input = "abcd嗨";
assert_eq!(Position::new(input, 7).unwrap().line_col(), (1, 6));
}

#[test]
Expand Down
6 changes: 3 additions & 3 deletions vm/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "pest_vm"
description = "pest grammar virtual machine"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = ["Dragoș Tiselice <[email protected]>"]
homepage = "https://pest-parser.github.io/"
Expand All @@ -14,5 +14,5 @@ readme = "_README.md"
rust-version = "1.56"

[dependencies]
pest = { path = "../pest", version = "2.3.0" }
pest_meta = { path = "../meta", version = "2.3.0" }
pest = { path = "../pest", version = "2.3.1" }
pest_meta = { path = "../meta", version = "2.3.1" }

0 comments on commit bfbdc4b

Please sign in to comment.