Skip to content

Commit

Permalink
deps: initial migration steps to regex 1.9
Browse files Browse the repository at this point in the history
This leaves the grep-regex crate in tatters. Pretty much the entire
thing needs to be re-worked. The upshot is that it should result in some
big simplifications. I hope.

The idea here is to drop down and actually use regex-automata 0.3
instead of the regex crate itself.
  • Loading branch information
BurntSushi committed Jul 5, 2023
1 parent a7f1276 commit 1035f6b
Show file tree
Hide file tree
Showing 15 changed files with 606 additions and 558 deletions.
78 changes: 33 additions & 45 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ autotests = false
edition = "2018"
rust-version = "1.65"

[patch.crates-io]
regex = { path = "/home/andrew/rust/regex" }
regex-automata = { path = "/home/andrew/rust/regex/regex-automata" }
regex-syntax = { path = "/home/andrew/rust/regex/regex-syntax" }

[[bin]]
bench = false
path = "crates/core/main.rs"
Expand Down Expand Up @@ -47,7 +52,7 @@ grep = { version = "0.2.12", path = "crates/grep" }
ignore = { version = "0.4.19", path = "crates/ignore" }
lazy_static = "1.1.0"
log = "0.4.5"
regex = "1.3.5"
regex = "1.8.3"
serde_json = "1.0.23"
termcolor = "1.1.0"

Expand Down
2 changes: 1 addition & 1 deletion crates/core/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,7 @@ impl ArgMatches {
// own, but if the patterns are joined in a set of alternations, then
// you wind up with `foo|`, which is currently invalid in Rust's regex
// engine.
"(?:z{0})*".to_string()
"(?:)".to_string()
}

/// Converts an OsStr pattern to a String pattern. The pattern is escaped
Expand Down
6 changes: 3 additions & 3 deletions crates/globset/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ name = "globset"
bench = false

[dependencies]
aho-corasick = "0.7.3"
bstr = { version = "1.1.0", default-features = false, features = ["std"] }
aho-corasick = "1.0.2"
bstr = { version = "1.5.0", default-features = false, features = ["std"] }
fnv = "1.0.6"
log = { version = "0.4.5", optional = true }
regex = { version = "1.1.5", default-features = false, features = ["perf", "std"] }
regex = { version = "1.8.3", default-features = false, features = ["perf", "std"] }
serde = { version = "1.0.104", optional = true }

[dev-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions crates/globset/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -818,15 +818,15 @@ impl MultiStrategyBuilder {

fn prefix(self) -> PrefixStrategy {
PrefixStrategy {
matcher: AhoCorasick::new_auto_configured(&self.literals),
matcher: AhoCorasick::new(&self.literals).unwrap(),
map: self.map,
longest: self.longest,
}
}

fn suffix(self) -> SuffixStrategy {
SuffixStrategy {
matcher: AhoCorasick::new_auto_configured(&self.literals),
matcher: AhoCorasick::new(&self.literals).unwrap(),
map: self.map,
longest: self.longest,
}
Expand Down
10 changes: 5 additions & 5 deletions crates/regex/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ license = "Unlicense OR MIT"
edition = "2018"

[dependencies]
aho-corasick = "0.7.3"
bstr = "1.1.0"
aho-corasick = "1.0.2"
bstr = "1.5.0"
grep-matcher = { version = "0.1.6", path = "../matcher" }
log = "0.4.5"
regex = "1.1"
regex-syntax = "0.6.5"
thread_local = "1.1.2"
regex = "1.8.3"
regex-syntax = "0.7.2"
thread_local = "1.1.7"
13 changes: 9 additions & 4 deletions crates/regex/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl Config {
let ast = self.ast(pattern)?;
let analysis = self.analysis(&ast)?;
let expr = hir::translate::TranslatorBuilder::new()
.allow_invalid_utf8(true)
.utf8(false)
.case_insensitive(self.is_case_insensitive(&analysis))
.multi_line(self.multi_line)
.dot_matches_new_line(self.dot_matches_new_line)
Expand Down Expand Up @@ -172,7 +172,12 @@ impl ConfiguredHIR {
/// CRLF hack is enabled and the regex is line anchored at the end. In
/// this case, matches that end with a `\r` have the `\r` stripped.
pub fn needs_crlf_stripped(&self) -> bool {
self.config.crlf && self.expr.is_line_anchored_end()
self.config.crlf
&& self
.expr
.properties()
.look_set_suffix_any()
.contains(hir::Look::EndLF)
}

/// Returns the line terminator configured on this expression.
Expand Down Expand Up @@ -202,7 +207,7 @@ impl ConfiguredHIR {

/// Returns true if and only if the underlying HIR has any text anchors.
fn is_any_anchored(&self) -> bool {
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
self.expr.properties().look_set().contains_anchor_haystack()
}

/// Builds a regular expression from this HIR expression.
Expand Down Expand Up @@ -301,7 +306,7 @@ impl ConfiguredHIR {
let expr = ::regex_syntax::ParserBuilder::new()
.nest_limit(self.config.nest_limit)
.octal(self.config.octal)
.allow_invalid_utf8(true)
.utf8(false)
.multi_line(self.config.multi_line)
.dot_matches_new_line(self.config.dot_matches_new_line)
.unicode(self.config.unicode)
Expand Down
42 changes: 18 additions & 24 deletions crates/regex/src/crlf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,32 +124,26 @@ pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
/// nicely in most cases, especially when a match is limited to a single line.
pub fn crlfify(expr: Hir) -> Hir {
match expr.into_kind() {
HirKind::Anchor(hir::Anchor::EndLine) => {
let concat = Hir::concat(vec![
Hir::repetition(hir::Repetition {
kind: hir::RepetitionKind::ZeroOrOne,
greedy: false,
hir: Box::new(Hir::literal(hir::Literal::Unicode('\r'))),
}),
Hir::anchor(hir::Anchor::EndLine),
]);
Hir::group(hir::Group {
kind: hir::GroupKind::NonCapturing,
hir: Box::new(concat),
})
}
HirKind::Look(hir::Look::EndLF) => Hir::concat(vec![
Hir::repetition(hir::Repetition {
min: 0,
max: Some(1),
greedy: false,
sub: Box::new(Hir::literal("\r".as_bytes())),
}),
Hir::look(hir::Look::EndLF),
]),
HirKind::Empty => Hir::empty(),
HirKind::Literal(x) => Hir::literal(x),
HirKind::Literal(hir::Literal(x)) => Hir::literal(x),
HirKind::Class(x) => Hir::class(x),
HirKind::Anchor(x) => Hir::anchor(x),
HirKind::WordBoundary(x) => Hir::word_boundary(x),
HirKind::Look(x) => Hir::look(x),
HirKind::Repetition(mut x) => {
x.hir = Box::new(crlfify(*x.hir));
x.sub = Box::new(crlfify(*x.sub));
Hir::repetition(x)
}
HirKind::Group(mut x) => {
x.hir = Box::new(crlfify(*x.hir));
Hir::group(x)
HirKind::Capture(mut x) => {
x.sub = Box::new(crlfify(*x.sub));
Hir::capture(x)
}
HirKind::Concat(xs) => {
Hir::concat(xs.into_iter().map(crlfify).collect())
Expand All @@ -174,12 +168,12 @@ mod tests {
#[test]
fn various() {
assert_eq!(roundtrip(r"(?m)$"), "(?:\r??(?m:$))");
assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$))(?:\r??(?m:$))");
assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$)\r??(?m:$))");
assert_eq!(
roundtrip(r"(?m)(?:foo$|bar$)"),
"(?:foo(?:\r??(?m:$))|bar(?:\r??(?m:$)))"
"(?:(?:(?:foo)\r??(?m:$))|(?:(?:bar)\r??(?m:$)))"
);
assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$))a");
assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$)a)");

// Not a multiline `$`, so no crlfifying occurs.
assert_eq!(roundtrip(r"$"), "\\z");
Expand Down
Loading

0 comments on commit 1035f6b

Please sign in to comment.