diff --git a/regex-automata/src/meta/strategy.rs b/regex-automata/src/meta/strategy.rs index 4cb3b29b9..04f2ba3c3 100644 --- a/regex-automata/src/meta/strategy.rs +++ b/regex-automata/src/meta/strategy.rs @@ -1167,21 +1167,34 @@ impl ReverseSuffix { return Err(core); } let kind = core.info.config().get_match_kind(); - let suffixseq = crate::util::prefilter::suffixes(kind, hirs); - let Some(suffixes) = suffixseq.literals() else { - debug!( - "skipping reverse suffix optimization because \ - the extract suffix sequence is not finite", - ); - return Err(core); + let suffixes = crate::util::prefilter::suffixes(kind, hirs); + let lcs = match suffixes.longest_common_suffix() { + None => { + debug!( + "skipping reverse suffix optimization because \ + a longest common suffix could not be found", + ); + return Err(core); + } + Some(lcs) if lcs.is_empty() => { + debug!( + "skipping reverse suffix optimization because \ + the longest common suffix is the empty string", + ); + return Err(core); + } + Some(lcs) => lcs, }; - let Some(pre) = Prefilter::new(kind, suffixes) else { - debug!( - "skipping reverse suffix optimization because \ + let pre = match Prefilter::new(kind, &[lcs]) { + Some(pre) => pre, + None => { + debug!( + "skipping reverse suffix optimization because \ a prefilter could not be constructed from the \ longest common suffix", - ); - return Err(core); + ); + return Err(core); + } }; if !pre.is_fast() { debug!( @@ -1268,7 +1281,7 @@ impl ReverseSuffix { e.try_search_half_rev_limited(&input, min_start) } else if let Some(e) = self.core.hybrid.get(&input) { trace!( - "using lazy DFA for reverse inner search at {:?}, \ + "using lazy DFA for reverse suffix search at {:?}, \ but will be stopped at {} to avoid quadratic behavior", input.get_span(), min_start, diff --git a/testdata/regression.toml b/testdata/regression.toml index 2954c9118..53b0701a3 100644 --- a/testdata/regression.toml +++ b/testdata/regression.toml @@ -813,3 +813,18 @@ name = "hir-optimization-out-of-order-class" regex = '^[[:alnum:]./-]+$' haystack = "a-b" matches = [[0, 3]] + +# This is a regression test for an improper reverse suffix optimization. This +# occurred when I "broadened" the applicability of the optimization to include +# multiple possible literal suffixes instead of only sticking to a non-empty +# longest common suffix. It turns out that, at least given how the reverse +# suffix optimization works, we need to stick to the longest common suffix for +# now. +# +# See: https://github.com/rust-lang/regex/issues/1110 +# See also: https://github.com/astral-sh/ruff/pull/7980 +[[test]] +name = 'improper-reverse-suffix-optimization' +regex = '(\\N\{[^}]+})|([{}])' +haystack = 'hiya \N{snowman} bye' +matches = [[[5, 16], [5, 16], []]]