Skip to content

Commit

Permalink
syntax: improve literal extraction from certain repetitions
Browse files Browse the repository at this point in the history
When repetitions didn't have an explicit max value, like in `(ab){2,}`
the literal extractor was producing sub-optimal literals, like `"ab"`
instead of `"abab"`.

Close #1032
  • Loading branch information
plusvic authored Jul 11, 2023
1 parent 28e16fa commit 43ba6b8
Showing 1 changed file with 7 additions and 5 deletions.
12 changes: 7 additions & 5 deletions regex-syntax/src/hir/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ impl Extractor {
}
seq
}
hir::Repetition { min, max: Some(max), .. } if min < max => {
hir::Repetition { min, .. } => {
assert!(min > 0); // handled above
let limit =
u32::try_from(self.limit_repeat).unwrap_or(u32::MAX);
Expand All @@ -491,10 +491,6 @@ impl Extractor {
seq.make_inexact();
seq
}
hir::Repetition { .. } => {
subseq.make_inexact();
subseq
}
}
}

Expand Down Expand Up @@ -2655,6 +2651,12 @@ mod tests {
]),
e(r"(ab|cd)(ef|gh)(ij|kl)")
);

assert_eq!(inexact([E("abab")], [E("abab")]), e(r"(ab){2}"));

assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,3}"));

assert_eq!(inexact([I("abab")], [I("abab")]), e(r"(ab){2,}"));
}

#[test]
Expand Down

0 comments on commit 43ba6b8

Please sign in to comment.