From 0086dec69a77a9e1153e97cd050ab567b5c7f109 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 14 Oct 2023 13:18:09 -0400 Subject: [PATCH] lite: fix stack overflow test It turns out that we missed another case where the stack could overflow: dropping a deeply nested Hir. Namely, since we permit deeply nested Hirs to be constructed and only reject them after determining they are too deeply nested, they still then need to be dropped. We fix this by implementing a custom a Drop impl that uses the heap to traverse the Hir and drop things without using unbounded stack space. An alternative way to fix this would be to adjust the parser somehow to avoid building deeply nested Hir values in the first place. But that seems trickier, so we just stick with this for now. --- regex-lite/src/hir/mod.rs | 60 ++++++++++++++++++++++++++++++++++++ regex-lite/src/hir/parse.rs | 6 ++-- regex-lite/tests/fuzz/mod.rs | 2 +- 3 files changed, 65 insertions(+), 3 deletions(-) diff --git a/regex-lite/src/hir/mod.rs b/regex-lite/src/hir/mod.rs index 3d61ce8c9..6e5348a5b 100644 --- a/regex-lite/src/hir/mod.rs +++ b/regex-lite/src/hir/mod.rs @@ -366,6 +366,24 @@ impl Hir { } } +impl HirKind { + /// Returns a slice of this kind's sub-expressions, if any. + fn subs(&self) -> &[Hir] { + use core::slice::from_ref; + + match *self { + HirKind::Empty + | HirKind::Char(_) + | HirKind::Class(_) + | HirKind::Look(_) => &[], + HirKind::Repetition(Repetition { ref sub, .. }) => from_ref(sub), + HirKind::Capture(Capture { ref sub, .. }) => from_ref(sub), + HirKind::Concat(ref subs) => subs, + HirKind::Alternation(ref subs) => subs, + } + } +} + #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct Class { pub(crate) ranges: Vec, @@ -747,3 +765,45 @@ fn prev_char(ch: char) -> Option { // and U+E000 yields a valid scalar value. Some(char::from_u32(u32::from(ch).checked_sub(1)?).unwrap()) } + +impl Drop for Hir { + fn drop(&mut self) { + use core::mem; + + match *self.kind() { + HirKind::Empty + | HirKind::Char(_) + | HirKind::Class(_) + | HirKind::Look(_) => return, + HirKind::Capture(ref x) if x.sub.kind.subs().is_empty() => return, + HirKind::Repetition(ref x) if x.sub.kind.subs().is_empty() => { + return + } + HirKind::Concat(ref x) if x.is_empty() => return, + HirKind::Alternation(ref x) if x.is_empty() => return, + _ => {} + } + + let mut stack = vec![mem::replace(self, Hir::empty())]; + while let Some(mut expr) = stack.pop() { + match expr.kind { + HirKind::Empty + | HirKind::Char(_) + | HirKind::Class(_) + | HirKind::Look(_) => {} + HirKind::Capture(ref mut x) => { + stack.push(mem::replace(&mut x.sub, Hir::empty())); + } + HirKind::Repetition(ref mut x) => { + stack.push(mem::replace(&mut x.sub, Hir::empty())); + } + HirKind::Concat(ref mut x) => { + stack.extend(x.drain(..)); + } + HirKind::Alternation(ref mut x) => { + stack.extend(x.drain(..)); + } + } + } + } +} diff --git a/regex-lite/src/hir/parse.rs b/regex-lite/src/hir/parse.rs index 0dcccdd46..ca93b8838 100644 --- a/regex-lite/src/hir/parse.rs +++ b/regex-lite/src/hir/parse.rs @@ -1328,8 +1328,10 @@ fn into_class_item_range(hir: Hir) -> Result { } } -fn into_class_item_ranges(hir: Hir) -> Result, Error> { - match hir.kind { +fn into_class_item_ranges( + mut hir: Hir, +) -> Result, Error> { + match core::mem::replace(&mut hir.kind, HirKind::Empty) { HirKind::Char(ch) => Ok(vec![hir::ClassRange { start: ch, end: ch }]), HirKind::Class(hir::Class { ranges }) => Ok(ranges), _ => Err(Error::new(ERR_CLASS_INVALID_ITEM)), diff --git a/regex-lite/tests/fuzz/mod.rs b/regex-lite/tests/fuzz/mod.rs index 747aab040..5a721f142 100644 --- a/regex-lite/tests/fuzz/mod.rs +++ b/regex-lite/tests/fuzz/mod.rs @@ -27,7 +27,7 @@ fn captures_wrong_order_min() { #[test] fn many_zero_to_many_reps() { let pat = format!(".{}", "*".repeat(1 << 15)); - let Ok(re) = regex_lite::RegexBuilder::new(&pat).build() else { return }; + let Ok(re) = regex_lite::Regex::new(&pat) else { return }; re.is_match(""); }