From 20b5317f7a8accbf64ee21245b0a37f636017e13 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 20 Oct 2023 07:52:52 -0400 Subject: [PATCH] automata: fix panic in dense DFA deserialization This fixes a hole in the validation logic that accidentally permitted a dense DFA to contain a match state with zero pattern IDs. Since search code is permitted to assume that every match state has at least one corresponding pattern ID, this led to a panic. Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63391 --- ...ata_deserialize_dense_dfa-5624222820728832 | Bin 0 -> 749 bytes regex-automata/src/dfa/dense.rs | 20 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832 diff --git a/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832 b/fuzz/regressions/clusterfuzz-testcase-minimized-fuzz_regex_automata_deserialize_dense_dfa-5624222820728832 new file mode 100644 index 0000000000000000000000000000000000000000..e236ae735c7f413c90a0e9b61cc4add46ced15e7 GIT binary patch literal 749 zcmd5)TMED+469=gkFon-(j{nQDi!=03sh-q(;8}bMmo$a!2W|zr>V_O2(ZE8>!v1* z6U=#_h8}h#XfDExxv!cs^^Zrt{#L1#-lYZ{(nKt)H^)$Ct|9sII*#X6$oXD13{eTq K(N?9_h4%nug9@bp literal 0 HcmV?d00001 diff --git a/regex-automata/src/dfa/dense.rs b/regex-automata/src/dfa/dense.rs index fd96bc878..6fc61dc4f 100644 --- a/regex-automata/src/dfa/dense.rs +++ b/regex-automata/src/dfa/dense.rs @@ -2340,8 +2340,8 @@ impl<'a> DFA<&'a [u32]> { // table, match states and accelerators below. If any validation fails, // then we return an error. let (dfa, nread) = unsafe { DFA::from_bytes_unchecked(slice)? }; - dfa.tt.validate(&dfa.special)?; - dfa.st.validate(&dfa.tt)?; + dfa.tt.validate(&dfa)?; + dfa.st.validate(&dfa)?; dfa.ms.validate(&dfa)?; dfa.accels.validate()?; // N.B. dfa.special doesn't have a way to do unchecked deserialization, @@ -3593,7 +3593,8 @@ impl> TransitionTable { /// /// That is, every state ID can be used to correctly index a state in this /// table. - fn validate(&self, sp: &Special) -> Result<(), DeserializeError> { + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + let sp = &dfa.special; for state in self.states() { // We check that the ID itself is well formed. That is, if it's // a special state then it must actually be a quit, dead, accel, @@ -3611,6 +3612,13 @@ impl> TransitionTable { wasn't actually special", )); } + if sp.is_match_state(state.id()) + && dfa.match_len(state.id()) == 0 + { + return Err(DeserializeError::generic( + "found match state with zero pattern IDs", + )); + } } for (_, to) in state.transitions() { if !self.is_valid(to) { @@ -4127,10 +4135,8 @@ impl> StartTable { /// it against the given transition table (which must be for the same DFA). /// /// That is, every state ID can be used to correctly index a state. - fn validate( - &self, - tt: &TransitionTable, - ) -> Result<(), DeserializeError> { + fn validate(&self, dfa: &DFA) -> Result<(), DeserializeError> { + let tt = &dfa.tt; if !self.universal_start_unanchored.map_or(true, |s| tt.is_valid(s)) { return Err(DeserializeError::generic( "found invalid universal unanchored starting state ID",