From 2f9da6773d71a213f90c022a162d76e687759d99 Mon Sep 17 00:00:00 2001 From: Navneet Aman Date: Fri, 4 Oct 2024 10:54:05 +0530 Subject: [PATCH] Expose the substitute method of pcre as Regex::substitute and Regex::substitute_all. --- src/bytes.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ffi.rs | 48 +++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/src/bytes.rs b/src/bytes.rs index 2129119..d5daab4 100644 --- a/src/bytes.rs +++ b/src/bytes.rs @@ -588,6 +588,44 @@ impl Regex { ) -> CaptureMatches<'r, 's> { CaptureMatches { re: self, subject, last_end: 0, last_match: None } } + + /// Replaces the first match in `subject` with the `replacement`, + /// and puts the replaced string in `output`. + /// ```rust + /// # fn example() -> Result<(), ::pcre2::Error> { + /// use std::str; + /// + /// use pcre2::bytes::Regex; + /// + /// let re = Regex::new(r"mike")?; + /// let text = b"Hi mike, wait you are not mike."; + /// let mut output = Vec::new(); + /// re.substitute(text, b"john", &mut output).unwrap(); + /// assert_eq!(&output, b"Hi john, wait you are not mike."); + /// # Ok(()) }; example().unwrap() + /// ``` + pub fn substitute(&self, subject: &[u8], replacement: &[u8], output: &mut Vec) -> Result{ + self.code.substitute(subject, replacement, output, 0) + } + /// Replaces all the matches in `subject` with the `replacement`, + /// and puts the replaced string in `output`. + /// ```rust + /// # fn example() -> Result<(), ::pcre2::Error> { + /// use std::str; + /// + /// use pcre2::bytes::Regex; + /// + /// let re = Regex::new(r"mike")?; + /// let text = b"Hi mike, wait you are not mike."; + /// let mut output = Vec::new(); + /// re.substitute_all(text, b"john", &mut output).unwrap(); + /// assert_eq!(&output, b"Hi john, wait you are not john."); + /// # Ok(()) }; example().unwrap() + /// ``` + pub fn substitute_all(&self, subject: &[u8], replacement: &[u8], output: &mut Vec) -> Result{ + self.code.substitute(subject, replacement, output, pcre2_sys::PCRE2_SUBSTITUTE_GLOBAL) + } + } /// Advanced or "lower level" search methods. @@ -1370,4 +1408,42 @@ mod tests { let matched = re.find(hay.as_bytes()).unwrap().unwrap(); assert_eq!(matched.as_bytes(), "😀👍🏼🎉".as_bytes()); } + #[test] + fn test_substitute() { + let hay = "0123456789😀👍🏼🎉abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ"; + let pattern = r"(*UTF) + (?x) (?#: Allow comments and whitespace.) + + [^\N{U+0000}-\N{U+007F}] (?#: Non-ascii code points.) + + (?#: One or more times.) + "; + let re = RegexBuilder::new() + .extended(true) + .utf(true) + .jit_if_available(true) + .build(pattern) + .unwrap(); + let mut output = Vec::new(); + re.substitute(hay.as_bytes(), b"42", &mut output).unwrap(); + assert_eq!(&output, "012345678942abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ".as_bytes()); + } + #[test] + fn test_substitute_all() { + let hay = "0123456789😀👍🏼🎉abcdefghijklmnopqrst😀👍🏼🎉auvwxyzABCKLMNOPQRSTUVWXYZ"; + let pattern = r"(*UTF) + (?x) (?#: Allow comments and whitespace.) + + [^\N{U+0000}-\N{U+007F}] (?#: Non-ascii code points.) + + (?#: One or more times.) + "; + let re = RegexBuilder::new() + .extended(true) + .utf(true) + .jit_if_available(true) + .build(pattern) + .unwrap(); + let mut output = Vec::new(); + re.substitute_all(hay.as_bytes(), b"42", &mut output).unwrap(); + assert_eq!(&output, "012345678942abcdefghijklmnopqrst42auvwxyzABCKLMNOPQRSTUVWXYZ".as_bytes()); + } } diff --git a/src/ffi.rs b/src/ffi.rs index aaabf74..ed7fb3c 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -258,6 +258,54 @@ impl Code { Ok(1 + count as usize) } } + + /// Substitute the replacement pattern in subject and put the output in + /// output vec. Output vec is will be cleared before use. + pub(crate) fn substitute(&self, subject: &[u8], replacement: &[u8], output: &mut Vec, options: u32) -> Result{ + output.clear(); + let mut output_length = output.capacity(); + let mut rc = unsafe { + pcre2_substitute_8( + self.as_ptr(), + subject.as_ptr(), + subject.len(), + 0, //startoffset + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | options, + ptr::null_mut(), //match_data + ptr::null_mut(), //match context + replacement.as_ptr(), + replacement.len(), + output.as_mut_ptr(), + &mut output_length, + ) + }; + if rc == PCRE2_ERROR_NOMEMORY { + output.reserve_exact(output_length + 1); + rc = unsafe { + pcre2_substitute_8( + self.as_ptr(), + subject.as_ptr(), + subject.len(), + 0, //startoffset + options, + ptr::null_mut(), //match_data + ptr::null_mut(), //match context + replacement.as_ptr(), + replacement.len(), + output.as_mut_ptr(), + &mut output_length, + ) + } + } + if rc < 0 { + Err(Error::info(rc)) + } else { + // Safety: pcre2_substitute_8 method above would set this + // field correctly. + unsafe { output.set_len(output_length) }; + Ok(rc as usize) + } + } } /// A low level representation of PCRE2's compilation context.