|
| 1 | +use std::collections::BTreeMap; |
| 2 | + |
| 3 | +use regex::{CaptureMatches, CaptureNames, Captures, Regex}; |
| 4 | + |
| 5 | +use crate::compiler::prelude::*; |
| 6 | + |
| 7 | +fn replace_with<T>( |
| 8 | + value: Value, |
| 9 | + pattern: &Regex, |
| 10 | + count: Value, |
| 11 | + ctx: &mut Context, |
| 12 | + runner: closure::Runner<T>, |
| 13 | +) -> Resolved |
| 14 | +where |
| 15 | + T: Fn(&mut Context) -> Result<Value, ExpressionError>, |
| 16 | +{ |
| 17 | + let haystack = value.try_bytes_utf8_lossy()?; |
| 18 | + let count = match count.try_integer()? { |
| 19 | + i if i > 0 => i as usize, |
| 20 | + i if i < 0 => 0, |
| 21 | + // this is when i == 0 |
| 22 | + _ => return Ok(value), |
| 23 | + }; |
| 24 | + let captures = pattern.captures_iter(&haystack); |
| 25 | + make_replacement( |
| 26 | + captures, |
| 27 | + &haystack, |
| 28 | + count, |
| 29 | + pattern.capture_names(), |
| 30 | + ctx, |
| 31 | + runner, |
| 32 | + ) |
| 33 | +} |
| 34 | + |
| 35 | +fn make_replacement<T>( |
| 36 | + caps: CaptureMatches, |
| 37 | + haystack: &str, |
| 38 | + count: usize, |
| 39 | + capture_names: CaptureNames, |
| 40 | + ctx: &mut Context, |
| 41 | + runner: closure::Runner<T>, |
| 42 | +) -> Resolved |
| 43 | +where |
| 44 | + T: Fn(&mut Context) -> Result<Value, ExpressionError>, |
| 45 | +{ |
| 46 | + // possible optimization: peek at first capture, if none return the original value. |
| 47 | + let mut replaced = String::with_capacity(haystack.len()); |
| 48 | + let limit = if count == 0 { usize::MAX } else { count - 1 }; |
| 49 | + let mut last_match = 0; |
| 50 | + // we loop over the matches ourselves instead of calling Regex::replacen, so that we can |
| 51 | + // handle errors. This is however based on the implementation of Regex::replacen |
| 52 | + for (idx, captures) in caps.enumerate() { |
| 53 | + // Safe to unrap because the 0th index always includes the full match. |
| 54 | + let m = captures.get(0).unwrap(); // full match |
| 55 | + |
| 56 | + let mut value = captures_to_value(&captures, capture_names.clone()); |
| 57 | + runner.map_value(ctx, &mut value)?; |
| 58 | + let replacement = value.try_bytes_utf8_lossy()?; |
| 59 | + |
| 60 | + replaced.push_str(&haystack[last_match..m.start()]); |
| 61 | + replaced.push_str(&replacement); |
| 62 | + last_match = m.end(); |
| 63 | + if idx >= limit { |
| 64 | + break; |
| 65 | + } |
| 66 | + } |
| 67 | + // add the final component |
| 68 | + replaced.push_str(&haystack[last_match..]); |
| 69 | + Ok(replaced.into()) |
| 70 | +} |
| 71 | + |
| 72 | +const STRING_NAME: &str = "string"; |
| 73 | +const CAPTURES_NAME: &str = "captures"; |
| 74 | + |
| 75 | +fn captures_to_value(captures: &Captures, capture_names: CaptureNames) -> Value { |
| 76 | + let mut object: ObjectMap = BTreeMap::new(); |
| 77 | + |
| 78 | + // The full match, named "string" |
| 79 | + object.insert(STRING_NAME.into(), captures.get(0).unwrap().as_str().into()); |
| 80 | + // The length includes the total match, so subtract 1 |
| 81 | + let mut capture_groups: Vec<Value> = Vec::with_capacity(captures.len() - 1); |
| 82 | + |
| 83 | + // We skip the first entry, because it is for the full match, which we have already |
| 84 | + // extracted |
| 85 | + for (idx, name) in capture_names.enumerate().skip(1) { |
| 86 | + let value: Value = if let Some(group) = captures.get(idx) { |
| 87 | + group.as_str().into() |
| 88 | + } else { |
| 89 | + Value::Null |
| 90 | + }; |
| 91 | + if let Some(name) = name { |
| 92 | + object.insert(name.into(), value.clone()); |
| 93 | + } |
| 94 | + capture_groups.push(value); |
| 95 | + } |
| 96 | + |
| 97 | + object.insert(CAPTURES_NAME.into(), capture_groups.into()); |
| 98 | + |
| 99 | + object.into() |
| 100 | +} |
| 101 | + |
| 102 | +#[derive(Clone, Copy, Debug)] |
| 103 | +pub struct ReplaceWith; |
| 104 | + |
| 105 | +impl Function for ReplaceWith { |
| 106 | + fn identifier(&self) -> &'static str { |
| 107 | + "replace_with" |
| 108 | + } |
| 109 | + |
| 110 | + fn parameters(&self) -> &'static [Parameter] { |
| 111 | + &[ |
| 112 | + Parameter { |
| 113 | + keyword: "value", |
| 114 | + kind: kind::BYTES, |
| 115 | + required: true, |
| 116 | + }, |
| 117 | + Parameter { |
| 118 | + keyword: "pattern", |
| 119 | + kind: kind::REGEX, |
| 120 | + required: true, |
| 121 | + }, |
| 122 | + Parameter { |
| 123 | + keyword: "count", |
| 124 | + kind: kind::INTEGER, |
| 125 | + required: false, |
| 126 | + }, |
| 127 | + ] |
| 128 | + } |
| 129 | + |
| 130 | + fn examples(&self) -> &'static [Example] { |
| 131 | + &[ |
| 132 | + Example { |
| 133 | + title: "double replacement", |
| 134 | + source: r#"replace_with("foobar", r'o|a') -> |m| { m.string + m.string }"#, |
| 135 | + result: Ok("foooobaar"), |
| 136 | + }, |
| 137 | + Example { |
| 138 | + title: "replace count", |
| 139 | + source: r#"replace_with("foobar", r'o|a', count: 1) -> |m| { m.string + m.string }"#, |
| 140 | + result: Ok("fooobar"), |
| 141 | + }, |
| 142 | + Example { |
| 143 | + title: "replace with capture group", |
| 144 | + source: r#"replace_with("foo123bar", r'foo(\d+)bar') -> |m| { x = m.captures[0]; "x={{x}}" }"#, |
| 145 | + result: Ok(r#"x=123"#), |
| 146 | + }, |
| 147 | + Example { |
| 148 | + title: "process capture group", |
| 149 | + source: r#"replace_with(s'Got message: {"msg": "b"}', r'message: (\{.*\})') -> |m| { to_string!(parse_json!(m.captures[0]).msg) }"#, |
| 150 | + result: Ok("Got b"), |
| 151 | + }, |
| 152 | + Example { |
| 153 | + title: "Optional capture group", |
| 154 | + source: r#"replace_with("foobar", r'bar( of gold)?') -> |m| { if m.captures[1] == null { "baz" } else { "rich" } }"#, |
| 155 | + result: Ok("foobaz"), |
| 156 | + }, |
| 157 | + Example { |
| 158 | + title: "Named capture group", |
| 159 | + source: r#"replace_with("foo123bar", r'foo(?P<num>\d+)bar') -> |m| { x = to_int!(m.num); to_string(x+ 1) }"#, //to_string(to_int!(m.named.num) + 1) }"#, |
| 160 | + result: Ok("\"124\""), |
| 161 | + }, |
| 162 | + ] |
| 163 | + } |
| 164 | + |
| 165 | + fn compile( |
| 166 | + &self, |
| 167 | + _state: &state::TypeState, |
| 168 | + _ctx: &mut FunctionCompileContext, |
| 169 | + arguments: ArgumentList, |
| 170 | + ) -> Compiled { |
| 171 | + let value = arguments.required("value"); |
| 172 | + let pattern = arguments.required("pattern"); |
| 173 | + let count = arguments.optional("count").unwrap_or(expr!(-1)); |
| 174 | + |
| 175 | + let closure = arguments.required_closure()?; |
| 176 | + |
| 177 | + Ok(ReplaceWithFn { |
| 178 | + value, |
| 179 | + pattern, |
| 180 | + count, |
| 181 | + closure, |
| 182 | + } |
| 183 | + .as_expr()) |
| 184 | + } |
| 185 | + |
| 186 | + fn closure(&self) -> Option<closure::Definition> { |
| 187 | + use closure::{Definition, Input, Output, Variable, VariableKind}; |
| 188 | + |
| 189 | + let match_type = Collection::from_parts( |
| 190 | + BTreeMap::from([ |
| 191 | + (STRING_NAME.into(), Kind::bytes()), |
| 192 | + ( |
| 193 | + CAPTURES_NAME.into(), |
| 194 | + Kind::array(Collection::from_unknown(Kind::bytes().or_null())), |
| 195 | + ), |
| 196 | + ]), |
| 197 | + Kind::bytes().or_null(), |
| 198 | + ); |
| 199 | + |
| 200 | + Some(Definition { |
| 201 | + inputs: vec![Input { |
| 202 | + parameter_keyword: "value", |
| 203 | + kind: Kind::bytes(), |
| 204 | + variables: vec![ |
| 205 | + Variable { |
| 206 | + kind: VariableKind::Exact(Kind::object(match_type)), |
| 207 | + }, |
| 208 | + ], |
| 209 | + output: Output::Kind(Kind::bytes()), |
| 210 | + example: Example { |
| 211 | + title: "replace with hash", |
| 212 | + source : r#"replace_with("received email from [email protected]", pattern: r'\w+@\w+\.\w+') -> |match| { sha2(match.string) }"#, |
| 213 | + result: Ok("received email from 896bdca840c9304a5d0bdbeacc4ef359e3093f80c9777c9967e31ba0ff99ed58"), |
| 214 | + }, |
| 215 | + }], |
| 216 | + is_iterator: false, |
| 217 | + }) |
| 218 | + } |
| 219 | +} |
| 220 | + |
| 221 | +#[derive(Debug, Clone)] |
| 222 | +struct ReplaceWithFn { |
| 223 | + value: Box<dyn Expression>, |
| 224 | + pattern: Box<dyn Expression>, |
| 225 | + count: Box<dyn Expression>, |
| 226 | + closure: FunctionClosure, |
| 227 | +} |
| 228 | + |
| 229 | +impl FunctionExpression for ReplaceWithFn { |
| 230 | + fn resolve(&self, ctx: &mut Context) -> ExpressionResult<Value> { |
| 231 | + let value = self.value.resolve(ctx)?; |
| 232 | + let pattern = self.pattern.resolve(ctx)?; |
| 233 | + let pattern = pattern |
| 234 | + .as_regex() |
| 235 | + .ok_or_else(|| ExpressionError::from("failed to resolve regex"))?; |
| 236 | + for name in pattern.capture_names().flatten() { |
| 237 | + if name == STRING_NAME || name == CAPTURES_NAME { |
| 238 | + return Err(ExpressionError::from( |
| 239 | + r#"Capture group cannot be named "string" or "captures""#, |
| 240 | + )); |
| 241 | + } |
| 242 | + } |
| 243 | + let count = self.count.resolve(ctx)?; |
| 244 | + let FunctionClosure { |
| 245 | + variables, block, .. |
| 246 | + } = &self.closure; |
| 247 | + |
| 248 | + let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx)); |
| 249 | + |
| 250 | + replace_with(value, pattern, count, ctx, runner) |
| 251 | + } |
| 252 | + |
| 253 | + fn type_def(&self, _: &state::TypeState) -> TypeDef { |
| 254 | + TypeDef::bytes().infallible() |
| 255 | + } |
| 256 | +} |
0 commit comments