Skip to content

Commit 309725b

Browse files
authored
feat(stdlib): Add replace_with function (#636)
* feat(stdlib): Add replace_with function This is similar to `replace`, but takes a closure to compute the replacment from the match and capture groups, instead of taking a replacment string. Fixes: #628 * Pull request feedback * enhancement(replace_with): Pass object instead of array to closure This allows us to expose the named capture groups with names. * Add named capture groups directly to capture object
1 parent 41a9b11 commit 309725b

File tree

10 files changed

+300
-0
lines changed

10 files changed

+300
-0
lines changed

changelog.d/628.feature.md

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add a `replace_with` function that is similar to `replace` but takes a closure instead of a
2+
replacement string.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# result: F bar F F fo F G
2+
3+
replace_with("foo bar faa fee fo fum gum", r'([fg])\w\w') -> |m| { upcase(string!(m.captures[0])) }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# result:
2+
# function call error for "replace_with" at (1:73): Capture group cannot be named "string" or "captures"
3+
4+
replace_with("captain bold", r'cap(?P<captures>\w*)') -> |_m| { "test" }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# result:
2+
# function call error for "replace_with" at (1:105): function call error for "assert" at (59:92): failed to parse
3+
4+
replace_with("this is a test", r'(?i)test') -> |_m| {
5+
assert!(false, "failed to parse")
6+
"TEST"
7+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# result: "fOO bAr cAt dOg"
2+
3+
replace_with("foo bar cat dog", r'[oa]*', count: -32) -> |m| { upcase(m.string) }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# result:
2+
# function call error for "replace_with" at (1:64): Capture group cannot be named "string" or "captures"
3+
4+
replace_with("a test", r'"(?P<string>.*)"') -> |m| { m.string }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# result:
2+
# error[E122]: type mismatch in closure return type
3+
# ┌─ :2:34
4+
# │
5+
# 2 │ replace_with("", r'test') -> |m| { to_int!(m.string) }
6+
# │ ^^^^^^^^^^^^^^^^^^^^^
7+
# │ │
8+
# │ block returns invalid value type
9+
# │ received: integer
10+
# │ expected: string
11+
# │
12+
# = see language documentation at https://vrl.dev
13+
# = try your code in the VRL REPL, learn more at https://vrl.dev/examples
14+
15+
replace_with("", r'test') -> |m| { to_int!(m.string) }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#result: "foo bar"
2+
3+
replace_with("foo bar", r'[oa]', count: 0) -> |m| { upcase(m.string) }

src/stdlib/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ cfg_if::cfg_if! {
163163
mod redact;
164164
mod remove;
165165
mod replace;
166+
mod replace_with;
166167
mod reverse_dns;
167168
mod round;
168169
mod seahash;
@@ -325,6 +326,7 @@ cfg_if::cfg_if! {
325326
pub use redact::Redact;
326327
pub use remove::Remove;
327328
pub use replace::Replace;
329+
pub use replace_with::ReplaceWith;
328330
pub use reverse_dns::ReverseDns;
329331
pub use round::Round;
330332
pub use set::Set;
@@ -493,6 +495,7 @@ pub fn all() -> Vec<Box<dyn Function>> {
493495
Box::new(Redact),
494496
Box::new(Remove),
495497
Box::new(Replace),
498+
Box::new(ReplaceWith),
496499
Box::new(ReverseDns),
497500
Box::new(Round),
498501
Box::new(Seahash),

src/stdlib/replace_with.rs

+256
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
use std::collections::BTreeMap;
2+
3+
use regex::{CaptureMatches, CaptureNames, Captures, Regex};
4+
5+
use crate::compiler::prelude::*;
6+
7+
fn replace_with<T>(
8+
value: Value,
9+
pattern: &Regex,
10+
count: Value,
11+
ctx: &mut Context,
12+
runner: closure::Runner<T>,
13+
) -> Resolved
14+
where
15+
T: Fn(&mut Context) -> Result<Value, ExpressionError>,
16+
{
17+
let haystack = value.try_bytes_utf8_lossy()?;
18+
let count = match count.try_integer()? {
19+
i if i > 0 => i as usize,
20+
i if i < 0 => 0,
21+
// this is when i == 0
22+
_ => return Ok(value),
23+
};
24+
let captures = pattern.captures_iter(&haystack);
25+
make_replacement(
26+
captures,
27+
&haystack,
28+
count,
29+
pattern.capture_names(),
30+
ctx,
31+
runner,
32+
)
33+
}
34+
35+
fn make_replacement<T>(
36+
caps: CaptureMatches,
37+
haystack: &str,
38+
count: usize,
39+
capture_names: CaptureNames,
40+
ctx: &mut Context,
41+
runner: closure::Runner<T>,
42+
) -> Resolved
43+
where
44+
T: Fn(&mut Context) -> Result<Value, ExpressionError>,
45+
{
46+
// possible optimization: peek at first capture, if none return the original value.
47+
let mut replaced = String::with_capacity(haystack.len());
48+
let limit = if count == 0 { usize::MAX } else { count - 1 };
49+
let mut last_match = 0;
50+
// we loop over the matches ourselves instead of calling Regex::replacen, so that we can
51+
// handle errors. This is however based on the implementation of Regex::replacen
52+
for (idx, captures) in caps.enumerate() {
53+
// Safe to unrap because the 0th index always includes the full match.
54+
let m = captures.get(0).unwrap(); // full match
55+
56+
let mut value = captures_to_value(&captures, capture_names.clone());
57+
runner.map_value(ctx, &mut value)?;
58+
let replacement = value.try_bytes_utf8_lossy()?;
59+
60+
replaced.push_str(&haystack[last_match..m.start()]);
61+
replaced.push_str(&replacement);
62+
last_match = m.end();
63+
if idx >= limit {
64+
break;
65+
}
66+
}
67+
// add the final component
68+
replaced.push_str(&haystack[last_match..]);
69+
Ok(replaced.into())
70+
}
71+
72+
const STRING_NAME: &str = "string";
73+
const CAPTURES_NAME: &str = "captures";
74+
75+
fn captures_to_value(captures: &Captures, capture_names: CaptureNames) -> Value {
76+
let mut object: ObjectMap = BTreeMap::new();
77+
78+
// The full match, named "string"
79+
object.insert(STRING_NAME.into(), captures.get(0).unwrap().as_str().into());
80+
// The length includes the total match, so subtract 1
81+
let mut capture_groups: Vec<Value> = Vec::with_capacity(captures.len() - 1);
82+
83+
// We skip the first entry, because it is for the full match, which we have already
84+
// extracted
85+
for (idx, name) in capture_names.enumerate().skip(1) {
86+
let value: Value = if let Some(group) = captures.get(idx) {
87+
group.as_str().into()
88+
} else {
89+
Value::Null
90+
};
91+
if let Some(name) = name {
92+
object.insert(name.into(), value.clone());
93+
}
94+
capture_groups.push(value);
95+
}
96+
97+
object.insert(CAPTURES_NAME.into(), capture_groups.into());
98+
99+
object.into()
100+
}
101+
102+
#[derive(Clone, Copy, Debug)]
103+
pub struct ReplaceWith;
104+
105+
impl Function for ReplaceWith {
106+
fn identifier(&self) -> &'static str {
107+
"replace_with"
108+
}
109+
110+
fn parameters(&self) -> &'static [Parameter] {
111+
&[
112+
Parameter {
113+
keyword: "value",
114+
kind: kind::BYTES,
115+
required: true,
116+
},
117+
Parameter {
118+
keyword: "pattern",
119+
kind: kind::REGEX,
120+
required: true,
121+
},
122+
Parameter {
123+
keyword: "count",
124+
kind: kind::INTEGER,
125+
required: false,
126+
},
127+
]
128+
}
129+
130+
fn examples(&self) -> &'static [Example] {
131+
&[
132+
Example {
133+
title: "double replacement",
134+
source: r#"replace_with("foobar", r'o|a') -> |m| { m.string + m.string }"#,
135+
result: Ok("foooobaar"),
136+
},
137+
Example {
138+
title: "replace count",
139+
source: r#"replace_with("foobar", r'o|a', count: 1) -> |m| { m.string + m.string }"#,
140+
result: Ok("fooobar"),
141+
},
142+
Example {
143+
title: "replace with capture group",
144+
source: r#"replace_with("foo123bar", r'foo(\d+)bar') -> |m| { x = m.captures[0]; "x={{x}}" }"#,
145+
result: Ok(r#"x=123"#),
146+
},
147+
Example {
148+
title: "process capture group",
149+
source: r#"replace_with(s'Got message: {"msg": "b"}', r'message: (\{.*\})') -> |m| { to_string!(parse_json!(m.captures[0]).msg) }"#,
150+
result: Ok("Got b"),
151+
},
152+
Example {
153+
title: "Optional capture group",
154+
source: r#"replace_with("foobar", r'bar( of gold)?') -> |m| { if m.captures[1] == null { "baz" } else { "rich" } }"#,
155+
result: Ok("foobaz"),
156+
},
157+
Example {
158+
title: "Named capture group",
159+
source: r#"replace_with("foo123bar", r'foo(?P<num>\d+)bar') -> |m| { x = to_int!(m.num); to_string(x+ 1) }"#, //to_string(to_int!(m.named.num) + 1) }"#,
160+
result: Ok("\"124\""),
161+
},
162+
]
163+
}
164+
165+
fn compile(
166+
&self,
167+
_state: &state::TypeState,
168+
_ctx: &mut FunctionCompileContext,
169+
arguments: ArgumentList,
170+
) -> Compiled {
171+
let value = arguments.required("value");
172+
let pattern = arguments.required("pattern");
173+
let count = arguments.optional("count").unwrap_or(expr!(-1));
174+
175+
let closure = arguments.required_closure()?;
176+
177+
Ok(ReplaceWithFn {
178+
value,
179+
pattern,
180+
count,
181+
closure,
182+
}
183+
.as_expr())
184+
}
185+
186+
fn closure(&self) -> Option<closure::Definition> {
187+
use closure::{Definition, Input, Output, Variable, VariableKind};
188+
189+
let match_type = Collection::from_parts(
190+
BTreeMap::from([
191+
(STRING_NAME.into(), Kind::bytes()),
192+
(
193+
CAPTURES_NAME.into(),
194+
Kind::array(Collection::from_unknown(Kind::bytes().or_null())),
195+
),
196+
]),
197+
Kind::bytes().or_null(),
198+
);
199+
200+
Some(Definition {
201+
inputs: vec![Input {
202+
parameter_keyword: "value",
203+
kind: Kind::bytes(),
204+
variables: vec![
205+
Variable {
206+
kind: VariableKind::Exact(Kind::object(match_type)),
207+
},
208+
],
209+
output: Output::Kind(Kind::bytes()),
210+
example: Example {
211+
title: "replace with hash",
212+
source: r#"replace_with("received email from [email protected]", pattern: r'\w+@\w+\.\w+') -> |match| { sha2(match.string) }"#,
213+
result: Ok("received email from 896bdca840c9304a5d0bdbeacc4ef359e3093f80c9777c9967e31ba0ff99ed58"),
214+
},
215+
}],
216+
is_iterator: false,
217+
})
218+
}
219+
}
220+
221+
#[derive(Debug, Clone)]
222+
struct ReplaceWithFn {
223+
value: Box<dyn Expression>,
224+
pattern: Box<dyn Expression>,
225+
count: Box<dyn Expression>,
226+
closure: FunctionClosure,
227+
}
228+
229+
impl FunctionExpression for ReplaceWithFn {
230+
fn resolve(&self, ctx: &mut Context) -> ExpressionResult<Value> {
231+
let value = self.value.resolve(ctx)?;
232+
let pattern = self.pattern.resolve(ctx)?;
233+
let pattern = pattern
234+
.as_regex()
235+
.ok_or_else(|| ExpressionError::from("failed to resolve regex"))?;
236+
for name in pattern.capture_names().flatten() {
237+
if name == STRING_NAME || name == CAPTURES_NAME {
238+
return Err(ExpressionError::from(
239+
r#"Capture group cannot be named "string" or "captures""#,
240+
));
241+
}
242+
}
243+
let count = self.count.resolve(ctx)?;
244+
let FunctionClosure {
245+
variables, block, ..
246+
} = &self.closure;
247+
248+
let runner = closure::Runner::new(variables, |ctx| block.resolve(ctx));
249+
250+
replace_with(value, pattern, count, ctx, runner)
251+
}
252+
253+
fn type_def(&self, _: &state::TypeState) -> TypeDef {
254+
TypeDef::bytes().infallible()
255+
}
256+
}

0 commit comments

Comments
 (0)