Skip to content

Commit 114bb3c

Browse files
authored
feat(stdlib): add punycode encoding functions (#672)
* feat(stdlib): add punycode encoding functions This adds `encode_punycode` and `decode_punycode` functions. It also adds tests to confirm `parse_url` function behavior when it comes to punycode. Fixes: #659 * Fix changelog entry PR reference * Add tests and examples of fully ASCII strings to punycode functions * Add benches for punycode related functions * Add VRL tests for punycode encoding * Make punycode functions fallible * Rename `err` to `errors` in `map_err`
1 parent 40cbdee commit 114bb3c

File tree

12 files changed

+278
-0
lines changed

12 files changed

+278
-0
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ stdlib = [
7474
"dep:hex",
7575
"dep:hmac",
7676
"dep:hostname",
77+
"dep:idna",
7778
"dep:indexmap",
7879
"dep:md-5",
7980
"dep:nom",
@@ -123,6 +124,7 @@ exitcode = {version = "1", optional = true }
123124
flate2 = { version = "1.0.28", default-features = false, features = ["default"], optional = true }
124125
hex = { version = "0.4", optional = true }
125126
hmac = { version = "0.12.1", optional = true }
127+
idna = { version = "0.5", optional = true }
126128
iana-time-zone = "0.1.59"
127129
indexmap = { version = "~2.2.2", default-features = false, features = ["std"], optional = true}
128130
indoc = {version = "2.0.4", optional = true }

benches/stdlib.rs

+30
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ criterion_group!(
2525
decode_base16,
2626
decode_base64,
2727
decode_percent,
28+
decode_punycode,
2829
decrypt,
2930
// TODO: Cannot pass a Path to bench_function
3031
//del,
@@ -35,6 +36,7 @@ criterion_group!(
3536
encode_json,
3637
encode_logfmt,
3738
encode_percent,
39+
encode_punycode,
3840
encrypt,
3941
ends_with,
4042
// TODO: Cannot pass a Path to bench_function
@@ -301,6 +303,20 @@ bench_function! {
301303
}
302304
}
303305

306+
bench_function! {
307+
decode_punycode => vrl::stdlib::DecodePunycode;
308+
309+
encoded {
310+
args: func_args![value: "www.xn--caf-dma.com"],
311+
want: Ok("www.café.com"),
312+
}
313+
314+
non_encoded {
315+
args: func_args![value: "www.cafe.com"],
316+
want: Ok("www.cafe.com"),
317+
}
318+
}
319+
304320
bench_function! {
305321
decode_mime_q => vrl::stdlib::DecodeMimeQ;
306322

@@ -443,6 +459,20 @@ bench_function! {
443459
}
444460
}
445461

462+
bench_function! {
463+
encode_punycode => vrl::stdlib::EncodePunycode;
464+
465+
idn {
466+
args: func_args![value: "www.CAFé.com"],
467+
want: Ok("www.xn--caf-dma.com"),
468+
}
469+
470+
ascii {
471+
args: func_args![value: "www.cafe.com"],
472+
want: Ok("www.cafe.com"),
473+
}
474+
}
475+
446476
bench_function! {
447477
ends_with => vrl::stdlib::EndsWith;
448478

changelog.d/672.feature.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added `encode_punycode` and `decode_punycode` functions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# result: "www.porquénopuedensimplementehablarenespañol.com"
2+
3+
decode_punycode!("www.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# result: "www.xn--ihqwcrb4cv8a8dqg056pqjye.com"
2+
3+
encode_punycode!("www.他们为什么不说中文.com")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# object: { "host": "www.ليهمابتكلموشعربي؟.他们为什么不说中文" }
2+
# result: "www.ليهمابتكلموشعربي؟.他们为什么不说中文"
3+
4+
encoded = encode_punycode!(.host)
5+
decode_punycode!(encoded)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# object: { "url": "https://www.CAFé.com" }
2+
# result: { "host": "www.xn--caf-dma.com", "host_decoded": "www.café.com" }
3+
4+
# parse url
5+
parsed_url = parse_url!(.url)
6+
7+
# delete url - no longer needed
8+
del(.url)
9+
10+
.host = parsed_url.host
11+
.host_decoded = decode_punycode!(.host)
12+
.

src/stdlib/decode_punycode.rs

+87
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
use crate::compiler::prelude::*;
2+
3+
#[derive(Clone, Copy, Debug)]
4+
pub struct DecodePunycode;
5+
6+
impl Function for DecodePunycode {
7+
fn identifier(&self) -> &'static str {
8+
"decode_punycode"
9+
}
10+
11+
fn parameters(&self) -> &'static [Parameter] {
12+
&[Parameter {
13+
keyword: "value",
14+
kind: kind::BYTES,
15+
required: true,
16+
}]
17+
}
18+
19+
fn compile(
20+
&self,
21+
_state: &state::TypeState,
22+
_ctx: &mut FunctionCompileContext,
23+
arguments: ArgumentList,
24+
) -> Compiled {
25+
let value = arguments.required("value");
26+
27+
Ok(DecodePunycodeFn { value }.as_expr())
28+
}
29+
30+
fn examples(&self) -> &'static [Example] {
31+
&[
32+
Example {
33+
title: "punycode string",
34+
source: r#"decode_punycode!("www.xn--caf-dma.com")"#,
35+
result: Ok("www.café.com"),
36+
},
37+
Example {
38+
title: "ascii string",
39+
source: r#"decode_punycode!("www.cafe.com")"#,
40+
result: Ok("www.cafe.com"),
41+
},
42+
]
43+
}
44+
}
45+
46+
#[derive(Clone, Debug)]
47+
struct DecodePunycodeFn {
48+
value: Box<dyn Expression>,
49+
}
50+
51+
impl FunctionExpression for DecodePunycodeFn {
52+
fn resolve(&self, ctx: &mut Context) -> Resolved {
53+
let value = self.value.resolve(ctx)?;
54+
let string = value.try_bytes_utf8_lossy()?;
55+
56+
let (encoded, result) = idna::domain_to_unicode(&string);
57+
result.map_err(|errors| format!("unable to decode punycode: {errors}"))?;
58+
59+
Ok(encoded.into())
60+
}
61+
62+
fn type_def(&self, _: &state::TypeState) -> TypeDef {
63+
TypeDef::bytes().fallible()
64+
}
65+
}
66+
67+
#[cfg(test)]
68+
mod test {
69+
use super::*;
70+
use crate::value;
71+
72+
test_function![
73+
decode_punycode => DecodePunycode;
74+
75+
demo_string {
76+
args: func_args![value: value!("www.xn--caf-dma.com")],
77+
want: Ok(value!("www.café.com")),
78+
tdef: TypeDef::bytes().fallible(),
79+
}
80+
81+
ascii_string {
82+
args: func_args![value: value!("www.cafe.com")],
83+
want: Ok(value!("www.cafe.com")),
84+
tdef: TypeDef::bytes().fallible(),
85+
}
86+
];
87+
}

src/stdlib/encode_punycode.rs

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
use crate::compiler::prelude::*;
2+
3+
#[derive(Clone, Copy, Debug)]
4+
pub struct EncodePunycode;
5+
6+
impl Function for EncodePunycode {
7+
fn identifier(&self) -> &'static str {
8+
"encode_punycode"
9+
}
10+
11+
fn parameters(&self) -> &'static [Parameter] {
12+
&[Parameter {
13+
keyword: "value",
14+
kind: kind::BYTES,
15+
required: true,
16+
}]
17+
}
18+
19+
fn compile(
20+
&self,
21+
_state: &state::TypeState,
22+
_ctx: &mut FunctionCompileContext,
23+
arguments: ArgumentList,
24+
) -> Compiled {
25+
let value = arguments.required("value");
26+
27+
Ok(EncodePunycodeFn { value }.as_expr())
28+
}
29+
30+
fn examples(&self) -> &'static [Example] {
31+
&[
32+
Example {
33+
title: "IDN string",
34+
source: r#"encode_punycode!("www.café.com")"#,
35+
result: Ok("www.xn--caf-dma.com"),
36+
},
37+
Example {
38+
title: "mixed case string",
39+
source: r#"encode_punycode!("www.CAFé.com")"#,
40+
result: Ok("www.xn--caf-dma.com"),
41+
},
42+
Example {
43+
title: "ascii string",
44+
source: r#"encode_punycode!("www.cafe.com")"#,
45+
result: Ok("www.cafe.com"),
46+
},
47+
]
48+
}
49+
}
50+
51+
#[derive(Clone, Debug)]
52+
struct EncodePunycodeFn {
53+
value: Box<dyn Expression>,
54+
}
55+
56+
impl FunctionExpression for EncodePunycodeFn {
57+
fn resolve(&self, ctx: &mut Context) -> Resolved {
58+
let value = self.value.resolve(ctx)?;
59+
let string = value.try_bytes_utf8_lossy()?;
60+
61+
let encoded = idna::domain_to_ascii(&string)
62+
.map_err(|errors| format!("unable to encode to punycode: {errors}"))?;
63+
64+
Ok(encoded.into())
65+
}
66+
67+
fn type_def(&self, _: &state::TypeState) -> TypeDef {
68+
TypeDef::bytes().fallible()
69+
}
70+
}
71+
72+
#[cfg(test)]
73+
mod test {
74+
use super::*;
75+
use crate::value;
76+
77+
test_function![
78+
encode_punycode => EncodePunycode;
79+
80+
idn_string {
81+
args: func_args![value: value!("www.café.com")],
82+
want: Ok(value!("www.xn--caf-dma.com")),
83+
tdef: TypeDef::bytes().fallible(),
84+
}
85+
86+
mixed_case {
87+
args: func_args![value: value!("www.CAFé.com")],
88+
want: Ok(value!("www.xn--caf-dma.com")),
89+
tdef: TypeDef::bytes().fallible(),
90+
}
91+
92+
ascii_string {
93+
args: func_args![value: value!("www.cafe.com")],
94+
want: Ok(value!("www.cafe.com")),
95+
tdef: TypeDef::bytes().fallible(),
96+
}
97+
];
98+
}

src/stdlib/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ cfg_if::cfg_if! {
5353
mod decode_gzip;
5454
mod decode_mime_q;
5555
mod decode_percent;
56+
mod decode_punycode;
5657
mod decode_snappy;
5758
mod decode_zlib;
5859
mod decode_zstd;
@@ -66,6 +67,7 @@ cfg_if::cfg_if! {
6667
mod encode_key_value;
6768
mod encode_logfmt;
6869
mod encode_percent;
70+
mod encode_punycode;
6971
mod encode_snappy;
7072
mod encode_zlib;
7173
mod encode_zstd;
@@ -220,6 +222,7 @@ cfg_if::cfg_if! {
220222
pub use decode_gzip::DecodeGzip;
221223
pub use decode_mime_q::DecodeMimeQ;
222224
pub use decode_percent::DecodePercent;
225+
pub use decode_punycode::DecodePunycode;
223226
pub use decode_snappy::DecodeSnappy;
224227
pub use decode_zlib::DecodeZlib;
225228
pub use decode_zstd::DecodeZstd;
@@ -233,6 +236,7 @@ cfg_if::cfg_if! {
233236
pub use encode_key_value::EncodeKeyValue;
234237
pub use encode_logfmt::EncodeLogfmt;
235238
pub use encode_percent::EncodePercent;
239+
pub use encode_punycode::EncodePunycode;
236240
pub use encode_snappy::EncodeSnappy;
237241
pub use encode_zlib::EncodeZlib;
238242
pub use encode_zstd::EncodeZstd;
@@ -390,6 +394,7 @@ pub fn all() -> Vec<Box<dyn Function>> {
390394
Box::new(DecodeBase64),
391395
Box::new(DecodeGzip),
392396
Box::new(DecodePercent),
397+
Box::new(DecodePunycode),
393398
Box::new(DecodeMimeQ),
394399
Box::new(DecodeSnappy),
395400
Box::new(DecodeZlib),
@@ -404,6 +409,7 @@ pub fn all() -> Vec<Box<dyn Function>> {
404409
Box::new(EncodeKeyValue),
405410
Box::new(EncodeLogfmt),
406411
Box::new(EncodePercent),
412+
Box::new(EncodePunycode),
407413
Box::new(EncodeSnappy),
408414
Box::new(EncodeZlib),
409415
Box::new(EncodeZstd),

src/stdlib/parse_url.rs

+30
Original file line numberDiff line numberDiff line change
@@ -208,5 +208,35 @@ mod tests {
208208
})),
209209
tdef: TypeDef::object(inner_kind()).fallible(),
210210
}
211+
212+
punycode {
213+
args: func_args![value: value!("https://www.café.com")],
214+
want: Ok(value!({
215+
fragment: (),
216+
host: "www.xn--caf-dma.com",
217+
password: "",
218+
path: "/",
219+
port: (),
220+
query: {},
221+
scheme: "https",
222+
username: "",
223+
})),
224+
tdef: TypeDef::object(inner_kind()).fallible(),
225+
}
226+
227+
punycode_mixed_case {
228+
args: func_args![value: value!("https://www.CAFé.com")],
229+
want: Ok(value!({
230+
fragment: (),
231+
host: "www.xn--caf-dma.com",
232+
password: "",
233+
path: "/",
234+
port: (),
235+
query: {},
236+
scheme: "https",
237+
username: "",
238+
})),
239+
tdef: TypeDef::object(inner_kind()).fallible(),
240+
}
211241
];
212242
}

0 commit comments

Comments
 (0)