diff --git a/Cargo.lock b/Cargo.lock index 6ebfcfcb3db76..528e4490594b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2264,6 +2264,7 @@ name = "oxc_regular_expression" version = "0.103.0" dependencies = [ "bitflags", + "insta", "oxc_allocator", "oxc_ast_macros", "oxc_diagnostics", diff --git a/crates/oxc_regular_expression/Cargo.toml b/crates/oxc_regular_expression/Cargo.toml index aa6e521f8bfce..aaad064403ce7 100644 --- a/crates/oxc_regular_expression/Cargo.toml +++ b/crates/oxc_regular_expression/Cargo.toml @@ -30,5 +30,8 @@ phf = { workspace = true, features = ["macros"] } rustc-hash = { workspace = true } unicode-id-start = { workspace = true } +[dev-dependencies] +insta = { workspace = true } + [package.metadata.cargo-shear] ignored-paths = ["src/generated/derive_get_address.rs"] diff --git a/crates/oxc_regular_expression/tests/diagnostics.rs b/crates/oxc_regular_expression/tests/diagnostics.rs new file mode 100644 index 0000000000000..ec63066ece1a9 --- /dev/null +++ b/crates/oxc_regular_expression/tests/diagnostics.rs @@ -0,0 +1,95 @@ +//! Snapshot tests for regex parse error diagnostics. +//! +//! Each test case triggers one specific error type to verify +//! that error messages and help text are correct. + +use oxc_allocator::Allocator; +use oxc_diagnostics::{GraphicalReportHandler, GraphicalTheme, NamedSource}; +use oxc_regular_expression::{LiteralParser, Options}; +use std::fmt::Write; + +fn test_err(allocator: &Allocator, pattern: &str, flags: Option<&str>) -> String { + let source = format!("/{pattern}/{}", flags.unwrap_or("")); + let err = LiteralParser::new( + allocator, + pattern, + flags, + Options { + pattern_span_offset: 1, + flags_span_offset: u32::try_from(pattern.len()).unwrap() + 2, + }, + ) + .parse() + .expect_err(&format!("{source} should fail to parse")); + + let error = err.with_source_code(NamedSource::new(source.clone(), source)); + let handler = GraphicalReportHandler::new_themed(GraphicalTheme::unicode_nocolor()); + let mut output = String::new(); + handler.render_report(&mut output, error.as_ref()).unwrap(); + output +} + +#[test] +fn test() { + let allocator = Allocator::default(); + + let cases: &[(&str, Option<&str>, &str)] = &[ + // Flag errors + ("a", Some("z"), "unknown_flag"), + ("a", Some("gg"), "duplicated_flags"), + ("a", Some("uv"), "invalid_unicode_flags"), + // Capturing group errors + (r"(?.)(?.)", Some(""), "duplicated_capturing_group_names"), + (r"(?<>a)", Some(""), "empty_group_specifier"), + // Quantifier errors + ("+", Some("u"), "lone_quantifier"), + ("a|+", Some(""), "invalid_braced_quantifier"), + ("a{", Some("u"), "invalid_braced_quantifier_unicode"), + ( + r"x{99999999999999999999999999999999999999999999999999}", + Some(""), + "too_large_number_in_braced_quantifier", + ), + (r"a{2,1}", Some(""), "braced_quantifier_out_of_order"), + // Unterminated patterns + ("(", Some(""), "unterminated_group"), + ("[", Some(""), "unterminated_character_class"), + // Reference errors + (r"\1", Some("u"), "invalid_indexed_reference"), + (r"\k", Some("u"), "invalid_named_reference"), + // Unicode property errors + (r"\P{Basic_Emoji}", Some("v"), "invalid_unicode_property_name_negative_strings"), + (r"\p{Basic_Emoji}", Some("u"), "invalid_unicode_property_of_strings"), + (r"\p{Foo}", Some("u"), "invalid_unicode_property"), + // Character class errors + ("[z-a]", Some(""), "character_class_range_out_of_order"), + (r"[\d-z]", Some("u"), "character_class_range_invalid_atom"), + (r"[a-\d]", Some("u"), "invalid_class_atom"), + // Unicode sets mode (v flag) errors + (r"[&&]", Some("v"), "empty_class_set_expression"), + (r"[a&&&b]", Some("v"), "class_intersection_unexpected_ampersand"), + (r"[a&&]", Some("v"), "class_set_expression_invalid_character"), + (r"[a&&b(]", Some("v"), "class_set_expression_invalid_character"), + (r"[[^\q{ng}]]", Some("v"), "character_class_contents_invalid_operands"), + // Unicode escape errors + (r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", Some("u"), "too_large_number_digits"), + (r"\ua", Some("u"), "invalid_unicode_escape_sequence"), + (r"\u{110000}", Some("u"), "invalid_surrogate_pair"), + // Escape errors + (r"\c0", Some("u"), "invalid_extended_atom_escape"), + // Modifier errors + (r"(?ii:.)", Some(""), "invalid_modifiers"), + (r"(?a:.)", Some(""), "unknown_modifiers"), + // Parse errors + (")", Some("v"), "parse_pattern_incomplete"), + ]; + + let mut snapshot = String::new(); + for (pattern, flags, name) in cases { + writeln!(snapshot, "# {name}").unwrap(); + snapshot.push_str(&test_err(&allocator, pattern, *flags)); + snapshot.push('\n'); + } + + insta::assert_snapshot!(snapshot); +} diff --git a/crates/oxc_regular_expression/tests/snapshots/diagnostics__test.snap b/crates/oxc_regular_expression/tests/snapshots/diagnostics__test.snap new file mode 100644 index 0000000000000..d42c8d779fe10 --- /dev/null +++ b/crates/oxc_regular_expression/tests/snapshots/diagnostics__test.snap @@ -0,0 +1,260 @@ +--- +source: crates/oxc_regular_expression/tests/diagnostics.rs +expression: snapshot +--- +# unknown_flag + + × Invalid regular expression: Unknown flag: `z` found + ╭─[/a/z:1:4] + 1 │ /a/z + · ─ + ╰──── + +# duplicated_flags + + × Invalid regular expression: Duplicated flag: `g` found + ╭─[/a/gg:1:5] + 1 │ /a/gg + · ─ + ╰──── + +# invalid_unicode_flags + + × Invalid regular expression: Invalid unicode flags combination `u` and `v` + ╭─[/a/uv:1:5] + 1 │ /a/uv + · ─ + ╰──── + +# duplicated_capturing_group_names + + × Invalid regular expression: Duplicated capturing group names + ╭─[/(?.)(?.)/:1:5] + 1 │ /(?.)(?.)/ + · ─ ─ + ╰──── + +# empty_group_specifier + + × Invalid regular expression: Unterminated capturing group name + ╭─[/(?<>a)/:1:4] + 1 │ /(?<>a)/ + · ─ + ╰──── + +# lone_quantifier + + × Invalid regular expression: Lone quantifier found, expected with `Atom` + ╭─[/+/u:1:2] + 1 │ /+/u + · ─ + ╰──── + +# invalid_braced_quantifier + + × Invalid regular expression: Invalid braced quantifier + ╭─[/a|+/:1:4] + 1 │ /a|+/ + · ─ + ╰──── + +# invalid_braced_quantifier_unicode + + × Invalid regular expression: Could not parse the entire pattern + ╭─[/a{/u:1:3] + 1 │ /a{/u + · ▲ + ╰──── + +# too_large_number_in_braced_quantifier + + × Invalid regular expression: Number is too large in decimal digits + ╭─[/x{99999999999999999999999999999999999999999999999999}/:1:4] + 1 │ /x{99999999999999999999999999999999999999999999999999}/ + · ─────────────────── + ╰──── + +# braced_quantifier_out_of_order + + × Invalid regular expression: Numbers out of order in braced quantifier + ╭─[/a{2,1}/:1:3] + 1 │ /a{2,1}/ + · ───── + ╰──── + +# unterminated_group + + × Invalid regular expression: Unterminated capturing group + ╭─[/(/:1:2] + 1 │ /(/ + · ─ + ╰──── + +# unterminated_character_class + + × Invalid regular expression: Unterminated character class + ╭─[/[/:1:2] + 1 │ /[/ + · ─ + ╰──── + +# invalid_indexed_reference + + × Invalid regular expression: Invalid indexed reference + ╭─[/\1/u:1:2] + 1 │ /\1/u + · ── + ╰──── + +# invalid_named_reference + + × Invalid regular expression: Group specifier is empty + ╭─[/\k/u:1:2] + 1 │ /\k/u + · ───── + ╰──── + +# invalid_unicode_property_name_negative_strings + + × Invalid regular expression: Invalid property name `Basic_Emoji`(negative + property of strings) + ╭─[/\P{Basic_Emoji}/v:1:2] + 1 │ /\P{Basic_Emoji}/v + · ─────────────── + ╰──── + +# invalid_unicode_property_of_strings + + × Invalid regular expression: Invalid unicode property `Basic_Emoji` + ╭─[/\p{Basic_Emoji}/u:1:5] + 1 │ /\p{Basic_Emoji}/u + · ─────────── + ╰──── + help: Enable `UnicodeSetsMode` to use this property + +# invalid_unicode_property + + × Invalid regular expression: Invalid unicode property name and/or value + ╭─[/\p{Foo}/u:1:5] + 1 │ /\p{Foo}/u + · ─── + ╰──── + +# character_class_range_out_of_order + + × Invalid regular expression: Character class atom range out of order + ╭─[/[z-a]/:1:4] + 1 │ /[z-a]/ + · ── + ╰──── + +# character_class_range_invalid_atom + + × Invalid regular expression: Character class range with invalid atom + ╭─[/[\d-z]/u:1:3] + 1 │ /[\d-z]/u + · ──── + ╰──── + +# invalid_class_atom + + × Invalid regular expression: Character class range with invalid atom + ╭─[/[a-\d]/u:1:3] + 1 │ /[a-\d]/u + · ──── + ╰──── + +# empty_class_set_expression + + × Invalid regular expression: Expected nonempty class set expression + ╭─[/[&&]/v:1:3] + 1 │ /[&&]/v + · ▲ + ╰──── + +# class_intersection_unexpected_ampersand + + × Invalid regular expression: Unexpected `&` inside of class intersection + ╭─[/[a&&&b]/v:1:6] + 1 │ /[a&&&b]/v + · ─ + ╰──── + +# class_set_expression_invalid_character + + × Invalid regular expression: Unexpected character inside of class intersection + ╭─[/[a&&]/v:1:6] + 1 │ /[a&&]/v + · ▲ + ╰──── + +# class_set_expression_invalid_character + + × Invalid regular expression: Unexpected character inside of class intersection + ╭─[/[a&&b(]/v:1:7] + 1 │ /[a&&b(]/v + · ▲ + ╰──── + +# character_class_contents_invalid_operands + + × Invalid regular expression: Invalid class operands inside of character class contents + ╭─[/[[^\q{ng}]]/v:1:3] + 1 │ /[[^\q{ng}]]/v + · ───────── + ╰──── + +# too_large_number_digits + + × Invalid regular expression: Number is too large in hex digits + ╭─[/\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}/u:1:5] + 1 │ /\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}/u + · ──────── + ╰──── + +# invalid_unicode_escape_sequence + + × Invalid regular expression: Invalid unicode escape sequence + ╭─[/\ua/u:1:3] + 1 │ /\ua/u + · ─ + ╰──── + +# invalid_surrogate_pair + + × Invalid regular expression: Invalid unicode escape sequence + ╭─[/\u{110000}/u:1:3] + 1 │ /\u{110000}/u + · ─ + ╰──── + +# invalid_extended_atom_escape + + × Invalid regular expression: Could not parse the entire pattern + ╭─[/\c0/u:1:3] + 1 │ /\c0/u + · ▲ + ╰──── + +# invalid_modifiers + + × Invalid regular expression: Invalid modifiers + ╭─[/(?ii:.)/:1:4] + 1 │ /(?ii:.)/ + · ── + ╰──── + +# unknown_modifiers + + × Invalid regular expression: Unknown modifiers + ╭─[/(?a:.)/:1:4] + 1 │ /(?a:.)/ + · ▲ + ╰──── + +# parse_pattern_incomplete + + × Invalid regular expression: Could not parse the entire pattern + ╭─[/)/v:1:2] + 1 │ /)/v + · ▲ + ╰────