Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/oxc_regular_expression/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ oxc_ast_macros = { workspace = true }
oxc_diagnostics = { workspace = true }
oxc_span = { workspace = true }

bitflags = { workspace = true }
phf = { workspace = true, features = ["macros"] }
rustc-hash = { workspace = true }
unicode-id-start = { workspace = true }
2 changes: 1 addition & 1 deletion crates/oxc_regular_expression/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Implements ECMAScript® 2024 Language Specification
- https://tc39.es/ecma262/2024/multipage/text-processing.html#sec-regexp-regular-expression-objects
- https://tc39.es/ecma262/2024/multipage/additional-ecmascript-features-for-web-browsers.html#sec-regular-expressions-patterns

And, Stage 3 proposals
And, Stage 4 proposals

- https://github.com/tc39/proposal-duplicate-named-capturing-groups
- https://github.com/tc39/proposal-regexp-modifiers
29 changes: 19 additions & 10 deletions crates/oxc_regular_expression/src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use bitflags::bitflags;

use oxc_allocator::{Box, CloneIn, GetAddress, Vec};
use oxc_ast_macros::ast;
use oxc_span::{Atom, ContentEq, Span};
Expand Down Expand Up @@ -283,19 +285,26 @@ pub struct IgnoreGroup<'a> {
#[generate_derive(CloneIn, ContentEq)]
pub struct Modifiers {
pub span: Span,
pub enabling: Option<Modifier>,
pub disabling: Option<Modifier>,
pub enabling: Modifier,
pub disabling: Modifier,
}

/// Each part of modifier in [`Modifiers`].
#[ast]
#[derive(Debug)]
#[generate_derive(CloneIn, ContentEq)]
pub struct Modifier {
pub ignore_case: bool,
pub multiline: bool,
pub sticky: bool,
bitflags! {
/// Each part of modifier in [`Modifiers`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Modifier: u8 {
/// Ignore case flag
const I = 1 << 0;
/// Multiline flag
const M = 1 << 1;
/// DotAll flag
const S = 1 << 2;
}
}
/// Dummy type to communicate the content of `Modifier` to `oxc_ast_tools`.
#[ast(foreign = Modifier)]
#[expect(dead_code)]
struct ModifierAlias(u8);

/// Backreference by index.
/// e.g. `\1`, `\2`, `\3`
Expand Down
11 changes: 11 additions & 0 deletions crates/oxc_regular_expression/src/ast_impl/allocator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use oxc_allocator::{Allocator, CloneIn};

use crate::ast::Modifier;

impl<'alloc> CloneIn<'alloc> for Modifier {
type Cloned = Self;

fn clone_in(&self, _: &'alloc Allocator) -> Self::Cloned {
*self
}
}
16 changes: 8 additions & 8 deletions crates/oxc_regular_expression/src/ast_impl/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,14 +250,14 @@ impl Display for CapturingGroup<'_> {

impl Display for IgnoreGroup<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn write_flags(f: &mut fmt::Formatter<'_>, flags: &Modifier) -> fmt::Result {
if flags.ignore_case {
fn write_flags(f: &mut fmt::Formatter<'_>, flags: Modifier) -> fmt::Result {
if flags.contains(Modifier::I) {
f.write_str("i")?;
}
if flags.multiline {
if flags.contains(Modifier::M) {
f.write_str("m")?;
}
if flags.sticky {
if flags.contains(Modifier::S) {
f.write_str("s")?;
}
Ok(())
Expand All @@ -266,12 +266,12 @@ impl Display for IgnoreGroup<'_> {
f.write_str("(?")?;

if let Some(modifiers) = &self.modifiers {
if let Some(enabling) = &modifiers.enabling {
write_flags(f, enabling)?;
if !modifiers.enabling.is_empty() {
write_flags(f, modifiers.enabling)?;
}
if let Some(disabling) = &modifiers.disabling {
if !modifiers.disabling.is_empty() {
f.write_str("-")?;
write_flags(f, disabling)?;
write_flags(f, modifiers.disabling)?;
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/oxc_regular_expression/src/ast_impl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod allocator;
mod display;
mod span;
pub mod visit;
12 changes: 10 additions & 2 deletions crates/oxc_regular_expression/src/ast_impl/span.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// NOTE: For now, this file is implemented by hand for convenience.
// But like `oxc_ast`, this should be generated by `tasks/ast_tools` in the future.

use oxc_span::{GetSpan, Span};
use oxc_span::{ContentEq, GetSpan, Span};

use crate::ast::{CharacterClassContents, Term};
use crate::ast::{CharacterClassContents, Modifier, Term};

impl GetSpan for Term<'_> {
#[inline]
Expand Down Expand Up @@ -38,3 +38,11 @@ impl GetSpan for CharacterClassContents<'_> {
}
}
}

// ---

impl ContentEq for Modifier {
fn content_eq(&self, other: &Self) -> bool {
self == other
}
}
22 changes: 8 additions & 14 deletions crates/oxc_regular_expression/src/generated/assert_layouts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,25 +135,22 @@ const _: () = {
assert!(offset_of!(CapturingGroup, body) == 24);

// Padding: 0 bytes
assert!(size_of::<IgnoreGroup>() == 56);
assert!(size_of::<IgnoreGroup>() == 64);
assert!(align_of::<IgnoreGroup>() == 8);
assert!(offset_of!(IgnoreGroup, span) == 0);
assert!(offset_of!(IgnoreGroup, modifiers) == 8);
assert!(offset_of!(IgnoreGroup, body) == 24);
assert!(offset_of!(IgnoreGroup, body) == 32);

// Padding: 2 bytes
// Padding: 6 bytes
assert!(size_of::<Modifiers>() == 16);
assert!(align_of::<Modifiers>() == 8);
assert!(offset_of!(Modifiers, span) == 0);
assert!(offset_of!(Modifiers, enabling) == 8);
assert!(offset_of!(Modifiers, disabling) == 11);
assert!(offset_of!(Modifiers, disabling) == 9);

// Padding: 0 bytes
assert!(size_of::<Modifier>() == 3);
assert!(size_of::<Modifier>() == 1);
assert!(align_of::<Modifier>() == 1);
assert!(offset_of!(Modifier, ignore_case) == 0);
assert!(offset_of!(Modifier, multiline) == 1);
assert!(offset_of!(Modifier, sticky) == 2);

// Padding: 4 bytes
assert!(size_of::<IndexedReference>() == 16);
Expand Down Expand Up @@ -303,18 +300,15 @@ const _: () = {
assert!(offset_of!(IgnoreGroup, body) == 24);

// Padding: 2 bytes
assert!(size_of::<Modifiers>() == 16);
assert!(size_of::<Modifiers>() == 12);
assert!(align_of::<Modifiers>() == 4);
assert!(offset_of!(Modifiers, span) == 0);
assert!(offset_of!(Modifiers, enabling) == 8);
assert!(offset_of!(Modifiers, disabling) == 11);
assert!(offset_of!(Modifiers, disabling) == 9);

// Padding: 0 bytes
assert!(size_of::<Modifier>() == 3);
assert!(size_of::<Modifier>() == 1);
assert!(align_of::<Modifier>() == 1);
assert!(offset_of!(Modifier, ignore_case) == 0);
assert!(offset_of!(Modifier, multiline) == 1);
assert!(offset_of!(Modifier, sticky) == 2);

// Padding: 0 bytes
assert!(size_of::<IndexedReference>() == 12);
Expand Down
20 changes: 0 additions & 20 deletions crates/oxc_regular_expression/src/generated/derive_clone_in.rs
Original file line number Diff line number Diff line change
Expand Up @@ -529,26 +529,6 @@ impl<'new_alloc> CloneIn<'new_alloc> for Modifiers {
}
}

impl<'new_alloc> CloneIn<'new_alloc> for Modifier {
type Cloned = Modifier;

fn clone_in(&self, allocator: &'new_alloc Allocator) -> Self::Cloned {
Modifier {
ignore_case: CloneIn::clone_in(&self.ignore_case, allocator),
multiline: CloneIn::clone_in(&self.multiline, allocator),
sticky: CloneIn::clone_in(&self.sticky, allocator),
}
}

fn clone_in_with_semantic_ids(&self, allocator: &'new_alloc Allocator) -> Self::Cloned {
Modifier {
ignore_case: CloneIn::clone_in_with_semantic_ids(&self.ignore_case, allocator),
multiline: CloneIn::clone_in_with_semantic_ids(&self.multiline, allocator),
sticky: CloneIn::clone_in_with_semantic_ids(&self.sticky, allocator),
}
}
}

impl<'new_alloc> CloneIn<'new_alloc> for IndexedReference {
type Cloned = IndexedReference;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,6 @@ impl ContentEq for Modifiers {
}
}

impl ContentEq for Modifier {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.ignore_case, &other.ignore_case)
&& ContentEq::content_eq(&self.multiline, &other.multiline)
&& ContentEq::content_eq(&self.sticky, &other.sticky)
}
}

impl ContentEq for IndexedReference {
fn content_eq(&self, other: &Self) -> bool {
ContentEq::content_eq(&self.index, &other.index)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1578,22 +1578,31 @@ impl<'a> PatternParser<'a> {
fn parse_modifiers(&mut self) -> Result<Option<ast::Modifiers>> {
let span_start = self.reader.offset();

// Currently only `[i, m, s]` are supported
let mut enabling_flags = [0, 0, 0];
let mut disabling_flags = [0, 0, 0];
let mut enabling = ast::Modifier::empty();
let mut disabling = ast::Modifier::empty();
let mut duplicate = false;

// Enabling
while self.reader.peek().filter(|&cp| cp == ':' as u32 || cp == '-' as u32).is_none() {
if self.reader.eat('i') {
enabling_flags[0] += 1;
if enabling.contains(ast::Modifier::I) {
duplicate = true;
}
enabling |= ast::Modifier::I;
continue;
}
if self.reader.eat('m') {
enabling_flags[1] += 1;
if enabling.contains(ast::Modifier::M) {
duplicate = true;
}
enabling |= ast::Modifier::M;
continue;
}
if self.reader.eat('s') {
enabling_flags[2] += 1;
if enabling.contains(ast::Modifier::S) {
duplicate = true;
}
enabling |= ast::Modifier::S;
continue;
}

Expand All @@ -1606,15 +1615,24 @@ impl<'a> PatternParser<'a> {
if self.reader.eat('-') {
while self.reader.peek().filter(|&cp| cp == ':' as u32).is_none() {
if self.reader.eat('i') {
disabling_flags[0] += 1;
if disabling.contains(ast::Modifier::I) {
duplicate = true;
}
disabling |= ast::Modifier::I;
continue;
}
if self.reader.eat('m') {
disabling_flags[1] += 1;
if disabling.contains(ast::Modifier::M) {
duplicate = true;
}
disabling |= ast::Modifier::M;
continue;
}
if self.reader.eat('s') {
disabling_flags[2] += 1;
if disabling.contains(ast::Modifier::S) {
duplicate = true;
}
disabling |= ast::Modifier::S;
continue;
}

Expand All @@ -1624,40 +1642,27 @@ impl<'a> PatternParser<'a> {
}
}

let (enabling_iter, disabling_iter) = (enabling_flags.iter(), disabling_flags.iter());

// [SS:EE] Atom :: (? RegularExpressionModifiers : Disjunction )
// It is a Syntax Error if the source text matched by RegularExpressionModifiers contains the same code point more than once.
// [SS:EE] Atom :: (? RegularExpressionModifiers - RegularExpressionModifiers : Disjunction )
// It is a Syntax Error if the source text matched by the first RegularExpressionModifiers and the source text matched by the second RegularExpressionModifiers are both empty.
// It is a Syntax Error if the source text matched by the first RegularExpressionModifiers contains the same code point more than once.
// It is a Syntax Error if the source text matched by the second RegularExpressionModifiers contains the same code point more than once.
// It is a Syntax Error if any code point in the source text matched by the first RegularExpressionModifiers is also contained in the source text matched by the second RegularExpressionModifiers.
let flags_iter = enabling_iter.clone().zip(disabling_iter.clone());
if flags_iter.clone().any(|flags| !matches!(flags, (0 | 1, 0) | (0, 1))) {
return Err(diagnostics::invalid_modifiers(
self.span_factory.create(span_start, self.reader.offset()),
));
}
// NOTE: Spec is not yet fixed and merged, so these may change:
// https://github.com/tc39/ecma262/pull/3221#pullrequestreview-2341169958
if flags_iter.clone().all(|flags| matches!(flags, (0, 0))) {
if enabling.is_empty() && disabling.is_empty()
|| duplicate
|| [ast::Modifier::I, ast::Modifier::M, ast::Modifier::S]
.iter()
.any(|&modifier| enabling.contains(modifier) && disabling.contains(modifier))
{
return Err(diagnostics::invalid_modifiers(
self.span_factory.create(span_start, self.reader.offset()),
));
}

Ok(Some(ast::Modifiers {
span: self.span_factory.create(span_start, self.reader.offset()),
enabling: enabling_iter.clone().any(|f| *f == 1).then(|| ast::Modifier {
ignore_case: enabling_flags[0] == 1,
multiline: enabling_flags[1] == 1,
sticky: enabling_flags[2] == 1,
}),
disabling: disabling_iter.clone().any(|f| *f == 1).then(|| ast::Modifier {
ignore_case: disabling_flags[0] == 1,
multiline: disabling_flags[1] == 1,
sticky: disabling_flags[2] == 1,
}),
enabling,
disabling,
}))
}

Expand Down
9 changes: 2 additions & 7 deletions napi/parser/generated/deserialize/js.js
Original file line number Diff line number Diff line change
Expand Up @@ -5389,13 +5389,8 @@ function deserializeVecCharacter(pos) {
}

function deserializeOptionModifiers(pos) {
if (uint8[pos + 8] === 3) return null;
return deserializeModifiers(pos);
}

function deserializeOptionModifier(pos) {
if (uint8[pos] === 2) return null;
return deserializeModifier(pos);
if (uint8[pos] === 0) return null;
return deserializeModifiers(pos + 8);
}

function deserializeVecError(pos) {
Expand Down
9 changes: 2 additions & 7 deletions napi/parser/generated/deserialize/ts.js
Original file line number Diff line number Diff line change
Expand Up @@ -5541,13 +5541,8 @@ function deserializeVecCharacter(pos) {
}

function deserializeOptionModifiers(pos) {
if (uint8[pos + 8] === 3) return null;
return deserializeModifiers(pos);
}

function deserializeOptionModifier(pos) {
if (uint8[pos] === 2) return null;
return deserializeModifier(pos);
if (uint8[pos] === 0) return null;
return deserializeModifiers(pos + 8);
}

function deserializeVecError(pos) {
Expand Down
Loading