diff --git a/crates/oxc_ast/src/ast/literal.rs b/crates/oxc_ast/src/ast/literal.rs index 39310b5b5ede2..8e8c06de915cc 100644 --- a/crates/oxc_ast/src/ast/literal.rs +++ b/crates/oxc_ast/src/ast/literal.rs @@ -5,6 +5,7 @@ use std::{ hash::{Hash, Hasher}, }; +use bitflags::bitflags; use num_bigint::BigUint; use ordered_float::NotNan; use serde::{ @@ -110,7 +111,7 @@ pub struct RegExpLiteral { #[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)] pub struct RegExp { pub pattern: Atom, - pub flags: Atom, + pub flags: RegExpFlags, } impl fmt::Display for RegExp { @@ -119,6 +120,59 @@ impl fmt::Display for RegExp { } } +bitflags! { + pub struct RegExpFlags: u8 { + const G = 1 << 0; + const I = 1 << 1; + const M = 1 << 2; + const S = 1 << 3; + const U = 1 << 4; + const Y = 1 << 5; + const D = 1 << 6; + /// v flag from `https://github.com/tc39/proposal-regexp-set-notation` + const V = 1 << 7; + } +} + +impl fmt::Display for RegExpFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.contains(Self::G) { + write!(f, "g")?; + } + if self.contains(Self::I) { + write!(f, "i")?; + } + if self.contains(Self::M) { + write!(f, "m")?; + } + if self.contains(Self::S) { + write!(f, "s")?; + } + if self.contains(Self::U) { + write!(f, "u")?; + } + if self.contains(Self::Y) { + write!(f, "y")?; + } + if self.contains(Self::D) { + write!(f, "d")?; + } + if self.contains(Self::V) { + write!(f, "v")?; + } + Ok(()) + } +} + +impl Serialize for RegExpFlags { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.to_string()) + } +} + #[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash)] pub struct EmptyObject {} diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 497a0988c4fbe..cdb50a12fda21 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -21,7 +21,7 @@ use constants::{ pub use kind::Kind; use number::{parse_big_int, parse_float, parse_int}; use oxc_allocator::{Allocator, String}; -use oxc_ast::{Atom, Node, SourceType}; +use oxc_ast::{ast::RegExpFlags, Atom, Node, SourceType}; use oxc_diagnostics::{Diagnostic, Diagnostics}; use simd::{SkipMultilineComment, SkipWhitespace}; use string_builder::AutoCow; @@ -1010,35 +1010,39 @@ impl<'a> Lexer<'a> { pattern.push_str(&start[..start.len() - self.current.chars.as_str().len() - 1]); - let mut flags = String::new_in(self.allocator); - while let c @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() { - self.current.chars.next(); - flags.push(c); - } + let mut flags = RegExpFlags::empty(); - // v flag from https://github.com/tc39/proposal-regexp-set-notation - let gimsuy_mask: u32 = - ['g', 'i', 'm', 's', 'u', 'y', 'd', 'v'].iter().map(|x| 1 << ((*x as u8) - b'a')).sum(); - let mut flag_text_set: u32 = 0; - for ch in flags.chars() { + while let ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9') = self.peek() { + self.current.chars.next(); + // dbg!(ch); if !ch.is_ascii_lowercase() { self.error(Diagnostic::RegExpFlag(ch, self.current_offset())); continue; } - let ch_mask = 1 << ((ch as u8) - b'a'); - if ch_mask & gimsuy_mask == 0 { - self.error(Diagnostic::RegExpFlag(ch, self.current_offset())); - } - if flag_text_set & ch_mask != 0 { + let flag = match ch { + 'g' => RegExpFlags::G, + 'i' => RegExpFlags::I, + 'm' => RegExpFlags::M, + 's' => RegExpFlags::S, + 'u' => RegExpFlags::U, + 'y' => RegExpFlags::Y, + 'd' => RegExpFlags::D, + 'v' => RegExpFlags::V, + _ => { + self.error(Diagnostic::RegExpFlag(ch, self.current_offset())); + continue; + } + }; + if flags.contains(flag) { self.error(Diagnostic::RegExpFlagTwice(ch, self.current_offset())); + continue; } - flag_text_set |= ch_mask; + flags |= flag; } - self.current.token.value = TokenValue::RegExp(RegExp { - pattern: Atom::from(pattern.as_str()), - flags: Atom::from(flags.as_str()), - }); + self.current.token.value = + TokenValue::RegExp(RegExp { pattern: Atom::from(pattern.as_str()), flags }); + Kind::RegExp } diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index 5ec9bcb6ae38b..16f7fae5b2841 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -1,7 +1,7 @@ //! Token use num_bigint::BigUint; -use oxc_ast::{Atom, Node}; +use oxc_ast::{ast::RegExpFlags, Atom, Node}; use super::kind::Kind; @@ -25,6 +25,13 @@ pub struct Token { pub value: TokenValue, } +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +#[test] +fn no_bloat_token() { + use std::mem::size_of; + assert_eq!(size_of::(), 56); +} + impl Token { #[must_use] pub const fn node(&self) -> Node { @@ -44,7 +51,7 @@ pub enum TokenValue { #[derive(Debug, Clone, PartialEq, Eq)] pub struct RegExp { pub pattern: Atom, - pub flags: Atom, + pub flags: RegExpFlags, } impl Default for TokenValue { diff --git a/crates/oxc_printer/src/gen.rs b/crates/oxc_printer/src/gen.rs index f386899effd3f..eae1e7021a70c 100644 --- a/crates/oxc_printer/src/gen.rs +++ b/crates/oxc_printer/src/gen.rs @@ -841,7 +841,7 @@ impl Gen for RegExpLiteral { p.print(b'/'); p.print_str(self.regex.pattern.as_bytes()); p.print(b'/'); - p.print_str(self.regex.flags.as_bytes()); + p.print_str(self.regex.flags.to_string().as_bytes()); } }