From 242212d462535eb76dc82ccda6565253f14d8f7d Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 10 Dec 2023 23:31:38 +0800
Subject: [PATCH 01/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ckpoint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Cargo.lock                              |   1 +
 crates/oxc_js_regex/Cargo.toml          |   1 +
 crates/oxc_js_regex/src/ast.rs          |  54 +++----
 crates/oxc_js_regex/src/ast_builder.rs  |  73 ++++++++++
 crates/oxc_js_regex/src/ast_kind.rs     |  25 ++++
 crates/oxc_js_regex/src/ecma_version.rs |  22 +++
 crates/oxc_js_regex/src/lib.rs          |   3 +
 crates/oxc_js_regex/src/parser.rs       | 178 ++++++++++++++++++++++++
 8 files changed, 331 insertions(+), 26 deletions(-)
 create mode 100644 crates/oxc_js_regex/src/ast_builder.rs
 create mode 100644 crates/oxc_js_regex/src/ast_kind.rs
 create mode 100644 crates/oxc_js_regex/src/ecma_version.rs

diff --git a/Cargo.lock b/Cargo.lock
index 3b11ab893b40c..b054d73d70750 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1600,6 +1600,7 @@ name = "oxc_js_regex"
 version = "0.0.0"
 dependencies = [
  "oxc_allocator",
+ "oxc_diagnostics",
  "oxc_span",
 ]
 
diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
index 38dcac20c5f06..3f65315f2e836 100644
--- a/crates/oxc_js_regex/Cargo.toml
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -21,3 +21,4 @@ doctest = false
 [dependencies]
 oxc_allocator = { workspace = true }
 oxc_span      = { workspace = true }
+oxc_diagnostics = { workspace = true }
diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index 6690c5f7386fc..47ea3e203f3eb 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -3,6 +3,8 @@
 use oxc_allocator::{Box, Vec};
 use oxc_span::{Atom, Span};
 
+use crate::ast_kind::AstKind;
+
 /// The type which includes all nodes.
 #[derive(Debug)]
 pub enum Node<'a> {
@@ -42,46 +44,46 @@ pub enum Leaf<'a> {
 /// The type which includes all atom nodes.
 #[derive(Debug)]
 pub enum Element<'a> {
-    Assertion(Box<'a, Assertion<'a>>),
-    QuantifiableElement(Box<'a, QuantifiableElement<'a>>),
-    Quantifier(Box<'a, Quantifier<'a>>),
+    Assertion(Assertion<'a>),
+    QuantifiableElement(QuantifiableElement<'a>),
+    Quantifier(Quantifier<'a>),
 }
 
 /// The type which includes all atom nodes that Quantifier node can have as children.
 #[derive(Debug)]
 pub enum QuantifiableElement<'a> {
-    Backreference(Box<'a, Backreference<'a>>),
-    CapturingGroup(Box<'a, CapturingGroup<'a>>),
-    Character(Box<'a, Character>),
-    CharacterClass(Box<'a, CharacterClass<'a>>),
-    CharacterSet(Box<'a, CharacterSet<'a>>),
-    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
-    Group(Box<'a, Group<'a>>),
-    LookaheadAssertion(Box<'a, LookaheadAssertion<'a>>),
+    Backreference(Backreference<'a>),
+    CapturingGroup(CapturingGroup<'a>),
+    Character(Character),
+    CharacterClass(CharacterClass<'a>),
+    CharacterSet(CharacterSet<'a>),
+    ExpressionCharacterClass(ExpressionCharacterClass<'a>),
+    Group(Group<'a>),
+    LookaheadAssertion(LookaheadAssertion<'a>),
 }
 
 /// The type which includes all character class atom nodes.
 #[derive(Debug)]
 pub enum CharacterClassElement<'a> {
-    ClassRangesCharacterClassElement(Box<'a, ClassRangesCharacterClassElement<'a>>),
-    UnicodeSetsCharacterClassElement(Box<'a, UnicodeSetsCharacterClassElement<'a>>),
+    ClassRangesCharacterClassElement(ClassRangesCharacterClassElement),
+    UnicodeSetsCharacterClassElement(UnicodeSetsCharacterClassElement<'a>),
 }
 #[derive(Debug)]
-pub enum ClassRangesCharacterClassElement<'a> {
-    Character(Box<'a, Character>),
-    CharacterClassRange(Box<'a, CharacterClassRange>),
-    CharacterUnicodePropertyCharacterSet(Box<'a, CharacterUnicodePropertyCharacterSet>),
-    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
+pub enum ClassRangesCharacterClassElement {
+    Character(Character),
+    CharacterClassRange(CharacterClassRange),
+    CharacterUnicodePropertyCharacterSet(CharacterUnicodePropertyCharacterSet),
+    EscapeCharacterSet(EscapeCharacterSet),
 }
 #[derive(Debug)]
 pub enum UnicodeSetsCharacterClassElement<'a> {
-    Character(Box<'a, Character>),
-    CharacterClassRange(Box<'a, CharacterClassRange>),
-    ClassStringDisjunction(Box<'a, ClassStringDisjunction<'a>>),
-    EscapeCharacterSet(Box<'a, EscapeCharacterSet>),
-    ExpressionCharacterClass(Box<'a, ExpressionCharacterClass<'a>>),
-    UnicodePropertyCharacterSet(Box<'a, UnicodePropertyCharacterSet<'a>>),
-    UnicodeSetsCharacterClass(Box<'a, UnicodeSetsCharacterClass<'a>>),
+    Character(Character),
+    CharacterClassRange(CharacterClassRange),
+    ClassStringDisjunction(ClassStringDisjunction<'a>),
+    EscapeCharacterSet(EscapeCharacterSet),
+    ExpressionCharacterClass(ExpressionCharacterClass<'a>),
+    UnicodePropertyCharacterSet(UnicodePropertyCharacterSet<'a>),
+    UnicodeSetsCharacterClass(UnicodeSetsCharacterClass<'a>),
 }
 
 /// The root node.
@@ -176,7 +178,7 @@ pub enum CharacterClass<'a> {
 pub struct ClassRangesCharacterClass<'a> {
     pub span: Span,
     pub unicode_sets: bool,
-    pub elements: Vec<'a, ClassRangesCharacterClassElement<'a>>,
+    pub elements: Vec<'a, ClassRangesCharacterClassElement>,
 }
 
 /// The character class used in Unicode sets mode (`v` flag).
diff --git a/crates/oxc_js_regex/src/ast_builder.rs b/crates/oxc_js_regex/src/ast_builder.rs
new file mode 100644
index 0000000000000..c94434cdf8a06
--- /dev/null
+++ b/crates/oxc_js_regex/src/ast_builder.rs
@@ -0,0 +1,73 @@
+use oxc_allocator::{Allocator, Box, String, Vec};
+use oxc_span::{Atom, GetSpan, SourceType, Span};
+
+#[allow(clippy::wildcard_imports)]
+use crate::ast::*;
+
+/// AST builder for creating AST nodes
+pub struct AstBuilder<'a> {
+    pub allocator: &'a Allocator,
+}
+
+impl<'a> AstBuilder<'a> {
+    pub fn new(allocator: &'a Allocator) -> Self {
+        Self { allocator }
+    }
+
+    #[inline]
+    pub fn alloc<T>(&self, value: T) -> Box<'a, T> {
+        Box(self.allocator.alloc(value))
+    }
+
+    #[inline]
+    pub fn new_vec<T>(&self) -> Vec<'a, T> {
+        Vec::new_in(self.allocator)
+    }
+
+    #[inline]
+    pub fn new_vec_with_capacity<T>(&self, capacity: usize) -> Vec<'a, T> {
+        Vec::with_capacity_in(capacity, self.allocator)
+    }
+
+    #[inline]
+    pub fn new_vec_single<T>(&self, value: T) -> Vec<'a, T> {
+        let mut vec = self.new_vec_with_capacity(1);
+        vec.push(value);
+        vec
+    }
+
+    #[inline]
+    pub fn new_str(&self, value: &str) -> &'a str {
+        String::from_str_in(value, self.allocator).into_bump_str()
+    }
+
+    pub fn copy<T>(&self, src: &T) -> T {
+        // SAFETY:
+        // This should be safe as long as `src` is an reference from the allocator.
+        // But honestly, I'm not really sure if this is safe.
+        unsafe { std::mem::transmute_copy(src) }
+    }
+
+    pub fn alternative(&mut self, span: Span, elements: Vec<'a, Element<'a>>) -> Branch<'a> {
+        Branch::Alternative(self.alloc(Alternative { span, elements }))
+    }
+
+    pub fn capturing_group(
+        &mut self,
+        span: Span,
+        name: Option<Atom>,
+        alternatives: Vec<'a, Alternative<'a>>,
+        references: Vec<'a, Backreference<'a>>,
+    ) -> Branch<'a> {
+        Branch::CapturingGroup(self.alloc(CapturingGroup { span, name, alternatives, references }))
+    }
+
+    pub fn reg_exp_literal(
+        &mut self,
+        span: Span,
+        flags: Flags,
+        pattern: Pattern<'a>,
+    ) -> RegExpLiteral<'a> {
+        RegExpLiteral { span, pattern, flags }
+    }
+}
diff --git a/crates/oxc_js_regex/src/ast_kind.rs b/crates/oxc_js_regex/src/ast_kind.rs
new file mode 100644
index 0000000000000..97eb10619a408
--- /dev/null
+++ b/crates/oxc_js_regex/src/ast_kind.rs
@@ -0,0 +1,25 @@
+use super::ast::*;
+
+#[allow(unused)]
+#[derive(Debug)]
+pub enum AstKind<'a> {
+    Alternative(&'a Alternative<'a>),
+    CapturingGroup(&'a CapturingGroup<'a>),
+    CharacterClass(&'a CharacterClass<'a>),
+    CharacterClassRange(&'a CharacterClassRange),
+    ClassIntersection(&'a ClassIntersection<'a>),
+    ClassStringDisjunction(&'a ClassStringDisjunction<'a>),
+    ClassSubtraction(&'a ClassSubtraction<'a>),
+    ExpressionCharacterClass(&'a ExpressionCharacterClass<'a>),
+    Group(&'a Group<'a>),
+    LookaroundAssertion(&'a LookaroundAssertion<'a>),
+    Pattern(&'a Pattern<'a>),
+    Quantifier(&'a Quantifier<'a>),
+    RegExpLiteral(&'a RegExpLiteral<'a>),
+    StringAlternative(&'a StringAlternative<'a>),
+    Backreference(&'a Backreference<'a>),
+    BoundaryAssertion(&'a BoundaryAssertion<'a>),
+    Character(&'a Character),
+    CharacterSet(&'a CharacterSet<'a>),
+    Flags(&'a Flags),
+}
diff --git a/crates/oxc_js_regex/src/ecma_version.rs b/crates/oxc_js_regex/src/ecma_version.rs
new file mode 100644
index 0000000000000..f71e61cd125d7
--- /dev/null
+++ b/crates/oxc_js_regex/src/ecma_version.rs
@@ -0,0 +1,22 @@
+#[allow(unused)]
+#[derive(Clone, Copy, PartialEq, PartialOrd, Default)]
+pub enum EcmaVersion {
+    #[default]
+    V5,
+    V2015,
+    V2016,
+    V2017,
+    V2018,
+    V2019,
+    V2020,
+    V2021,
+    V2022,
+    V2023,
+    V2024,
+}
+#[allow(unused)]
+impl EcmaVersion {
+    pub fn latest_ecma_version() -> Self {
+        Self::V2024
+    }
+}
diff --git a/crates/oxc_js_regex/src/lib.rs b/crates/oxc_js_regex/src/lib.rs
index 6647fb03be8f5..515c301327a72 100644
--- a/crates/oxc_js_regex/src/lib.rs
+++ b/crates/oxc_js_regex/src/lib.rs
@@ -1,4 +1,7 @@
 pub mod ast;
+mod ast_builder;
+mod ast_kind;
+mod ecma_version;
 mod lexer;
 pub mod parser;
 pub mod validator;
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 8b137891791fe..f48998a39de0e 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -1 +1,179 @@
+use std::collections::{HashSet, VecDeque};
+use std::iter::Peekable;
+use std::ops::Range;
+use std::str::{CharIndices, Chars, Matches};
 
+use oxc_allocator::Allocator;
+use oxc_diagnostics::Error;
+
+use crate::ast::{Branch, Pattern, RegExpLiteral};
+use crate::ecma_version::EcmaVersion;
+
+pub struct Lexer<'a> {
+    allocator: &'a Allocator,
+
+    source: &'a str,
+    /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
+    chars: Vec<char>,
+
+    pub(crate) errors: Vec<Error>,
+}
+
+#[allow(clippy::unused_self)]
+impl<'a> Lexer<'a> {
+    pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
+        Self { source, allocator, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
+    }
+}
+
+pub struct Parser<'a> {
+    lexer: Lexer<'a>,
+
+    /// Source Code
+    source_text: &'a str,
+
+    /// All syntax errors from parser and lexer
+    /// Note: favor adding to `Diagnostics` instead of raising Err
+    errors: Vec<Error>,
+    context: ParserContext,
+    index: usize,
+    group_names: HashSet<String>,
+numCapturingParens: usize
+}
+
+#[derive(Default, Copy, Clone)]
+struct ParserContext {
+    source_kind: SourceKind,
+    unicode_mode: bool,
+    nflag: bool,
+    unicode_sets_mode: bool,
+    ecma_version: EcmaVersion,
+}
+
+impl<'a> Parser<'a> {
+    /// Create a new parser
+    pub fn new(allocator: &'a Allocator, source_text: &'a str) -> Self {
+        Self {
+            lexer: Lexer::new(allocator, source_text),
+            source_text,
+            errors: vec![],
+            context: ParserContext::default(),
+            index: 0,
+            group_names: HashSet::new(),
+        }
+    }
+
+    pub fn eat(&self, ch: char) -> bool {
+        self.lexer.chars.get(self.index) == Some(&ch)
+    }
+
+    pub fn nth(&self, n: usize) -> Option<&char> {
+        self.lexer.chars.get(self.index + n)
+    }
+
+    /// by default next means `next_1`
+    pub fn next(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index + 1)
+    }
+
+    /// get a range chars relative from current cursor
+    pub fn nrange(&self, range: Range<usize>) -> Option<&[char]> {
+        self.lexer.chars.get(self.index + range.start..(self.index + range.end))
+    }
+
+    pub fn current(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index)
+    }
+
+    pub fn advance(&mut self) -> bool {
+        if self.index < self.lexer.chars.len() {
+            self.index += 1;
+            return true;
+        } else {
+            false
+        }
+    }
+
+    pub fn rewind(&mut self, start: usize) {
+        self.index = start;
+    }
+}
+
+#[derive(Default, Clone, Copy)]
+pub enum SourceKind {
+    Flags,
+    #[default]
+    Literal,
+    Pattern,
+}
+
+pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
+    if parser.eat('/') {
+        parser.advance();
+        let pattern = parse_pattern(parser);
+        todo!()
+    } else if parser.source_text.is_empty() {
+        panic!("Empty")
+    } else {
+        match parser.current() {
+            Some(ch) => {
+                panic!("unexpected character {ch}")
+            }
+            None => {
+                panic!("unexpected eof")
+            }
+        };
+    }
+}
+
+fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
+    let start = parser.index;
+    if let Some(pattern) = parse_pattern_internal(parser) {
+        return pattern;
+    } else if !parser.context.nflag
+        && parser.context.ecma_version >= EcmaVersion::V2018
+        && parser.group_names.len() > 0
+    {
+        parser.rewind(start);
+        parser.context.nflag = true;
+        return parse_pattern_internal(parser).expect("should have pattern");
+    }
+    panic!("Invalid pattern")
+}
+
+fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
+    let start = parser.index;
+    let 
+    todo!()
+}
+
+fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
+    let start = parser.index;
+    let mut in_class = false;
+    let mut escaped = false;
+    let count = 0;
+    while let Some(ch) = parser.current() {
+        if escaped {
+            escaped = false;
+        }
+        match ch {
+            '\\' => {
+                escaped = true;
+            }
+            '[' | ']' => {
+                in_class = false;
+            }
+            '(' if !in_class => {
+                if parser.next() != Some(&'?')
+                    || (parser.nth(2) == Some(&'<') && !matches!(parser.nth(3), '=' | '!'))
+                {
+                    count += 1;
+                }
+            }
+            _ => {}
+        }
+        parser.advance();
+    }
+    parser.rewind(start);
+    count
+}

From 716f03532daf11b321899489bcacc5f418094466 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Thu, 21 Dec 2023 01:23:17 +0800
Subject: [PATCH 02/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/ast.rs    |   6 +-
 crates/oxc_js_regex/src/parser.rs | 149 ++++++++++++++++++++++++++++--
 2 files changed, 146 insertions(+), 9 deletions(-)

diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index 47ea3e203f3eb..0650f3053f4bc 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -157,8 +157,10 @@ pub struct LookbehindAssertion<'a> {
 #[derive(Debug)]
 pub struct Quantifier<'a> {
     pub span: Span,
-    pub min: f64,
-    pub max: f64, // can be f64::INFINITY
+    /// https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/validator.ts#L384-L398
+    /// both `min` and `max` are integer
+    pub min: usize,
+    pub max: usize,
     pub greedy: bool,
     pub element: QuantifiableElement<'a>,
 }
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index f48998a39de0e..7dc0e04253d6f 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -1,12 +1,18 @@
 use std::collections::{HashSet, VecDeque};
 use std::iter::Peekable;
 use std::ops::Range;
+use std::os::unix::fs::OpenOptionsExt;
+use std::panic;
 use std::str::{CharIndices, Chars, Matches};
 
 use oxc_allocator::Allocator;
 use oxc_diagnostics::Error;
+use oxc_span::Span;
 
-use crate::ast::{Branch, Pattern, RegExpLiteral};
+use crate::ast::{
+    Alternative, Assertion, Branch, Character, Element, Pattern, QuantifiableElement, Quantifier,
+    RegExpLiteral,
+};
 use crate::ecma_version::EcmaVersion;
 
 pub struct Lexer<'a> {
@@ -38,7 +44,10 @@ pub struct Parser<'a> {
     context: ParserContext,
     index: usize,
     group_names: HashSet<String>,
-numCapturingParens: usize
+    num_capturing_parens: usize,
+    last_int_value: usize,
+    back_reference_names: HashSet<String>,
+    last_range: Range<usize>,
 }
 
 #[derive(Default, Copy, Clone)]
@@ -48,6 +57,7 @@ struct ParserContext {
     nflag: bool,
     unicode_sets_mode: bool,
     ecma_version: EcmaVersion,
+    strict: bool,
 }
 
 impl<'a> Parser<'a> {
@@ -60,13 +70,30 @@ impl<'a> Parser<'a> {
             context: ParserContext::default(),
             index: 0,
             group_names: HashSet::new(),
+            num_capturing_parens: 0,
+            back_reference_names: HashSet::new(),
+            last_int_value: 0,
+            last_range: 0..0,
         }
     }
 
-    pub fn eat(&self, ch: char) -> bool {
+    pub fn is(&self, ch: char) -> bool {
         self.lexer.chars.get(self.index) == Some(&ch)
     }
 
+    pub fn eat(&mut self, ch: char) -> bool {
+        if self.is(ch) {
+            self.index += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn eof(&self) -> bool {
+        self.index < self.lexer.chars.len()
+    }
+
     pub fn nth(&self, n: usize) -> Option<&char> {
         self.lexer.chars.get(self.index + n)
     }
@@ -108,7 +135,7 @@ pub enum SourceKind {
 }
 
 pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
-    if parser.eat('/') {
+    if parser.is('/') {
         parser.advance();
         let pattern = parse_pattern(parser);
         todo!()
@@ -143,15 +170,122 @@ fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
 
 fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
     let start = parser.index;
-    let 
+    parser.num_capturing_parens = count_capturing_parens(parser);
+    parser.group_names.clear();
+    parser.back_reference_names.clear();
     todo!()
 }
 
+fn parse_disjunction<'a>(parser: &mut Parser<'a>) {
+    let start = parser.index;
+    let mut i = 0;
+    loop {}
+}
+
+fn parser_alternative<'a>(parser: &mut Parser<'a>, i: usize) -> Alternative<'a> {
+    let start = parser.index;
+    // let mut elements = vec![];
+    while !parser.eof() {}
+    Alternative { span: todo!(), elements: todo!() }
+}
+
+fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
+    if parser.context.unicode_mode || parser.context.strict {}
+    todo!()
+}
+
+fn parse_quantifier<'a>(
+    parser: &mut Parser<'a>,
+    no_consume: Option<bool>,
+) -> (bool, Option<Element<'a>>) {
+    let mut no_consume = no_consume.unwrap_or_default();
+    let start = parser.index;
+    let mut min = 0;
+    let mut max = 0;
+    let mut greedy = false;
+    let mut element = None;
+    match parser.current().cloned() {
+        Some('*') => {
+            min = 0;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('+') => {
+            min = 1;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('?') => {
+            min = 0;
+            max = 1;
+            parser.advance();
+        }
+        Some(_) => {
+            if parse_braced_quantifier(parser, no_consume) {
+                min = parser.last_range.start;
+                max = parser.last_range.end;
+            }
+        }
+        None => return (false, None),
+    }
+    greedy = !parser.eat('?');
+
+    if !no_consume {
+        element = Some(Element::Quantifier(Quantifier {
+            span: Span { start: start as u32, end: parser.index as u32 },
+            min,
+            max,
+            greedy,
+            // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
+            // it can't be null, or the program will panic, so we put a dummy element, and parent
+            // should replace it
+            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
+        }))
+    }
+    (true, element)
+}
+
+fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool {
+    let start = parser.index;
+    if eat_decimal_digits(parser) {
+        let min = parser.last_int_value;
+        let mut max = min;
+        if parser.eat(',') {
+            max = if eat_decimal_digits(parser) { parser.last_int_value } else { usize::MAX };
+        }
+        if parser.eat('}') {
+            if !no_error && max < min {
+                panic!("numbers out of order in {{}} quantifier");
+            }
+            parser.last_range = min..max;
+            return true;
+        }
+    }
+    if !no_error && (parser.context.unicode_mode || parser.context.strict) {
+        panic!("Incomplete quantifier");
+    }
+    parser.rewind(start);
+    false
+}
+
+fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    parser.last_int_value = 0;
+    while let Some(ch) = parser.current() {
+        let Some(d) = ch.to_digit(10) else {
+            break;
+        };
+        parser.last_int_value = 10 * parser.last_int_value + d as usize;
+        parser.advance();
+    }
+    parser.index != start
+}
+
 fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
     let start = parser.index;
     let mut in_class = false;
     let mut escaped = false;
-    let count = 0;
+    let mut count = 0;
     while let Some(ch) = parser.current() {
         if escaped {
             escaped = false;
@@ -165,7 +299,8 @@ fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
             }
             '(' if !in_class => {
                 if parser.next() != Some(&'?')
-                    || (parser.nth(2) == Some(&'<') && !matches!(parser.nth(3), '=' | '!'))
+                    || (parser.nth(2) == Some(&'<')
+                        && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
                 {
                     count += 1;
                 }

From cd1e5b4cfb73d1415679b26c654b71eaf2e98ef7 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Thu, 21 Dec 2023 02:00:24 +0800
Subject: [PATCH 03/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 7dc0e04253d6f..029cc2a2cddca 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -77,6 +77,9 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn alloc<T>(&self, val: T) -> &mut T {
+        self.lexer.allocator.alloc(val)
+    }
     pub fn is(&self, ch: char) -> bool {
         self.lexer.chars.get(self.index) == Some(&ch)
     }
@@ -194,6 +197,10 @@ fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
     todo!()
 }
 
+fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
+    todo!()
+}
+
 fn parse_quantifier<'a>(
     parser: &mut Parser<'a>,
     no_consume: Option<bool>,
@@ -231,7 +238,7 @@ fn parse_quantifier<'a>(
     greedy = !parser.eat('?');
 
     if !no_consume {
-        element = Some(Element::Quantifier(Quantifier {
+        let quantifier = parser.alloc(Quantifier {
             span: Span { start: start as u32, end: parser.index as u32 },
             min,
             max,
@@ -240,7 +247,9 @@ fn parse_quantifier<'a>(
             // it can't be null, or the program will panic, so we put a dummy element, and parent
             // should replace it
             element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
-        }))
+        });
+
+        element = Some(Element::Quantifier(quantifier))
     }
     (true, element)
 }

From 95287445dc6cf77b393f13a70c94ba80168227ce Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 16:24:33 +0800
Subject: [PATCH 04/19] =?UTF-8?q?fix:=20=F0=9F=90=9B=20compile=20error?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 '                                 | 355 ++++++++++++++++++++++++++++++
 crates/oxc_js_regex/src/ast.rs    |   2 +-
 crates/oxc_js_regex/src/parser.rs |  54 +++--
 3 files changed, 395 insertions(+), 16 deletions(-)
 create mode 100644 '

diff --git a/' b/'
new file mode 100644
index 0000000000000..bec9fec060094
--- /dev/null
+++ b/'
@@ -0,0 +1,355 @@
+use std::collections::{HashSet, VecDeque};
+use std::iter::Peekable;
+use std::ops::Range;
+use std::os::unix::fs::OpenOptionsExt;
+use std::panic;
+use std::str::{CharIndices, Chars, Matches};
+
+use oxc_allocator::Allocator;
+use oxc_diagnostics::Error;
+use oxc_span::Span;
+
+use crate::ast::{
+    Alternative, Assertion, Branch, Character, Element, Pattern, QuantifiableElement, Quantifier,
+    RegExpLiteral,
+};
+use crate::ecma_version::EcmaVersion;
+
+pub struct Lexer<'a> {
+    allocator: &'a Allocator,
+
+    source: &'a str,
+    /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
+    chars: Vec<char>,
+
+    pub(crate) errors: Vec<Error>,
+}
+
+#[allow(clippy::unused_self)]
+impl<'a> Lexer<'a> {
+    pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
+        Self { source, allocator, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
+    }
+}
+
+pub struct Parser<'a> {
+    lexer: Lexer<'a>,
+
+    /// Source Code
+    source_text: &'a str,
+
+    /// All syntax errors from parser and lexer
+    /// Note: favor adding to `Diagnostics` instead of raising Err
+    errors: Vec<Error>,
+    context: ParserContext,
+    index: usize,
+    group_names: HashSet<String>,
+    num_capturing_parens: usize,
+    last_int_value: usize,
+    back_reference_names: HashSet<String>,
+    last_assertion_is_quantifiable: bool,
+    last_range: Range<usize>,
+}
+
+#[derive(Default, Copy, Clone)]
+struct ParserContext {
+    source_kind: SourceKind,
+    unicode_mode: bool,
+    nflag: bool,
+    unicode_sets_mode: bool,
+    ecma_version: EcmaVersion,
+    strict: bool,
+}
+
+impl<'a> Parser<'a> {
+    /// Create a new parser
+    pub fn new(allocator: &'a Allocator, source_text: &'a str) -> Self {
+        Self {
+            lexer: Lexer::new(allocator, source_text),
+            source_text,
+            errors: vec![],
+            context: ParserContext::default(),
+            index: 0,
+            group_names: HashSet::new(),
+            num_capturing_parens: 0,
+            back_reference_names: HashSet::new(),
+            last_int_value: 0,
+            last_range: 0..0,
+            last_assertion_is_quantifiable: false,
+        }
+    }
+
+    // fn alloc<T>(&self, val: T) -> T {
+    //     self.lexer.allocator.alloc(val)
+    // }
+
+    #[inline]
+    pub fn alloc<T>(&self, value: T) -> Box<'a, T> {
+        Box(self.lexer.allocator.alloc(value))
+    }
+    pub fn is(&self, ch: char) -> bool {
+        self.lexer.chars.get(self.index) == Some(&ch)
+    }
+
+    pub fn eat(&mut self, ch: char) -> bool {
+        if self.is(ch) {
+            self.index += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn eof(&self) -> bool {
+        self.index < self.lexer.chars.len()
+    }
+
+    pub fn nth(&self, n: usize) -> Option<&char> {
+        self.lexer.chars.get(self.index + n)
+    }
+
+    /// by default next means `next_1`
+    pub fn next(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index + 1)
+    }
+
+    /// get a range chars relative from current cursor
+    pub fn nrange(&self, range: Range<usize>) -> Option<&[char]> {
+        self.lexer.chars.get(self.index + range.start..(self.index + range.end))
+    }
+
+    pub fn current(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index)
+    }
+
+    pub fn advance(&mut self) -> bool {
+        if self.index < self.lexer.chars.len() {
+            self.index += 1;
+            return true;
+        } else {
+            false
+        }
+    }
+
+    pub fn rewind(&mut self, start: usize) {
+        self.index = start;
+    }
+}
+
+#[derive(Default, Clone, Copy)]
+pub enum SourceKind {
+    Flags,
+    #[default]
+    Literal,
+    Pattern,
+}
+
+pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
+    if parser.is('/') {
+        parser.advance();
+        let pattern = parse_pattern(parser);
+        todo!()
+    } else if parser.source_text.is_empty() {
+        panic!("Empty")
+    } else {
+        match parser.current() {
+            Some(ch) => {
+                panic!("unexpected character {ch}")
+            }
+            None => {
+                panic!("unexpected eof")
+            }
+        };
+    }
+}
+
+fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
+    let start = parser.index;
+    if let Some(pattern) = parse_pattern_internal(parser) {
+        return pattern;
+    } else if !parser.context.nflag
+        && parser.context.ecma_version >= EcmaVersion::V2018
+        && parser.group_names.len() > 0
+    {
+        parser.rewind(start);
+        parser.context.nflag = true;
+        return parse_pattern_internal(parser).expect("should have pattern");
+    }
+    panic!("Invalid pattern")
+}
+
+fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
+    let start = parser.index;
+    parser.num_capturing_parens = count_capturing_parens(parser);
+    parser.group_names.clear();
+    parser.back_reference_names.clear();
+    todo!()
+}
+
+fn parse_disjunction<'a>(parser: &mut Parser<'a>) {
+    let start = parser.index;
+    let mut i = 0;
+    loop {}
+}
+
+/// Validate the next characters as a RegExp `Alternative` production.
+/// ```
+///  Alternative[UnicodeMode, UnicodeSetsMode, N]::
+///      [empty]
+///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
+/// ```
+fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
+    let start = parser.index;
+    let mut elements = vec![];
+    while !parser.eof() {
+        let term = parse_term(p);
+    }
+    Alternative { span: Span::new(start, parser.index), elements }
+}
+
+fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
+    if parser.context.unicode_mode || parser.context.strict {}
+    todo!()
+}
+
+fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
+    let start = parser.index;
+    parser.last_assertion_is_quantifiable = false;
+
+    todo!()
+}
+
+/// Validate the next characters as a RegExp `Quantifier` production if possible.
+/// ```
+///  Quantifier::
+///        QuantifierPrefix
+///        QuantifierPrefix `?`
+///   QuantifierPrefix::
+///        `*`
+///        `+`
+///        `?`
+///        `{` DecimalDigits `}`
+///        `{` DecimalDigits `,}`
+///        `{` DecimalDigits `,` DecimalDigits `}`
+///   ```
+/// returns `true` if it consumed the next characters successfully.
+fn parse_quantifier<'a>(
+    parser: &mut Parser<'a>,
+    no_consume: Option<bool>,
+) -> (bool, Option<Element<'a>>) {
+    let mut no_consume = no_consume.unwrap_or_default();
+    let start = parser.index;
+    let mut min = 0;
+    let mut max = 0;
+    let mut greedy = false;
+    let mut element = None;
+    match parser.current().cloned() {
+        Some('*') => {
+            min = 0;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('+') => {
+            min = 1;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('?') => {
+            min = 0;
+            max = 1;
+            parser.advance();
+        }
+        Some(_) => {
+            if parse_braced_quantifier(parser, no_consume) {
+                min = parser.last_range.start;
+                max = parser.last_range.end;
+            }
+        }
+        None => return (false, None),
+    }
+    greedy = !parser.eat('?');
+
+    if !no_consume {
+        let quantifier = parser.alloc(Quantifier {
+            span: Span { start: start as u32, end: parser.index as u32 },
+            min,
+            max,
+            greedy,
+            // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
+            // it can't be null, or the program will panic, so we put a dummy element, and parent
+            // should replace it
+            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
+        });
+
+        element = Some(Element::Quantifier(quantifier))
+    }
+    (true, element)
+}
+
+fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool {
+    let start = parser.index;
+    if eat_decimal_digits(parser) {
+        let min = parser.last_int_value;
+        let mut max = min;
+        if parser.eat(',') {
+            max = if eat_decimal_digits(parser) { parser.last_int_value } else { usize::MAX };
+        }
+        if parser.eat('}') {
+            if !no_error && max < min {
+                panic!("numbers out of order in {{}} quantifier");
+            }
+            parser.last_range = min..max;
+            return true;
+        }
+    }
+    if !no_error && (parser.context.unicode_mode || parser.context.strict) {
+        panic!("Incomplete quantifier");
+    }
+    parser.rewind(start);
+    false
+}
+
+fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    parser.last_int_value = 0;
+    while let Some(ch) = parser.current() {
+        let Some(d) = ch.to_digit(10) else {
+            break;
+        };
+        parser.last_int_value = 10 * parser.last_int_value + d as usize;
+        parser.advance();
+    }
+    parser.index != start
+}
+
+fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
+    let start = parser.index;
+    let mut in_class = false;
+    let mut escaped = false;
+    let mut count = 0;
+    while let Some(ch) = parser.current() {
+        if escaped {
+            escaped = false;
+        }
+        match ch {
+            '\\' => {
+                escaped = true;
+            }
+            '[' | ']' => {
+                in_class = false;
+            }
+            '(' if !in_class => {
+                if parser.next() != Some(&'?')
+                    || (parser.nth(2) == Some(&'<')
+                        && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
+                {
+                    count += 1;
+                }
+            }
+            _ => {}
+        }
+        parser.advance();
+    }
+    parser.rewind(start);
+    count
+}
diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index 0650f3053f4bc..5d429b4f063b2 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -46,7 +46,7 @@ pub enum Leaf<'a> {
 pub enum Element<'a> {
     Assertion(Assertion<'a>),
     QuantifiableElement(QuantifiableElement<'a>),
-    Quantifier(Quantifier<'a>),
+    Quantifier(Box<'a, Quantifier<'a>>),
 }
 
 /// The type which includes all atom nodes that Quantifier node can have as children.
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 029cc2a2cddca..bc8dd4428d208 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -5,7 +5,6 @@ use std::os::unix::fs::OpenOptionsExt;
 use std::panic;
 use std::str::{CharIndices, Chars, Matches};
 
-use oxc_allocator::Allocator;
 use oxc_diagnostics::Error;
 use oxc_span::Span;
 
@@ -13,11 +12,10 @@ use crate::ast::{
     Alternative, Assertion, Branch, Character, Element, Pattern, QuantifiableElement, Quantifier,
     RegExpLiteral,
 };
+use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
 
 pub struct Lexer<'a> {
-    allocator: &'a Allocator,
-
     source: &'a str,
     /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
     chars: Vec<char>,
@@ -27,13 +25,14 @@ pub struct Lexer<'a> {
 
 #[allow(clippy::unused_self)]
 impl<'a> Lexer<'a> {
-    pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
-        Self { source, allocator, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
+    pub fn new(source: &'a str) -> Self {
+        Self { source, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
     }
 }
 
 pub struct Parser<'a> {
     lexer: Lexer<'a>,
+    builder: AstBuilder<'a>,
 
     /// Source Code
     source_text: &'a str,
@@ -47,6 +46,7 @@ pub struct Parser<'a> {
     num_capturing_parens: usize,
     last_int_value: usize,
     back_reference_names: HashSet<String>,
+    last_assertion_is_quantifiable: bool,
     last_range: Range<usize>,
 }
 
@@ -62,9 +62,9 @@ struct ParserContext {
 
 impl<'a> Parser<'a> {
     /// Create a new parser
-    pub fn new(allocator: &'a Allocator, source_text: &'a str) -> Self {
+    pub fn new(allocator: &'a oxc_allocator::Allocator, source_text: &'a str) -> Self {
         Self {
-            lexer: Lexer::new(allocator, source_text),
+            lexer: Lexer::new(source_text),
             source_text,
             errors: vec![],
             context: ParserContext::default(),
@@ -74,12 +74,11 @@ impl<'a> Parser<'a> {
             back_reference_names: HashSet::new(),
             last_int_value: 0,
             last_range: 0..0,
+            last_assertion_is_quantifiable: false,
+            builder: AstBuilder::new(allocator),
         }
     }
 
-    fn alloc<T>(&self, val: T) -> &mut T {
-        self.lexer.allocator.alloc(val)
-    }
     pub fn is(&self, ch: char) -> bool {
         self.lexer.chars.get(self.index) == Some(&ch)
     }
@@ -185,11 +184,19 @@ fn parse_disjunction<'a>(parser: &mut Parser<'a>) {
     loop {}
 }
 
-fn parser_alternative<'a>(parser: &mut Parser<'a>, i: usize) -> Alternative<'a> {
+/// Validate the next characters as a RegExp `Alternative` production.
+/// ```
+///  Alternative[UnicodeMode, UnicodeSetsMode, N]::
+///      [empty]
+///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
+/// ```
+fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
     let start = parser.index;
-    // let mut elements = vec![];
-    while !parser.eof() {}
-    Alternative { span: todo!(), elements: todo!() }
+    let mut elements = parser.builder.new_vec();
+    while !parser.eof() {
+        let term = parse_term(parser);
+    }
+    Alternative { span: Span::new(start as u32, parser.index as u32), elements }
 }
 
 fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
@@ -198,9 +205,26 @@ fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
 }
 
 fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
+    let start = parser.index;
+    parser.last_assertion_is_quantifiable = false;
+
     todo!()
 }
 
+/// Validate the next characters as a RegExp `Quantifier` production if possible.
+/// ```
+///  Quantifier::
+///        QuantifierPrefix
+///        QuantifierPrefix `?`
+///   QuantifierPrefix::
+///        `*`
+///        `+`
+///        `?`
+///        `{` DecimalDigits `}`
+///        `{` DecimalDigits `,}`
+///        `{` DecimalDigits `,` DecimalDigits `}`
+///   ```
+/// returns `true` if it consumed the next characters successfully.
 fn parse_quantifier<'a>(
     parser: &mut Parser<'a>,
     no_consume: Option<bool>,
@@ -238,7 +262,7 @@ fn parse_quantifier<'a>(
     greedy = !parser.eat('?');
 
     if !no_consume {
-        let quantifier = parser.alloc(Quantifier {
+        let quantifier = parser.builder.alloc(Quantifier {
             span: Span { start: start as u32, end: parser.index as u32 },
             min,
             max,

From e8e1d922925239cb9dbbff15d73fe1b9de902a7b Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 16:28:58 +0800
Subject: [PATCH 05/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20parse=20alternativ?=
 =?UTF-8?q?e?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index bc8dd4428d208..1517bcdd44f70 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -194,7 +194,13 @@ fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
     let start = parser.index;
     let mut elements = parser.builder.new_vec();
     while !parser.eof() {
-        let term = parse_term(parser);
+        let (flag, node) = parse_term(parser);
+        if let Some(node) = node {
+            elements.push(node);
+        }
+        if !flag {
+            break;
+        }
     }
     Alternative { span: Span::new(start as u32, parser.index as u32), elements }
 }

From 9c574e87a70084a4ae37d225814747ca2fdb5cad Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 16:38:03 +0800
Subject: [PATCH 06/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20disjunction?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 1517bcdd44f70..6e26300c4d18c 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -178,10 +178,23 @@ fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
     todo!()
 }
 
-fn parse_disjunction<'a>(parser: &mut Parser<'a>) {
+fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alternative<'a>> {
     let start = parser.index;
-    let mut i = 0;
-    loop {}
+    let mut ret = parser.builder.new_vec();
+    loop {
+        ret.push(parser_alternative(parser));
+        if !parser.eat('|') {
+            break;
+        }
+    }
+    // Only consume the ast when `no_consume` is false
+    if parse_quantifier(parser, Some(true)).0 {
+        panic!("Nothing to repeat");
+    }
+    if parser.eat('{') {
+        panic!("Lone quantifier brackets")
+    }
+    ret
 }
 
 /// Validate the next characters as a RegExp `Alternative` production.

From e25ac163aeb03b062c474d7e836cfaf0f21b8de8 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 17:22:12 +0800
Subject: [PATCH 07/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20parse=20assertion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ;                                 | 377 ++++++++++++++++++++++++++++++
 crates/oxc_js_regex/src/parser.rs | 102 +++++++-
 2 files changed, 472 insertions(+), 7 deletions(-)
 create mode 100644 ;

diff --git a/; b/;
new file mode 100644
index 0000000000000..acc0dcbc178a7
--- /dev/null
+++ b/;
@@ -0,0 +1,377 @@
+use std::collections::{HashSet, VecDeque};
+use std::iter::Peekable;
+use std::ops::Range;
+use std::os::unix::fs::OpenOptionsExt;
+use std::panic;
+use std::str::{CharIndices, Chars, Matches};
+
+use oxc_diagnostics::Error;
+use oxc_span::Span;
+
+use crate::ast::{
+    Alternative, Assertion, BoundaryAssertion, Branch, Character, EdgeAssertion, Element, Pattern,
+    QuantifiableElement, Quantifier, RegExpLiteral,
+};
+use crate::ast_builder::AstBuilder;
+use crate::ecma_version::EcmaVersion;
+
+pub struct Lexer<'a> {
+    source: &'a str,
+    /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
+    chars: Vec<char>,
+
+    pub(crate) errors: Vec<Error>,
+}
+
+#[allow(clippy::unused_self)]
+impl<'a> Lexer<'a> {
+    pub fn new(source: &'a str) -> Self {
+        Self { source, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
+    }
+}
+
+pub struct Parser<'a> {
+    lexer: Lexer<'a>,
+    builder: AstBuilder<'a>,
+
+    /// Source Code
+    source_text: &'a str,
+
+    /// All syntax errors from parser and lexer
+    /// Note: favor adding to `Diagnostics` instead of raising Err
+    errors: Vec<Error>,
+    context: ParserContext,
+    index: usize,
+    group_names: HashSet<String>,
+    num_capturing_parens: usize,
+    last_int_value: usize,
+    back_reference_names: HashSet<String>,
+    last_assertion_is_quantifiable: bool,
+    last_range: Range<usize>,
+}
+
+#[derive(Default, Copy, Clone)]
+struct ParserContext {
+    source_kind: SourceKind,
+    unicode_mode: bool,
+    nflag: bool,
+    unicode_sets_mode: bool,
+    ecma_version: EcmaVersion,
+    strict: bool,
+}
+
+impl<'a> Parser<'a> {
+    /// Create a new parser
+    pub fn new(allocator: &'a oxc_allocator::Allocator, source_text: &'a str) -> Self {
+        Self {
+            lexer: Lexer::new(source_text),
+            source_text,
+            errors: vec![],
+            context: ParserContext::default(),
+            index: 0,
+            group_names: HashSet::new(),
+            num_capturing_parens: 0,
+            back_reference_names: HashSet::new(),
+            last_int_value: 0,
+            last_range: 0..0,
+            last_assertion_is_quantifiable: false,
+            builder: AstBuilder::new(allocator),
+        }
+    }
+
+    pub fn is(&self, ch: char) -> bool {
+        self.lexer.chars.get(self.index) == Some(&ch)
+    }
+
+    pub fn eat(&mut self, ch: char) -> bool {
+        if self.is(ch) {
+            self.index += 1;
+            true
+        } else {
+            false
+        }
+    }
+
+    pub fn eof(&self) -> bool {
+        self.index < self.lexer.chars.len()
+    }
+
+    pub fn nth(&self, n: usize) -> Option<&char> {
+        self.lexer.chars.get(self.index + n)
+    }
+
+    /// by default next means `next_1`
+    pub fn next(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index + 1)
+    }
+
+    /// get a range chars relative from current cursor
+    pub fn nrange(&self, range: Range<usize>) -> Option<&[char]> {
+        self.lexer.chars.get(self.index + range.start..(self.index + range.end))
+    }
+
+    pub fn current(&self) -> Option<&char> {
+        self.lexer.chars.get(self.index)
+    }
+
+    pub fn advance(&mut self) -> bool {
+        if self.index < self.lexer.chars.len() {
+            self.index += 1;
+            return true;
+        } else {
+            false
+        }
+    }
+
+    pub fn rewind(&mut self, start: usize) {
+        self.index = start;
+    }
+}
+
+#[derive(Default, Clone, Copy)]
+pub enum SourceKind {
+    Flags,
+    #[default]
+    Literal,
+    Pattern,
+}
+
+pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
+    if parser.is('/') {
+        parser.advance();
+        let pattern = parse_pattern(parser);
+        todo!()
+    } else if parser.source_text.is_empty() {
+        panic!("Empty")
+    } else {
+        match parser.current() {
+            Some(ch) => {
+                panic!("unexpected character {ch}")
+            }
+            None => {
+                panic!("unexpected eof")
+            }
+        };
+    }
+}
+
+fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
+    let start = parser.index;
+    if let Some(pattern) = parse_pattern_internal(parser) {
+        return pattern;
+    } else if !parser.context.nflag
+        && parser.context.ecma_version >= EcmaVersion::V2018
+        && parser.group_names.len() > 0
+    {
+        parser.rewind(start);
+        parser.context.nflag = true;
+        return parse_pattern_internal(parser).expect("should have pattern");
+    }
+    panic!("Invalid pattern")
+}
+
+fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
+    let start = parser.index;
+    parser.num_capturing_parens = count_capturing_parens(parser);
+    parser.group_names.clear();
+    parser.back_reference_names.clear();
+    todo!()
+}
+
+fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alternative<'a>> {
+    let start = parser.index;
+    let mut alternatives = parser.builder.new_vec();
+    loop {
+        alternatives.push(parser_alternative(parser));
+        if !parser.eat('|') {
+            break;
+        }
+    }
+    // Only consume the ast when `no_consume` is false
+    if parse_quantifier(parser, Some(true)).0 {
+        panic!("Nothing to repeat");
+    }
+    if parser.eat('{') {
+        panic!("Lone quantifier brackets")
+    }
+    alternatives
+}
+
+/// Validate the next characters as a RegExp `Alternative` production.
+/// ```
+///  Alternative[UnicodeMode, UnicodeSetsMode, N]::
+///      [empty]
+///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
+/// ```
+fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
+    let start = parser.index;
+    let mut elements = parser.builder.new_vec();
+    while !parser.eof() {
+        let (flag, node) = parse_term(parser);
+        if let Some(node) = node {
+            elements.push(node);
+        }
+        if !flag {
+            break;
+        }
+    }
+    Alternative { span: Span::new(start as u32, parser.index as u32), elements }
+}
+
+fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
+    if parser.context.unicode_mode || parser.context.strict {}
+    todo!()
+}
+
+fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
+    let start = parser.index;
+    parser.last_assertion_is_quantifiable = false;
+
+    if parser.eat('^') {
+        return (
+            true,
+            Some(Assertion::BoundaryAssertion(parser.builder.alloc(BoundaryAssertion::EdgeAssertion(
+                parser.builder.alloc(EdgeAssertion {
+                    span: Span::new(start as u32, parser.index as u32),
+                    kind: todo!(),
+                }),
+            ))),
+        );
+    }
+    todo!()
+}
+
+/// Validate the next characters as a RegExp `Quantifier` production if possible.
+/// ```
+///  Quantifier::
+///        QuantifierPrefix
+///        QuantifierPrefix `?`
+///   QuantifierPrefix::
+///        `*`
+///        `+`
+///        `?`
+///        `{` DecimalDigits `}`
+///        `{` DecimalDigits `,}`
+///        `{` DecimalDigits `,` DecimalDigits `}`
+///   ```
+/// returns `true` if it consumed the next characters successfully.
+fn parse_quantifier<'a>(
+    parser: &mut Parser<'a>,
+    no_consume: Option<bool>,
+) -> (bool, Option<Element<'a>>) {
+    let mut no_consume = no_consume.unwrap_or_default();
+    let start = parser.index;
+    let mut min = 0;
+    let mut max = 0;
+    let mut greedy = false;
+    let mut element = None;
+    match parser.current().cloned() {
+        Some('*') => {
+            min = 0;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('+') => {
+            min = 1;
+            max = usize::MAX;
+            parser.advance();
+        }
+        Some('?') => {
+            min = 0;
+            max = 1;
+            parser.advance();
+        }
+        Some(_) => {
+            if parse_braced_quantifier(parser, no_consume) {
+                min = parser.last_range.start;
+                max = parser.last_range.end;
+            }
+        }
+        None => return (false, None),
+    }
+    greedy = !parser.eat('?');
+
+    if !no_consume {
+        let quantifier = parser.builder.alloc(Quantifier {
+            span: Span { start: start as u32, end: parser.index as u32 },
+            min,
+            max,
+            greedy,
+            // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
+            // it can't be null, or the program will panic, so we put a dummy element, and parent
+            // should replace it
+            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
+        });
+
+        element = Some(Element::Quantifier(quantifier))
+    }
+    (true, element)
+}
+
+fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool {
+    let start = parser.index;
+    if eat_decimal_digits(parser) {
+        let min = parser.last_int_value;
+        let mut max = min;
+        if parser.eat(',') {
+            max = if eat_decimal_digits(parser) { parser.last_int_value } else { usize::MAX };
+        }
+        if parser.eat('}') {
+            if !no_error && max < min {
+                panic!("numbers out of order in {{}} quantifier");
+            }
+            parser.last_range = min..max;
+            return true;
+        }
+    }
+    if !no_error && (parser.context.unicode_mode || parser.context.strict) {
+        panic!("Incomplete quantifier");
+    }
+    parser.rewind(start);
+    false
+}
+
+fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    parser.last_int_value = 0;
+    while let Some(ch) = parser.current() {
+        let Some(d) = ch.to_digit(10) else {
+            break;
+        };
+        parser.last_int_value = 10 * parser.last_int_value + d as usize;
+        parser.advance();
+    }
+    parser.index != start
+}
+
+fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
+    let start = parser.index;
+    let mut in_class = false;
+    let mut escaped = false;
+    let mut count = 0;
+    while let Some(ch) = parser.current() {
+        if escaped {
+            escaped = false;
+        }
+        match ch {
+            '\\' => {
+                escaped = true;
+            }
+            '[' | ']' => {
+                in_class = false;
+            }
+            '(' if !in_class => {
+                if parser.next() != Some(&'?')
+                    || (parser.nth(2) == Some(&'<')
+                        && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
+                {
+                    count += 1;
+                }
+            }
+            _ => {}
+        }
+        parser.advance();
+    }
+    parser.rewind(start);
+    count
+}
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 6e26300c4d18c..e7854d063d509 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -9,8 +9,9 @@ use oxc_diagnostics::Error;
 use oxc_span::Span;
 
 use crate::ast::{
-    Alternative, Assertion, Branch, Character, Element, Pattern, QuantifiableElement, Quantifier,
-    RegExpLiteral,
+    Alternative, Assertion, BoundaryAssertion, Branch, Character, EdgeAssertion, EdgeAssertionKind,
+    Element, LookaheadAssertion, LookaroundAssertion, LookbehindAssertion, Pattern,
+    QuantifiableElement, Quantifier, RegExpLiteral, WordBoundaryAssertion,
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
@@ -92,6 +93,15 @@ impl<'a> Parser<'a> {
         }
     }
 
+    pub fn eat2(&mut self, first: char, second: char) -> bool {
+        if self.is(first) && self.nth(1) == Some(&second) {
+            self.index += 2;
+            true
+        } else {
+            false
+        }
+    }
+
     pub fn eof(&self) -> bool {
         self.index < self.lexer.chars.len()
     }
@@ -180,9 +190,9 @@ fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
 
 fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alternative<'a>> {
     let start = parser.index;
-    let mut ret = parser.builder.new_vec();
+    let mut alternatives = parser.builder.new_vec();
     loop {
-        ret.push(parser_alternative(parser));
+        alternatives.push(parse_alternative(parser));
         if !parser.eat('|') {
             break;
         }
@@ -194,7 +204,7 @@ fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alte
     if parser.eat('{') {
         panic!("Lone quantifier brackets")
     }
-    ret
+    alternatives
 }
 
 /// Validate the next characters as a RegExp `Alternative` production.
@@ -203,7 +213,7 @@ fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alte
 ///      [empty]
 ///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
 /// ```
-fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
+fn parse_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
     let start = parser.index;
     let mut elements = parser.builder.new_vec();
     while !parser.eof() {
@@ -227,7 +237,85 @@ fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>)
     let start = parser.index;
     parser.last_assertion_is_quantifiable = false;
 
-    todo!()
+    if parser.eat('^') {
+        return (
+            true,
+            Some(Assertion::BoundaryAssertion(parser.builder.alloc(
+                BoundaryAssertion::EdgeAssertion(parser.builder.alloc(EdgeAssertion {
+                    span: Span::new(start as u32, parser.index as u32),
+                    kind: EdgeAssertionKind::Start,
+                })),
+            ))),
+        );
+    }
+
+    if parser.eat('$') {
+        return (
+            true,
+            Some(Assertion::BoundaryAssertion(parser.builder.alloc(
+                BoundaryAssertion::EdgeAssertion(parser.builder.alloc(EdgeAssertion {
+                    span: Span::new(start as u32, parser.index as u32),
+                    kind: EdgeAssertionKind::End,
+                })),
+            ))),
+        );
+    }
+
+    if parser.eat2('\\', 'B') {
+        return (
+            true,
+            Some(Assertion::BoundaryAssertion(parser.builder.alloc(
+                BoundaryAssertion::WordBoundaryAssertion(parser.builder.alloc(
+                    WordBoundaryAssertion {
+                        span: Span::new(start as u32, parser.index as u32),
+                        negate: true,
+                    },
+                )),
+            ))),
+        );
+    }
+
+    if parser.eat2('\\', 'b') {
+        return (
+            true,
+            Some(Assertion::BoundaryAssertion(parser.builder.alloc(
+                BoundaryAssertion::WordBoundaryAssertion(parser.builder.alloc(
+                    WordBoundaryAssertion {
+                        span: Span::new(start as u32, parser.index as u32),
+                        negate: false,
+                    },
+                )),
+            ))),
+        );
+    }
+
+    // Lookahead / Lookbehind
+    if parser.eat2('(', '?') {
+        let lookbeind = parser.context.ecma_version >= EcmaVersion::V2018 && parser.eat('<');
+        let mut eq_sign = parser.eat('=');
+        let mut negate = if eq_sign { false } else { parser.eat('!') };
+        if eq_sign || negate {
+            let span = Span::new(start as u32, parser.index as u32);
+            let alternatives = parse_disjunction(parser);
+            let look_around_assertion =
+                if lookbeind {
+                    LookaroundAssertion::LookbehindAssertion(
+                        parser.builder.alloc(LookbehindAssertion { span, negate, alternatives }),
+                    )
+                } else {
+                    LookaroundAssertion::LookaheadAssertion(
+                        parser.builder.alloc(LookaheadAssertion { span, negate, alternatives }),
+                    )
+                };
+            let node = Assertion::LookaroundAssertion(parser.builder.alloc(look_around_assertion));
+            if !parser.eat(')') {
+                panic!("Unterminated group")
+            }
+            parser.last_assertion_is_quantifiable = !lookbeind && !parser.context.strict;
+        }
+        parser.rewind(start);
+    }
+    (false, None)
 }
 
 /// Validate the next characters as a RegExp `Quantifier` production if possible.

From 54501a7b0e829f834f0b16fa2bb4e91afb9e7590 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 21:36:40 +0800
Subject: [PATCH 08/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20copy=20all?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 '                                 |  355 ---------
 ;                                 |  377 ----------
 crates/oxc_js_regex/src/ast.rs    |    2 +-
 crates/oxc_js_regex/src/parser.rs | 1120 ++++++++++++++++++++++++++++-
 4 files changed, 1118 insertions(+), 736 deletions(-)
 delete mode 100644 '
 delete mode 100644 ;

diff --git a/' b/'
deleted file mode 100644
index bec9fec060094..0000000000000
--- a/'
+++ /dev/null
@@ -1,355 +0,0 @@
-use std::collections::{HashSet, VecDeque};
-use std::iter::Peekable;
-use std::ops::Range;
-use std::os::unix::fs::OpenOptionsExt;
-use std::panic;
-use std::str::{CharIndices, Chars, Matches};
-
-use oxc_allocator::Allocator;
-use oxc_diagnostics::Error;
-use oxc_span::Span;
-
-use crate::ast::{
-    Alternative, Assertion, Branch, Character, Element, Pattern, QuantifiableElement, Quantifier,
-    RegExpLiteral,
-};
-use crate::ecma_version::EcmaVersion;
-
-pub struct Lexer<'a> {
-    allocator: &'a Allocator,
-
-    source: &'a str,
-    /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
-    chars: Vec<char>,
-
-    pub(crate) errors: Vec<Error>,
-}
-
-#[allow(clippy::unused_self)]
-impl<'a> Lexer<'a> {
-    pub fn new(allocator: &'a Allocator, source: &'a str) -> Self {
-        Self { source, allocator, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
-    }
-}
-
-pub struct Parser<'a> {
-    lexer: Lexer<'a>,
-
-    /// Source Code
-    source_text: &'a str,
-
-    /// All syntax errors from parser and lexer
-    /// Note: favor adding to `Diagnostics` instead of raising Err
-    errors: Vec<Error>,
-    context: ParserContext,
-    index: usize,
-    group_names: HashSet<String>,
-    num_capturing_parens: usize,
-    last_int_value: usize,
-    back_reference_names: HashSet<String>,
-    last_assertion_is_quantifiable: bool,
-    last_range: Range<usize>,
-}
-
-#[derive(Default, Copy, Clone)]
-struct ParserContext {
-    source_kind: SourceKind,
-    unicode_mode: bool,
-    nflag: bool,
-    unicode_sets_mode: bool,
-    ecma_version: EcmaVersion,
-    strict: bool,
-}
-
-impl<'a> Parser<'a> {
-    /// Create a new parser
-    pub fn new(allocator: &'a Allocator, source_text: &'a str) -> Self {
-        Self {
-            lexer: Lexer::new(allocator, source_text),
-            source_text,
-            errors: vec![],
-            context: ParserContext::default(),
-            index: 0,
-            group_names: HashSet::new(),
-            num_capturing_parens: 0,
-            back_reference_names: HashSet::new(),
-            last_int_value: 0,
-            last_range: 0..0,
-            last_assertion_is_quantifiable: false,
-        }
-    }
-
-    // fn alloc<T>(&self, val: T) -> T {
-    //     self.lexer.allocator.alloc(val)
-    // }
-
-    #[inline]
-    pub fn alloc<T>(&self, value: T) -> Box<'a, T> {
-        Box(self.lexer.allocator.alloc(value))
-    }
-    pub fn is(&self, ch: char) -> bool {
-        self.lexer.chars.get(self.index) == Some(&ch)
-    }
-
-    pub fn eat(&mut self, ch: char) -> bool {
-        if self.is(ch) {
-            self.index += 1;
-            true
-        } else {
-            false
-        }
-    }
-
-    pub fn eof(&self) -> bool {
-        self.index < self.lexer.chars.len()
-    }
-
-    pub fn nth(&self, n: usize) -> Option<&char> {
-        self.lexer.chars.get(self.index + n)
-    }
-
-    /// by default next means `next_1`
-    pub fn next(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index + 1)
-    }
-
-    /// get a range chars relative from current cursor
-    pub fn nrange(&self, range: Range<usize>) -> Option<&[char]> {
-        self.lexer.chars.get(self.index + range.start..(self.index + range.end))
-    }
-
-    pub fn current(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index)
-    }
-
-    pub fn advance(&mut self) -> bool {
-        if self.index < self.lexer.chars.len() {
-            self.index += 1;
-            return true;
-        } else {
-            false
-        }
-    }
-
-    pub fn rewind(&mut self, start: usize) {
-        self.index = start;
-    }
-}
-
-#[derive(Default, Clone, Copy)]
-pub enum SourceKind {
-    Flags,
-    #[default]
-    Literal,
-    Pattern,
-}
-
-pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
-    if parser.is('/') {
-        parser.advance();
-        let pattern = parse_pattern(parser);
-        todo!()
-    } else if parser.source_text.is_empty() {
-        panic!("Empty")
-    } else {
-        match parser.current() {
-            Some(ch) => {
-                panic!("unexpected character {ch}")
-            }
-            None => {
-                panic!("unexpected eof")
-            }
-        };
-    }
-}
-
-fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
-    let start = parser.index;
-    if let Some(pattern) = parse_pattern_internal(parser) {
-        return pattern;
-    } else if !parser.context.nflag
-        && parser.context.ecma_version >= EcmaVersion::V2018
-        && parser.group_names.len() > 0
-    {
-        parser.rewind(start);
-        parser.context.nflag = true;
-        return parse_pattern_internal(parser).expect("should have pattern");
-    }
-    panic!("Invalid pattern")
-}
-
-fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
-    let start = parser.index;
-    parser.num_capturing_parens = count_capturing_parens(parser);
-    parser.group_names.clear();
-    parser.back_reference_names.clear();
-    todo!()
-}
-
-fn parse_disjunction<'a>(parser: &mut Parser<'a>) {
-    let start = parser.index;
-    let mut i = 0;
-    loop {}
-}
-
-/// Validate the next characters as a RegExp `Alternative` production.
-/// ```
-///  Alternative[UnicodeMode, UnicodeSetsMode, N]::
-///      [empty]
-///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
-/// ```
-fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
-    let start = parser.index;
-    let mut elements = vec![];
-    while !parser.eof() {
-        let term = parse_term(p);
-    }
-    Alternative { span: Span::new(start, parser.index), elements }
-}
-
-fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
-    if parser.context.unicode_mode || parser.context.strict {}
-    todo!()
-}
-
-fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
-    let start = parser.index;
-    parser.last_assertion_is_quantifiable = false;
-
-    todo!()
-}
-
-/// Validate the next characters as a RegExp `Quantifier` production if possible.
-/// ```
-///  Quantifier::
-///        QuantifierPrefix
-///        QuantifierPrefix `?`
-///   QuantifierPrefix::
-///        `*`
-///        `+`
-///        `?`
-///        `{` DecimalDigits `}`
-///        `{` DecimalDigits `,}`
-///        `{` DecimalDigits `,` DecimalDigits `}`
-///   ```
-/// returns `true` if it consumed the next characters successfully.
-fn parse_quantifier<'a>(
-    parser: &mut Parser<'a>,
-    no_consume: Option<bool>,
-) -> (bool, Option<Element<'a>>) {
-    let mut no_consume = no_consume.unwrap_or_default();
-    let start = parser.index;
-    let mut min = 0;
-    let mut max = 0;
-    let mut greedy = false;
-    let mut element = None;
-    match parser.current().cloned() {
-        Some('*') => {
-            min = 0;
-            max = usize::MAX;
-            parser.advance();
-        }
-        Some('+') => {
-            min = 1;
-            max = usize::MAX;
-            parser.advance();
-        }
-        Some('?') => {
-            min = 0;
-            max = 1;
-            parser.advance();
-        }
-        Some(_) => {
-            if parse_braced_quantifier(parser, no_consume) {
-                min = parser.last_range.start;
-                max = parser.last_range.end;
-            }
-        }
-        None => return (false, None),
-    }
-    greedy = !parser.eat('?');
-
-    if !no_consume {
-        let quantifier = parser.alloc(Quantifier {
-            span: Span { start: start as u32, end: parser.index as u32 },
-            min,
-            max,
-            greedy,
-            // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
-            // it can't be null, or the program will panic, so we put a dummy element, and parent
-            // should replace it
-            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
-        });
-
-        element = Some(Element::Quantifier(quantifier))
-    }
-    (true, element)
-}
-
-fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool {
-    let start = parser.index;
-    if eat_decimal_digits(parser) {
-        let min = parser.last_int_value;
-        let mut max = min;
-        if parser.eat(',') {
-            max = if eat_decimal_digits(parser) { parser.last_int_value } else { usize::MAX };
-        }
-        if parser.eat('}') {
-            if !no_error && max < min {
-                panic!("numbers out of order in {{}} quantifier");
-            }
-            parser.last_range = min..max;
-            return true;
-        }
-    }
-    if !no_error && (parser.context.unicode_mode || parser.context.strict) {
-        panic!("Incomplete quantifier");
-    }
-    parser.rewind(start);
-    false
-}
-
-fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = parser.index;
-    parser.last_int_value = 0;
-    while let Some(ch) = parser.current() {
-        let Some(d) = ch.to_digit(10) else {
-            break;
-        };
-        parser.last_int_value = 10 * parser.last_int_value + d as usize;
-        parser.advance();
-    }
-    parser.index != start
-}
-
-fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
-    let start = parser.index;
-    let mut in_class = false;
-    let mut escaped = false;
-    let mut count = 0;
-    while let Some(ch) = parser.current() {
-        if escaped {
-            escaped = false;
-        }
-        match ch {
-            '\\' => {
-                escaped = true;
-            }
-            '[' | ']' => {
-                in_class = false;
-            }
-            '(' if !in_class => {
-                if parser.next() != Some(&'?')
-                    || (parser.nth(2) == Some(&'<')
-                        && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
-                {
-                    count += 1;
-                }
-            }
-            _ => {}
-        }
-        parser.advance();
-    }
-    parser.rewind(start);
-    count
-}
diff --git a/; b/;
deleted file mode 100644
index acc0dcbc178a7..0000000000000
--- a/;
+++ /dev/null
@@ -1,377 +0,0 @@
-use std::collections::{HashSet, VecDeque};
-use std::iter::Peekable;
-use std::ops::Range;
-use std::os::unix::fs::OpenOptionsExt;
-use std::panic;
-use std::str::{CharIndices, Chars, Matches};
-
-use oxc_diagnostics::Error;
-use oxc_span::Span;
-
-use crate::ast::{
-    Alternative, Assertion, BoundaryAssertion, Branch, Character, EdgeAssertion, Element, Pattern,
-    QuantifiableElement, Quantifier, RegExpLiteral,
-};
-use crate::ast_builder::AstBuilder;
-use crate::ecma_version::EcmaVersion;
-
-pub struct Lexer<'a> {
-    source: &'a str,
-    /// Regex usually, use a collected `Vec` could reduce lookahead and other util function implementation complexity
-    chars: Vec<char>,
-
-    pub(crate) errors: Vec<Error>,
-}
-
-#[allow(clippy::unused_self)]
-impl<'a> Lexer<'a> {
-    pub fn new(source: &'a str) -> Self {
-        Self { source, errors: vec![], chars: source.chars().collect::<Vec<_>>() }
-    }
-}
-
-pub struct Parser<'a> {
-    lexer: Lexer<'a>,
-    builder: AstBuilder<'a>,
-
-    /// Source Code
-    source_text: &'a str,
-
-    /// All syntax errors from parser and lexer
-    /// Note: favor adding to `Diagnostics` instead of raising Err
-    errors: Vec<Error>,
-    context: ParserContext,
-    index: usize,
-    group_names: HashSet<String>,
-    num_capturing_parens: usize,
-    last_int_value: usize,
-    back_reference_names: HashSet<String>,
-    last_assertion_is_quantifiable: bool,
-    last_range: Range<usize>,
-}
-
-#[derive(Default, Copy, Clone)]
-struct ParserContext {
-    source_kind: SourceKind,
-    unicode_mode: bool,
-    nflag: bool,
-    unicode_sets_mode: bool,
-    ecma_version: EcmaVersion,
-    strict: bool,
-}
-
-impl<'a> Parser<'a> {
-    /// Create a new parser
-    pub fn new(allocator: &'a oxc_allocator::Allocator, source_text: &'a str) -> Self {
-        Self {
-            lexer: Lexer::new(source_text),
-            source_text,
-            errors: vec![],
-            context: ParserContext::default(),
-            index: 0,
-            group_names: HashSet::new(),
-            num_capturing_parens: 0,
-            back_reference_names: HashSet::new(),
-            last_int_value: 0,
-            last_range: 0..0,
-            last_assertion_is_quantifiable: false,
-            builder: AstBuilder::new(allocator),
-        }
-    }
-
-    pub fn is(&self, ch: char) -> bool {
-        self.lexer.chars.get(self.index) == Some(&ch)
-    }
-
-    pub fn eat(&mut self, ch: char) -> bool {
-        if self.is(ch) {
-            self.index += 1;
-            true
-        } else {
-            false
-        }
-    }
-
-    pub fn eof(&self) -> bool {
-        self.index < self.lexer.chars.len()
-    }
-
-    pub fn nth(&self, n: usize) -> Option<&char> {
-        self.lexer.chars.get(self.index + n)
-    }
-
-    /// by default next means `next_1`
-    pub fn next(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index + 1)
-    }
-
-    /// get a range chars relative from current cursor
-    pub fn nrange(&self, range: Range<usize>) -> Option<&[char]> {
-        self.lexer.chars.get(self.index + range.start..(self.index + range.end))
-    }
-
-    pub fn current(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index)
-    }
-
-    pub fn advance(&mut self) -> bool {
-        if self.index < self.lexer.chars.len() {
-            self.index += 1;
-            return true;
-        } else {
-            false
-        }
-    }
-
-    pub fn rewind(&mut self, start: usize) {
-        self.index = start;
-    }
-}
-
-#[derive(Default, Clone, Copy)]
-pub enum SourceKind {
-    Flags,
-    #[default]
-    Literal,
-    Pattern,
-}
-
-pub fn parse_literal<'a>(parser: &mut Parser<'a>) -> RegExpLiteral<'a> {
-    if parser.is('/') {
-        parser.advance();
-        let pattern = parse_pattern(parser);
-        todo!()
-    } else if parser.source_text.is_empty() {
-        panic!("Empty")
-    } else {
-        match parser.current() {
-            Some(ch) => {
-                panic!("unexpected character {ch}")
-            }
-            None => {
-                panic!("unexpected eof")
-            }
-        };
-    }
-}
-
-fn parse_pattern<'a>(parser: &mut Parser<'a>) -> Pattern<'a> {
-    let start = parser.index;
-    if let Some(pattern) = parse_pattern_internal(parser) {
-        return pattern;
-    } else if !parser.context.nflag
-        && parser.context.ecma_version >= EcmaVersion::V2018
-        && parser.group_names.len() > 0
-    {
-        parser.rewind(start);
-        parser.context.nflag = true;
-        return parse_pattern_internal(parser).expect("should have pattern");
-    }
-    panic!("Invalid pattern")
-}
-
-fn parse_pattern_internal<'a>(parser: &mut Parser<'a>) -> Option<Pattern<'a>> {
-    let start = parser.index;
-    parser.num_capturing_parens = count_capturing_parens(parser);
-    parser.group_names.clear();
-    parser.back_reference_names.clear();
-    todo!()
-}
-
-fn parse_disjunction<'a>(parser: &mut Parser<'a>) -> oxc_allocator::Vec<'a, Alternative<'a>> {
-    let start = parser.index;
-    let mut alternatives = parser.builder.new_vec();
-    loop {
-        alternatives.push(parser_alternative(parser));
-        if !parser.eat('|') {
-            break;
-        }
-    }
-    // Only consume the ast when `no_consume` is false
-    if parse_quantifier(parser, Some(true)).0 {
-        panic!("Nothing to repeat");
-    }
-    if parser.eat('{') {
-        panic!("Lone quantifier brackets")
-    }
-    alternatives
-}
-
-/// Validate the next characters as a RegExp `Alternative` production.
-/// ```
-///  Alternative[UnicodeMode, UnicodeSetsMode, N]::
-///      [empty]
-///  Alternative[?UnicodeMode, ?UnicodeSetsMode, ?N] Term[?UnicodeMode, ?UnicodeSetsMode, ?N]
-/// ```
-fn parser_alternative<'a>(parser: &mut Parser<'a>) -> Alternative<'a> {
-    let start = parser.index;
-    let mut elements = parser.builder.new_vec();
-    while !parser.eof() {
-        let (flag, node) = parse_term(parser);
-        if let Some(node) = node {
-            elements.push(node);
-        }
-        if !flag {
-            break;
-        }
-    }
-    Alternative { span: Span::new(start as u32, parser.index as u32), elements }
-}
-
-fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
-    if parser.context.unicode_mode || parser.context.strict {}
-    todo!()
-}
-
-fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
-    let start = parser.index;
-    parser.last_assertion_is_quantifiable = false;
-
-    if parser.eat('^') {
-        return (
-            true,
-            Some(Assertion::BoundaryAssertion(parser.builder.alloc(BoundaryAssertion::EdgeAssertion(
-                parser.builder.alloc(EdgeAssertion {
-                    span: Span::new(start as u32, parser.index as u32),
-                    kind: todo!(),
-                }),
-            ))),
-        );
-    }
-    todo!()
-}
-
-/// Validate the next characters as a RegExp `Quantifier` production if possible.
-/// ```
-///  Quantifier::
-///        QuantifierPrefix
-///        QuantifierPrefix `?`
-///   QuantifierPrefix::
-///        `*`
-///        `+`
-///        `?`
-///        `{` DecimalDigits `}`
-///        `{` DecimalDigits `,}`
-///        `{` DecimalDigits `,` DecimalDigits `}`
-///   ```
-/// returns `true` if it consumed the next characters successfully.
-fn parse_quantifier<'a>(
-    parser: &mut Parser<'a>,
-    no_consume: Option<bool>,
-) -> (bool, Option<Element<'a>>) {
-    let mut no_consume = no_consume.unwrap_or_default();
-    let start = parser.index;
-    let mut min = 0;
-    let mut max = 0;
-    let mut greedy = false;
-    let mut element = None;
-    match parser.current().cloned() {
-        Some('*') => {
-            min = 0;
-            max = usize::MAX;
-            parser.advance();
-        }
-        Some('+') => {
-            min = 1;
-            max = usize::MAX;
-            parser.advance();
-        }
-        Some('?') => {
-            min = 0;
-            max = 1;
-            parser.advance();
-        }
-        Some(_) => {
-            if parse_braced_quantifier(parser, no_consume) {
-                min = parser.last_range.start;
-                max = parser.last_range.end;
-            }
-        }
-        None => return (false, None),
-    }
-    greedy = !parser.eat('?');
-
-    if !no_consume {
-        let quantifier = parser.builder.alloc(Quantifier {
-            span: Span { start: start as u32, end: parser.index as u32 },
-            min,
-            max,
-            greedy,
-            // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
-            // it can't be null, or the program will panic, so we put a dummy element, and parent
-            // should replace it
-            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
-        });
-
-        element = Some(Element::Quantifier(quantifier))
-    }
-    (true, element)
-}
-
-fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool {
-    let start = parser.index;
-    if eat_decimal_digits(parser) {
-        let min = parser.last_int_value;
-        let mut max = min;
-        if parser.eat(',') {
-            max = if eat_decimal_digits(parser) { parser.last_int_value } else { usize::MAX };
-        }
-        if parser.eat('}') {
-            if !no_error && max < min {
-                panic!("numbers out of order in {{}} quantifier");
-            }
-            parser.last_range = min..max;
-            return true;
-        }
-    }
-    if !no_error && (parser.context.unicode_mode || parser.context.strict) {
-        panic!("Incomplete quantifier");
-    }
-    parser.rewind(start);
-    false
-}
-
-fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = parser.index;
-    parser.last_int_value = 0;
-    while let Some(ch) = parser.current() {
-        let Some(d) = ch.to_digit(10) else {
-            break;
-        };
-        parser.last_int_value = 10 * parser.last_int_value + d as usize;
-        parser.advance();
-    }
-    parser.index != start
-}
-
-fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
-    let start = parser.index;
-    let mut in_class = false;
-    let mut escaped = false;
-    let mut count = 0;
-    while let Some(ch) = parser.current() {
-        if escaped {
-            escaped = false;
-        }
-        match ch {
-            '\\' => {
-                escaped = true;
-            }
-            '[' | ']' => {
-                in_class = false;
-            }
-            '(' if !in_class => {
-                if parser.next() != Some(&'?')
-                    || (parser.nth(2) == Some(&'<')
-                        && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
-                {
-                    count += 1;
-                }
-            }
-            _ => {}
-        }
-        parser.advance();
-    }
-    parser.rewind(start);
-    count
-}
diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index 5d429b4f063b2..ff0f940eb7408 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -358,7 +358,7 @@ pub struct StringAlternative<'a> {
 #[derive(Debug)]
 pub struct Character {
     pub span: Span,
-    pub value: u16, // UTF-16 code point
+    pub value: char, // UTF-16 code point
 }
 
 #[derive(Debug)]
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index e7854d063d509..4678ea0d8e710 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -110,7 +110,7 @@ impl<'a> Parser<'a> {
         self.lexer.chars.get(self.index + n)
     }
 
-    /// by default next means `next_1`
+    /// by default next means `nth(1)`
     pub fn next(&self) -> Option<&char> {
         self.lexer.chars.get(self.index + 1)
     }
@@ -233,6 +233,11 @@ fn parse_term<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
     todo!()
 }
 
+fn parse_optional_quantifier<'a>(parser: &mut Parser<'a>) -> (bool, Option<Element<'a>>) {
+    let (_, node) = parse_quantifier(parser, None);
+    (true, node)
+}
+
 fn parse_assertion<'a>(parser: &mut Parser<'a>) -> (bool, Option<Assertion<'a>>) {
     let start = parser.index;
     parser.last_assertion_is_quantifiable = false;
@@ -377,7 +382,10 @@ fn parse_quantifier<'a>(
             // https://github.com/eslint-community/regexpp/blob/2e8f1af992fb12eae46a446253e8fa3f6cede92a/src/parser.ts#L269-L275
             // it can't be null, or the program will panic, so we put a dummy element, and parent
             // should replace it
-            element: QuantifiableElement::Character(Character { span: Span::default(), value: 0 }),
+            element: QuantifiableElement::Character(Character {
+                span: Span::default(),
+                value: ' ',
+            }),
         });
 
         element = Some(Element::Quantifier(quantifier))
@@ -408,6 +416,447 @@ fn parse_braced_quantifier<'a>(parser: &mut Parser<'a>, no_error: bool) -> bool
     false
 }
 
+fn parse_atom<'a>(parser: &mut Parser<'a>) {
+    todo!()
+}
+
+fn parse_dot<'a>(parser: &mut Parser<'a>) -> (bool, Option<Character>) {
+    let start = parser.index;
+    if parser.eat('.') {
+        (true, Some(Character { span: Span::new(start as u32, parser.index as u32), value: '.' }))
+    } else {
+        (false, None)
+    }
+}
+
+fn parse_reverse_solidus_atom_escape<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    if parser.eat('\\') {
+        if parse_atom_escape(parser) {
+            return true;
+        }
+        parser.rewind(start);
+    }
+    false
+}
+
+fn parse_atom_escape<'a>(parser: &mut Parser<'a>) -> bool {
+    if parse_backreference(parser)
+        || parser.consume_character_class_escape()
+        || parser.consume_character_escape()
+        || (parser._n_flag && parser.consume_k_group_name())
+    {
+        true
+    } else {
+        if parser.strict || parser._unicode_mode {
+            parser.raise("Invalid escape");
+        }
+        false
+    }
+}
+
+fn parse_backreference<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    if parser.eat_decimal_escape() {
+        let n = parser.last_int_value;
+        if n <= parser.num_capturing_parens {
+            parser.on_backreference(start - 1, parser.index, n);
+            true
+        } else {
+            if parser.context.strict || parser.context.unicode_mode {
+                panic!("Invalid escape");
+            }
+            parser.rewind(start);
+        }
+    } else {
+        false
+    }
+}
+
+fn consume_character_class_escape<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
+    let start = parser.index;
+
+    if parser.eat(LATIN_SMALL_LETTER_D) {
+        parser.last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "digit", false);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    if parser.eat(LATIN_CAPITAL_LETTER_D) {
+        parser._last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "digit", true);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    if parser.eat(LATIN_SMALL_LETTER_S) {
+        parser._last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "space", false);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    if parser.eat(LATIN_CAPITAL_LETTER_S) {
+        parser._last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "space", true);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    if parser.eat(LATIN_SMALL_LETTER_W) {
+        parser._last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "word", false);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    if parser.eat(LATIN_CAPITAL_LETTER_W) {
+        parser._last_int_value = -1;
+        parser.on_escape_character_set(start - 1, parser.index, "word", true);
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+    }
+
+    let mut negate = false;
+    if parser._unicode_mode
+        && parser.ecma_version >= 2018
+        && (parser.eat(LATIN_SMALL_LETTER_P) || (negate = parser.eat(LATIN_CAPITAL_LETTER_P)))
+    {
+        parser._last_int_value = -1;
+        if parser.eat(LEFT_CURLY_BRACKET) {
+            if let Some(result) = parser.eat_unicode_property_value_expression() {
+                if parser.eat(RIGHT_CURLY_BRACKET) {
+                    if negate && result.strings.is_some() {
+                        parser.raise("Invalid property name");
+                    }
+
+                    parser.on_unicode_property_character_set(
+                        start - 1,
+                        parser.index,
+                        "property",
+                        &result.key,
+                        &result.value,
+                        negate,
+                        result.strings.unwrap_or(false),
+                    );
+
+                    return Some(UnicodeSetsConsumeResult {
+                        may_contain_strings: result.strings.unwrap_or(false),
+                    });
+                }
+            }
+        }
+        panic!("Invalid property name");
+    }
+
+    None
+}
+
+fn consume_k_group_name<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+
+    if parser.eat(LATIN_SMALL_LETTER_K) {
+        if parser.eat_group_name() {
+            let group_name = parser._last_str_value.clone();
+            parser._backreference_names.insert(group_name.clone());
+            parser.on_backreference(start - 1, parser.index, group_name);
+            return true;
+        }
+        parser.raise("Invalid named reference");
+    }
+
+    false
+}
+
+fn consume_character_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
+    let start = parser.index;
+
+    if parser.eat(LEFT_SQUARE_BRACKET) {
+        let negate = parser.eat(CIRCUMFLEX_ACCENT);
+        parser.on_character_class_enter(start, negate, parser._unicode_sets_mode);
+        let result = parser.consume_class_contents()?;
+        if !parser.eat(RIGHT_SQUARE_BRACKET) {
+            if parser.current_code_point == -1 {
+                parser.raise("Unterminated character class");
+            }
+            parser.raise("Invalid character in character class");
+        }
+        if negate && result.may_contain_strings {
+            parser.raise("Negated character class may contain strings");
+        }
+
+        parser.on_character_class_leave(start, parser.index, negate);
+
+        // * Static Semantics: MayContainStrings
+        // CharacterClass[UnicodeMode, UnicodeSetsMode] ::
+        //         [ ^ ClassContents[?UnicodeMode, ?UnicodeSetsMode] ]
+        //     1. Return false.
+        // CharacterClass :: [ ClassContents ]
+        //     1. Return MayContainStrings of the ClassContents.
+        Some(result)
+    } else {
+        None
+    }
+}
+
+/**
+ * Consume ClassContents in a character class.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_class_contents(&mut self) -> UnicodeSetsConsumeResult {
+    if self._unicode_sets_mode {
+        if self.current_code_point == RIGHT_SQUARE_BRACKET {
+            // [empty]
+
+            // * Static Semantics: MayContainStrings
+            // ClassContents[UnicodeMode, UnicodeSetsMode] ::
+            //         [empty]
+            //     1. Return false.
+            return UnicodeSetsConsumeResult { may_contain_strings: false };
+        }
+        let result = self.consume_class_set_expression();
+
+        // * Static Semantics: MayContainStrings
+        // ClassContents :: ClassSetExpression
+        //     1. Return MayContainStrings of the ClassSetExpression.
+        return result;
+    }
+
+    let strict = self.strict || self._unicode_mode;
+    loop {
+        // Consume the first ClassAtom
+        let range_start = self.index;
+        if !self.consume_class_atom() {
+            break;
+        }
+        let min = self._last_int_value;
+
+        // Consume `-`
+        if !self.eat(HYPHEN_MINUS) {
+            continue;
+        }
+        self.on_character(range_start - 1, self.index, HYPHEN_MINUS);
+
+        // Consume the second ClassAtom
+        if !self.consume_class_atom() {
+            break;
+        }
+        let max = self._last_int_value;
+
+        // Validate
+        if min == -1 || max == -1 {
+            if strict {
+                self.raise("Invalid character class");
+            }
+            continue;
+        }
+        if min > max {
+            self.raise("Range out of order in character class");
+        }
+
+        self.on_character_class_range(range_start, self.index, min, max);
+    }
+
+    // * Static Semantics: MayContainStrings
+    // ClassContents[UnicodeMode, UnicodeSetsMode] ::
+    //         NonemptyClassRanges[?UnicodeMode]
+    //     1. Return false.
+    return UnicodeSetsConsumeResult { may_contain_strings: false };
+}
+
+/**
+ * Consume ClassAtom in a character class.
+ * @returns `true` if it consumed the next characters successfully.
+ */
+fn consume_class_atom(&mut self) -> bool {
+    let start = self.index;
+    let cp = self.current_code_point;
+
+    if cp != -1 && cp != REVERSE_SOLIDUS && cp != RIGHT_SQUARE_BRACKET {
+        self.advance();
+        self._last_int_value = cp;
+        self.on_character(start, self.index, self._last_int_value);
+        return true;
+    }
+
+    if self.eat(REVERSE_SOLIDUS) {
+        if self.consume_class_escape() {
+            return true;
+        }
+        if !self.strict && self.current_code_point == LATIN_SMALL_LETTER_C {
+            self._last_int_value = REVERSE_SOLIDUS;
+            self.on_character(start, self.index, self._last_int_value);
+            return true;
+        }
+        if self.strict || self._unicode_mode {
+            self.raise("Invalid escape");
+        }
+        self.rewind(start);
+    }
+
+    return false;
+}
+
+/**
+ * Consume ClassEscape in a character class.
+ * @returns `true` if it consumed the next characters successfully.
+ */
+fn consume_class_escape(&mut self) -> bool {
+    let start = self.index;
+
+    // `b`
+    if self.eat(LATIN_SMALL_LETTER_B) {
+        self._last_int_value = BACKSPACE;
+        self.on_character(start - 1, self.index, self._last_int_value);
+        return true;
+    }
+
+    // [+UnicodeMode] `-`
+    if self._unicode_mode && self.eat(HYPHEN_MINUS) {
+        self._last_int_value = HYPHEN_MINUS;
+        self.on_character(start - 1, self.index, self._last_int_value);
+        return true;
+    }
+
+    // [annexB][~UnicodeMode] `c` ClassControlLetter
+    let cp = 0;
+    if !self.strict
+        && !self._unicode_mode
+        && self.current_code_point == LATIN_SMALL_LETTER_C
+        && (is_decimal_digit((cp = self.next_code_point)) || cp == LOW_LINE)
+    {
+        self.advance();
+        self.advance();
+        self._last_int_value = cp % 0x20;
+        self.on_character(start - 1, self.index, self._last_int_value);
+        return true;
+    }
+
+    return self.consume_character_class_escape() || self.consume_character_escape();
+}
+
+/**
+ * Consume ClassSetExpression in a character class.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_class_set_expression(&mut self) -> UnicodeSetsConsumeResult {
+    let start = self.index;
+    let mut may_contain_strings: Option<bool> = None;
+    let mut result: Option<UnicodeSetsConsumeResult> = None;
+
+    if self.consume_class_set_character() {
+        if self.consume_class_set_range_from_operator(start) {
+            // ClassUnion
+            self.consume_class_union_right(UnicodeSetsConsumeResult { may_contain_strings: None });
+            return UnicodeSetsConsumeResult { may_contain_strings: false };
+        }
+        // ClassSetOperand
+
+        // * Static Semantics: MayContainStrings
+        // ClassSetOperand ::
+        //         ClassSetCharacter
+        //     1. Return false.
+        may_contain_strings = Some(false);
+    } else if let Some(res) = self.consume_class_set_operand() {
+        may_contain_strings = Some(res.may_contain_strings);
+    } else {
+        let cp = self.current_code_point;
+        if cp == REVERSE_SOLIDUS {
+            self.advance();
+            self.raise("Invalid escape");
+        }
+        if cp == self.next_code_point && is_class_set_reserved_double_punctuator_character(cp) {
+            self.raise("Invalid set operation in character class");
+        }
+        self.raise("Invalid character in character class");
+    }
+
+    if self.eat2(AMPERSAND, AMPERSAND) {
+        // ClassIntersection
+        while self.current_code_point != AMPERSAND
+            && (result = self.consume_class_set_operand()).is_some()
+        {
+            self.on_class_intersection(start, self.index);
+            if !result.as_ref().unwrap().may_contain_strings.unwrap_or(false) {
+                may_contain_strings = Some(false);
+            }
+            if self.eat2(AMPERSAND, AMPERSAND) {
+                continue;
+            }
+
+            // * Static Semantics: MayContainStrings
+            // ClassSetExpression :: ClassIntersection
+            //     1. Return MayContainStrings of the ClassIntersection.
+            // ClassIntersection :: ClassSetOperand && ClassSetOperand
+            //     1. If MayContainStrings of the first ClassSetOperand is false, return false.
+            //     2. If MayContainStrings of the second ClassSetOperand is false, return false.
+            //     3. Return true.
+            // ClassIntersection :: ClassIntersection && ClassSetOperand
+            //     1. If MayContainStrings of the ClassIntersection is false, return false.
+            //     2. If MayContainStrings of the ClassSetOperand is false, return false.
+            //     3. Return true.
+            return UnicodeSetsConsumeResult { may_contain_strings };
+        }
+
+        self.raise("Invalid character in character class");
+    }
+    if self.eat2(HYPHEN_MINUS, HYPHEN_MINUS) {
+        // ClassSubtraction
+        while self.consume_class_set_operand() {
+            self.on_class_subtraction(start, self.index);
+            if self.eat2(HYPHEN_MINUS, HYPHEN_MINUS) {
+                continue;
+            }
+            // * Static Semantics: MayContainStrings
+            // ClassSetExpression :: ClassSubtraction
+            //     1. Return MayContainStrings of the ClassSubtraction.
+            // ClassSubtraction :: ClassSetOperand -- ClassSetOperand
+            //     1. Return MayContainStrings of the first ClassSetOperand.
+            // ClassSubtraction :: ClassSubtraction -- ClassSetOperand
+            //     1. Return MayContainStrings of the ClassSubtraction.
+            return UnicodeSetsConsumeResult { may_contain_strings };
+        }
+
+        self.raise("Invalid character in character class");
+    }
+    // ClassUnion
+    return self.consume_class_union_right(UnicodeSetsConsumeResult { may_contain_strings });
+}
+
+/**
+ * Consume the right operand of a ClassUnion in a character class.
+ * @param left_result The result information for the left ClassSetRange or ClassSetOperand.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_class_union_right(
+    &mut self,
+    left_result: UnicodeSetsConsumeResult,
+) -> UnicodeSetsConsumeResult {
+    // ClassUnion
+    let mut may_contain_strings = left_result.may_contain_strings.unwrap_or(false);
+    loop {
+        let start = self.index;
+        if self.consume_class_set_character() {
+            self.consume_class_set_range_from_operator(start);
+            continue;
+        }
+        if let Some(result) = self.consume_class_set_operand() {
+            if result.may_contain_strings.unwrap_or(false) {
+                may_contain_strings = true;
+            }
+            continue;
+        }
+        break;
+    }
+
+    // * Static Semantics: MayContainStrings
+    // ClassSetExpression :: ClassUnion
+    //     1. Return MayContainStrings of the ClassUnion.
+    // ClassUnion :: ClassSetRange ClassUnion(opt)
+    //     1. If the ClassUnion is present, return MayContainStrings of the ClassUnion.
+    //     2. Return false.
+    // ClassUnion :: ClassSetOperand ClassUnion(opt)
+    //     1. If MayContainStrings of the ClassSetOperand is true, return true.
+    //     2. If ClassUnion is present, return MayContainStrings of the ClassUnion.
+    //     3. Return false.
+    return UnicodeSetsConsumeResult { may_contain_strings };
+}
+
 fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
     let start = parser.index;
     parser.last_int_value = 0;
@@ -438,7 +887,7 @@ fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
                 in_class = false;
             }
             '(' if !in_class => {
-                if parser.next() != Some(&'?')
+                if parser.nth(1) != Some(&'?')
                     || (parser.nth(2) == Some(&'<')
                         && !matches!(parser.nth(3), Some(&'=') | Some(&'!')))
                 {
@@ -452,3 +901,668 @@ fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
     parser.rewind(start);
     count
 }
+
+/**
+ * Consume NestedClass in a character class.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_nested_class(&mut self) -> Option<UnicodeSetsConsumeResult> {
+    let start = self.index;
+    if self.eat(LEFT_SQUARE_BRACKET) {
+        let negate = self.eat(CIRCUMFLEX_ACCENT);
+        self.on_character_class_enter(start, negate, true);
+        let result = self.consume_class_contents();
+        if !self.eat(RIGHT_SQUARE_BRACKET) {
+            self.raise("Unterminated character class");
+        }
+        if negate && result.may_contain_strings.unwrap_or(false) {
+            self.raise("Negated character class may contain strings");
+        }
+        self.on_character_class_leave(start, self.index, negate);
+
+        // * Static Semantics: MayContainStrings
+        // NestedClass ::
+        //         [ ^ ClassContents[+UnicodeMode, +UnicodeSetsMode] ]
+        //     1. Return false.
+        // NestedClass :: [ ClassContents ]
+        //     1. Return MayContainStrings of the ClassContents.
+        return Some(result);
+    }
+    if self.eat(REVERSE_SOLIDUS) {
+        if let Some(result) = self.consume_character_class_escape() {
+            // * Static Semantics: MayContainStrings
+            // NestedClass :: \ CharacterClassEscape
+            //     1. Return MayContainStrings of the CharacterClassEscape.
+            return Some(result);
+        }
+        self.rewind(start);
+    }
+    None
+}
+
+/**
+ * Consume ClassStringDisjunction in a character class.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_class_string_disjunction(&mut self) -> Option<UnicodeSetsConsumeResult> {
+    let start = self.index;
+    if self.eat3(REVERSE_SOLIDUS, LATIN_SMALL_LETTER_Q, LEFT_CURLY_BRACKET) {
+        self.on_class_string_disjunction_enter(start);
+
+        let mut i = 0;
+        let mut may_contain_strings = false;
+        while self.consume_class_string(i).may_contain_strings.unwrap_or(false) {
+            may_contain_strings = true;
+            i += 1;
+            if !self.eat(VERTICAL_LINE) {
+                break;
+            }
+        }
+
+        if self.eat(RIGHT_CURLY_BRACKET) {
+            self.on_class_string_disjunction_leave(start, self.index);
+
+            // * Static Semantics: MayContainStrings
+            // ClassStringDisjunction :: \q{ ClassStringDisjunctionContents }
+            //     1. Return MayContainStrings of the ClassStringDisjunctionContents.
+            // ClassStringDisjunctionContents :: ClassString
+            //     1. Return MayContainStrings of the ClassString.
+            // ClassStringDisjunctionContents :: ClassString | ClassStringDisjunctionContents
+            //     1. If MayContainStrings of the ClassString is true, return true.
+            //     2. Return MayContainStrings of the ClassStringDisjunctionContents.
+            return Some(UnicodeSetsConsumeResult { may_contain_strings });
+        }
+        self.raise("Unterminated class string disjunction");
+    }
+    None
+}
+
+/**
+ * Consume ClassString in a character class.
+ * @param i - The index of the string alternative.
+ * @returns `UnicodeSetsConsumeResult`.
+ */
+fn consume_class_string(&mut self, i: usize) -> UnicodeSetsConsumeResult {
+    let start = self.index;
+
+    let mut count = 0;
+    self.on_string_alternative_enter(start, i);
+
+    while self.current_code_point != -1 && self.consume_class_set_character() {
+        count += 1;
+    }
+
+    self.on_string_alternative_leave(start, self.index, i);
+
+    // * Static Semantics: MayContainStrings
+    // ClassString :: [empty]
+    //     1. Return true.
+    // ClassString :: NonEmptyClassString
+    //     1. Return MayContainStrings of the NonEmptyClassString.
+    // NonEmptyClassString :: ClassSetCharacter NonEmptyClassString(opt)
+    //     1. If NonEmptyClassString is present, return true.
+    //     2. Return false.
+    return UnicodeSetsConsumeResult { may_contain_strings: Some(count != 1) };
+}
+
+/**
+ * Consume ClassSetCharacter in a character class.
+ * Set `self._last_int_value` if it consumed the next characters successfully.
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn consume_class_set_character(&mut self) -> bool {
+    let start = self.index;
+    let cp = self.current_code_point;
+
+    if cp != -1 && cp != self.next_code_point
+        || !is_class_set_reserved_double_punctuator_character(cp)
+    {
+        if cp != -1 && !is_class_set_syntax_character(cp) {
+            self._last_int_value = cp;
+            self.advance();
+            self.on_character(start, self.index, self._last_int_value);
+            return true;
+        }
+    }
+
+    if self.eat(REVERSE_SOLIDUS) {
+        if self.consume_character_escape() {
+            return true;
+        }
+        if is_class_set_reserved_punctuator(self.current_code_point) {
+            self._last_int_value = self.current_code_point;
+            self.advance();
+            self.on_character(start, self.index, self._last_int_value);
+            return true;
+        }
+        if self.eat(LATIN_SMALL_LETTER_B) {
+            self._last_int_value = BACKSPACE;
+            self.on_character(start, self.index, self._last_int_value);
+            return true;
+        }
+        self.rewind(start);
+    }
+
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `GroupName` production if possible.
+ * Set `self._last_str_value` if the group name existed.
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_group_name(&mut self) -> bool {
+    if self.eat(LESS_THAN_SIGN) {
+        if self.eat_reg_exp_identifier_name() && self.eat(GREATER_THAN_SIGN) {
+            return true;
+        }
+        self.raise("Invalid capture group name");
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `RegExpIdentifierName` production if
+ * possible.
+ * Set `self._last_str_value` if the identifier name existed.
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_identifier_name(&mut self) -> bool {
+    if self.eat_reg_exp_identifier_start() {
+        self._last_str_value = self._last_int_value.to_string();
+
+        while self.eat_reg_exp_identifier_part() {
+            self._last_str_value.push_str(&self._last_int_value.to_string());
+        }
+
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `RegExpIdentifierStart` production if
+ * possible.
+ * Set `self._last_int_value` if the identifier start existed.
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_identifier_start(&mut self) -> bool {
+    let start = self.index;
+    let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
+    let mut cp = self.current_code_point;
+    self.advance();
+
+    if cp == REVERSE_SOLIDUS && self.eat_reg_exp_unicode_escape_sequence(force_u_flag) {
+        cp = self._last_int_value;
+    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(self.current_code_point) {
+        cp = combine_surrogate_pair(cp, self.current_code_point);
+        self.advance();
+    }
+
+    if is_identifier_start_char(cp) {
+        self._last_int_value = cp;
+        return true;
+    }
+
+    if self.index != start {
+        self.rewind(start);
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `RegExpIdentifierPart` production if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * RegExpIdentifierPart[UnicodeMode]::
+ *      RegExpIdentifierStart[?UnicodeMode]
+ *      DecimalDigit
+ *      \ UnicodeEscapeSequence[+UnicodeMode]
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_identifier_part(&mut self) -> bool {
+    let start = self.index;
+    let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
+    let mut cp = self.current_code_point;
+    self.advance();
+
+    if cp == REVERSE_SOLIDUS && self.eat_reg_exp_unicode_escape_sequence(force_u_flag) {
+        cp = self._last_int_value;
+    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(self.current_code_point) {
+        cp = combine_surrogate_pair(cp, self.current_code_point);
+        self.advance();
+    }
+
+    if is_identifier_part_char(cp) {
+        self._last_int_value = cp;
+        return true;
+    }
+
+    if self.index != start {
+        self.rewind(start);
+    }
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      `c` ControlLetter
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_c_control_letter(&mut self) -> bool {
+    let start = self.index;
+    if self.eat(LATIN_SMALL_LETTER_C) {
+        if self.eat_control_letter() {
+            return true;
+        }
+        self.rewind(start);
+    }
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      `0` [lookahead ∉ DecimalDigit]
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_zero(&mut self) -> bool {
+    if self.current_code_point == DIGIT_ZERO && !is_decimal_digit(self.next_code_point) {
+        self._last_int_value = 0;
+        self.advance();
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `ControlEscape` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * ControlEscape:: one of
+ *      f n r t v
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_control_escape(&mut self) -> bool {
+    if self.eat(LATIN_SMALL_LETTER_F) {
+        self._last_int_value = FORM_FEED;
+        return true;
+    }
+    if self.eat(LATIN_SMALL_LETTER_N) {
+        self._last_int_value = LINE_FEED;
+        return true;
+    }
+    if self.eat(LATIN_SMALL_LETTER_R) {
+        self._last_int_value = CARRIAGE_RETURN;
+        return true;
+    }
+    if self.eat(LATIN_SMALL_LETTER_T) {
+        self._last_int_value = CHARACTER_TABULATION;
+        return true;
+    }
+    if self.eat(LATIN_SMALL_LETTER_V) {
+        self._last_int_value = LINE_TABULATION;
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `ControlLetter` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * ControlLetter:: one of
+ *      a b c d e f g h i j k l m n o p q r s t u v w x y z
+ *      A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_control_letter(&mut self) -> bool {
+    let cp = self.current_code_point;
+    if is_latin_letter(cp) {
+        self.advance();
+        self._last_int_value = cp % 0x20;
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `RegExpUnicodeEscapeSequence`
+ * production if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * RegExpUnicodeEscapeSequence[UnicodeMode]::
+ *      [+UnicodeMode] `u` HexLeadSurrogate `\u` HexTrailSurrogate
+ *      [+UnicodeMode] `u` HexLeadSurrogate
+ *      [+UnicodeMode] `u` HexTrailSurrogate
+ *      [+UnicodeMode] `u` HexNonSurrogate
+ *      [~UnicodeMode] `u` Hex4Digits
+ *      [+UnicodeMode] `u{` CodePoint `}`
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
+    let start = self.index;
+    let u_flag = force_u_flag || self._unicode_mode;
+
+    if self.eat(LATIN_SMALL_LETTER_U) {
+        if (u_flag && self.eat_reg_exp_unicode_surrogate_pair_escape())
+            || self.eat_fixed_hex_digits(4)
+            || (u_flag && self.eat_reg_exp_unicode_code_point_escape())
+        {
+            return true;
+        }
+        if self.strict || u_flag {
+            self.raise("Invalid unicode escape");
+        }
+        self.rewind(start);
+    }
+
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      HexLeadSurrogate `\u` HexTrailSurrogate
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
+    let start = self.index;
+
+    if self.eat_fixed_hex_digits(4) {
+        let lead = self._last_int_value;
+        if is_lead_surrogate(lead)
+            && self.eat(REVERSE_SOLIDUS)
+            && self.eat(LATIN_SMALL_LETTER_U)
+            && self.eat_fixed_hex_digits(4)
+        {
+            let trail = self._last_int_value;
+            if is_trail_surrogate(trail) {
+                self._last_int_value = combine_surrogate_pair(lead, trail);
+                return true;
+            }
+        }
+
+        self.rewind(start);
+    }
+
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      `{` CodePoint `}`
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
+    let start = self.index;
+
+    if self.eat(LEFT_CURLY_BRACKET)
+        && self.eat_hex_digits()
+        && self.eat(RIGHT_CURLY_BRACKET)
+        && is_valid_unicode(self._last_int_value)
+    {
+        return true;
+    }
+
+    self.rewind(start);
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `IdentityEscape` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * IdentityEscape[UnicodeMode, N]::
+ *      [+UnicodeMode] SyntaxCharacter
+ *      [+UnicodeMode] `/`
+ *      [strict][~UnicodeMode] SourceCharacter but not UnicodeIDContinue
+ *      [annexB][~UnicodeMode] SourceCharacterIdentityEscape[?N]
+ * SourceCharacterIdentityEscape[N]::
+ *      [~N] SourceCharacter but not c
+ *      [+N] SourceCharacter but not one of c k
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_identity_escape(&mut self) -> bool {
+    let cp = self.current_code_point;
+    if self.is_valid_identity_escape(cp) {
+        self._last_int_value = cp;
+        self.advance();
+        return true;
+    }
+    false
+}
+
+fn is_valid_identity_escape(&self, cp: i32) -> bool {
+    if cp == -1 {
+        return false;
+    }
+    if self._unicode_mode {
+        return is_syntax_character(cp) || cp == SOLIDUS;
+    }
+    if self.strict {
+        return !is_id_continue(cp);
+    }
+    if self._n_flag {
+        return !(cp == LATIN_SMALL_LETTER_C || cp == LATIN_SMALL_LETTER_K);
+    }
+    cp != LATIN_SMALL_LETTER_C
+}
+
+/**
+ * Eat the next characters as a RegExp `DecimalEscape` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * DecimalEscape::
+ *      NonZeroDigit DecimalDigits(opt) [lookahead ∉ DecimalDigit]
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_decimal_escape(&mut self) -> bool {
+    self._last_int_value = 0;
+    let mut cp = self.current_code_point;
+    if cp >= DIGIT_ONE && cp <= DIGIT_NINE {
+        while cp >= DIGIT_ZERO && cp <= DIGIT_NINE {
+            self._last_int_value = 10 * self._last_int_value + (cp - DIGIT_ZERO);
+            self.advance();
+            cp = self.current_code_point;
+        }
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `ControlLetter` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * ControlLetter:: one of
+ *      a b c d e f g h i j k l m n o p q r s t u v w x y z
+ *      A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_control_letter(&mut self) -> bool {
+    let cp = self.current_code_point;
+    if is_latin_letter(cp) {
+        self.advance();
+        self._last_int_value = cp % 0x20;
+        return true;
+    }
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `RegExpUnicodeEscapeSequence`
+ * production if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * RegExpUnicodeEscapeSequence[UnicodeMode]::
+ *      [+UnicodeMode] `u` HexLeadSurrogate `\u` HexTrailSurrogate
+ *      [+UnicodeMode] `u` HexLeadSurrogate
+ *      [+UnicodeMode] `u` HexTrailSurrogate
+ *      [+UnicodeMode] `u` HexNonSurrogate
+ *      [~UnicodeMode] `u` Hex4Digits
+ *      [+UnicodeMode] `u{` CodePoint `}`
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
+    let start = self.index;
+    let u_flag = force_u_flag || self._unicode_mode;
+
+    if self.eat(LATIN_SMALL_LETTER_U) {
+        if (u_flag && self.eat_reg_exp_unicode_surrogate_pair_escape())
+            || self.eat_fixed_hex_digits(4)
+            || (u_flag && self.eat_reg_exp_unicode_code_point_escape())
+        {
+            return true;
+        }
+        if self.strict || u_flag {
+            self.raise("Invalid unicode escape");
+        }
+        self.rewind(start);
+    }
+
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      HexLeadSurrogate `\u` HexTrailSurrogate
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
+    let start = self.index;
+
+    if self.eat_fixed_hex_digits(4) {
+        let lead = self._last_int_value;
+        if is_lead_surrogate(lead)
+            && self.eat(REVERSE_SOLIDUS)
+            && self.eat(LATIN_SMALL_LETTER_U)
+            && self.eat_fixed_hex_digits(4)
+        {
+            let trail = self._last_int_value;
+            if is_trail_surrogate(trail) {
+                self._last_int_value = combine_surrogate_pair(lead, trail);
+                return true;
+            }
+        }
+
+        self.rewind(start);
+    }
+
+    false
+}
+
+/**
+ * Eat the next characters as the following alternatives if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ *      `{` CodePoint `}`
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
+    let start = self.index;
+
+    if self.eat(LEFT_CURLY_BRACKET)
+        && self.eat_hex_digits()
+        && self.eat(RIGHT_CURLY_BRACKET)
+        && is_valid_unicode(self._last_int_value)
+    {
+        return true;
+    }
+
+    self.rewind(start);
+    false
+}
+
+/**
+ * Eat the next characters as a RegExp `IdentityEscape` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * IdentityEscape[UnicodeMode, N]::
+ *      [+UnicodeMode] SyntaxCharacter
+ *      [+UnicodeMode] `/`
+ *      [strict][~UnicodeMode] SourceCharacter but not UnicodeIDContinue
+ *      [annexB][~UnicodeMode] SourceCharacterIdentityEscape[?N]
+ * SourceCharacterIdentityEscape[N]::
+ *      [~N] SourceCharacter but not c
+ *      [+N] SourceCharacter but not one of c k
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_identity_escape(&mut self) -> bool {
+    let cp = self.current_code_point;
+    if self.is_valid_identity_escape(cp) {
+        self._last_int_value = cp;
+        self.advance();
+        return true;
+    }
+    false
+}
+
+fn is_valid_identity_escape(&self, cp: i32) -> bool {
+    if cp == -1 {
+        return false;
+    }
+    if self._unicode_mode {
+        return is_syntax_character(cp) || cp == SOLIDUS;
+    }
+    if self.strict {
+        return !is_id_continue(cp);
+    }
+    if self._n_flag {
+        return !(cp == LATIN_SMALL_LETTER_C || cp == LATIN_SMALL_LETTER_K);
+    }
+    cp != LATIN_SMALL_LETTER_C
+}
+
+/**
+ * Eat the next characters as a RegExp `DecimalEscape` production if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * DecimalEscape::
+ *      NonZeroDigit DecimalDigits(opt) [lookahead ∉ DecimalDigit]
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_decimal_escape(&mut self) -> bool {
+    self._last_int_value = 0;
+    let mut cp = self.current_code_point;
+    if cp >= DIGIT_ONE && cp <= DIGIT_NINE {
+        while cp >= DIGIT_ZERO && cp <= DIGIT_NINE {
+            self._last_int_value = 10 * self._last_int_value + (cp - DIGIT_ZERO);
+            self.advance();
+            cp = self.current_code_point;
+        }
+        return true;
+    }
+    false
+}

From 15582e874d83e81ec6d0d0400fb7147719cce6a0 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 21:58:49 +0800
Subject: [PATCH 09/19] =?UTF-8?q?fix:=20=F0=9F=90=9B=20syntax=20err?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 87 ++++++++++++++++---------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 4678ea0d8e710..c61291b9f6a1d 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -133,7 +133,7 @@ impl<'a> Parser<'a> {
         }
     }
 
-    pub fn rewind(&mut self, start: usize) {
+    pub fn rewind<'a>(parser: &mut Parser<'a>, start: usize) {
         self.index = start;
     }
 }
@@ -444,7 +444,7 @@ fn parse_atom_escape<'a>(parser: &mut Parser<'a>) -> bool {
     if parse_backreference(parser)
         || parser.consume_character_class_escape()
         || parser.consume_character_escape()
-        || (parser._n_flag && parser.consume_k_group_name())
+        || (parser.context.nflag && parser.consume_k_group_name())
     {
         true
     } else {
@@ -467,6 +467,7 @@ fn parse_backreference<'a>(parser: &mut Parser<'a>) -> bool {
                 panic!("Invalid escape");
             }
             parser.rewind(start);
+            true
         }
     } else {
         false
@@ -598,7 +599,7 @@ fn consume_character_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsCon
  * Consume ClassContents in a character class.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_contents(&mut self) -> UnicodeSetsConsumeResult {
+fn consume_class_contents<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResult {
     if self._unicode_sets_mode {
         if self.current_code_point == RIGHT_SQUARE_BRACKET {
             // [empty]
@@ -663,7 +664,7 @@ fn consume_class_contents(&mut self) -> UnicodeSetsConsumeResult {
  * Consume ClassAtom in a character class.
  * @returns `true` if it consumed the next characters successfully.
  */
-fn consume_class_atom(&mut self) -> bool {
+fn consume_class_atom<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
     let cp = self.current_code_point;
 
@@ -696,7 +697,7 @@ fn consume_class_atom(&mut self) -> bool {
  * Consume ClassEscape in a character class.
  * @returns `true` if it consumed the next characters successfully.
  */
-fn consume_class_escape(&mut self) -> bool {
+fn consume_class_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
 
     // `b`
@@ -734,7 +735,7 @@ fn consume_class_escape(&mut self) -> bool {
  * Consume ClassSetExpression in a character class.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_set_expression(&mut self) -> UnicodeSetsConsumeResult {
+fn consume_class_set_expression<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResult {
     let start = self.index;
     let mut may_contain_strings: Option<bool> = None;
     let mut result: Option<UnicodeSetsConsumeResult> = None;
@@ -823,8 +824,8 @@ fn consume_class_set_expression(&mut self) -> UnicodeSetsConsumeResult {
  * @param left_result The result information for the left ClassSetRange or ClassSetOperand.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_union_right(
-    &mut self,
+fn consume_class_union_right<'a>(
+    parser: &mut Parser<'a>,
     left_result: UnicodeSetsConsumeResult,
 ) -> UnicodeSetsConsumeResult {
     // ClassUnion
@@ -906,7 +907,7 @@ fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
  * Consume NestedClass in a character class.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_nested_class(&mut self) -> Option<UnicodeSetsConsumeResult> {
+fn consume_nested_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
     let start = self.index;
     if self.eat(LEFT_SQUARE_BRACKET) {
         let negate = self.eat(CIRCUMFLEX_ACCENT);
@@ -944,7 +945,9 @@ fn consume_nested_class(&mut self) -> Option<UnicodeSetsConsumeResult> {
  * Consume ClassStringDisjunction in a character class.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_string_disjunction(&mut self) -> Option<UnicodeSetsConsumeResult> {
+fn consume_class_string_disjunction<'a>(
+    parser: &mut Parser<'a>,
+) -> Option<UnicodeSetsConsumeResult> {
     let start = self.index;
     if self.eat3(REVERSE_SOLIDUS, LATIN_SMALL_LETTER_Q, LEFT_CURLY_BRACKET) {
         self.on_class_string_disjunction_enter(start);
@@ -982,7 +985,7 @@ fn consume_class_string_disjunction(&mut self) -> Option<UnicodeSetsConsumeResul
  * @param i - The index of the string alternative.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_string(&mut self, i: usize) -> UnicodeSetsConsumeResult {
+fn consume_class_string<'a>(parser: &mut Parser<'a>, i: usize) -> UnicodeSetsConsumeResult {
     let start = self.index;
 
     let mut count = 0;
@@ -1010,7 +1013,7 @@ fn consume_class_string(&mut self, i: usize) -> UnicodeSetsConsumeResult {
  * Set `self._last_int_value` if it consumed the next characters successfully.
  * @returns `true` if it ate the next characters successfully.
  */
-fn consume_class_set_character(&mut self) -> bool {
+fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
     let cp = self.current_code_point;
 
@@ -1051,7 +1054,7 @@ fn consume_class_set_character(&mut self) -> bool {
  * Set `self._last_str_value` if the group name existed.
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_group_name(&mut self) -> bool {
+fn eat_group_name<'a>(parser: &mut Parser<'a>) -> bool {
     if self.eat(LESS_THAN_SIGN) {
         if self.eat_reg_exp_identifier_name() && self.eat(GREATER_THAN_SIGN) {
             return true;
@@ -1067,7 +1070,7 @@ fn eat_group_name(&mut self) -> bool {
  * Set `self._last_str_value` if the identifier name existed.
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_identifier_name(&mut self) -> bool {
+fn eat_reg_exp_identifier_name<'a>(parser: &mut Parser<'a>) -> bool {
     if self.eat_reg_exp_identifier_start() {
         self._last_str_value = self._last_int_value.to_string();
 
@@ -1086,7 +1089,7 @@ fn eat_reg_exp_identifier_name(&mut self) -> bool {
  * Set `self._last_int_value` if the identifier start existed.
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_identifier_start(&mut self) -> bool {
+fn eat_reg_exp_identifier_start<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
     let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
     let mut cp = self.current_code_point;
@@ -1121,7 +1124,7 @@ fn eat_reg_exp_identifier_start(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_identifier_part(&mut self) -> bool {
+fn eat_reg_exp_identifier_part<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
     let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
     let mut cp = self.current_code_point;
@@ -1153,7 +1156,7 @@ fn eat_reg_exp_identifier_part(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_c_control_letter(&mut self) -> bool {
+fn eat_c_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
     if self.eat(LATIN_SMALL_LETTER_C) {
         if self.eat_control_letter() {
@@ -1172,7 +1175,7 @@ fn eat_c_control_letter(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_zero(&mut self) -> bool {
+fn eat_zero<'a>(parser: &mut Parser<'a>) -> bool {
     if self.current_code_point == DIGIT_ZERO && !is_decimal_digit(self.next_code_point) {
         self._last_int_value = 0;
         self.advance();
@@ -1191,7 +1194,7 @@ fn eat_zero(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_control_escape(&mut self) -> bool {
+fn eat_control_escape<'a>(parser: &mut Parser<'a>) -> bool {
     if self.eat(LATIN_SMALL_LETTER_F) {
         self._last_int_value = FORM_FEED;
         return true;
@@ -1226,7 +1229,7 @@ fn eat_control_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_control_letter(&mut self) -> bool {
+fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
     let cp = self.current_code_point;
     if is_latin_letter(cp) {
         self.advance();
@@ -1251,7 +1254,7 @@ fn eat_control_letter(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
+fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag: bool) -> bool {
     let start = self.index;
     let u_flag = force_u_flag || self._unicode_mode;
 
@@ -1279,7 +1282,7 @@ fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
+fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
 
     if self.eat_fixed_hex_digits(4) {
@@ -1310,7 +1313,7 @@ fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
+fn eat_reg_exp_unicode_code_point_escape(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
 
     if self.eat(LEFT_CURLY_BRACKET)
@@ -1341,7 +1344,7 @@ fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_identity_escape(&mut self) -> bool {
+fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let cp = self.current_code_point;
     if self.is_valid_identity_escape(cp) {
         self._last_int_value = cp;
@@ -1351,7 +1354,7 @@ fn eat_identity_escape(&mut self) -> bool {
     false
 }
 
-fn is_valid_identity_escape(&self, cp: i32) -> bool {
+fn is_valid_identity_escape<'a>(parser: &mut Parser<'a>, cp: i32) -> bool {
     if cp == -1 {
         return false;
     }
@@ -1377,14 +1380,14 @@ fn is_valid_identity_escape(&self, cp: i32) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_decimal_escape(&mut self) -> bool {
-    self._last_int_value = 0;
-    let mut cp = self.current_code_point;
-    if cp >= DIGIT_ONE && cp <= DIGIT_NINE {
-        while cp >= DIGIT_ZERO && cp <= DIGIT_NINE {
-            self._last_int_value = 10 * self._last_int_value + (cp - DIGIT_ZERO);
-            self.advance();
-            cp = self.current_code_point;
+fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
+    parser.last_int_value = 0;
+    let mut cp = parser.current();
+    if cp >= Some(&'1') && cp <= Some(&'9') {
+        while cp >= Some(&'1') && cp <= Some(&'9') {
+            parser.last_int_value = 10 * parser.last_int_value + (cp - DIGIT_ZERO);
+            parser.advance();
+            cp = parser.current();
         }
         return true;
     }
@@ -1402,11 +1405,11 @@ fn eat_decimal_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_control_letter(&mut self) -> bool {
-    let cp = self.current_code_point;
+fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
+    let cp = parser.current();
     if is_latin_letter(cp) {
-        self.advance();
-        self._last_int_value = cp % 0x20;
+        parser.advance();
+        parser.last_int_value = cp % 0x20;
         return true;
     }
     false
@@ -1427,7 +1430,7 @@ fn eat_control_letter(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
+fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag: bool) -> bool {
     let start = self.index;
     let u_flag = force_u_flag || self._unicode_mode;
 
@@ -1455,7 +1458,7 @@ fn eat_reg_exp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
+fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
 
     if self.eat_fixed_hex_digits(4) {
@@ -1486,7 +1489,7 @@ fn eat_reg_exp_unicode_surrogate_pair_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
+fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let start = self.index;
 
     if self.eat(LEFT_CURLY_BRACKET)
@@ -1517,7 +1520,7 @@ fn eat_reg_exp_unicode_code_point_escape(&mut self) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_identity_escape(&mut self) -> bool {
+fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let cp = self.current_code_point;
     if self.is_valid_identity_escape(cp) {
         self._last_int_value = cp;
@@ -1553,7 +1556,7 @@ fn is_valid_identity_escape(&self, cp: i32) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_decimal_escape(&mut self) -> bool {
+fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
     self._last_int_value = 0;
     let mut cp = self.current_code_point;
     if cp >= DIGIT_ONE && cp <= DIGIT_NINE {

From 5a5b523ed438d1ccc16cb1d61fb8dd915511e6cf Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 22:32:20 +0800
Subject: [PATCH 10/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20sk=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/ast.rs    |  2 +-
 crates/oxc_js_regex/src/parser.rs | 78 +++++++++++++++++++------------
 2 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/crates/oxc_js_regex/src/ast.rs b/crates/oxc_js_regex/src/ast.rs
index ff0f940eb7408..f4e5900b8da32 100644
--- a/crates/oxc_js_regex/src/ast.rs
+++ b/crates/oxc_js_regex/src/ast.rs
@@ -119,7 +119,7 @@ pub struct Group<'a> {
 
 /// The capturing group.
 /// E.g. `(ab)`, `(?<name>ab)`
-#[derive(Debug)]
+#[derive(Debug, Default)]
 pub struct CapturingGroup<'a> {
     pub span: Span,
     pub name: Option<Atom>,
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index c61291b9f6a1d..061906951b33d 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -9,9 +9,10 @@ use oxc_diagnostics::Error;
 use oxc_span::Span;
 
 use crate::ast::{
-    Alternative, Assertion, BoundaryAssertion, Branch, Character, EdgeAssertion, EdgeAssertionKind,
-    Element, LookaheadAssertion, LookaroundAssertion, LookbehindAssertion, Pattern,
-    QuantifiableElement, Quantifier, RegExpLiteral, WordBoundaryAssertion,
+    Alternative, Assertion, Backreference, BackreferenceRef, BoundaryAssertion, Branch,
+    CapturingGroup, Character, EdgeAssertion, EdgeAssertionKind, Element, LookaheadAssertion,
+    LookaroundAssertion, LookbehindAssertion, Pattern, QuantifiableElement, Quantifier,
+    RegExpLiteral, WordBoundaryAssertion,
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
@@ -49,6 +50,7 @@ pub struct Parser<'a> {
     back_reference_names: HashSet<String>,
     last_assertion_is_quantifiable: bool,
     last_range: Range<usize>,
+    last_str_value: Stirng,
 }
 
 #[derive(Default, Copy, Clone)]
@@ -77,6 +79,7 @@ impl<'a> Parser<'a> {
             last_range: 0..0,
             last_assertion_is_quantifiable: false,
             builder: AstBuilder::new(allocator),
+            last_str_value: String::default(),
         }
     }
 
@@ -92,6 +95,9 @@ impl<'a> Parser<'a> {
             false
         }
     }
+    pub fn span_with_start(&self, start: u32) -> Span {
+        Span::new(start, self.index as u32)
+    }
 
     pub fn eat2(&mut self, first: char, second: char) -> bool {
         if self.is(first) && self.nth(1) == Some(&second) {
@@ -455,62 +461,70 @@ fn parse_atom_escape<'a>(parser: &mut Parser<'a>) -> bool {
     }
 }
 
-fn parse_backreference<'a>(parser: &mut Parser<'a>) -> bool {
+/// TODO: resolve when pattern leave
+fn parse_backreference<'a>(parser: &mut Parser<'a>) -> Option<Backreference<'a>> {
     let start = parser.index;
     if parser.eat_decimal_escape() {
         let n = parser.last_int_value;
         if n <= parser.num_capturing_parens {
-            parser.on_backreference(start - 1, parser.index, n);
-            true
+            Some(Backreference {
+                span: Span::new(start as u32, parser.index as u32),
+                reference: BackreferenceRef::Number(n as usize),
+                resolved: CapturingGroup::default(),
+            })
         } else {
             if parser.context.strict || parser.context.unicode_mode {
                 panic!("Invalid escape");
             }
             parser.rewind(start);
-            true
+            None
         }
     } else {
-        false
+        None
     }
 }
 
+struct UnicodeSetsConsumeResult {
+    may_contain_strings: Option<bool>,
+}
+
 fn consume_character_class_escape<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
     let start = parser.index;
 
     if parser.eat(LATIN_SMALL_LETTER_D) {
         parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "digit", false);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     if parser.eat(LATIN_CAPITAL_LETTER_D) {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "digit", true);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     if parser.eat(LATIN_SMALL_LETTER_S) {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "space", false);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     if parser.eat(LATIN_CAPITAL_LETTER_S) {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "space", true);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     if parser.eat(LATIN_SMALL_LETTER_W) {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "word", false);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     if parser.eat(LATIN_CAPITAL_LETTER_W) {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         parser.on_escape_character_set(start - 1, parser.index, "word", true);
-        return Some(UnicodeSetsConsumeResult { may_contain_strings: false });
+        return Some(UnicodeSetsConsumeResult { may_contain_strings: None });
     }
 
     let mut negate = false;
@@ -518,7 +532,7 @@ fn consume_character_class_escape<'a>(parser: &mut Parser<'a>) -> Option<Unicode
         && parser.ecma_version >= 2018
         && (parser.eat(LATIN_SMALL_LETTER_P) || (negate = parser.eat(LATIN_CAPITAL_LETTER_P)))
     {
-        parser._last_int_value = -1;
+        parser.last_int_value = -1;
         if parser.eat(LEFT_CURLY_BRACKET) {
             if let Some(result) = parser.eat_unicode_property_value_expression() {
                 if parser.eat(RIGHT_CURLY_BRACKET) {
@@ -548,20 +562,24 @@ fn consume_character_class_escape<'a>(parser: &mut Parser<'a>) -> Option<Unicode
     None
 }
 
-fn consume_k_group_name<'a>(parser: &mut Parser<'a>) -> bool {
+fn consume_k_group_name<'a>(parser: &mut Parser<'a>) -> Option<Backreference<'a>> {
     let start = parser.index;
 
-    if parser.eat(LATIN_SMALL_LETTER_K) {
+    if parser.eat('k') {
         if parser.eat_group_name() {
-            let group_name = parser._last_str_value.clone();
-            parser._backreference_names.insert(group_name.clone());
-            parser.on_backreference(start - 1, parser.index, group_name);
-            return true;
+            let group_name: String = parser.last_str_value.clone();
+            parser.back_reference_names.insert(group_name.clone());
+            return Some(Backreference {
+                span: parser.span_with_start(start),
+                reference: BackreferenceRef::Atom(group_name.as_str().into()),
+                // dummy resolved
+                resolved: CapturingGroup::default(),
+            });
         }
-        parser.raise("Invalid named reference");
+        panic!("Invalid named reference");
     }
 
-    false
+    None
 }
 
 fn consume_character_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
@@ -570,7 +588,7 @@ fn consume_character_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsCon
     if parser.eat(LEFT_SQUARE_BRACKET) {
         let negate = parser.eat(CIRCUMFLEX_ACCENT);
         parser.on_character_class_enter(start, negate, parser._unicode_sets_mode);
-        let result = parser.consume_class_contents()?;
+        let result = consume_class_contents(parser);
         if !parser.eat(RIGHT_SQUARE_BRACKET) {
             if parser.current_code_point == -1 {
                 parser.raise("Unterminated character class");
@@ -608,7 +626,7 @@ fn consume_class_contents<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResu
             // ClassContents[UnicodeMode, UnicodeSetsMode] ::
             //         [empty]
             //     1. Return false.
-            return UnicodeSetsConsumeResult { may_contain_strings: false };
+            return UnicodeSetsConsumeResult { may_contain_strings: None };
         }
         let result = self.consume_class_set_expression();
 

From dedd6d69af6f44a31ad396ef7f502f66dc372d36 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 22:38:21 +0800
Subject: [PATCH 11/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20oct?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 46 +++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 061906951b33d..ca5eeccbf2dce 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -1587,3 +1587,49 @@ fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
     }
     false
 }
+
+/**
+ * Eat the next characters as a `OctalDigit` production if possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * OctalDigit:: one of
+ *      0 1 2 3 4 5 6 7
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_octal_digit<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    let cp = parser.current()?;
+    if cp.is_digit(8) {
+        parser.advance();
+        parser.last_int_value = cp.to_digit(8)?;
+        Some(())
+    } else {
+        parser.last_int_value = 0;
+        None
+    }
+}
+
+/**
+ * Eat the next characters as the given number of `HexDigit` productions if
+ * possible.
+ * Set `self._last_int_value` if it ate the next characters successfully.
+ * ```
+ * HexDigit:: one of
+ *      0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
+ * ```
+ * @returns `true` if it ate the next characters successfully.
+ */
+fn eat_fixed_hex_digits<'a>(parser: &mut Parser<'a>, length: usize) -> Option<()> {
+    let start = parser.index;
+    parser.last_int_value = 0;
+    for _ in 0..length {
+        let cp = parser.current()?;
+        if !cp.is_ascii_hexdigit() {
+            parser.rewind(start);
+            return None;
+        }
+        parser.last_int_value = 16 * parser.last_int_value + cp.to_digit(16)? as usize;
+        parser.advance();
+    }
+    Some(())
+}

From 970d9de4ae827628f5a198c26194d2e983036ac1 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 23:16:34 +0800
Subject: [PATCH 12/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Cargo.lock                        |   2 +
 crates/oxc_js_regex/Cargo.toml    |   6 +-
 crates/oxc_js_regex/src/lib.rs    |   1 +
 crates/oxc_js_regex/src/parser.rs | 147 ++++++++++++++----------------
 crates/oxc_js_regex/src/util.rs   |   7 ++
 5 files changed, 81 insertions(+), 82 deletions(-)
 create mode 100644 crates/oxc_js_regex/src/util.rs

diff --git a/Cargo.lock b/Cargo.lock
index b054d73d70750..1d13b2a27b61d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1602,6 +1602,8 @@ dependencies = [
  "oxc_allocator",
  "oxc_diagnostics",
  "oxc_span",
+ "oxc_syntax",
+ "phf",
 ]
 
 [[package]]
diff --git a/crates/oxc_js_regex/Cargo.toml b/crates/oxc_js_regex/Cargo.toml
index 3f65315f2e836..e473738bc7bbe 100644
--- a/crates/oxc_js_regex/Cargo.toml
+++ b/crates/oxc_js_regex/Cargo.toml
@@ -19,6 +19,8 @@ workspace = true
 doctest = false
 
 [dependencies]
-oxc_allocator = { workspace = true }
-oxc_span      = { workspace = true }
+phf             = { workspace = true }
+oxc_allocator   = { workspace = true }
+oxc_span        = { workspace = true }
 oxc_diagnostics = { workspace = true }
+oxc_syntax.workspace = true
diff --git a/crates/oxc_js_regex/src/lib.rs b/crates/oxc_js_regex/src/lib.rs
index 515c301327a72..5062aac9f87e6 100644
--- a/crates/oxc_js_regex/src/lib.rs
+++ b/crates/oxc_js_regex/src/lib.rs
@@ -4,5 +4,6 @@ mod ast_kind;
 mod ecma_version;
 mod lexer;
 pub mod parser;
+mod util;
 pub mod validator;
 pub mod visitor;
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index ca5eeccbf2dce..2de56395648e8 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -7,6 +7,7 @@ use std::str::{CharIndices, Chars, Matches};
 
 use oxc_diagnostics::Error;
 use oxc_span::Span;
+use oxc_syntax::unicode_id_start::is_id_continue;
 
 use crate::ast::{
     Alternative, Assertion, Backreference, BackreferenceRef, BoundaryAssertion, Branch,
@@ -16,6 +17,7 @@ use crate::ast::{
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
+use crate::util::is_syntax_character;
 
 pub struct Lexer<'a> {
     source: &'a str,
@@ -873,7 +875,7 @@ fn consume_class_union_right<'a>(
     //     1. If MayContainStrings of the ClassSetOperand is true, return true.
     //     2. If ClassUnion is present, return MayContainStrings of the ClassUnion.
     //     3. Return false.
-    return UnicodeSetsConsumeResult { may_contain_strings };
+    return UnicodeSetsConsumeResult { may_contain_strings: Some(may_contain_strings) };
 }
 
 fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
@@ -889,6 +891,22 @@ fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
     parser.index != start
 }
 
+fn eat_hex_digits(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    parser.last_int_value = 0;
+
+    while let Some(ch) = parser.current() {
+        if !ch.is_ascii_hexdigit() {
+            break;
+        }
+        parser.last_int_value =
+            16 * parser.last_int_value + ch.to_digit(16).expect("should convert successfully");
+        parser.advance();
+    }
+
+    parser.index != start
+}
+
 fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
     let start = parser.index;
     let mut in_class = false;
@@ -1346,48 +1364,6 @@ fn eat_reg_exp_unicode_code_point_escape(parser: &mut Parser<'a>) -> bool {
     false
 }
 
-/**
- * Eat the next characters as a RegExp `IdentityEscape` production if
- * possible.
- * Set `self._last_int_value` if it ate the next characters successfully.
- * ```
- * IdentityEscape[UnicodeMode, N]::
- *      [+UnicodeMode] SyntaxCharacter
- *      [+UnicodeMode] `/`
- *      [strict][~UnicodeMode] SourceCharacter but not UnicodeIDContinue
- *      [annexB][~UnicodeMode] SourceCharacterIdentityEscape[?N]
- * SourceCharacterIdentityEscape[N]::
- *      [~N] SourceCharacter but not c
- *      [+N] SourceCharacter but not one of c k
- * ```
- * @returns `true` if it ate the next characters successfully.
- */
-fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let cp = self.current_code_point;
-    if self.is_valid_identity_escape(cp) {
-        self._last_int_value = cp;
-        self.advance();
-        return true;
-    }
-    false
-}
-
-fn is_valid_identity_escape<'a>(parser: &mut Parser<'a>, cp: i32) -> bool {
-    if cp == -1 {
-        return false;
-    }
-    if self._unicode_mode {
-        return is_syntax_character(cp) || cp == SOLIDUS;
-    }
-    if self.strict {
-        return !is_id_continue(cp);
-    }
-    if self._n_flag {
-        return !(cp == LATIN_SMALL_LETTER_C || cp == LATIN_SMALL_LETTER_K);
-    }
-    cp != LATIN_SMALL_LETTER_C
-}
-
 /**
  * Eat the next characters as a RegExp `DecimalEscape` production if
  * possible.
@@ -1423,14 +1399,14 @@ fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
-    let cp = parser.current();
-    if is_latin_letter(cp) {
+fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    let cp = parser.current()?;
+    if cp.is_ascii_alphabetic() {
         parser.advance();
-        parser.last_int_value = cp % 0x20;
-        return true;
+        parser.last_int_value = (cp as usize) % 0x20;
+        return Some(());
     }
-    false
+    None
 }
 
 /**
@@ -1508,17 +1484,17 @@ fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> boo
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
+    let start = parser.index;
 
-    if self.eat(LEFT_CURLY_BRACKET)
-        && self.eat_hex_digits()
-        && self.eat(RIGHT_CURLY_BRACKET)
-        && is_valid_unicode(self._last_int_value)
+    if parser.eat('{')
+        && eat_hex_digits(parser)
+        && parser.eat('}')
+        && is_valid_unicode(parser.last_int_value)
     {
         return true;
     }
 
-    self.rewind(start);
+    parser.rewind(start);
     false
 }
 
@@ -1538,30 +1514,31 @@ fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let cp = self.current_code_point;
-    if self.is_valid_identity_escape(cp) {
-        self._last_int_value = cp;
-        self.advance();
+fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    let cp = parser.current();
+    if parser.is_valid_identity_escape(cp.cloned()) {
+        parser.last_int_value = cp.unwrap() as usize;
+        parser.advance();
         return true;
     }
-    false
+    None
 }
 
-fn is_valid_identity_escape(&self, cp: i32) -> bool {
-    if cp == -1 {
+fn is_valid_identity_escape(parser: &mut Parser<'a>, cp: Option<char>) -> bool {
+    if cp.is_none() {
         return false;
     }
-    if self._unicode_mode {
-        return is_syntax_character(cp) || cp == SOLIDUS;
+    let cp = cp.unwrap();
+    if parser.context.unicode_mode {
+        return is_syntax_character(cp) || cp == '/';
     }
-    if self.strict {
+    if parser.context.strict {
         return !is_id_continue(cp);
     }
-    if self._n_flag {
-        return !(cp == LATIN_SMALL_LETTER_C || cp == LATIN_SMALL_LETTER_K);
+    if parser.context.nflag {
+        return !(cp == 'c' || cp == 'k');
     }
-    cp != LATIN_SMALL_LETTER_C
+    cp != 'c'
 }
 
 /**
@@ -1574,18 +1551,21 @@ fn is_valid_identity_escape(&self, cp: i32) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    self._last_int_value = 0;
-    let mut cp = self.current_code_point;
-    if cp >= DIGIT_ONE && cp <= DIGIT_NINE {
-        while cp >= DIGIT_ZERO && cp <= DIGIT_NINE {
-            self._last_int_value = 10 * self._last_int_value + (cp - DIGIT_ZERO);
-            self.advance();
-            cp = self.current_code_point;
+fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    parser.last_int_value = 0;
+    let mut cp = parser.current()?;
+    if cp.is_ascii_digit() {
+        while cp.is_ascii_digit() {
+            parser.last_int_value = 10 * parser.last_int_value + cp.to_digit(10)?;
+            parser.advance();
+            cp = match parser.current() {
+                Some(char) => char,
+                None => break,
+            };
         }
-        return true;
+        return Some(());
     }
-    false
+    None
 }
 
 /**
@@ -1601,7 +1581,7 @@ fn eat_octal_digit<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let cp = parser.current()?;
     if cp.is_digit(8) {
         parser.advance();
-        parser.last_int_value = cp.to_digit(8)?;
+        parser.last_int_value = cp.to_digit(8)? as usize;
         Some(())
     } else {
         parser.last_int_value = 0;
@@ -1633,3 +1613,10 @@ fn eat_fixed_hex_digits<'a>(parser: &mut Parser<'a>, length: usize) -> Option<()
     }
     Some(())
 }
+
+const MIN_CODE_POINT: u32 = 0;
+const MAX_CODE_POINT: u32 = 0x10FFFF;
+
+fn is_valid_unicode(code: u32) -> bool {
+    code >= MIN_CODE_POINT && code <= MAX_CODE_POINT
+}
diff --git a/crates/oxc_js_regex/src/util.rs b/crates/oxc_js_regex/src/util.rs
new file mode 100644
index 0000000000000..c89e651da867e
--- /dev/null
+++ b/crates/oxc_js_regex/src/util.rs
@@ -0,0 +1,7 @@
+use phf::phf_set;
+
+const SYNTAX_CHARACTERS: phf::Set<char> = phf_set!['(', ')', '[', ']', '{', '}', '|', '-'];
+#[inline]
+pub fn is_syntax_character(cp: char) -> bool {
+    SYNTAX_CHARACTERS.contains(&cp)
+}

From 408e3026d9ec0573f425569d84f95cafe2af4647 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 23:20:06 +0800
Subject: [PATCH 13/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 26 ++++++++++++++------------
 crates/oxc_js_regex/src/util.rs   | 12 ++++++++++++
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 2de56395648e8..8cd5c42c3362f 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -17,7 +17,9 @@ use crate::ast::{
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
-use crate::util::is_syntax_character;
+use crate::util::{
+    combine_surrogate_pair, is_lead_surrogate, is_syntax_character, is_trail_surrogate,
+};
 
 pub struct Lexer<'a> {
     source: &'a str,
@@ -1453,23 +1455,23 @@ fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
+    let start = parser.index;
 
-    if self.eat_fixed_hex_digits(4) {
-        let lead = self._last_int_value;
-        if is_lead_surrogate(lead)
-            && self.eat(REVERSE_SOLIDUS)
-            && self.eat(LATIN_SMALL_LETTER_U)
-            && self.eat_fixed_hex_digits(4)
+    if parser.eat_fixed_hex_digits(4) {
+        let lead = parser.last_int_value;
+        if is_lead_surrogate(lead as u32)
+            && parser.eat('\\')
+            && parser.eat('u')
+            && parser.eat_fixed_hex_digits(4)
         {
-            let trail = self._last_int_value;
-            if is_trail_surrogate(trail) {
-                self._last_int_value = combine_surrogate_pair(lead, trail);
+            let trail = parser.last_int_value;
+            if is_trail_surrogate(trail as u32) {
+                parser.last_int_value = combine_surrogate_pair(lead, trail) as usize;
                 return true;
             }
         }
 
-        self.rewind(start);
+        parser.rewind(start);
     }
 
     false
diff --git a/crates/oxc_js_regex/src/util.rs b/crates/oxc_js_regex/src/util.rs
index c89e651da867e..9abd39e754b66 100644
--- a/crates/oxc_js_regex/src/util.rs
+++ b/crates/oxc_js_regex/src/util.rs
@@ -5,3 +5,15 @@ const SYNTAX_CHARACTERS: phf::Set<char> = phf_set!['(', ')', '[', ']', '{', '}',
 pub fn is_syntax_character(cp: char) -> bool {
     SYNTAX_CHARACTERS.contains(&cp)
 }
+
+pub fn is_lead_surrogate(code: char) -> bool {
+    code >= 0xd800 && code <= 0xdbff
+}
+
+pub fn is_trail_surrogate(code: u32) -> bool {
+    code >= 0xdc00 && code <= 0xdfff
+}
+
+pub fn combine_surrogate_pair(lead: u32, trail: u32) -> u32 {
+    (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
+}

From 907d7c04c71b24b9f148ef30ef396b613741debd Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 23:24:49 +0800
Subject: [PATCH 14/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 38 +++++++++++++++++--------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 8cd5c42c3362f..212c87cd720f2 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -1376,18 +1376,22 @@ fn eat_reg_exp_unicode_code_point_escape(parser: &mut Parser<'a>) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> bool {
+fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> Option<()> {
     parser.last_int_value = 0;
-    let mut cp = parser.current();
-    if cp >= Some(&'1') && cp <= Some(&'9') {
-        while cp >= Some(&'1') && cp <= Some(&'9') {
-            parser.last_int_value = 10 * parser.last_int_value + (cp - DIGIT_ZERO);
+    let mut cp = parser.current()?;
+    if cp >= &'1' && cp <= &'9' {
+        while cp >= &'1' && cp <= &'9' {
+            parser.last_int_value = 10 * parser.last_int_value
+                + cp.to_digit(10).expect("should convert successfully") as usize;
             parser.advance();
-            cp = parser.current();
+            cp = match parser.current() {
+                Some(ch) => ch,
+                None => break,
+            };
         }
-        return true;
+        return Some(());
     }
-    false
+    None
 }
 
 /**
@@ -1427,20 +1431,20 @@ fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> Option<()> {
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag: bool) -> bool {
-    let start = self.index;
-    let u_flag = force_u_flag || self._unicode_mode;
+    let start = parser.index;
+    let u_flag = force_u_flag || parser.context.unicode_mode;
 
-    if self.eat(LATIN_SMALL_LETTER_U) {
-        if (u_flag && self.eat_reg_exp_unicode_surrogate_pair_escape())
-            || self.eat_fixed_hex_digits(4)
-            || (u_flag && self.eat_reg_exp_unicode_code_point_escape())
+    if parser.eat('u') {
+        if (u_flag && eat_reg_exp_unicode_surrogate_pair_escape(parser))
+            || eat_fixed_hex_digits(parser, 4).is_some()
+            || (u_flag && eat_reg_exp_unicode_code_point_escape(parser))
         {
             return true;
         }
-        if self.strict || u_flag {
-            self.raise("Invalid unicode escape");
+        if parser.context.strict || u_flag {
+            panic!("Invalid unicode escape");
         }
-        self.rewind(start);
+        parser.rewind(start);
     }
 
     false

From 852691da079ae788e4f13553f66d7a43d945148d Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 23:36:04 +0800
Subject: [PATCH 15/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20control=20eascape?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 144 ++++--------------------------
 1 file changed, 17 insertions(+), 127 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 212c87cd720f2..8af85e936d56d 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -1233,116 +1233,29 @@ fn eat_zero<'a>(parser: &mut Parser<'a>) -> bool {
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_control_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    if self.eat(LATIN_SMALL_LETTER_F) {
-        self._last_int_value = FORM_FEED;
+    if parser.eat('f') {
+        parser.last_int_value = 12;
         return true;
     }
-    if self.eat(LATIN_SMALL_LETTER_N) {
-        self._last_int_value = LINE_FEED;
+    if parser.eat('n') {
+        parser.last_int_value = 10;
         return true;
     }
-    if self.eat(LATIN_SMALL_LETTER_R) {
-        self._last_int_value = CARRIAGE_RETURN;
+    if parser.eat('r') {
+        parser.last_int_value = 13;
         return true;
     }
-    if self.eat(LATIN_SMALL_LETTER_T) {
-        self._last_int_value = CHARACTER_TABULATION;
+    if parser.eat('t') {
+        parser.last_int_value = 9;
         return true;
     }
-    if self.eat(LATIN_SMALL_LETTER_V) {
-        self._last_int_value = LINE_TABULATION;
+    if parser.eat('v') {
+        parser.last_int_value = 11;
         return true;
     }
     false
 }
 
-/**
- * Eat the next characters as a RegExp `ControlLetter` production if
- * possible.
- * Set `self._last_int_value` if it ate the next characters successfully.
- * ```
- * ControlLetter:: one of
- *      a b c d e f g h i j k l m n o p q r s t u v w x y z
- *      A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
- * ```
- * @returns `true` if it ate the next characters successfully.
- */
-fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
-    let cp = self.current_code_point;
-    if is_latin_letter(cp) {
-        self.advance();
-        self._last_int_value = cp % 0x20;
-        return true;
-    }
-    false
-}
-
-/**
- * Eat the next characters as a RegExp `RegExpUnicodeEscapeSequence`
- * production if possible.
- * Set `self._last_int_value` if it ate the next characters successfully.
- * ```
- * RegExpUnicodeEscapeSequence[UnicodeMode]::
- *      [+UnicodeMode] `u` HexLeadSurrogate `\u` HexTrailSurrogate
- *      [+UnicodeMode] `u` HexLeadSurrogate
- *      [+UnicodeMode] `u` HexTrailSurrogate
- *      [+UnicodeMode] `u` HexNonSurrogate
- *      [~UnicodeMode] `u` Hex4Digits
- *      [+UnicodeMode] `u{` CodePoint `}`
- * ```
- * @returns `true` if it ate the next characters successfully.
- */
-fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag: bool) -> bool {
-    let start = self.index;
-    let u_flag = force_u_flag || self._unicode_mode;
-
-    if self.eat(LATIN_SMALL_LETTER_U) {
-        if (u_flag && self.eat_reg_exp_unicode_surrogate_pair_escape())
-            || self.eat_fixed_hex_digits(4)
-            || (u_flag && self.eat_reg_exp_unicode_code_point_escape())
-        {
-            return true;
-        }
-        if self.strict || u_flag {
-            self.raise("Invalid unicode escape");
-        }
-        self.rewind(start);
-    }
-
-    false
-}
-
-/**
- * Eat the next characters as the following alternatives if possible.
- * Set `self._last_int_value` if it ate the next characters successfully.
- * ```
- *      HexLeadSurrogate `\u` HexTrailSurrogate
- * ```
- * @returns `true` if it ate the next characters successfully.
- */
-fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
-
-    if self.eat_fixed_hex_digits(4) {
-        let lead = self._last_int_value;
-        if is_lead_surrogate(lead)
-            && self.eat(REVERSE_SOLIDUS)
-            && self.eat(LATIN_SMALL_LETTER_U)
-            && self.eat_fixed_hex_digits(4)
-        {
-            let trail = self._last_int_value;
-            if is_trail_surrogate(trail) {
-                self._last_int_value = combine_surrogate_pair(lead, trail);
-                return true;
-            }
-        }
-
-        self.rewind(start);
-    }
-
-    false
-}
-
 /**
  * Eat the next characters as the following alternatives if possible.
  * Set `self._last_int_value` if it ate the next characters successfully.
@@ -1351,18 +1264,18 @@ fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> boo
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_unicode_code_point_escape(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
+fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
 
-    if self.eat(LEFT_CURLY_BRACKET)
-        && self.eat_hex_digits()
-        && self.eat(RIGHT_CURLY_BRACKET)
-        && is_valid_unicode(self._last_int_value)
+    if parser.eat('{')
+        && eat_hex_digits(parser)
+        && parser.eat('}')
+        && is_valid_unicode(parser.last_int_value as u32)
     {
         return true;
     }
 
-    self.rewind(start);
+    parser.rewind(start);
     false
 }
 
@@ -1481,29 +1394,6 @@ fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> boo
     false
 }
 
-/**
- * Eat the next characters as the following alternatives if possible.
- * Set `self._last_int_value` if it ate the next characters successfully.
- * ```
- *      `{` CodePoint `}`
- * ```
- * @returns `true` if it ate the next characters successfully.
- */
-fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = parser.index;
-
-    if parser.eat('{')
-        && eat_hex_digits(parser)
-        && parser.eat('}')
-        && is_valid_unicode(parser.last_int_value)
-    {
-        return true;
-    }
-
-    parser.rewind(start);
-    false
-}
-
 /**
  * Eat the next characters as a RegExp `IdentityEscape` production if
  * possible.

From 4a39f2e7d2b480173e3c9096867de5a5aa5ab652 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Sun, 14 Jan 2024 23:49:08 +0800
Subject: [PATCH 16/19] =?UTF-8?q?chore:=20=F0=9F=A4=96=20ck=20point?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 93 ++++++++++++++++---------------
 crates/oxc_js_regex/src/util.rs   |  2 +-
 2 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 8af85e936d56d..b4fbb6dfc46be 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -7,6 +7,7 @@ use std::str::{CharIndices, Chars, Matches};
 
 use oxc_diagnostics::Error;
 use oxc_span::Span;
+use oxc_syntax::identifier::is_identifier_part;
 use oxc_syntax::unicode_id_start::is_id_continue;
 
 use crate::ast::{
@@ -130,8 +131,8 @@ impl<'a> Parser<'a> {
         self.lexer.chars.get(self.index + range.start..(self.index + range.end))
     }
 
-    pub fn current(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index)
+    pub fn current(&self) -> Option<char> {
+        self.lexer.chars.get(self.index).copied()
     }
 
     pub fn advance(&mut self) -> bool {
@@ -143,7 +144,7 @@ impl<'a> Parser<'a> {
         }
     }
 
-    pub fn rewind<'a>(parser: &mut Parser<'a>, start: usize) {
+    pub fn rewind<'a>(&mut self, start: usize) {
         self.index = start;
     }
 }
@@ -1127,26 +1128,28 @@ fn eat_reg_exp_identifier_name<'a>(parser: &mut Parser<'a>) -> bool {
  * Set `self._last_int_value` if the identifier start existed.
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_identifier_start<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
-    let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
-    let mut cp = self.current_code_point;
-    self.advance();
-
-    if cp == REVERSE_SOLIDUS && self.eat_reg_exp_unicode_escape_sequence(force_u_flag) {
-        cp = self._last_int_value;
-    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(self.current_code_point) {
-        cp = combine_surrogate_pair(cp, self.current_code_point);
-        self.advance();
+fn eat_reg_exp_identifier_start<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    let start = parser.index;
+    let force_u_flag =
+        !parser.context.unicode_mode && parser.context.ecma_version >= EcmaVersion::V2020;
+    let mut cp = *parser.current()?;
+    parser.advance();
+
+    if cp == '\\' && eat_reg_exp_unicode_escape_sequence(parser, force_u_flag) {
+        cp = parser.last_int_value as u32 as char;
+    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(parser.current()? as u32)
+    {
+        cp = combine_surrogate_pair(cp, parser.current() as u32);
+        parser.advance();
     }
 
     if is_identifier_start_char(cp) {
-        self._last_int_value = cp;
+        parser.last_int_value = cp;
         return true;
     }
 
-    if self.index != start {
-        self.rewind(start);
+    if parser.index != start {
+        parser.rewind(start);
     }
     false
 }
@@ -1162,28 +1165,30 @@ fn eat_reg_exp_identifier_start<'a>(parser: &mut Parser<'a>) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_reg_exp_identifier_part<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
-    let force_u_flag = !self._unicode_mode && self.ecma_version >= 2020;
-    let mut cp = self.current_code_point;
-    self.advance();
-
-    if cp == REVERSE_SOLIDUS && self.eat_reg_exp_unicode_escape_sequence(force_u_flag) {
-        cp = self._last_int_value;
-    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(self.current_code_point) {
-        cp = combine_surrogate_pair(cp, self.current_code_point);
-        self.advance();
+fn eat_reg_exp_identifier_part<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    let start = parser.index;
+    let force_u_flag =
+        !parser.context.unicode_mode && parser.context.ecma_version >= EcmaVersion::V2020;
+    let mut cp = *parser.current()?;
+    parser.advance();
+
+    if cp == '\\' && eat_reg_exp_unicode_escape_sequence(parser, force_u_flag) {
+        cp = parser.last_int_value as u32 as char;
+    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(parser.current()? as u32)
+    {
+        cp = combine_surrogate_pair(cp, parser.current()? as u32);
+        parser.advance();
     }
 
-    if is_identifier_part_char(cp) {
-        self._last_int_value = cp;
-        return true;
+    if is_identifier_part(cp) {
+        parser.last_int_value = cp as usize;
+        return Some(());
     }
 
-    if self.index != start {
-        self.rewind(start);
+    if parser.index != start {
+        parser.rewind(start);
     }
-    false
+    None
 }
 
 /**
@@ -1195,12 +1200,12 @@ fn eat_reg_exp_identifier_part<'a>(parser: &mut Parser<'a>) -> bool {
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_c_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
-    if self.eat(LATIN_SMALL_LETTER_C) {
-        if self.eat_control_letter() {
+    let start = parser.index;
+    if parser.eat('c') {
+        if eat_control_letter(parser).is_some() {
             return true;
         }
-        self.rewind(start);
+        parser.rewind(start);
     }
     false
 }
@@ -1213,13 +1218,13 @@ fn eat_c_control_letter<'a>(parser: &mut Parser<'a>) -> bool {
  * ```
  * @returns `true` if it ate the next characters successfully.
  */
-fn eat_zero<'a>(parser: &mut Parser<'a>) -> bool {
-    if self.current_code_point == DIGIT_ZERO && !is_decimal_digit(self.next_code_point) {
-        self._last_int_value = 0;
-        self.advance();
-        return true;
+fn eat_zero<'a>(parser: &mut Parser<'a>) -> Option<()> {
+    if parser.current()? == '0' && parser.nth(1).map(|ch| ch.is_ascii_digit()) == Some(false) {
+        parser.last_int_value = 0;
+        parser.advance();
+        return Some(());
     }
-    false
+    None
 }
 
 /**
diff --git a/crates/oxc_js_regex/src/util.rs b/crates/oxc_js_regex/src/util.rs
index 9abd39e754b66..d30f37eebc99e 100644
--- a/crates/oxc_js_regex/src/util.rs
+++ b/crates/oxc_js_regex/src/util.rs
@@ -6,7 +6,7 @@ pub fn is_syntax_character(cp: char) -> bool {
     SYNTAX_CHARACTERS.contains(&cp)
 }
 
-pub fn is_lead_surrogate(code: char) -> bool {
+pub fn is_lead_surrogate(code: u32) -> bool {
     code >= 0xd800 && code <= 0xdbff
 }
 

From 0c5e0ff7bb57b4c3bf5f74781a2a976902631409 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Mon, 15 Jan 2024 00:15:10 +0800
Subject: [PATCH 17/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20consume=5Fclass=5F?=
 =?UTF-8?q?set=5Fcharacter?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 '                                 | 46 +++++++++++++++++++
 crates/oxc_js_regex/src/parser.rs | 74 +++++++++++++++++--------------
 crates/oxc_js_regex/src/util.rs   | 65 +++++++++++++++++++++++++++
 3 files changed, 151 insertions(+), 34 deletions(-)
 create mode 100644 '

diff --git a/' b/'
new file mode 100644
index 0000000000000..53a6359f76054
--- /dev/null
+++ b/'
@@ -0,0 +1,46 @@
+use phf::phf_set;
+
+const SYNTAX_CHARACTERS: phf::Set<char> = phf_set!['(', ')', '[', ']', '{', '}', '|', '-'];
+
+const CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR_CHARACTER: phf::Set<char> = phf_set! {
+    '&' => AMPERSAND,
+    '!' => EXCLAMATION_MARK,
+    '#' => NUMBER_SIGN,
+    '$' => DOLLAR_SIGN,
+    '%' => PERCENT_SIGN,
+    '*' => ASTERISK,
+    '+' => PLUS_SIGN,
+    ',' => COMMA,
+    '.' => FULL_STOP,
+    ':' => COLON,
+    ';' => SEMICOLON,
+    '<' => LESS_THAN_SIGN,
+    '=' => EQUALS_SIGN,
+    '>' => GREATER_THAN_SIGN,
+    '?' => QUESTION_MARK,
+    '@' => COMMERCIAL_AT,
+    '^' => CIRCUMFLEX_ACCENT,
+    '`' => GRAVE_ACCENT,
+    '~' => TILDE,
+};
+
+#[inline]
+pub fn is_syntax_character(cp: char) -> bool {
+    SYNTAX_CHARACTERS.contains(&cp)
+}
+
+pub fn is_lead_surrogate(code: u32) -> bool {
+    code >= 0xd800 && code <= 0xdbff
+}
+
+pub fn is_trail_surrogate(code: u32) -> bool {
+    code >= 0xdc00 && code <= 0xdfff
+}
+
+pub fn combine_surrogate_pair(lead: u32, trail: u32) -> u32 {
+    (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
+}
+
+pub fn is_class_set_reserved_double_punctuator_character(cp: char) -> bool {
+    CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR_CHARACTER.contains(&cp)
+}
diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index b4fbb6dfc46be..18b24aeb93ae7 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -19,7 +19,9 @@ use crate::ast::{
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
 use crate::util::{
-    combine_surrogate_pair, is_lead_surrogate, is_syntax_character, is_trail_surrogate,
+    combine_surrogate_pair, is_class_set_reserved_double_punctuator_character,
+    is_class_set_reserved_punctuator, is_class_set_syntax_character, is_lead_surrogate,
+    is_syntax_character, is_trail_surrogate,
 };
 
 pub struct Lexer<'a> {
@@ -51,7 +53,7 @@ pub struct Parser<'a> {
     index: usize,
     group_names: HashSet<String>,
     num_capturing_parens: usize,
-    last_int_value: usize,
+    last_int_value: u32,
     back_reference_names: HashSet<String>,
     last_assertion_is_quantifiable: bool,
     last_range: Range<usize>,
@@ -123,7 +125,7 @@ impl<'a> Parser<'a> {
 
     /// by default next means `nth(1)`
     pub fn next(&self) -> Option<&char> {
-        self.lexer.chars.get(self.index + 1)
+        self.lexer.chars.get(self.index + 1).copied()
     }
 
     /// get a range chars relative from current cursor
@@ -1052,40 +1054,44 @@ fn consume_class_string<'a>(parser: &mut Parser<'a>, i: usize) -> UnicodeSetsCon
  * Set `self._last_int_value` if it consumed the next characters successfully.
  * @returns `true` if it ate the next characters successfully.
  */
-fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> bool {
-    let start = self.index;
-    let cp = self.current_code_point;
+fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> Option<Character> {
+    let start = parser.index;
+    let cp = parser.current()?;
 
-    if cp != -1 && cp != self.next_code_point
-        || !is_class_set_reserved_double_punctuator_character(cp)
-    {
-        if cp != -1 && !is_class_set_syntax_character(cp) {
-            self._last_int_value = cp;
-            self.advance();
-            self.on_character(start, self.index, self._last_int_value);
-            return true;
+    if Some(cp) != parser.next() || !is_class_set_reserved_double_punctuator_character(cp) {
+        if !is_class_set_syntax_character(cp) {
+            parser.last_int_value = cp as u32;
+            parser.advance();
+            Some(Character { span: parser.span_with_start(start), value: cp })
         }
     }
 
-    if self.eat(REVERSE_SOLIDUS) {
-        if self.consume_character_escape() {
+    if parser.eat('\\') {
+        if consume_character_escape() {
             return true;
         }
-        if is_class_set_reserved_punctuator(self.current_code_point) {
-            self._last_int_value = self.current_code_point;
-            self.advance();
-            self.on_character(start, self.index, self._last_int_value);
-            return true;
+        if let Some(ch) = parser.current()
+            && is_class_set_reserved_punctuator(ch)
+        {
+            parser.last_int_value = parser.current()?;
+            parser.advance();
+
+            Some(Character {
+                span: parser.span_with_start(start),
+                value: parser.last_int_value as char,
+            })
         }
-        if self.eat(LATIN_SMALL_LETTER_B) {
-            self._last_int_value = BACKSPACE;
-            self.on_character(start, self.index, self._last_int_value);
-            return true;
+        if parser.eat('b') {
+            parser.last_int_value = 8;
+            Some(Character {
+                span: parser.span_with_start(start),
+                value: parser.last_int_value as char,
+            })
         }
-        self.rewind(start);
+        parser.rewind(start);
     }
 
-    false
+    None
 }
 
 /**
@@ -1094,11 +1100,11 @@ fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> bool {
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_group_name<'a>(parser: &mut Parser<'a>) -> bool {
-    if self.eat(LESS_THAN_SIGN) {
-        if self.eat_reg_exp_identifier_name() && self.eat(GREATER_THAN_SIGN) {
+    if parser.eat('<') {
+        if eat_reg_exp_identifier_name(parser) && parser.eat('>') {
             return true;
         }
-        self.raise("Invalid capture group name");
+        panic!("Invalid capture group name");
     }
     false
 }
@@ -1110,11 +1116,11 @@ fn eat_group_name<'a>(parser: &mut Parser<'a>) -> bool {
  * @returns `true` if it ate the next characters successfully.
  */
 fn eat_reg_exp_identifier_name<'a>(parser: &mut Parser<'a>) -> bool {
-    if self.eat_reg_exp_identifier_start() {
-        self._last_str_value = self._last_int_value.to_string();
+    if eat_reg_exp_identifier_start(parser).is_some() {
+        parser.last_str_value = (parser.last_int_value as char).to_string();
 
-        while self.eat_reg_exp_identifier_part() {
-            self._last_str_value.push_str(&self._last_int_value.to_string());
+        while eat_reg_exp_identifier_part(parser) {
+            parser.last_str_value.push(parser.last_int_value as char);
         }
 
         return true;
diff --git a/crates/oxc_js_regex/src/util.rs b/crates/oxc_js_regex/src/util.rs
index d30f37eebc99e..79bea7181163d 100644
--- a/crates/oxc_js_regex/src/util.rs
+++ b/crates/oxc_js_regex/src/util.rs
@@ -1,6 +1,59 @@
 use phf::phf_set;
 
 const SYNTAX_CHARACTERS: phf::Set<char> = phf_set!['(', ')', '[', ']', '{', '}', '|', '-'];
+
+const CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR_CHARACTER: phf::Set<char> = phf_set! {
+    '&' => AMPERSAND,
+    '!' => EXCLAMATION_MARK,
+    '#' => NUMBER_SIGN,
+    '$' => DOLLAR_SIGN,
+    '%' => PERCENT_SIGN,
+    '*' => ASTERISK,
+    '+' => PLUS_SIGN,
+    ',' => COMMA,
+    '.' => FULL_STOP,
+    ':' => COLON,
+    ';' => SEMICOLON,
+    '<' => LESS_THAN_SIGN,
+    '=' => EQUALS_SIGN,
+    '>' => GREATER_THAN_SIGN,
+    '?' => QUESTION_MARK,
+    '@' => COMMERCIAL_AT,
+    '^' => CIRCUMFLEX_ACCENT,
+    '`' => GRAVE_ACCENT,
+    '~' => TILDE,
+};
+
+const CLASS_SET_SYNTAX_CHARACTER: phf::Set<char> = phf_set! {
+    '(' => LEFT_PARENTHESIS,
+    ')' => RIGHT_PARENTHESIS,
+    '[' => LEFT_SQUARE_BRACKET,
+    ']' => RIGHT_SQUARE_BRACKET,
+    '{' => LEFT_CURLY_BRACKET,
+    '}' => RIGHT_CURLY_BRACKET,
+    '/' => SOLIDUS,
+    '-' => HYPHEN_MINUS,
+    '\\' => REVERSE_SOLIDUS,
+    '|' => VERTICAL_LINE,
+};
+
+const CLASS_SET_RESERVED_PUNCTUATOR: phf::Set<char> = phf_set! {
+    '&' => AMPERSAND,
+    '-' => HYPHEN_MINUS,
+    '!' => EXCLAMATION_MARK,
+    '#' => NUMBER_SIGN,
+    '%' => PERCENT_SIGN,
+    ',' => COMMA,
+    ':' => COLON,
+    ';' => SEMICOLON,
+    '<' => LESS_THAN_SIGN,
+    '=' => EQUALS_SIGN,
+    '>' => GREATER_THAN_SIGN,
+    '@' => COMMERCIAL_AT,
+    '`' => GRAVE_ACCENT,
+    '~' => TILDE,
+};
+
 #[inline]
 pub fn is_syntax_character(cp: char) -> bool {
     SYNTAX_CHARACTERS.contains(&cp)
@@ -17,3 +70,15 @@ pub fn is_trail_surrogate(code: u32) -> bool {
 pub fn combine_surrogate_pair(lead: u32, trail: u32) -> u32 {
     (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
 }
+
+pub fn is_class_set_reserved_double_punctuator_character(cp: char) -> bool {
+    CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR_CHARACTER.contains(&cp)
+}
+
+pub fn is_class_set_syntax_character(cp: u32) -> bool {
+    CLASS_SET_SYNTAX_CHARACTER.contains(&cp)
+}
+
+pub fn is_class_set_reserved_punctuator(cp: u32) -> bool {
+    CLASS_SET_RESERVED_PUNCTUATOR.contains(&cp)
+}

From 306be96b2dbd11bb2c8586167a0d1391b660427e Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Mon, 15 Jan 2024 00:34:22 +0800
Subject: [PATCH 18/19] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20consume=20string?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 84 ++++++++++++++++++++++++++-----
 crates/oxc_js_regex/src/util.rs   |  4 +-
 2 files changed, 74 insertions(+), 14 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 18b24aeb93ae7..724ac3c6fdbcc 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -14,7 +14,7 @@ use crate::ast::{
     Alternative, Assertion, Backreference, BackreferenceRef, BoundaryAssertion, Branch,
     CapturingGroup, Character, EdgeAssertion, EdgeAssertionKind, Element, LookaheadAssertion,
     LookaroundAssertion, LookbehindAssertion, Pattern, QuantifiableElement, Quantifier,
-    RegExpLiteral, WordBoundaryAssertion,
+    RegExpLiteral, StringAlternative, WordBoundaryAssertion,
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
@@ -1026,18 +1026,23 @@ fn consume_class_string_disjunction<'a>(
  * @param i - The index of the string alternative.
  * @returns `UnicodeSetsConsumeResult`.
  */
-fn consume_class_string<'a>(parser: &mut Parser<'a>, i: usize) -> UnicodeSetsConsumeResult {
-    let start = self.index;
+fn consume_class_string<'a>(
+    parser: &mut Parser<'a>,
+    i: usize,
+) -> (UnicodeSetsConsumeResult, Option<StringAlternative<'a>>) {
+    let start = parser.index;
 
     let mut count = 0;
-    self.on_string_alternative_enter(start, i);
-
-    while self.current_code_point != -1 && self.consume_class_set_character() {
-        count += 1;
+    let mut arr = parser.builder.new_vec();
+    while !parser.eof() {
+        if let Some(character) = consume_class_set_character(parser) {
+            arr.push(character);
+            count += 1;
+        } else {
+            break;
+        }
     }
 
-    self.on_string_alternative_leave(start, self.index, i);
-
     // * Static Semantics: MayContainStrings
     // ClassString :: [empty]
     //     1. Return true.
@@ -1046,7 +1051,10 @@ fn consume_class_string<'a>(parser: &mut Parser<'a>, i: usize) -> UnicodeSetsCon
     // NonEmptyClassString :: ClassSetCharacter NonEmptyClassString(opt)
     //     1. If NonEmptyClassString is present, return true.
     //     2. Return false.
-    return UnicodeSetsConsumeResult { may_contain_strings: Some(count != 1) };
+    (
+        UnicodeSetsConsumeResult { may_contain_strings: Some(count != 1) },
+        Some(StringAlternative { span: parser.span_with_start(start), elements: arr }),
+    )
 }
 
 /**
@@ -1067,13 +1075,13 @@ fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> Option<Character>
     }
 
     if parser.eat('\\') {
-        if consume_character_escape() {
+        if consume_character_escape(parser) {
             return true;
         }
         if let Some(ch) = parser.current()
             && is_class_set_reserved_punctuator(ch)
         {
-            parser.last_int_value = parser.current()?;
+            parser.last_int_value = parser.current()? as u32;
             parser.advance();
 
             Some(Character {
@@ -1094,6 +1102,58 @@ fn consume_class_set_character<'a>(parser: &mut Parser<'a>) -> Option<Character>
     None
 }
 
+fn consume_character_escape<'a>(parser: &mut Parser<'a>) -> Option<Character> {
+    let start = parser.index;
+    if eat_control_escape(parser)
+        || eat_c_control_letter(parser)
+        || eat_zero(parser).is_some()
+        || eat_hex_escape_sequence(parser)
+        || eat_reg_exp_unicode_escape_sequence(parser, false)
+        || (!parser.context.strict
+            && !parser.context.unicode_mode
+            && eat_legacy_octal_escape_sequence(parser))
+        || eat_identity_escape(parser).is_some()
+    {
+        Some(Character {
+            span: parser.span_with_start(start - 1),
+            value: parser.last_int_value as char,
+        })
+    }
+    None
+}
+
+fn eat_hex_escape_sequence<'a>(parser: &mut Parser<'a>) -> bool {
+    let start = parser.index;
+    if parser.eat('x') {
+        if eat_fixed_hex_digits(parser, 2) {
+            return true;
+        }
+        if parser.context.unicode_mode || parser.context.strict {
+            panic!("Invalid escape");
+        }
+        parser.rewind(start);
+    }
+    false
+}
+
+fn eat_legacy_octal_escape_sequence<'a>(parser: &mut Parser<'a>) -> bool {
+    if eat_octal_digit(parser).is_some() {
+        let n1 = parser.last_int_value;
+        if eat_octal_digit(parser).is_some() {
+            let n2 = parser.last_int_value;
+            if n1 <= 3 && eat_octal_digit(parser).is_some() {
+                parser.last_int_value = n1 * 64 + n2 * 8 + parser.last_int_value;
+            } else {
+                parser.last_int_value = n1 * 8 + n2;
+            }
+        } else {
+            parser.last_int_value = n1;
+        }
+        return true;
+    }
+    false
+}
+
 /**
  * Eat the next characters as a RegExp `GroupName` production if possible.
  * Set `self._last_str_value` if the group name existed.
diff --git a/crates/oxc_js_regex/src/util.rs b/crates/oxc_js_regex/src/util.rs
index 79bea7181163d..454ea5fb88bc9 100644
--- a/crates/oxc_js_regex/src/util.rs
+++ b/crates/oxc_js_regex/src/util.rs
@@ -75,10 +75,10 @@ pub fn is_class_set_reserved_double_punctuator_character(cp: char) -> bool {
     CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR_CHARACTER.contains(&cp)
 }
 
-pub fn is_class_set_syntax_character(cp: u32) -> bool {
+pub fn is_class_set_syntax_character(cp: char) -> bool {
     CLASS_SET_SYNTAX_CHARACTER.contains(&cp)
 }
 
-pub fn is_class_set_reserved_punctuator(cp: u32) -> bool {
+pub fn is_class_set_reserved_punctuator(cp: char) -> bool {
     CLASS_SET_RESERVED_PUNCTUATOR.contains(&cp)
 }

From 5bb90f589fdf2b0d2cd713e175b3d7c3b29ccd03 Mon Sep 17 00:00:00 2001
From: IWANABETHATGUY <iwanabethatguy@qq.com>
Date: Mon, 15 Jan 2024 01:19:04 +0800
Subject: [PATCH 19/19] =?UTF-8?q?fix:=20=F0=9F=90=9B=20error?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/oxc_js_regex/src/parser.rs | 151 ++++++++++++++++--------------
 1 file changed, 83 insertions(+), 68 deletions(-)

diff --git a/crates/oxc_js_regex/src/parser.rs b/crates/oxc_js_regex/src/parser.rs
index 724ac3c6fdbcc..bca2c289ec990 100644
--- a/crates/oxc_js_regex/src/parser.rs
+++ b/crates/oxc_js_regex/src/parser.rs
@@ -12,9 +12,10 @@ use oxc_syntax::unicode_id_start::is_id_continue;
 
 use crate::ast::{
     Alternative, Assertion, Backreference, BackreferenceRef, BoundaryAssertion, Branch,
-    CapturingGroup, Character, EdgeAssertion, EdgeAssertionKind, Element, LookaheadAssertion,
-    LookaroundAssertion, LookbehindAssertion, Pattern, QuantifiableElement, Quantifier,
-    RegExpLiteral, StringAlternative, WordBoundaryAssertion,
+    CapturingGroup, Character, CharacterClass, ClassStringDisjunction, EdgeAssertion,
+    EdgeAssertionKind, Element, LookaheadAssertion, LookaroundAssertion, LookbehindAssertion,
+    Pattern, QuantifiableElement, Quantifier, RegExpLiteral, StringAlternative,
+    WordBoundaryAssertion,
 };
 use crate::ast_builder::AstBuilder;
 use crate::ecma_version::EcmaVersion;
@@ -149,6 +150,15 @@ impl<'a> Parser<'a> {
     pub fn rewind<'a>(&mut self, start: usize) {
         self.index = start;
     }
+
+    fn eat3(&self, first: char, second: char, third: char) -> bool {
+        if self.is(first) && self.nth(1) == Some(&second) && self.nth(2) == Some(&third) {
+            self.index += 3;
+            true
+        } else {
+            false
+        }
+    }
 }
 
 #[derive(Default, Clone, Copy)]
@@ -620,13 +630,11 @@ fn consume_character_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsCon
     }
 }
 
-/**
- * Consume ClassContents in a character class.
- * @returns `UnicodeSetsConsumeResult`.
- */
+/// * Consume ClassContents in a character class.
+///  * @returns `UnicodeSetsConsumeResult`.
 fn consume_class_contents<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResult {
-    if self._unicode_sets_mode {
-        if self.current_code_point == RIGHT_SQUARE_BRACKET {
+    if parser._unicode_sets_mode {
+        if parser.current_code_point == RIGHT_SQUARE_BRACKET {
             // [empty]
 
             // * Static Semantics: MayContainStrings
@@ -635,7 +643,7 @@ fn consume_class_contents<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResu
             //     1. Return false.
             return UnicodeSetsConsumeResult { may_contain_strings: None };
         }
-        let result = self.consume_class_set_expression();
+        let result = parser.consume_class_set_expression();
 
         // * Static Semantics: MayContainStrings
         // ClassContents :: ClassSetExpression
@@ -643,39 +651,39 @@ fn consume_class_contents<'a>(parser: &mut Parser<'a>) -> UnicodeSetsConsumeResu
         return result;
     }
 
-    let strict = self.strict || self._unicode_mode;
+    let strict = parser.strict || parser._unicode_mode;
     loop {
         // Consume the first ClassAtom
-        let range_start = self.index;
-        if !self.consume_class_atom() {
+        let range_start = parser.index;
+        if !parser.consume_class_atom() {
             break;
         }
-        let min = self._last_int_value;
+        let min = parser._last_int_value;
 
         // Consume `-`
-        if !self.eat(HYPHEN_MINUS) {
+        if !parser.eat(HYPHEN_MINUS) {
             continue;
         }
-        self.on_character(range_start - 1, self.index, HYPHEN_MINUS);
+        parser.on_character(range_start - 1, parser.index, HYPHEN_MINUS);
 
         // Consume the second ClassAtom
-        if !self.consume_class_atom() {
+        if !parser.consume_class_atom() {
             break;
         }
-        let max = self._last_int_value;
+        let max = parser._last_int_value;
 
         // Validate
         if min == -1 || max == -1 {
             if strict {
-                self.raise("Invalid character class");
+                parser.raise("Invalid character class");
             }
             continue;
         }
         if min > max {
-            self.raise("Range out of order in character class");
+            parser.raise("Range out of order in character class");
         }
 
-        self.on_character_class_range(range_start, self.index, min, max);
+        parser.on_character_class_range(range_start, self.index, min, max);
     }
 
     // * Static Semantics: MayContainStrings
@@ -701,7 +709,7 @@ fn consume_class_atom<'a>(parser: &mut Parser<'a>) -> bool {
     }
 
     if self.eat(REVERSE_SOLIDUS) {
-        if self.consume_class_escape() {
+        if consume_class_escape(parser) {
             return true;
         }
         if !self.strict && self.current_code_point == LATIN_SMALL_LETTER_C {
@@ -753,7 +761,7 @@ fn consume_class_escape<'a>(parser: &mut Parser<'a>) -> bool {
         return true;
     }
 
-    return self.consume_character_class_escape() || self.consume_character_escape();
+    return consume_character_class_escape(parser) || consume_character_escape(parser);
 }
 
 /**
@@ -890,7 +898,7 @@ fn eat_decimal_digits<'a>(parser: &mut Parser<'a>) -> bool {
         let Some(d) = ch.to_digit(10) else {
             break;
         };
-        parser.last_int_value = 10 * parser.last_int_value + d as usize;
+        parser.last_int_value = 10 * parser.last_int_value + d;
         parser.advance();
     }
     parser.index != start
@@ -944,16 +952,15 @@ fn count_capturing_parens<'a>(parser: &mut Parser<'a>) -> usize {
     count
 }
 
-/**
- * Consume NestedClass in a character class.
- * @returns `UnicodeSetsConsumeResult`.
- */
-fn consume_nested_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsumeResult> {
+/// * Consume NestedClass in a character class.
+///  * @returns `UnicodeSetsConsumeResult`.
+///  TODO:
+fn consume_nested_class<'a>(parser: &mut Parser<'a>) -> Option<CharacterClass> {
     let start = self.index;
     if self.eat(LEFT_SQUARE_BRACKET) {
         let negate = self.eat(CIRCUMFLEX_ACCENT);
         self.on_character_class_enter(start, negate, true);
-        let result = self.consume_class_contents();
+        let result = consume_class_contents(parser);
         if !self.eat(RIGHT_SQUARE_BRACKET) {
             self.raise("Unterminated character class");
         }
@@ -988,24 +995,27 @@ fn consume_nested_class<'a>(parser: &mut Parser<'a>) -> Option<UnicodeSetsConsum
  */
 fn consume_class_string_disjunction<'a>(
     parser: &mut Parser<'a>,
-) -> Option<UnicodeSetsConsumeResult> {
-    let start = self.index;
-    if self.eat3(REVERSE_SOLIDUS, LATIN_SMALL_LETTER_Q, LEFT_CURLY_BRACKET) {
-        self.on_class_string_disjunction_enter(start);
-
+) -> (Option<UnicodeSetsConsumeResult>, Option<ClassStringDisjunction<'a>>) {
+    let start = parser.index;
+    if parser.eat3('\\', 'q', '{') {
         let mut i = 0;
         let mut may_contain_strings = false;
-        while self.consume_class_string(i).may_contain_strings.unwrap_or(false) {
-            may_contain_strings = true;
+        let mut alternatives = parser.builder.new_vec();
+        loop {
+            let (consume_res, node) = consume_class_string(parser, i);
+            if consume_res.may_contain_strings.unwrap_or_default() {
+                may_contain_strings = true;
+            }
+            if let Some(node) = node {
+                alternatives.push(node);
+            }
             i += 1;
-            if !self.eat(VERTICAL_LINE) {
+            if !parser.eat('|') {
                 break;
             }
         }
 
-        if self.eat(RIGHT_CURLY_BRACKET) {
-            self.on_class_string_disjunction_leave(start, self.index);
-
+        if parser.eat('}') {
             // * Static Semantics: MayContainStrings
             // ClassStringDisjunction :: \q{ ClassStringDisjunctionContents }
             //     1. Return MayContainStrings of the ClassStringDisjunctionContents.
@@ -1014,9 +1024,12 @@ fn consume_class_string_disjunction<'a>(
             // ClassStringDisjunctionContents :: ClassString | ClassStringDisjunctionContents
             //     1. If MayContainStrings of the ClassString is true, return true.
             //     2. Return MayContainStrings of the ClassStringDisjunctionContents.
-            return Some(UnicodeSetsConsumeResult { may_contain_strings });
+            return (
+                Some(UnicodeSetsConsumeResult { may_contain_strings: Some(may_contain_strings) }),
+                Some(ClassStringDisjunction { span: parser.span_with_start(start), alternatives }),
+            );
         }
-        self.raise("Unterminated class string disjunction");
+        panic!("Unterminated class string disjunction");
     }
     None
 }
@@ -1198,20 +1211,20 @@ fn eat_reg_exp_identifier_start<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let start = parser.index;
     let force_u_flag =
         !parser.context.unicode_mode && parser.context.ecma_version >= EcmaVersion::V2020;
-    let mut cp = *parser.current()?;
+    let mut cp = parser.current()?;
     parser.advance();
 
     if cp == '\\' && eat_reg_exp_unicode_escape_sequence(parser, force_u_flag) {
-        cp = parser.last_int_value as u32 as char;
+        cp = char::from_u32(parser.last_int_value).expect("should convert to char");
     } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(parser.current()? as u32)
     {
-        cp = combine_surrogate_pair(cp, parser.current() as u32);
+        cp = combine_surrogate_pair(cp, parser.current().expect("should convert to u32") as u32);
         parser.advance();
     }
 
     if is_identifier_start_char(cp) {
-        parser.last_int_value = cp;
-        return true;
+        parser.last_int_value = cp as u32;
+        return Some(());
     }
 
     if parser.index != start {
@@ -1235,19 +1248,21 @@ fn eat_reg_exp_identifier_part<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let start = parser.index;
     let force_u_flag =
         !parser.context.unicode_mode && parser.context.ecma_version >= EcmaVersion::V2020;
-    let mut cp = *parser.current()?;
+    let mut cp = parser.current()?;
     parser.advance();
 
     if cp == '\\' && eat_reg_exp_unicode_escape_sequence(parser, force_u_flag) {
-        cp = parser.last_int_value as u32 as char;
-    } else if force_u_flag && is_lead_surrogate(cp) && is_trail_surrogate(parser.current()? as u32)
+        cp = char::from_u32(parser.last_int_value).expect("should convert to char");
+    } else if force_u_flag
+        && is_lead_surrogate(cp as u32)
+        && is_trail_surrogate(parser.current()? as u32)
     {
-        cp = combine_surrogate_pair(cp, parser.current()? as u32);
+        cp = combine_surrogate_pair(cp as u32, parser.current()? as u32);
         parser.advance();
     }
 
     if is_identifier_part(cp) {
-        parser.last_int_value = cp as usize;
+        parser.last_int_value = cp as u32;
         return Some(());
     }
 
@@ -1363,10 +1378,10 @@ fn eat_reg_exp_unicode_code_point_escape<'a>(parser: &mut Parser<'a>) -> bool {
 fn eat_decimal_escape<'a>(parser: &mut Parser<'a>) -> Option<()> {
     parser.last_int_value = 0;
     let mut cp = parser.current()?;
-    if cp >= &'1' && cp <= &'9' {
-        while cp >= &'1' && cp <= &'9' {
-            parser.last_int_value = 10 * parser.last_int_value
-                + cp.to_digit(10).expect("should convert successfully") as usize;
+    if cp >= '1' && cp <= '9' {
+        while cp >= '1' && cp <= '9' {
+            parser.last_int_value =
+                10 * parser.last_int_value + cp.to_digit(10).expect("should convert successfully");
             parser.advance();
             cp = match parser.current() {
                 Some(ch) => ch,
@@ -1393,7 +1408,7 @@ fn eat_control_letter<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let cp = parser.current()?;
     if cp.is_ascii_alphabetic() {
         parser.advance();
-        parser.last_int_value = (cp as usize) % 0x20;
+        parser.last_int_value = (cp as u32) % 0x20;
         return Some(());
     }
     None
@@ -1445,16 +1460,16 @@ fn eat_reg_exp_unicode_escape_sequence<'a>(parser: &mut Parser<'a>, force_u_flag
 fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> bool {
     let start = parser.index;
 
-    if parser.eat_fixed_hex_digits(4) {
+    if eat_fixed_hex_digits(parser, 4).is_some() {
         let lead = parser.last_int_value;
-        if is_lead_surrogate(lead as u32)
+        if is_lead_surrogate(lead)
             && parser.eat('\\')
             && parser.eat('u')
-            && parser.eat_fixed_hex_digits(4)
+            && eat_fixed_hex_digits(parser, 4).is_some()
         {
             let trail = parser.last_int_value;
-            if is_trail_surrogate(trail as u32) {
-                parser.last_int_value = combine_surrogate_pair(lead, trail) as usize;
+            if is_trail_surrogate(trail) {
+                parser.last_int_value = combine_surrogate_pair(lead, trail);
                 return true;
             }
         }
@@ -1483,10 +1498,10 @@ fn eat_reg_exp_unicode_surrogate_pair_escape<'a>(parser: &mut Parser<'a>) -> boo
  */
 fn eat_identity_escape<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let cp = parser.current();
-    if parser.is_valid_identity_escape(cp.cloned()) {
-        parser.last_int_value = cp.unwrap() as usize;
+    if is_valid_identity_escape(parser, cp) {
+        parser.last_int_value = cp.unwrap() as u32;
         parser.advance();
-        return true;
+        return Some(());
     }
     None
 }
@@ -1548,7 +1563,7 @@ fn eat_octal_digit<'a>(parser: &mut Parser<'a>) -> Option<()> {
     let cp = parser.current()?;
     if cp.is_digit(8) {
         parser.advance();
-        parser.last_int_value = cp.to_digit(8)? as usize;
+        parser.last_int_value = cp.to_digit(8)?;
         Some(())
     } else {
         parser.last_int_value = 0;
@@ -1575,7 +1590,7 @@ fn eat_fixed_hex_digits<'a>(parser: &mut Parser<'a>, length: usize) -> Option<()
             parser.rewind(start);
             return None;
         }
-        parser.last_int_value = 16 * parser.last_int_value + cp.to_digit(16)? as usize;
+        parser.last_int_value = 16 * parser.last_int_value + cp.to_digit(16)?;
         parser.advance();
     }
     Some(())