diff --git a/crates/swc_ecma_lexer/src/common/lexer/mod.rs b/crates/swc_ecma_lexer/src/common/lexer/mod.rs index e32c94edb80f..cc757e103a13 100644 --- a/crates/swc_ecma_lexer/src/common/lexer/mod.rs +++ b/crates/swc_ecma_lexer/src/common/lexer/mod.rs @@ -2110,20 +2110,16 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { } } - /// This can be used if there's no keyword starting with the first - /// character. - fn read_word_with( + fn read_keyword_with( &mut self, convert: &dyn Fn(&str) -> Option, ) -> LexResult> { debug_assert!(self.cur().is_some()); let start = self.cur_pos(); - let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| { - if can_be_known { - if let Some(word) = convert(s) { - return word; - } + let (word, has_escape) = self.read_keyword_as_str_with(|l, s, _, _| { + if let Some(word) = convert(s) { + return word; } let atom = l.atom(s); Self::Token::unknown_ident(atom, l) @@ -2133,7 +2129,6 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { // 'await' and 'yield' may have semantic of reserved word, which means lexer // should know context or parser should handle this error. Our approach to this // problem is former one. - if has_escape && word.is_reserved(self.ctx()) { let word = word.into_atom(self).unwrap(); self.error(start, SyntaxError::EscapeInReservedWord { word })? @@ -2141,6 +2136,58 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { Ok(Some(word)) } } + + /// This is a performant version of [Lexer::read_word_as_str_with] for + /// reading keywords. We should make sure the first byte is a valid + /// ASCII. + fn read_keyword_as_str_with(&mut self, convert: F) -> LexResult<(Ret, bool)> + where + F: FnOnce(&mut Self, &str, bool, bool) -> Ret, + { + let slice_start = self.cur_pos(); + let has_escape = false; + + // Fast path: try to scan ASCII identifier using byte_search + // Performance optimization: check if first char disqualifies as keyword + // Advance past first byte + self.bump(); + + // Use byte_search to quickly scan to end of ASCII identifier + let next_byte = byte_search! { + lexer: self, + table: NOT_ASCII_ID_CONTINUE_TABLE, + handle_eof: { + // Reached EOF, entire remainder is identifier + let end = self.cur_pos(); + let s = unsafe { + // Safety: slice_start and end are valid position because we got them from + // `self.input` + self.input_slice(slice_start, end) + }; + + return Ok((convert(self, s, false, true), false)); + }, + }; + + // Check if we hit end of identifier or need to fall back to slow path + if !next_byte.is_ascii() { + // Hit Unicode character, fall back to slow path from current position + self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true) + } else if next_byte == b'\\' { + // Hit escape sequence, fall back to slow path from current position + self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true) + } else { + // Hit end of identifier (non-continue ASCII char) + let end = self.cur_pos(); + let s = unsafe { + // Safety: slice_start and end are valid position because we got them from + // `self.input` + self.input_slice(slice_start, end) + }; + + return Ok((convert(self, s, has_escape, true), has_escape)); + } + } } pub fn pos_span(p: BytePos) -> Span { diff --git a/crates/swc_ecma_lexer/src/lexer/table.rs b/crates/swc_ecma_lexer/src/lexer/table.rs index 105478b1e189..876548cffc4b 100644 --- a/crates/swc_ecma_lexer/src/lexer/table.rs +++ b/crates/swc_ecma_lexer/src/lexer/table.rs @@ -64,7 +64,7 @@ const ERR: ByteHandler = Some(|lexer| { const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown().map(Some)); const L_A: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "abstract" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Abstract, )))), @@ -88,7 +88,7 @@ const L_A: ByteHandler = Some(|lexer| { }); const L_B: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "break" => Some(Token::Word(Word::Keyword(Keyword::Break))), "boolean" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Boolean, @@ -101,7 +101,7 @@ const L_B: ByteHandler = Some(|lexer| { }); const L_C: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "case" => Some(Token::Word(Word::Keyword(Keyword::Case))), "catch" => Some(Token::Word(Word::Keyword(Keyword::Catch))), "class" => Some(Token::Word(Word::Keyword(Keyword::Class))), @@ -112,7 +112,7 @@ const L_C: ByteHandler = Some(|lexer| { }); const L_D: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "debugger" => Some(Token::Word(Word::Keyword(Keyword::Debugger))), "default" => Some(Token::Word(Word::Keyword(Keyword::Default_))), "delete" => Some(Token::Word(Word::Keyword(Keyword::Delete))), @@ -125,7 +125,7 @@ const L_D: ByteHandler = Some(|lexer| { }); const L_E: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "else" => Some(Token::Word(Word::Keyword(Keyword::Else))), "enum" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Enum)))), "export" => Some(Token::Word(Word::Keyword(Keyword::Export))), @@ -135,7 +135,7 @@ const L_E: ByteHandler = Some(|lexer| { }); const L_F: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "false" => Some(Token::Word(Word::False)), "finally" => Some(Token::Word(Word::Keyword(Keyword::Finally))), "for" => Some(Token::Word(Word::Keyword(Keyword::For))), @@ -146,7 +146,7 @@ const L_F: ByteHandler = Some(|lexer| { }); const L_G: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "global" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Global, )))), @@ -158,7 +158,7 @@ const L_G: ByteHandler = Some(|lexer| { const L_H: ByteHandler = IDN; const L_I: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "if" => Some(Token::Word(Word::Keyword(Keyword::If))), "import" => Some(Token::Word(Word::Keyword(Keyword::Import))), "in" => Some(Token::Word(Word::Keyword(Keyword::In))), @@ -183,7 +183,7 @@ const L_I: ByteHandler = Some(|lexer| { const L_J: ByteHandler = IDN; const L_K: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "keyof" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Keyof, )))), @@ -192,21 +192,21 @@ const L_K: ByteHandler = Some(|lexer| { }); const L_L: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "let" => Some(Token::Word(Word::Keyword(Keyword::Let))), _ => None, }) }); const L_M: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "meta" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Meta)))), _ => None, }) }); const L_N: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "new" => Some(Token::Word(Word::Keyword(Keyword::New))), "null" => Some(Token::Word(Word::Null)), "number" => Some(Token::Word(Word::Ident(IdentLike::Known( @@ -223,7 +223,7 @@ const L_N: ByteHandler = Some(|lexer| { }); const L_O: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "of" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Of)))), "object" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Object, @@ -233,7 +233,7 @@ const L_O: ByteHandler = Some(|lexer| { }); const L_P: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "public" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Public, )))), @@ -253,7 +253,7 @@ const L_P: ByteHandler = Some(|lexer| { const L_Q: ByteHandler = IDN; const L_R: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "return" => Some(Token::Word(Word::Keyword(Keyword::Return))), "readonly" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Readonly, @@ -266,7 +266,7 @@ const L_R: ByteHandler = Some(|lexer| { }); const L_S: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "super" => Some(Token::Word(Word::Keyword(Keyword::Super))), "static" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Static, @@ -287,7 +287,7 @@ const L_S: ByteHandler = Some(|lexer| { }); const L_T: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "this" => Some(Token::Word(Word::Keyword(Keyword::This))), "throw" => Some(Token::Word(Word::Keyword(Keyword::Throw))), "true" => Some(Token::Word(Word::True)), @@ -302,7 +302,7 @@ const L_T: ByteHandler = Some(|lexer| { }); const L_U: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "using" => Some(Token::Word(Word::Ident(IdentLike::Known( KnownIdent::Using, )))), @@ -320,7 +320,7 @@ const L_U: ByteHandler = Some(|lexer| { }); const L_V: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "var" => Some(Token::Word(Word::Keyword(Keyword::Var))), "void" => Some(Token::Word(Word::Keyword(Keyword::Void))), _ => None, @@ -328,7 +328,7 @@ const L_V: ByteHandler = Some(|lexer| { }); const L_W: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "while" => Some(Token::Word(Word::Keyword(Keyword::While))), "with" => Some(Token::Word(Word::Keyword(Keyword::With))), _ => None, @@ -338,7 +338,7 @@ const L_W: ByteHandler = Some(|lexer| { const L_X: ByteHandler = IDN; const L_Y: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "yield" => Some(Token::Word(Word::Keyword(Keyword::Yield))), _ => None, }) diff --git a/crates/swc_ecma_parser/src/lexer/table.rs b/crates/swc_ecma_parser/src/lexer/table.rs index 03222c4aa7df..cb54621aee0f 100644 --- a/crates/swc_ecma_parser/src/lexer/table.rs +++ b/crates/swc_ecma_parser/src/lexer/table.rs @@ -63,7 +63,7 @@ const ERR: ByteHandler = Some(|lexer| { const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown().map(Some)); const L_A: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "abstract" => Some(Token::Abstract), "as" => Some(Token::As), "await" => Some(Token::Await), @@ -77,7 +77,7 @@ const L_A: ByteHandler = Some(|lexer| { }); const L_B: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "break" => Some(Token::Break), "boolean" => Some(Token::Boolean), "bigint" => Some(Token::Bigint), @@ -86,7 +86,7 @@ const L_B: ByteHandler = Some(|lexer| { }); const L_C: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "case" => Some(Token::Case), "catch" => Some(Token::Catch), "class" => Some(Token::Class), @@ -97,7 +97,7 @@ const L_C: ByteHandler = Some(|lexer| { }); const L_D: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "debugger" => Some(Token::Debugger), "default" => Some(Token::Default), "delete" => Some(Token::Delete), @@ -108,7 +108,7 @@ const L_D: ByteHandler = Some(|lexer| { }); const L_E: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "else" => Some(Token::Else), "enum" => Some(Token::Enum), "export" => Some(Token::Export), @@ -118,7 +118,7 @@ const L_E: ByteHandler = Some(|lexer| { }); const L_F: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "false" => Some(Token::False), "finally" => Some(Token::Finally), "for" => Some(Token::For), @@ -129,7 +129,7 @@ const L_F: ByteHandler = Some(|lexer| { }); const L_G: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "global" => Some(Token::Global), "get" => Some(Token::Get), _ => None, @@ -139,7 +139,7 @@ const L_G: ByteHandler = Some(|lexer| { const L_H: ByteHandler = IDN; const L_I: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "if" => Some(Token::If), "import" => Some(Token::Import), "in" => Some(Token::In), @@ -156,28 +156,28 @@ const L_I: ByteHandler = Some(|lexer| { const L_J: ByteHandler = IDN; const L_K: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "keyof" => Some(Token::Keyof), _ => None, }) }); const L_L: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "let" => Some(Token::Let), _ => None, }) }); const L_M: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "meta" => Some(Token::Meta), _ => None, }) }); const L_N: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "new" => Some(Token::New), "null" => Some(Token::Null), "number" => Some(Token::Number), @@ -188,7 +188,7 @@ const L_N: ByteHandler = Some(|lexer| { }); const L_O: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "of" => Some(Token::Of), "object" => Some(Token::Object), "out" => Some(Token::Out), @@ -198,7 +198,7 @@ const L_O: ByteHandler = Some(|lexer| { }); const L_P: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "public" => Some(Token::Public), "package" => Some(Token::Package), "protected" => Some(Token::Protected), @@ -210,7 +210,7 @@ const L_P: ByteHandler = Some(|lexer| { const L_Q: ByteHandler = IDN; const L_R: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "return" => Some(Token::Return), "readonly" => Some(Token::Readonly), "require" => Some(Token::Require), @@ -219,7 +219,7 @@ const L_R: ByteHandler = Some(|lexer| { }); const L_S: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "super" => Some(Token::Super), "static" => Some(Token::Static), "switch" => Some(Token::Switch), @@ -232,7 +232,7 @@ const L_S: ByteHandler = Some(|lexer| { }); const L_T: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "this" => Some(Token::This), "throw" => Some(Token::Throw), "true" => Some(Token::True), @@ -245,7 +245,7 @@ const L_T: ByteHandler = Some(|lexer| { }); const L_U: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "using" => Some(Token::Using), "unique" => Some(Token::Unique), "undefined" => Some(Token::Undefined), @@ -255,7 +255,7 @@ const L_U: ByteHandler = Some(|lexer| { }); const L_V: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "var" => Some(Token::Var), "void" => Some(Token::Void), _ => None, @@ -263,7 +263,7 @@ const L_V: ByteHandler = Some(|lexer| { }); const L_W: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "while" => Some(Token::While), "with" => Some(Token::With), _ => None, @@ -273,7 +273,7 @@ const L_W: ByteHandler = Some(|lexer| { const L_X: ByteHandler = IDN; const L_Y: ByteHandler = Some(|lexer| { - lexer.read_word_with(&|s| match s { + lexer.read_keyword_with(&|s| match s { "yield" => Some(Token::Yield), _ => None, })