Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 56 additions & 9 deletions crates/swc_ecma_lexer/src/common/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2110,20 +2110,16 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
}
}

/// This can be used if there's no keyword starting with the first
/// character.
fn read_word_with(
fn read_keyword_with(
&mut self,
convert: &dyn Fn(&str) -> Option<Self::Token>,
) -> LexResult<Option<Self::Token>> {
debug_assert!(self.cur().is_some());

let start = self.cur_pos();
let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| {
if can_be_known {
if let Some(word) = convert(s) {
return word;
}
let (word, has_escape) = self.read_keyword_as_str_with(|l, s, _, _| {
if let Some(word) = convert(s) {
return word;
}
let atom = l.atom(s);
Self::Token::unknown_ident(atom, l)
Expand All @@ -2133,14 +2129,65 @@ pub trait Lexer<'a, TokenAndSpan>: Tokens<TokenAndSpan> + Sized {
// 'await' and 'yield' may have semantic of reserved word, which means lexer
// should know context or parser should handle this error. Our approach to this
// problem is former one.

if has_escape && word.is_reserved(self.ctx()) {
let word = word.into_atom(self).unwrap();
self.error(start, SyntaxError::EscapeInReservedWord { word })?
} else {
Ok(Some(word))
}
}

/// This is a performant version of [Lexer::read_word_as_str_with] for
/// reading keywords. We should make sure the first byte is a valid
/// ASCII.
fn read_keyword_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
where
F: FnOnce(&mut Self, &str, bool, bool) -> Ret,
{
let slice_start = self.cur_pos();
let has_escape = false;

// Fast path: try to scan ASCII identifier using byte_search
// Performance optimization: check if first char disqualifies as keyword
// Advance past first byte
self.bump();

// Use byte_search to quickly scan to end of ASCII identifier
let next_byte = byte_search! {
lexer: self,
table: NOT_ASCII_ID_CONTINUE_TABLE,
handle_eof: {
// Reached EOF, entire remainder is identifier
let end = self.cur_pos();
let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
self.input_slice(slice_start, end)
};

return Ok((convert(self, s, false, true), false));
},
};

// Check if we hit end of identifier or need to fall back to slow path
if !next_byte.is_ascii() {
// Hit Unicode character, fall back to slow path from current position
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
} else if next_byte == b'\\' {
// Hit escape sequence, fall back to slow path from current position
self.read_word_as_str_with_slow_path(convert, slice_start, has_escape, true)
} else {
// Hit end of identifier (non-continue ASCII char)
let end = self.cur_pos();
let s = unsafe {
// Safety: slice_start and end are valid position because we got them from
// `self.input`
self.input_slice(slice_start, end)
};

return Ok((convert(self, s, has_escape, true), has_escape));
}
}
}

pub fn pos_span(p: BytePos) -> Span {
Expand Down
42 changes: 21 additions & 21 deletions crates/swc_ecma_lexer/src/lexer/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ const ERR: ByteHandler = Some(|lexer| {
const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown().map(Some));

const L_A: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"abstract" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Abstract,
)))),
Expand All @@ -88,7 +88,7 @@ const L_A: ByteHandler = Some(|lexer| {
});

const L_B: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"break" => Some(Token::Word(Word::Keyword(Keyword::Break))),
"boolean" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Boolean,
Expand All @@ -101,7 +101,7 @@ const L_B: ByteHandler = Some(|lexer| {
});

const L_C: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"case" => Some(Token::Word(Word::Keyword(Keyword::Case))),
"catch" => Some(Token::Word(Word::Keyword(Keyword::Catch))),
"class" => Some(Token::Word(Word::Keyword(Keyword::Class))),
Expand All @@ -112,7 +112,7 @@ const L_C: ByteHandler = Some(|lexer| {
});

const L_D: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"debugger" => Some(Token::Word(Word::Keyword(Keyword::Debugger))),
"default" => Some(Token::Word(Word::Keyword(Keyword::Default_))),
"delete" => Some(Token::Word(Word::Keyword(Keyword::Delete))),
Expand All @@ -125,7 +125,7 @@ const L_D: ByteHandler = Some(|lexer| {
});

const L_E: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"else" => Some(Token::Word(Word::Keyword(Keyword::Else))),
"enum" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Enum)))),
"export" => Some(Token::Word(Word::Keyword(Keyword::Export))),
Expand All @@ -135,7 +135,7 @@ const L_E: ByteHandler = Some(|lexer| {
});

const L_F: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"false" => Some(Token::Word(Word::False)),
"finally" => Some(Token::Word(Word::Keyword(Keyword::Finally))),
"for" => Some(Token::Word(Word::Keyword(Keyword::For))),
Expand All @@ -146,7 +146,7 @@ const L_F: ByteHandler = Some(|lexer| {
});

const L_G: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"global" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Global,
)))),
Expand All @@ -158,7 +158,7 @@ const L_G: ByteHandler = Some(|lexer| {
const L_H: ByteHandler = IDN;

const L_I: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"if" => Some(Token::Word(Word::Keyword(Keyword::If))),
"import" => Some(Token::Word(Word::Keyword(Keyword::Import))),
"in" => Some(Token::Word(Word::Keyword(Keyword::In))),
Expand All @@ -183,7 +183,7 @@ const L_I: ByteHandler = Some(|lexer| {
const L_J: ByteHandler = IDN;

const L_K: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"keyof" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Keyof,
)))),
Expand All @@ -192,21 +192,21 @@ const L_K: ByteHandler = Some(|lexer| {
});

const L_L: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"let" => Some(Token::Word(Word::Keyword(Keyword::Let))),
_ => None,
})
});

const L_M: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"meta" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Meta)))),
_ => None,
})
});

const L_N: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"new" => Some(Token::Word(Word::Keyword(Keyword::New))),
"null" => Some(Token::Word(Word::Null)),
"number" => Some(Token::Word(Word::Ident(IdentLike::Known(
Expand All @@ -223,7 +223,7 @@ const L_N: ByteHandler = Some(|lexer| {
});

const L_O: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"of" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Of)))),
"object" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Object,
Expand All @@ -233,7 +233,7 @@ const L_O: ByteHandler = Some(|lexer| {
});

const L_P: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"public" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Public,
)))),
Expand All @@ -253,7 +253,7 @@ const L_P: ByteHandler = Some(|lexer| {
const L_Q: ByteHandler = IDN;

const L_R: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"return" => Some(Token::Word(Word::Keyword(Keyword::Return))),
"readonly" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Readonly,
Expand All @@ -266,7 +266,7 @@ const L_R: ByteHandler = Some(|lexer| {
});

const L_S: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"super" => Some(Token::Word(Word::Keyword(Keyword::Super))),
"static" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Static,
Expand All @@ -287,7 +287,7 @@ const L_S: ByteHandler = Some(|lexer| {
});

const L_T: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"this" => Some(Token::Word(Word::Keyword(Keyword::This))),
"throw" => Some(Token::Word(Word::Keyword(Keyword::Throw))),
"true" => Some(Token::Word(Word::True)),
Expand All @@ -302,7 +302,7 @@ const L_T: ByteHandler = Some(|lexer| {
});

const L_U: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"using" => Some(Token::Word(Word::Ident(IdentLike::Known(
KnownIdent::Using,
)))),
Expand All @@ -320,15 +320,15 @@ const L_U: ByteHandler = Some(|lexer| {
});

const L_V: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"var" => Some(Token::Word(Word::Keyword(Keyword::Var))),
"void" => Some(Token::Word(Word::Keyword(Keyword::Void))),
_ => None,
})
});

const L_W: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"while" => Some(Token::Word(Word::Keyword(Keyword::While))),
"with" => Some(Token::Word(Word::Keyword(Keyword::With))),
_ => None,
Expand All @@ -338,7 +338,7 @@ const L_W: ByteHandler = Some(|lexer| {
const L_X: ByteHandler = IDN;

const L_Y: ByteHandler = Some(|lexer| {
lexer.read_word_with(&|s| match s {
lexer.read_keyword_with(&|s| match s {
"yield" => Some(Token::Word(Word::Keyword(Keyword::Yield))),
_ => None,
})
Expand Down
Loading
Loading