diff --git a/.changeset/kind-weeks-study.md b/.changeset/kind-weeks-study.md new file mode 100644 index 000000000000..25e739fa1e9a --- /dev/null +++ b/.changeset/kind-weeks-study.md @@ -0,0 +1,5 @@ +--- +swc_common: major +--- + +refactor(es/parser): Split parser into also-lex/parse-only diff --git a/.changeset/twenty-rocks-deny.md b/.changeset/twenty-rocks-deny.md new file mode 100644 index 000000000000..8d3c26908988 --- /dev/null +++ b/.changeset/twenty-rocks-deny.md @@ -0,0 +1,6 @@ +--- +swc_ecma_lexer: major +swc_ecma_parser: major +--- + +split token value and token kind diff --git a/Cargo.lock b/Cargo.lock index f82a112c5416..2d10bc399081 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6215,7 +6215,7 @@ dependencies = [ "swc_common", "swc_ecma_ast", "swc_ecma_codegen", - "swc_ecma_parser", + "swc_ecma_lexer", "swc_ecma_transforms_base", "swc_ecma_transforms_typescript", "swc_ecma_visit", diff --git a/crates/swc_common/src/input.rs b/crates/swc_common/src/input.rs index a446712fa658..32cbad246972 100644 --- a/crates/swc_common/src/input.rs +++ b/crates/swc_common/src/input.rs @@ -72,7 +72,7 @@ impl<'a> From<&'a SourceFile> for StringInput<'a> { } } -impl Input for StringInput<'_> { +impl<'a> Input<'a> for StringInput<'a> { #[inline] fn cur(&self) -> Option { self.iter.clone().next() @@ -133,7 +133,7 @@ impl Input for StringInput<'_> { } #[inline] - unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str { + unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str { debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}"); let s = self.orig; @@ -151,7 +151,7 @@ impl Input for StringInput<'_> { } #[inline] - fn uncons_while(&mut self, mut pred: F) -> &str + fn uncons_while(&mut self, mut pred: F) -> &'a str where F: FnMut(char) -> bool, { @@ -237,7 +237,7 @@ impl Input for StringInput<'_> { } } -pub trait Input: Clone { +pub trait Input<'a>: Clone { fn cur(&self) -> Option; fn peek(&self) -> Option; fn peek_ahead(&self) -> Option; @@ -270,11 +270,11 @@ pub trait Input: Clone { /// /// - start should be less than or equal to end. /// - start and end should be in the valid range of input. - unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str; + unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str; /// Takes items from stream, testing each one with predicate. returns the /// range of items which passed predicate. - fn uncons_while(&mut self, f: F) -> &str + fn uncons_while(&mut self, f: F) -> &'a str where F: FnMut(char) -> bool; diff --git a/crates/swc_css_parser/src/lexer/mod.rs b/crates/swc_css_parser/src/lexer/mod.rs index 341669afa176..3ff7cc011cd6 100644 --- a/crates/swc_css_parser/src/lexer/mod.rs +++ b/crates/swc_css_parser/src/lexer/mod.rs @@ -21,7 +21,7 @@ pub(crate) type LexResult = Result; #[derive(Clone)] pub struct Lexer<'a, I> where - I: Input, + I: Input<'a>, { comments: Option<&'a dyn Comments>, pending_leading_comments: Vec, @@ -41,7 +41,7 @@ where impl<'a, I> Lexer<'a, I> where - I: Input, + I: Input<'a>, { pub fn new(input: I, comments: Option<&'a dyn Comments>, config: ParserConfig) -> Self { let start_pos = input.last_pos(); @@ -65,7 +65,7 @@ where fn with_buf(&mut self, op: F) -> LexResult where - F: for<'any> FnOnce(&mut Lexer, &mut String) -> LexResult, + F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String) -> LexResult, { let b = self.buf.clone(); let mut buf = b.borrow_mut(); @@ -77,7 +77,7 @@ where fn with_sub_buf(&mut self, op: F) -> LexResult where - F: for<'any> FnOnce(&mut Lexer, &mut String) -> LexResult, + F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String) -> LexResult, { let b = self.sub_buf.clone(); let mut sub_buf = b.borrow_mut(); @@ -89,7 +89,7 @@ where fn with_buf_and_raw_buf(&mut self, op: F) -> LexResult where - F: for<'any> FnOnce(&mut Lexer, &mut String, &mut String) -> LexResult, + F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String, &mut String) -> LexResult, { let b = self.buf.clone(); let r = self.raw_buf.clone(); @@ -103,7 +103,7 @@ where } } -impl Iterator for Lexer<'_, I> { +impl<'a, I: Input<'a>> Iterator for Lexer<'a, I> { type Item = TokenAndSpan; fn next(&mut self) -> Option { @@ -133,9 +133,9 @@ pub struct LexerState { pos: BytePos, } -impl ParserInput for Lexer<'_, I> +impl<'a, I> ParserInput for Lexer<'a, I> where - I: Input, + I: Input<'a>, { type State = LexerState; @@ -185,9 +185,9 @@ where } } -impl Lexer<'_, I> +impl<'a, I> Lexer<'a, I> where - I: Input, + I: Input<'a>, { #[inline(always)] fn cur(&mut self) -> Option { diff --git a/crates/swc_css_parser/src/lib.rs b/crates/swc_css_parser/src/lib.rs index b8ff4b6ddd68..13ab7e459d10 100644 --- a/crates/swc_css_parser/src/lib.rs +++ b/crates/swc_css_parser/src/lib.rs @@ -41,14 +41,14 @@ where /// /// If there are syntax errors but if it was recoverable, it will be appended /// to `errors`. -pub fn parse_file<'a, 'b, T>( +pub fn parse_file<'a, T>( fm: &'a SourceFile, - comments: Option<&'b dyn Comments>, + comments: Option<&'a dyn Comments>, config: ParserConfig, errors: &mut Vec, ) -> PResult where - Parser>>: Parse, + Parser>>: Parse, { parse_string_input(StringInput::from(fm), comments, config, errors) } @@ -57,14 +57,14 @@ where /// /// If there are syntax errors but if it was recoverable, it will be appended /// to `errors`. -pub fn parse_string_input<'a, 'b, T>( +pub fn parse_string_input<'a, T>( input: StringInput<'a>, - comments: Option<&'b dyn Comments>, + comments: Option<&'a dyn Comments>, config: ParserConfig, errors: &mut Vec, ) -> PResult where - Parser>>: Parse, + Parser>>: Parse, { let lexer = Lexer::new(input, comments, config); let mut parser = Parser::new(lexer, config); diff --git a/crates/swc_ecma_lexer/Cargo.toml b/crates/swc_ecma_lexer/Cargo.toml index 486a17231030..58f03c769569 100644 --- a/crates/swc_ecma_lexer/Cargo.toml +++ b/crates/swc_ecma_lexer/Cargo.toml @@ -61,6 +61,9 @@ swc_ecma_visit = { version = "10.0.0", path = "../swc_ecma_visit" } swc_malloc = { version = "1.2.2", path = "../swc_malloc" } testing = { version = "11.0.0", path = "../testing" } +[[example]] +name = "lexer" + [[bench]] harness = false name = "lexer" diff --git a/crates/swc_ecma_parser/examples/lexer.rs b/crates/swc_ecma_lexer/examples/lexer.rs similarity index 61% rename from crates/swc_ecma_parser/examples/lexer.rs rename to crates/swc_ecma_lexer/examples/lexer.rs index e75f8b332bd0..63bc1cd8aabc 100644 --- a/crates/swc_ecma_parser/examples/lexer.rs +++ b/crates/swc_ecma_lexer/examples/lexer.rs @@ -1,9 +1,10 @@ use swc_common::{ errors::{ColorConfig, Handler}, + input::StringInput, sync::Lrc, FileName, SourceMap, }; -use swc_ecma_parser::{lexer::Lexer, Capturing, Parser, StringInput, Syntax}; +use swc_ecma_lexer::{lexer, lexer::Lexer, Syntax}; fn main() { let cm: Lrc = Default::default(); @@ -19,25 +20,15 @@ fn main() { "function foo() {}".into(), ); - let lexer = Lexer::new( + let l = Lexer::new( Syntax::Es(Default::default()), Default::default(), StringInput::from(&*fm), None, ); - let capturing = Capturing::new(lexer); - - let mut parser = Parser::new_from(capturing); - - for e in parser.take_errors() { - e.into_diagnostic(&handler).emit(); - } - - let _module = parser - .parse_module() + let tokens = lexer(l) .map_err(|e| e.into_diagnostic(&handler).emit()) - .expect("Failed to parse module."); - - println!("Tokens: {:?}", parser.input().take()); + .expect("Failed to lex."); + println!("Tokens: {tokens:?}",); } diff --git a/crates/swc_ecma_lexer/src/common/context.rs b/crates/swc_ecma_lexer/src/common/context.rs new file mode 100644 index 000000000000..3caced6e962d --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/context.rs @@ -0,0 +1,69 @@ +bitflags::bitflags! { + #[derive(Debug, Clone, Copy, Default)] + pub struct Context: u32 { + + /// `true` while backtracking + const IgnoreError = 1 << 0; + + /// Is in module code? + const Module = 1 << 1; + const CanBeModule = 1 << 2; + const Strict = 1 << 3; + + const ForLoopInit = 1 << 4; + const ForAwaitLoopInit = 1 << 5; + + const IncludeInExpr = 1 << 6; + /// If true, await expression is parsed, and "await" is treated as a + /// keyword. + const InAsync = 1 << 7; + /// If true, yield expression is parsed, and "yield" is treated as a + /// keyword. + const InGenerator = 1 << 8; + + /// If true, await is treated as a keyword. + const InStaticBlock = 1 << 9; + + const IsContinueAllowed = 1 << 10; + const IsBreakAllowed = 1 << 11; + + const InType = 1 << 12; + /// Typescript extension. + const ShouldNotLexLtOrGtAsType = 1 << 13; + /// Typescript extension. + const InDeclare = 1 << 14; + + /// If true, `:` should not be treated as a type annotation. + const InCondExpr = 1 << 15; + const WillExpectColonForCond = 1 << 16; + + const InClass = 1 << 17; + + const InClassField = 1 << 18; + + const InFunction = 1 << 19; + + /// This indicates current scope or the scope out of arrow function is + /// function declaration or function expression or not. + const InsideNonArrowFunctionScope = 1 << 20; + + const InParameters = 1 << 21; + + const HasSuperClass = 1 << 22; + + const InPropertyName = 1 << 23; + + const InForcedJsxContext = 1 << 24; + + // If true, allow super.x and super[x] + const AllowDirectSuper = 1 << 25; + + const IgnoreElseClause = 1 << 26; + + const DisallowConditionalTypes = 1 << 27; + + const AllowUsingDecl = 1 << 28; + + const TopLevel = 1 << 29; + } +} diff --git a/crates/swc_ecma_lexer/src/common/input.rs b/crates/swc_ecma_lexer/src/common/input.rs new file mode 100644 index 000000000000..2f811e47747b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/input.rs @@ -0,0 +1,48 @@ +use swc_common::BytePos; +use swc_ecma_ast::EsVersion; + +use super::{context::Context, syntax::Syntax}; +use crate::{error::Error, lexer}; + +/// Clone should be cheap if you are parsing typescript because typescript +/// syntax requires backtracking. +pub trait Tokens: Clone + Iterator { + fn set_ctx(&mut self, ctx: Context); + fn ctx(&self) -> Context; + fn syntax(&self) -> Syntax; + fn target(&self) -> EsVersion; + + fn start_pos(&self) -> BytePos { + BytePos(0) + } + + fn set_expr_allowed(&mut self, allow: bool); + fn set_next_regexp(&mut self, start: Option); + + fn token_context(&self) -> &lexer::TokenContexts; + fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; + fn set_token_context(&mut self, _c: lexer::TokenContexts); + + /// Implementors should use Rc>>. + /// + /// It is required because parser should backtrack while parsing typescript + /// code. + fn add_error(&self, error: Error); + + /// Add an error which is valid syntax in script mode. + /// + /// This errors should be dropped if it's not a module. + /// + /// Implementor should check for if [Context].module, and buffer errors if + /// module is false. Also, implementors should move errors to the error + /// buffer on set_ctx if the parser mode become module mode. + fn add_module_mode_error(&self, error: Error); + + fn end_pos(&self) -> BytePos; + + fn take_errors(&mut self) -> Vec; + + /// If the program was parsed as a script, this contains the module + /// errors should the program be identified as a module in the future. + fn take_script_module_errors(&mut self) -> Vec; +} diff --git a/crates/swc_ecma_lexer/src/common/lexer/char.rs b/crates/swc_ecma_lexer/src/common/lexer/char.rs new file mode 100644 index 000000000000..53e433e4d7ee --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/char.rs @@ -0,0 +1,179 @@ +use std::iter::FusedIterator; + +use arrayvec::ArrayVec; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Char(u32); + +impl From for Char { + fn from(c: char) -> Self { + Char(c as u32) + } +} + +impl From for Char { + fn from(c: u32) -> Self { + Char(c) + } +} + +pub struct CharIter(ArrayVec); + +/// Ported from https://github.com/web-infra-dev/oxc/blob/99a4816ce7b6132b2667257984f9d92ae3768f03/crates/oxc_parser/src/lexer/mod.rs#L1349-L1374 +impl IntoIterator for Char { + type IntoIter = CharIter; + type Item = char; + + #[allow(unsafe_code)] + fn into_iter(self) -> Self::IntoIter { + // // TODO: Check if this is correct + // fn to_char(v: u8) -> char { + // char::from_digit(v as _, 16).unwrap_or('0') + // } + + CharIter(match char::from_u32(self.0) { + Some(c) => { + let mut buf = ArrayVec::new(); + // Safety: we can make sure that `buf` has enough capacity + unsafe { + buf.push_unchecked(c); + } + buf + } + None => { + let mut buf = ArrayVec::new(); + + let high = self.0 & 0xffff0000 >> 16; + + let low = self.0 & 0x0000ffff; + + // The second code unit of a surrogate pair is always in the range from 0xDC00 + // to 0xDFFF, and is called a low surrogate or a trail surrogate. + if !(0xdc00..=0xdfff).contains(&low) { + // Safety: we can make sure that `buf` has enough capacity + unsafe { + buf.push_unchecked('\\'); + buf.push_unchecked('u'); + for c in format!("{high:x}").chars() { + buf.push_unchecked(c); + } + buf.push_unchecked('\\'); + buf.push_unchecked('u'); + for c in format!("{low:x}").chars() { + buf.push_unchecked(c); + } + } + } else { + // `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair` + let astral_code_point = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; + + // Safety: we can make sure that `buf` has enough capacity + unsafe { + buf.push_unchecked('\\'); + buf.push_unchecked('u'); + for c in format!("{astral_code_point:x}").chars() { + buf.push_unchecked(c); + } + } + } + + buf + } + }) + } +} + +impl Iterator for CharIter { + type Item = char; + + fn next(&mut self) -> Option { + if self.0.is_empty() { + None + } else { + Some(self.0.remove(0)) + } + } +} + +impl FusedIterator for CharIter {} + +/// Implemented for `char`. +pub trait CharExt: Copy { + fn to_char(self) -> Option; + + /// Test whether a given character code starts an identifier. + /// + /// https://tc39.github.io/ecma262/#prod-IdentifierStart + #[inline] + fn is_ident_start(self) -> bool { + let c = match self.to_char() { + Some(c) => c, + None => return false, + }; + swc_ecma_ast::Ident::is_valid_start(c) + } + + /// Test whether a given character is part of an identifier. + #[inline] + fn is_ident_part(self) -> bool { + let c = match self.to_char() { + Some(c) => c, + None => return false, + }; + swc_ecma_ast::Ident::is_valid_continue(c) + } + + /// See https://tc39.github.io/ecma262/#sec-line-terminators + #[inline] + fn is_line_terminator(self) -> bool { + let c = match self.to_char() { + Some(c) => c, + None => return false, + }; + matches!(c, '\r' | '\n' | '\u{2028}' | '\u{2029}') + } + + /// See https://tc39.github.io/ecma262/#sec-literals-string-literals + #[inline] + fn is_line_break(self) -> bool { + let c = match self.to_char() { + Some(c) => c, + None => return false, + }; + matches!(c, '\r' | '\n') + } + + /// See https://tc39.github.io/ecma262/#sec-white-space + #[inline] + fn is_ws(self) -> bool { + let c = match self.to_char() { + Some(c) => c, + None => return false, + }; + match c { + '\u{0009}' | '\u{000b}' | '\u{000c}' | '\u{0020}' | '\u{00a0}' | '\u{feff}' => true, + _ => { + if self.is_line_terminator() { + // NOTE: Line terminator is not whitespace. + false + } else { + c.is_whitespace() + } + } + } + } +} + +impl CharExt for Char { + #[inline(always)] + fn to_char(self) -> Option { + char::from_u32(self.0) + } +} + +impl CharExt for char { + #[inline(always)] + fn to_char(self) -> Option { + Some(self) + } +} diff --git a/crates/swc_ecma_lexer/src/lexer/comments_buffer.rs b/crates/swc_ecma_lexer/src/common/lexer/comments_buffer.rs similarity index 92% rename from crates/swc_ecma_lexer/src/lexer/comments_buffer.rs rename to crates/swc_ecma_lexer/src/common/lexer/comments_buffer.rs index a68f0cca2667..471251f72601 100644 --- a/crates/swc_ecma_lexer/src/lexer/comments_buffer.rs +++ b/crates/swc_ecma_lexer/src/common/lexer/comments_buffer.rs @@ -3,24 +3,30 @@ use std::{iter::Rev, rc::Rc, vec::IntoIter}; use swc_common::{comments::Comment, BytePos}; #[derive(Clone)] -pub(crate) struct BufferedComment { +pub struct BufferedComment { pub kind: BufferedCommentKind, pub pos: BytePos, pub comment: Comment, } #[derive(Clone)] -pub(crate) enum BufferedCommentKind { +pub enum BufferedCommentKind { Leading, Trailing, } #[derive(Clone)] -pub(crate) struct CommentsBuffer { +pub struct CommentsBuffer { comments: OneDirectionalList, pending_leading: OneDirectionalList, } +impl Default for CommentsBuffer { + fn default() -> Self { + Self::new() + } +} + impl CommentsBuffer { pub fn new() -> Self { Self { diff --git a/crates/swc_ecma_lexer/src/common/lexer/jsx.rs b/crates/swc_ecma_lexer/src/common/lexer/jsx.rs new file mode 100644 index 000000000000..e43c83d35ccb --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/jsx.rs @@ -0,0 +1,270 @@ +macro_rules! xhtml { + ( + $( + $i:ident : $s:expr, + )* + ) => { + pub(super) fn xhtml(s: &str) -> Option { + match s{ + $(stringify!($i) => Some($s),)* + _ => None, + } + } + }; +} + +xhtml!( + quot: '\u{0022}', + amp: '&', + apos: '\u{0027}', + lt: '<', + gt: '>', + nbsp: '\u{00A0}', + iexcl: '\u{00A1}', + cent: '\u{00A2}', + pound: '\u{00A3}', + curren: '\u{00A4}', + yen: '\u{00A5}', + brvbar: '\u{00A6}', + sect: '\u{00A7}', + uml: '\u{00A8}', + copy: '\u{00A9}', + ordf: '\u{00AA}', + laquo: '\u{00AB}', + not: '\u{00AC}', + shy: '\u{00AD}', + reg: '\u{00AE}', + macr: '\u{00AF}', + deg: '\u{00B0}', + plusmn: '\u{00B1}', + sup2: '\u{00B2}', + sup3: '\u{00B3}', + acute: '\u{00B4}', + micro: '\u{00B5}', + para: '\u{00B6}', + middot: '\u{00B7}', + cedil: '\u{00B8}', + sup1: '\u{00B9}', + ordm: '\u{00BA}', + raquo: '\u{00BB}', + frac14: '\u{00BC}', + frac12: '\u{00BD}', + frac34: '\u{00BE}', + iquest: '\u{00BF}', + Agrave: '\u{00C0}', + Aacute: '\u{00C1}', + Acirc: '\u{00C2}', + Atilde: '\u{00C3}', + Auml: '\u{00C4}', + Aring: '\u{00C5}', + AElig: '\u{00C6}', + Ccedil: '\u{00C7}', + Egrave: '\u{00C8}', + Eacute: '\u{00C9}', + Ecirc: '\u{00CA}', + Euml: '\u{00CB}', + Igrave: '\u{00CC}', + Iacute: '\u{00CD}', + Icirc: '\u{00CE}', + Iuml: '\u{00CF}', + ETH: '\u{00D0}', + Ntilde: '\u{00D1}', + Ograve: '\u{00D2}', + Oacute: '\u{00D3}', + Ocirc: '\u{00D4}', + Otilde: '\u{00D5}', + Ouml: '\u{00D6}', + times: '\u{00D7}', + Oslash: '\u{00D8}', + Ugrave: '\u{00D9}', + Uacute: '\u{00DA}', + Ucirc: '\u{00DB}', + Uuml: '\u{00DC}', + Yacute: '\u{00DD}', + THORN: '\u{00DE}', + szlig: '\u{00DF}', + agrave: '\u{00E0}', + aacute: '\u{00E1}', + acirc: '\u{00E2}', + atilde: '\u{00E3}', + auml: '\u{00E4}', + aring: '\u{00E5}', + aelig: '\u{00E6}', + ccedil: '\u{00E7}', + egrave: '\u{00E8}', + eacute: '\u{00E9}', + ecirc: '\u{00EA}', + euml: '\u{00EB}', + igrave: '\u{00EC}', + iacute: '\u{00ED}', + icirc: '\u{00EE}', + iuml: '\u{00EF}', + eth: '\u{00F0}', + ntilde: '\u{00F1}', + ograve: '\u{00F2}', + oacute: '\u{00F3}', + ocirc: '\u{00F4}', + otilde: '\u{00F5}', + ouml: '\u{00F6}', + divide: '\u{00F7}', + oslash: '\u{00F8}', + ugrave: '\u{00F9}', + uacute: '\u{00FA}', + ucirc: '\u{00FB}', + uuml: '\u{00FC}', + yacute: '\u{00FD}', + thorn: '\u{00FE}', + yuml: '\u{00FF}', + OElig: '\u{0152}', + oelig: '\u{0153}', + Scaron: '\u{0160}', + scaron: '\u{0161}', + Yuml: '\u{0178}', + fnof: '\u{0192}', + circ: '\u{02C6}', + tilde: '\u{02DC}', + Alpha: '\u{0391}', + Beta: '\u{0392}', + Gamma: '\u{0393}', + Delta: '\u{0394}', + Epsilon: '\u{0395}', + Zeta: '\u{0396}', + Eta: '\u{0397}', + Theta: '\u{0398}', + Iota: '\u{0399}', + Kappa: '\u{039A}', + Lambda: '\u{039B}', + Mu: '\u{039C}', + Nu: '\u{039D}', + Xi: '\u{039E}', + Omicron: '\u{039F}', + Pi: '\u{03A0}', + Rho: '\u{03A1}', + Sigma: '\u{03A3}', + Tau: '\u{03A4}', + Upsilon: '\u{03A5}', + Phi: '\u{03A6}', + Chi: '\u{03A7}', + Psi: '\u{03A8}', + Omega: '\u{03A9}', + alpha: '\u{03B1}', + beta: '\u{03B2}', + gamma: '\u{03B3}', + delta: '\u{03B4}', + epsilon: '\u{03B5}', + zeta: '\u{03B6}', + eta: '\u{03B7}', + theta: '\u{03B8}', + iota: '\u{03B9}', + kappa: '\u{03BA}', + lambda: '\u{03BB}', + mu: '\u{03BC}', + nu: '\u{03BD}', + xi: '\u{03BE}', + omicron: '\u{03BF}', + pi: '\u{03C0}', + rho: '\u{03C1}', + sigmaf: '\u{03C2}', + sigma: '\u{03C3}', + tau: '\u{03C4}', + upsilon: '\u{03C5}', + phi: '\u{03C6}', + chi: '\u{03C7}', + psi: '\u{03C8}', + omega: '\u{03C9}', + thetasym: '\u{03D1}', + upsih: '\u{03D2}', + piv: '\u{03D6}', + ensp: '\u{2002}', + emsp: '\u{2003}', + thinsp: '\u{2009}', + zwnj: '\u{200C}', + zwj: '\u{200D}', + lrm: '\u{200E}', + rlm: '\u{200F}', + ndash: '\u{2013}', + mdash: '\u{2014}', + lsquo: '\u{2018}', + rsquo: '\u{2019}', + sbquo: '\u{201A}', + ldquo: '\u{201C}', + rdquo: '\u{201D}', + bdquo: '\u{201E}', + dagger: '\u{2020}', + Dagger: '\u{2021}', + bull: '\u{2022}', + hellip: '\u{2026}', + permil: '\u{2030}', + prime: '\u{2032}', + Prime: '\u{2033}', + lsaquo: '\u{2039}', + rsaquo: '\u{203A}', + oline: '\u{203E}', + frasl: '\u{2044}', + euro: '\u{20AC}', + image: '\u{2111}', + weierp: '\u{2118}', + real: '\u{211C}', + trade: '\u{2122}', + alefsym: '\u{2135}', + larr: '\u{2190}', + uarr: '\u{2191}', + rarr: '\u{2192}', + darr: '\u{2193}', + harr: '\u{2194}', + crarr: '\u{21B5}', + lArr: '\u{21D0}', + uArr: '\u{21D1}', + rArr: '\u{21D2}', + dArr: '\u{21D3}', + hArr: '\u{21D4}', + forall: '\u{2200}', + part: '\u{2202}', + exist: '\u{2203}', + empty: '\u{2205}', + nabla: '\u{2207}', + isin: '\u{2208}', + notin: '\u{2209}', + ni: '\u{220B}', + prod: '\u{220F}', + sum: '\u{2211}', + minus: '\u{2212}', + lowast: '\u{2217}', + radic: '\u{221A}', + prop: '\u{221D}', + infin: '\u{221E}', + ang: '\u{2220}', + and: '\u{2227}', + or: '\u{2228}', + cap: '\u{2229}', + cup: '\u{222A}', + int: '\u{222B}', + there4: '\u{2234}', + sim: '\u{223C}', + cong: '\u{2245}', + asymp: '\u{2248}', + ne: '\u{2260}', + equiv: '\u{2261}', + le: '\u{2264}', + ge: '\u{2265}', + sub: '\u{2282}', + sup: '\u{2283}', + nsub: '\u{2284}', + sube: '\u{2286}', + supe: '\u{2287}', + oplus: '\u{2295}', + otimes: '\u{2297}', + perp: '\u{22A5}', + sdot: '\u{22C5}', + lceil: '\u{2308}', + rceil: '\u{2309}', + lfloor: '\u{230A}', + rfloor: '\u{230B}', + lang: '\u{2329}', + rang: '\u{232A}', + loz: '\u{25CA}', + spades: '\u{2660}', + clubs: '\u{2663}', + hearts: '\u{2665}', + diams: '\u{2666}', +); diff --git a/crates/swc_ecma_lexer/src/common/lexer/mod.rs b/crates/swc_ecma_lexer/src/common/lexer/mod.rs new file mode 100644 index 000000000000..fb7efc8896eb --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/mod.rs @@ -0,0 +1,2024 @@ +use std::borrow::Cow; + +use char::{Char, CharExt}; +use either::Either::{self, Left, Right}; +use num_bigint::BigInt as BigIntValue; +use num_traits::{Num as NumTrait, ToPrimitive}; +use number::LazyBigInt; +use smartstring::{LazyCompact, SmartString}; +use state::State; +use swc_atoms::Atom; +use swc_common::{ + input::{Input, StringInput}, + BytePos, Span, +}; +use swc_ecma_ast::{EsVersion, Ident}; + +use self::jsx::xhtml; +use super::{context::Context, input::Tokens}; +use crate::{error::SyntaxError, token::BinOpToken}; + +pub mod char; +pub mod comments_buffer; +mod jsx; +pub mod number; +pub mod state; +pub mod token; +pub mod whitespace; + +use token::TokenFactory; + +pub type LexResult = Result; + +pub trait Lexer<'a, TokenAndSpan>: Tokens + Sized { + type State: self::state::State; + type Token: token::TokenFactory<'a, TokenAndSpan, Self, Lexer = Self>; + + fn input(&self) -> &StringInput<'a>; + fn input_mut(&mut self) -> &mut StringInput<'a>; + fn state(&self) -> &Self::State; + fn state_mut(&mut self) -> &mut Self::State; + fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments>; + fn comments_buffer(&self) -> Option<&self::comments_buffer::CommentsBuffer>; + fn comments_buffer_mut(&mut self) -> Option<&mut self::comments_buffer::CommentsBuffer>; + /// # Safety + /// + /// We know that the start and the end are valid + unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str; + fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str; + fn atom<'b>(&self, s: impl Into>) -> swc_atoms::Atom; + fn push_error(&self, error: crate::error::Error); + fn buf(&self) -> std::rc::Rc>; + // TODO: invest why there has regression if implement this by trait + fn skip_block_comment(&mut self); + + #[inline(always)] + #[allow(clippy::misnamed_getters)] + fn had_line_break_before_last(&self) -> bool { + self.state().had_line_break() + } + + #[inline(always)] + fn span(&self, start: BytePos) -> Span { + let end = self.last_pos(); + if cfg!(debug_assertions) && start > end { + unreachable!( + "assertion failed: (span.start <= span.end). + start = {}, end = {}", + start.0, end.0 + ) + } + Span { lo: start, hi: end } + } + + #[inline(always)] + fn bump(&mut self) { + unsafe { + // Safety: Actually this is not safe but this is an internal method. + self.input_mut().bump() + } + } + + #[inline(always)] + fn is(&self, c: u8) -> bool { + self.input().is_byte(c) + } + + #[inline(always)] + fn is_str(&self, s: &str) -> bool { + self.input().is_str(s) + } + + #[inline(always)] + fn eat(&mut self, c: u8) -> bool { + self.input_mut().eat_byte(c) + } + + #[inline(always)] + fn cur(&self) -> Option { + self.input().cur() + } + + #[inline(always)] + fn peek(&self) -> Option { + self.input().peek() + } + + #[inline(always)] + fn peek_ahead(&self) -> Option { + self.input().peek_ahead() + } + + #[inline(always)] + fn cur_pos(&self) -> BytePos { + self.input().cur_pos() + } + + #[inline(always)] + fn last_pos(&self) -> BytePos { + self.input().last_pos() + } + + /// Shorthand for `let span = self.span(start); self.error_span(span)` + #[cold] + #[inline(never)] + fn error(&self, start: BytePos, kind: SyntaxError) -> LexResult { + let span = self.span(start); + self.error_span(span, kind) + } + + #[cold] + #[inline(never)] + fn error_span(&self, span: Span, kind: SyntaxError) -> LexResult { + Err(crate::error::Error::new(span, kind)) + } + + #[cold] + #[inline(never)] + fn emit_error(&mut self, start: BytePos, kind: SyntaxError) { + let span = self.span(start); + self.emit_error_span(span, kind) + } + + #[cold] + #[inline(never)] + fn emit_error_span(&mut self, span: Span, kind: SyntaxError) { + if self.ctx().contains(Context::IgnoreError) { + return; + } + tracing::warn!("Lexer error at {:?}", span); + let err = crate::error::Error::new(span, kind); + self.push_error(err); + } + + #[cold] + #[inline(never)] + fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) { + let span = self.span(start); + if self.ctx().contains(Context::Strict) { + self.emit_error_span(span, kind); + } else { + let err = crate::error::Error::new(span, kind); + self.add_module_mode_error(err); + } + } + + #[cold] + #[inline(never)] + fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) { + let span = self.span(start); + let err = crate::error::Error::new(span, kind); + self.add_module_mode_error(err); + } + + #[inline(never)] + fn skip_line_comment(&mut self, start_skip: usize) { + let start = self.cur_pos(); + self.input_mut().bump_bytes(start_skip); + let slice_start = self.cur_pos(); + + // foo // comment for foo + // bar + // + // foo + // // comment for bar + // bar + // + let is_for_next = + self.state().had_line_break() || !self.state().can_have_trailing_line_comment(); + + // Optimization: Performance improvement with byte-based termination character + // search + let input_str = self.input().as_str(); + let bytes = input_str.as_bytes(); + let mut idx = 0; + let len = bytes.len(); + + // Direct search for line termination characters (ASCII case optimization) + while idx < len { + let b = *unsafe { bytes.get_unchecked(idx) }; + if b == b'\r' || b == b'\n' { + self.state_mut().set_had_line_break(true); + break; + } else if b > 127 { + // non-ASCII case: Check for Unicode line termination characters + let s = unsafe { input_str.get_unchecked(idx..) }; + if let Some(first_char) = s.chars().next() { + if first_char == '\u{2028}' || first_char == '\u{2029}' { + self.state_mut().set_had_line_break(true); + break; + } + idx += first_char.len_utf8() - 1; // `-1` will incrumented + // below + } + } + idx += 1; + } + + self.input_mut().bump_bytes(idx); + let end = self.cur_pos(); + + // Create and process slice only if comments need to be stored + if self.comments_buffer().is_some() { + let s = unsafe { + // Safety: We know that the start and the end are valid + self.input_slice(slice_start, end) + }; + let cmt = swc_common::comments::Comment { + kind: swc_common::comments::CommentKind::Line, + span: Span::new(start, end), + text: self.atom(s), + }; + + if is_for_next { + let comments = self.comments_buffer_mut().unwrap(); + comments.push_pending_leading(cmt); + } else { + let pos = self.state().prev_hi(); + let comments = self.comments_buffer_mut().unwrap(); + comments.push(self::comments_buffer::BufferedComment { + kind: self::comments_buffer::BufferedCommentKind::Trailing, + pos, + comment: cmt, + }); + } + } + + unsafe { + // Safety: We got end from self.input + self.input_mut().reset_to(end); + } + } + + /// Skip comments or whitespaces. + /// + /// See https://tc39.github.io/ecma262/#sec-white-space + #[inline(never)] + fn skip_space(&mut self) { + loop { + let (offset, newline) = { + let mut skip = self::whitespace::SkipWhitespace { + input: self.input().as_str(), + newline: false, + offset: 0, + }; + + skip.scan(); + + (skip.offset, skip.newline) + }; + + self.input_mut().bump_bytes(offset as usize); + if newline { + self.state_mut().set_had_line_break(true); + } + + if LEX_COMMENTS && self.input().is_byte(b'/') { + if let Some(c) = self.peek() { + if c == '/' { + self.skip_line_comment(2); + continue; + } else if c == '*' { + self.skip_block_comment(); + continue; + } + } + } + + break; + } + } + + /// Ensure that ident cannot directly follow numbers. + fn ensure_not_ident(&mut self) -> LexResult<()> { + match self.cur() { + Some(c) if c.is_ident_start() => { + let span = pos_span(self.cur_pos()); + self.error_span(span, SyntaxError::IdentAfterNum)? + } + _ => Ok(()), + } + } + + fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult { + self.ensure_not_ident()?; + if self.syntax().typescript() && self.target() >= EsVersion::Es5 { + self.emit_error(start, SyntaxError::TS1085); + } + self.emit_strict_mode_error(start, SyntaxError::LegacyOctal); + Ok(val) + } + + /// `op`- |total, radix, value| -> (total * radix + value, continue) + fn read_digits( + &mut self, + mut op: F, + allow_num_separator: bool, + ) -> LexResult + where + F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>, + Ret: Copy + Default, + { + debug_assert!( + RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, + "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}" + ); + + if cfg!(feature = "debug") { + tracing::trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur()); + } + + let start = self.cur_pos(); + let mut total: Ret = Default::default(); + let mut prev = None; + + while let Some(c) = self.cur() { + if allow_num_separator && c == '_' { + let is_allowed = |c: Option| { + if c.is_none() { + return false; + } + + let c = c.unwrap(); + + c.is_digit(RADIX as _) + }; + let is_forbidden = |c: Option| { + if c.is_none() { + return true; + } + + if RADIX == 16 { + matches!(c.unwrap(), '.' | 'X' | '_' | 'x') + } else { + matches!(c.unwrap(), '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o') + } + }; + + let next = self.input().peek(); + + if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) { + self.emit_error( + start, + SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits, + ); + } + + // Ignore this _ character + unsafe { + // Safety: cur() returns Some(c) where c is a valid char + self.input_mut().bump(); + } + + continue; + } + + // e.g. (val for a) = 10 where radix = 16 + let val = if let Some(val) = c.to_digit(RADIX as _) { + val + } else { + return Ok(total); + }; + + self.bump(); + + let (t, cont) = op(total, RADIX, val)?; + + total = t; + + if !cont { + return Ok(total); + } + + prev = Some(c); + } + + Ok(total) + } + + /// This can read long integers like + /// "13612536612375123612312312312312312312312". + fn read_number_no_dot(&mut self) -> LexResult { + debug_assert!( + RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, + "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}" + ); + let start = self.cur_pos(); + + let mut read_any = false; + + let res = self.read_digits::<_, f64, RADIX>( + |total, radix, v| { + read_any = true; + + Ok((f64::mul_add(total, radix as f64, v as f64), true)) + }, + true, + ); + + if !read_any { + self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?; + } + res + } + + /// This can read long integers like + /// "13612536612375123612312312312312312312312". + /// + /// - Returned `bool` is `true` is there was `8` or `9`. + fn read_number_no_dot_as_str( + &mut self, + ) -> LexResult<(f64, LazyBigInt, bool)> { + debug_assert!( + RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, + "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}" + ); + let start = self.cur_pos(); + + let mut non_octal = false; + let mut read_any = false; + + self.read_digits::<_, f64, RADIX>( + |total, radix, v| { + read_any = true; + + if v == 8 || v == 9 { + non_octal = true; + } + + Ok((f64::mul_add(total, radix as f64, v as f64), true)) + }, + true, + )?; + + if !read_any { + self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?; + } + + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + // Remove number separator from number + let raw_number_str = raw.replace('_', ""); + let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32) + .expect("failed to parse float using BigInt") + .to_f64() + .expect("failed to parse float using BigInt"); + Ok((parsed_float, LazyBigInt::new(raw_number_str), non_octal)) + } + + /// Read an integer in the given radix. Return `None` if zero digits + /// were read, the integer value otherwise. + /// When `len` is not zero, this + /// will return `None` unless the integer has exactly `len` digits. + fn read_int(&mut self, len: u8) -> LexResult> { + let mut count = 0u16; + let v = self.read_digits::<_, Option, RADIX>( + |opt: Option, radix, val| { + count += 1; + let total = opt.unwrap_or_default() * radix as f64 + val as f64; + + Ok((Some(total), count != len as u16)) + }, + true, + )?; + if len != 0 && count != len as u16 { + Ok(None) + } else { + Ok(v) + } + } + + /// Reads an integer, octal integer, or floating-point number + fn read_number( + &mut self, + starts_with_dot: bool, + ) -> LexResult, Atom)>> { + debug_assert!(self.cur().is_some()); + + if starts_with_dot { + debug_assert_eq!( + self.cur(), + Some('.'), + "read_number(starts_with_dot = true) expects current char to be '.'" + ); + } + + let start = self.cur_pos(); + + let val = if starts_with_dot { + // first char is '.' + 0f64 + } else { + let starts_with_zero = self.cur().unwrap() == '0'; + + // Use read_number_no_dot to support long numbers. + let (val, s, not_octal) = self.read_number_no_dot_as_str::<10>()?; + + if self.eat(b'n') { + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + + return Ok(Either::Right((Box::new(s.into_value()), self.atom(raw)))); + } + + if starts_with_zero { + // TODO: I guess it would be okay if I don't use -ffast-math + // (or something like that), but needs review. + if val == 0.0f64 { + // If only one zero is used, it's decimal. + // And if multiple zero is used, it's octal. + // + // e.g. `0` is decimal (so it can be part of float) + // + // e.g. `000` is octal + if start.0 != self.last_pos().0 - 1 { + // `-1` is utf 8 length of `0` + + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + let raw = self.atom(raw); + return self + .make_legacy_octal(start, 0f64) + .map(|value| Either::Left((value, raw))); + } + } else { + // strict mode hates non-zero decimals starting with zero. + // e.g. 08.1 is strict mode violation but 0.1 is valid float. + + if val.fract() == 0.0 { + let val_str = &s.value; + + // if it contains '8' or '9', it's decimal. + if not_octal { + // Continue parsing + self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal); + } else { + // It's Legacy octal, and we should reinterpret value. + let val = BigIntValue::from_str_radix(val_str, 8) + .unwrap_or_else(|err| { + panic!( + "failed to parse {val_str} using `from_str_radix`: {err:?}" + ) + }) + .to_f64() + .unwrap_or_else(|| { + panic!("failed to parse {val_str} into float using BigInt") + }); + + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + let raw = self.atom(raw); + + return self + .make_legacy_octal(start, val) + .map(|value| Either::Left((value, raw))); + } + } + } + } + + val + }; + + // At this point, number cannot be an octal literal. + + let mut val: f64 = val; + + // `0.a`, `08.a`, `102.a` are invalid. + // + // `.1.a`, `.1e-4.a` are valid, + if self.cur() == Some('.') { + self.bump(); + + if starts_with_dot { + debug_assert!(self.cur().is_some()); + debug_assert!(self.cur().unwrap().is_ascii_digit()); + } + + // Read numbers after dot + self.read_int::<10>(0)?; + + val = { + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + + // Remove number separator from number + if raw.contains('_') { + Cow::Owned(raw.replace('_', "")) + } else { + Cow::Borrowed(raw) + } + .parse() + .expect("failed to parse float using rust's impl") + }; + } + + // Handle 'e' and 'E' + // + // .5e1 = 5 + // 1e2 = 100 + // 1e+2 = 100 + // 1e-2 = 0.01 + match self.cur() { + Some('e') | Some('E') => { + self.bump(); + + let next = match self.cur() { + Some(next) => next, + None => { + let pos = self.cur_pos(); + self.error(pos, SyntaxError::NumLitTerminatedWithExp)? + } + }; + + let positive = if next == '+' || next == '-' { + self.bump(); // remove '+', '-' + + next == '+' + } else { + true + }; + + let exp = self.read_number_no_dot::<10>()?; + + val = if exp == f64::INFINITY { + if positive && val != 0.0 { + f64::INFINITY + } else { + 0.0 + } + } else { + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + + if raw.contains('_') { + Cow::Owned(raw.replace('_', "")) + } else { + Cow::Borrowed(raw) + } + .parse() + .expect("failed to parse float literal") + } + } + _ => {} + } + + self.ensure_not_ident()?; + + let end = self.cur_pos(); + let raw_str = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + Ok(Either::Left((val, raw_str.into()))) + } + + fn read_int_u32(&mut self, len: u8) -> LexResult> { + let start = self.state().start(); + + let mut count = 0; + let v = self.read_digits::<_, Option, RADIX>( + |opt: Option, radix, val| { + count += 1; + + let total = opt + .unwrap_or_default() + .checked_mul(radix as u32) + .and_then(|v| v.checked_add(val)) + .ok_or_else(|| { + let span = Span::new(start, start); + crate::error::Error::new(span, SyntaxError::InvalidUnicodeEscape) + })?; + + Ok((Some(total), count != len)) + }, + true, + )?; + if len != 0 && count != len { + Ok(None) + } else { + Ok(v) + } + } + + /// Returns `Left(value)` or `Right(BigInt)` + fn read_radix_number( + &mut self, + ) -> LexResult, Atom)>> { + debug_assert!( + RADIX == 2 || RADIX == 8 || RADIX == 16, + "radix should be one of 2, 8, 16, but got {RADIX}" + ); + debug_assert_eq!(self.cur(), Some('0')); + + let start = self.cur_pos(); + + self.bump(); + + match self.input().cur() { + Some(..) => { + self.bump(); + } + _ => { + unreachable!(); + } + } + + let (val, s, _) = self.read_number_no_dot_as_str::()?; + + if self.eat(b'n') { + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + + return Ok(Either::Right((Box::new(s.into_value()), self.atom(raw)))); + } + + self.ensure_not_ident()?; + + let end = self.cur_pos(); + let raw = unsafe { + // Safety: We got both start and end position from `self.input` + self.input_slice(start, end) + }; + + Ok(Either::Left((val, self.atom(raw)))) + } + + /// Consume pending comments. + /// + /// This is called when the input is exhausted. + #[cold] + #[inline(never)] + fn consume_pending_comments(&mut self) { + if let Some(comments) = self.comments() { + let last = self.state().prev_hi(); + let start_pos = self.start_pos(); + let comments_buffer = self.comments_buffer_mut().unwrap(); + + // move the pending to the leading or trailing + for c in comments_buffer.take_pending_leading() { + // if the file had no tokens and no shebang, then treat any + // comments in the leading comments buffer as leading. + // Otherwise treat them as trailing. + if last == start_pos { + comments_buffer.push(self::comments_buffer::BufferedComment { + kind: self::comments_buffer::BufferedCommentKind::Leading, + pos: last, + comment: c, + }); + } else { + comments_buffer.push(self::comments_buffer::BufferedComment { + kind: self::comments_buffer::BufferedCommentKind::Trailing, + pos: last, + comment: c, + }); + } + } + + // now fill the user's passed in comments + for comment in comments_buffer.take_comments() { + match comment.kind { + self::comments_buffer::BufferedCommentKind::Leading => { + comments.add_leading(comment.pos, comment.comment); + } + self::comments_buffer::BufferedCommentKind::Trailing => { + comments.add_trailing(comment.pos, comment.comment); + } + } + } + } + } + + /// Read a JSX identifier (valid tag or attribute name). + /// + /// Optimized version since JSX identifiers can"t contain + /// escape characters and so can be read as single slice. + /// Also assumes that first character was already checked + /// by isIdentifierStart in readToken. + fn read_jsx_word(&mut self) -> LexResult { + debug_assert!(self.syntax().jsx()); + debug_assert!(self.input().cur().is_some_and(|c| c.is_ident_start())); + + let mut first = true; + let slice = self.input_uncons_while(|c| { + if first { + first = false; + c.is_ident_start() + } else { + c.is_ident_part() || c == '-' + } + }); + + Ok(Self::Token::jsx_name(slice, self)) + } + + fn read_jsx_entity(&mut self) -> LexResult<(char, String)> { + debug_assert!(self.syntax().jsx()); + + fn from_code(s: &str, radix: u32) -> LexResult { + // TODO(kdy1): unwrap -> Err + let c = char::from_u32( + u32::from_str_radix(s, radix).expect("failed to parse string as number"), + ) + .expect("failed to parse number as char"); + + Ok(c) + } + + fn is_hex(s: &str) -> bool { + s.chars().all(|c| c.is_ascii_hexdigit()) + } + + fn is_dec(s: &str) -> bool { + s.chars().all(|c| c.is_ascii_digit()) + } + + let mut s = SmartString::::default(); + + let c = self.input().cur(); + debug_assert_eq!(c, Some('&')); + unsafe { + // Safety: cur() was Some('&') + self.input_mut().bump(); + } + + let start_pos = self.input().cur_pos(); + + for _ in 0..10 { + let c = match self.input().cur() { + Some(c) => c, + None => break, + }; + unsafe { + // Safety: cur() was Some(c) + self.input_mut().bump(); + } + + if c == ';' { + if let Some(stripped) = s.strip_prefix('#') { + if stripped.starts_with('x') { + if is_hex(&s[2..]) { + let value = from_code(&s[2..], 16)?; + + return Ok((value, format!("&{s};"))); + } + } else if is_dec(stripped) { + let value = from_code(stripped, 10)?; + + return Ok((value, format!("&{s};"))); + } + } else if let Some(entity) = xhtml(&s) { + return Ok((entity, format!("&{s};"))); + } + + break; + } + + s.push(c) + } + + unsafe { + // Safety: start_pos is a valid position because we got it from self.input + self.input_mut().reset_to(start_pos); + } + + Ok(('&', "&".to_string())) + } + + fn read_jsx_new_line(&mut self, normalize_crlf: bool) -> LexResult> { + debug_assert!(self.syntax().jsx()); + let ch = self.input().cur().unwrap(); + unsafe { + // Safety: cur() was Some(ch) + self.input_mut().bump(); + } + + let out = if ch == '\r' && self.input().cur() == Some('\n') { + unsafe { + // Safety: cur() was Some('\n') + self.input_mut().bump(); + } + Either::Left(if normalize_crlf { "\n" } else { "\r\n" }) + } else { + Either::Right(ch) + }; + let cur_pos = self.input().cur_pos(); + self.state_mut().add_current_line(1); + self.state_mut().set_line_start(cur_pos); + Ok(out) + } + + fn read_jsx_str(&mut self, quote: char) -> LexResult { + debug_assert!(self.syntax().jsx()); + let start = self.input().cur_pos(); + unsafe { + // Safety: cur() was Some(quote) + self.input_mut().bump(); // `quote` + } + let mut out = String::new(); + let mut chunk_start = self.input().cur_pos(); + loop { + let ch = match self.input().cur() { + Some(c) => c, + None => { + let start = self.state().start(); + self.emit_error(start, SyntaxError::UnterminatedStrLit); + break; + } + }; + let cur_pos = self.input().cur_pos(); + if ch == '\\' { + let value = unsafe { + // Safety: We already checked for the range + self.input_slice(chunk_start, cur_pos) + }; + + out.push_str(value); + out.push('\\'); + + self.bump(); + + chunk_start = self.input().cur_pos(); + + continue; + } + + if ch == quote { + break; + } + + if ch == '&' { + let value = unsafe { + // Safety: We already checked for the range + self.input_slice(chunk_start, cur_pos) + }; + + out.push_str(value); + + let jsx_entity = self.read_jsx_entity()?; + + out.push(jsx_entity.0); + + chunk_start = self.input().cur_pos(); + } else if ch.is_line_terminator() { + let value = unsafe { + // Safety: We already checked for the range + self.input_slice(chunk_start, cur_pos) + }; + + out.push_str(value); + + match self.read_jsx_new_line(false)? { + Either::Left(s) => { + out.push_str(s); + } + Either::Right(c) => { + out.push(c); + } + } + + chunk_start = cur_pos + BytePos(ch.len_utf8() as _); + } else { + unsafe { + // Safety: cur() was Some(ch) + self.input_mut().bump(); + } + } + } + + let value = if out.is_empty() { + // Fast path: We don't need to allocate + + let cur_pos = self.input().cur_pos(); + let value = unsafe { + // Safety: We already checked for the range + self.input_slice(chunk_start, cur_pos) + }; + self.atom(value) + } else { + let cur_pos = self.input().cur_pos(); + let value = unsafe { + // Safety: We already checked for the range + self.input_slice(chunk_start, cur_pos) + }; + out.push_str(value); + self.atom(out) + }; + + // it might be at the end of the file when + // the string literal is unterminated + if self.input().peek_ahead().is_some() { + unsafe { + // Safety: We called peek_ahead() which means cur() was Some + self.input_mut().bump(); + } + } + + let end = self.input().cur_pos(); + let raw = unsafe { + // Safety: Both of `start` and `end` are generated from `cur_pos()` + self.input_slice(start, end) + }; + let raw = self.atom(raw); + Ok(Self::Token::str(value, raw, self)) + } + + /// Utility method to reuse buffer. + fn with_buf(&mut self, op: F) -> LexResult + where + F: FnOnce(&mut Self, &mut String) -> LexResult, + { + let b = self.buf(); + let mut buf = b.borrow_mut(); + buf.clear(); + op(self, &mut buf) + } + + fn read_unicode_escape(&mut self) -> LexResult> { + debug_assert_eq!(self.cur(), Some('u')); + + let mut chars = Vec::with_capacity(4); + let mut is_curly = false; + + self.bump(); // 'u' + + if self.eat(b'{') { + is_curly = true; + } + + let state = self.input().cur_pos(); + let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) { + Ok(Some(val)) => { + if 0x0010_ffff >= val { + char::from_u32(val) + } else { + let start = self.cur_pos(); + + self.error( + start, + SyntaxError::BadCharacterEscapeSequence { + expected: if is_curly { + "1-6 hex characters in the range 0 to 10FFFF." + } else { + "4 hex characters" + }, + }, + )? + } + } + _ => { + let start = self.cur_pos(); + + self.error( + start, + SyntaxError::BadCharacterEscapeSequence { + expected: if is_curly { + "1-6 hex characters" + } else { + "4 hex characters" + }, + }, + )? + } + }; + + match c { + Some(c) => { + chars.push(c.into()); + } + _ => { + unsafe { + // Safety: state is valid position because we got it from cur_pos() + self.input_mut().reset_to(state); + } + + chars.push(Char::from('\\')); + chars.push(Char::from('u')); + + if is_curly { + chars.push(Char::from('{')); + + for _ in 0..6 { + if let Some(c) = self.input().cur() { + if c == '}' { + break; + } + + self.bump(); + + chars.push(Char::from(c)); + } else { + break; + } + } + + chars.push(Char::from('}')); + } else { + for _ in 0..4 { + if let Some(c) = self.input().cur() { + self.bump(); + + chars.push(Char::from(c)); + } + } + } + } + } + + if is_curly && !self.eat(b'}') { + self.error(state, SyntaxError::InvalidUnicodeEscape)? + } + + Ok(chars) + } + + #[cold] + fn read_shebang(&mut self) -> LexResult> { + if self.input().cur() != Some('#') || self.input().peek() != Some('!') { + return Ok(None); + } + unsafe { + // Safety: cur() is Some('#') + self.input_mut().bump(); + // Safety: cur() is Some('!') + self.input_mut().bump(); + } + let s = self.input_uncons_while(|c| !c.is_line_terminator()); + Ok(Some(self.atom(s))) + } + + fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult { + let start = self.cur_pos(); + + let mut cooked = Ok(String::new()); + let mut cooked_slice_start = start; + let raw_slice_start = start; + + macro_rules! consume_cooked { + () => {{ + if let Ok(cooked) = &mut cooked { + let last_pos = self.cur_pos(); + cooked.push_str(unsafe { + // Safety: Both of start and last_pos are valid position because we got them + // from `self.input` + self.input_slice(cooked_slice_start, last_pos) + }); + } + }}; + } + + while let Some(c) = self.cur() { + if c == '`' || (c == '$' && self.peek() == Some('{')) { + if start == self.cur_pos() && self.state().last_was_tpl_element() { + if c == '$' { + self.bump(); + self.bump(); + return Ok(Self::Token::DOLLAR_LBRACE); + } else { + self.bump(); + return Ok(Self::Token::BACKQUOTE); + } + } + + // If we don't have any escape + let cooked = if cooked_slice_start == raw_slice_start { + let last_pos = self.cur_pos(); + let s = unsafe { + // Safety: Both of start and last_pos are valid position because we got them + // from `self.input` + self.input_slice(cooked_slice_start, last_pos) + }; + + Ok(self.atom(s)) + } else { + consume_cooked!(); + + cooked.map(|s| self.atom(s)) + }; + + // TODO: Handle error + let end = self.input().cur_pos(); + let raw = unsafe { + // Safety: Both of start and last_pos are valid position because we got them + // from `self.input` + self.input_slice(raw_slice_start, end) + }; + let raw = self.atom(raw); + return Ok(Self::Token::template(cooked, raw, self)); + } + + if c == '\\' { + consume_cooked!(); + + match self.read_escaped_char(true) { + Ok(Some(chars)) => { + if let Ok(ref mut cooked) = cooked { + for c in chars { + cooked.extend(c); + } + } + } + Ok(None) => {} + Err(error) => { + cooked = Err(error); + } + } + + cooked_slice_start = self.cur_pos(); + } else if c.is_line_terminator() { + self.state_mut().set_had_line_break(true); + + consume_cooked!(); + + let c = if c == '\r' && self.peek() == Some('\n') { + self.bump(); // '\r' + '\n' + } else { + match c { + '\n' => '\n', + '\r' => '\n', + '\u{2028}' => '\u{2028}', + '\u{2029}' => '\u{2029}', + _ => unreachable!(), + } + }; + + self.bump(); + + if let Ok(ref mut cooked) = cooked { + cooked.push(c); + } + cooked_slice_start = self.cur_pos(); + } else { + self.bump(); + } + } + + self.error(start_of_tpl, SyntaxError::UnterminatedTpl)? + } + + /// Read an escaped character for string literal. + /// + /// In template literal, we should preserve raw string. + fn read_escaped_char(&mut self, in_template: bool) -> LexResult>> { + debug_assert_eq!(self.cur(), Some('\\')); + + let start = self.cur_pos(); + + self.bump(); // '\' + + let c = match self.cur() { + Some(c) => c, + None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?, + }; + + macro_rules! push_c_and_ret { + ($c:expr) => {{ + $c + }}; + } + + let c = match c { + '\\' => push_c_and_ret!('\\'), + 'n' => push_c_and_ret!('\n'), + 'r' => push_c_and_ret!('\r'), + 't' => push_c_and_ret!('\t'), + 'b' => push_c_and_ret!('\u{0008}'), + 'v' => push_c_and_ret!('\u{000b}'), + 'f' => push_c_and_ret!('\u{000c}'), + '\r' => { + self.bump(); // remove '\r' + + self.eat(b'\n'); + + return Ok(None); + } + '\n' | '\u{2028}' | '\u{2029}' => { + self.bump(); + + return Ok(None); + } + + // read hexadecimal escape sequences + 'x' => { + self.bump(); // 'x' + + match self.read_int_u32::<16>(2)? { + Some(val) => return Ok(Some(vec![Char::from(val)])), + None => self.error( + start, + SyntaxError::BadCharacterEscapeSequence { + expected: "2 hex characters", + }, + )?, + } + } + + // read unicode escape sequences + 'u' => match self.read_unicode_escape() { + Ok(chars) => return Ok(Some(chars)), + Err(err) => self.error(start, err.into_kind())?, + }, + + // octal escape sequences + '0'..='7' => { + self.bump(); + + let first_c = if c == '0' { + match self.cur() { + Some(next) if next.is_digit(8) => c, + // \0 is not an octal literal nor decimal literal. + _ => return Ok(Some(vec!['\u{0000}'.into()])), + } + } else { + c + }; + + // TODO: Show template instead of strict mode + if in_template { + self.error(start, SyntaxError::LegacyOctal)? + } + + self.emit_strict_mode_error(start, SyntaxError::LegacyOctal); + + let mut value: u8 = first_c.to_digit(8).unwrap() as u8; + + macro_rules! one { + ($check:expr) => {{ + let cur = self.cur(); + + match cur.and_then(|c| c.to_digit(8)) { + Some(v) => { + value = if $check { + let new_val = value + .checked_mul(8) + .and_then(|value| value.checked_add(v as u8)); + match new_val { + Some(val) => val, + None => return Ok(Some(vec![Char::from(value as char)])), + } + } else { + value * 8 + v as u8 + }; + + self.bump(); + } + _ => return Ok(Some(vec![Char::from(value as u32)])), + } + }}; + } + + one!(false); + one!(true); + + return Ok(Some(vec![Char::from(value as char)])); + } + _ => c, + }; + + unsafe { + // Safety: cur() is Some(c) if this method is called. + self.input_mut().bump(); + } + + Ok(Some(vec![c.into()])) + } + + /// Expects current char to be '/' + fn read_regexp(&mut self, start: BytePos) -> LexResult { + unsafe { + // Safety: start is valid position, and cur() is Some('/') + self.input_mut().reset_to(start); + } + + debug_assert_eq!(self.cur(), Some('/')); + + let start = self.cur_pos(); + + self.bump(); + + let (mut escaped, mut in_class) = (false, false); + + let content = self.with_buf(|l, buf| { + while let Some(c) = l.cur() { + // This is ported from babel. + // Seems like regexp literal cannot contain linebreak. + if c.is_line_terminator() { + let span = l.span(start); + + return Err(crate::error::Error::new( + span, + SyntaxError::UnterminatedRegExp, + )); + } + + if escaped { + escaped = false; + } else { + match c { + '[' => in_class = true, + ']' if in_class => in_class = false, + // Terminates content part of regex literal + '/' if !in_class => break, + _ => {} + } + + escaped = c == '\\'; + } + + l.bump(); + buf.push(c); + } + + Ok(l.atom(&**buf)) + })?; + + // input is terminated without following `/` + if !self.is(b'/') { + let span = self.span(start); + + return Err(crate::error::Error::new( + span, + SyntaxError::UnterminatedRegExp, + )); + } + + self.bump(); // '/' + + // Spec says "It is a Syntax Error if IdentifierPart contains a Unicode escape + // sequence." TODO: check for escape + + // Need to use `read_word` because '\uXXXX' sequences are allowed + // here (don't ask). + // let flags_start = self.cur_pos(); + let flags = { + match self.cur() { + Some(c) if c.is_ident_start() => { + self.read_word_as_str_with(|l, s, _, _| l.atom(s)).map(Some) + } + _ => Ok(None), + } + }? + .map(|(value, _)| value) + .unwrap_or_default(); + + Ok(Self::Token::regexp(content, flags, self)) + } + + /// This method is optimized for texts without escape sequences. + /// + /// `convert(text, has_escape, can_be_keyword)` + fn read_word_as_str_with(&mut self, convert: F) -> LexResult<(Ret, bool)> + where + F: FnOnce(&mut Self, &str, bool, bool) -> Ret, + { + debug_assert!(self.cur().is_some()); + let mut first = true; + let mut can_be_keyword = true; + let mut slice_start = self.cur_pos(); + let mut has_escape = false; + + self.with_buf(|l, buf| { + loop { + if let Some(c) = l.input().cur_as_ascii() { + // Performance optimization + if can_be_keyword && (c.is_ascii_uppercase() || c.is_ascii_digit()) { + can_be_keyword = false; + } + + if Ident::is_valid_ascii_continue(c) { + l.bump(); + continue; + } else if first && Ident::is_valid_ascii_start(c) { + l.bump(); + first = false; + continue; + } + + // unicode escape + if c == b'\\' { + first = false; + has_escape = true; + let start = l.cur_pos(); + l.bump(); + + if !l.is(b'u') { + l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)? + } + + { + let end = l.input().cur_pos(); + let s = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input_slice(slice_start, start) + }; + buf.push_str(s); + unsafe { + // Safety: We got end from `self.input` + l.input_mut().reset_to(end); + } + } + + let chars = l.read_unicode_escape()?; + + if let Some(c) = chars.first() { + let valid = if first { + c.is_ident_start() + } else { + c.is_ident_part() + }; + + if !valid { + l.emit_error(start, SyntaxError::InvalidIdentChar); + } + } + + for c in chars { + buf.extend(c); + } + + slice_start = l.cur_pos(); + continue; + } + + // ASCII but not a valid identifier + break; + } else if let Some(c) = l.input().cur() { + if Ident::is_valid_non_ascii_continue(c) { + l.bump(); + continue; + } else if first && Ident::is_valid_non_ascii_start(c) { + l.bump(); + first = false; + continue; + } + } + + break; + } + + let end = l.cur_pos(); + let s = unsafe { + // Safety: slice_start and end are valid position because we got them from + // `self.input` + l.input_slice(slice_start, end) + }; + let value = if !has_escape { + // Fast path: raw slice is enough if there's no escape. + convert(l, s, has_escape, can_be_keyword) + } else { + buf.push_str(s); + convert(l, buf, has_escape, can_be_keyword) + }; + + Ok((value, has_escape)) + }) + } + + /// `#` + fn read_token_number_sign(&mut self) -> LexResult> { + debug_assert!(self.cur().is_some()); + + unsafe { + // Safety: cur() is Some('#') + self.input_mut().bump(); // '#' + } + + // `#` can also be a part of shebangs, however they should have been + // handled by `read_shebang()` + debug_assert!( + !self.input().is_at_start() || self.cur() != Some('!'), + "#! should have already been handled by read_shebang()" + ); + Ok(Some(Self::Token::HASH)) + } + + /// Read a token given `.`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_dot(&mut self) -> LexResult { + // Check for eof + let next = match self.input().peek() { + Some(next) => next, + None => { + unsafe { + // Safety: cur() is Some(',') + self.input_mut().bump(); + } + return Ok(Self::Token::DOT); + } + }; + if next.is_ascii_digit() { + return self.read_number(true).map(|v| match v { + Left((value, raw)) => Self::Token::num(value, raw, self), + Right((value, raw)) => Self::Token::bigint(value, raw, self), + }); + } + + unsafe { + // Safety: cur() is Some + // 1st `.` + self.input_mut().bump(); + } + + if next == '.' && self.input().peek() == Some('.') { + unsafe { + // Safety: peek() was Some + + self.input_mut().bump(); // 2nd `.` + self.input_mut().bump(); // 3rd `.` + } + + return Ok(Self::Token::DOTDOTDOT); + } + + Ok(Self::Token::DOT) + } + + /// Read a token given `?`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_question_mark(&mut self) -> LexResult { + match self.input().peek() { + Some('?') => { + unsafe { + // Safety: peek() was some + self.input_mut().bump(); + self.input_mut().bump(); + } + if self.input().cur() == Some('=') { + unsafe { + // Safety: cur() was some + self.input_mut().bump(); + } + return Ok(Self::Token::NULLISH_ASSIGN); + } + Ok(Self::Token::NULLISH_COALESCING) + } + _ => { + unsafe { + // Safety: peek() is callable only if cur() is Some + self.input_mut().bump(); + } + Ok(Self::Token::QUESTION) + } + } + } + + /// Read a token given `:`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_colon(&mut self) -> LexResult { + unsafe { + // Safety: cur() is Some(':') + self.input_mut().bump(); + } + Ok(Self::Token::COLON) + } + + /// Read a token given `0`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_zero(&mut self) -> LexResult { + let next = self.input().peek(); + + let bigint = match next { + Some('x') | Some('X') => self.read_radix_number::<16>(), + Some('o') | Some('O') => self.read_radix_number::<8>(), + Some('b') | Some('B') => self.read_radix_number::<2>(), + _ => { + return self.read_number(false).map(|v| match v { + Left((value, raw)) => Self::Token::num(value, raw, self), + Right((value, raw)) => Self::Token::bigint(value, raw, self), + }); + } + }; + + bigint.map(|v| match v { + Left((value, raw)) => Self::Token::num(value, raw, self), + Right((value, raw)) => Self::Token::bigint(value, raw, self), + }) + } + + /// Read a token given `|` or `&`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_logical(&mut self) -> LexResult { + let had_line_break_before_last = self.had_line_break_before_last(); + let start = self.cur_pos(); + + unsafe { + // Safety: cur() is Some(c as char) + self.input_mut().bump(); + } + let token = if C == b'&' { + BinOpToken::BitAnd + } else { + BinOpToken::BitOr + }; + + // '|=', '&=' + if self.input_mut().eat_byte(b'=') { + return Ok(match token { + BinOpToken::BitAnd => Self::Token::BIT_AND_EQ, + BinOpToken::BitOr => Self::Token::BIT_OR_EQ, + _ => unreachable!(), + }); + } + + // '||', '&&' + if self.input().cur() == Some(C as char) { + unsafe { + // Safety: cur() is Some(c) + self.input_mut().bump(); + } + + if self.input().cur() == Some('=') { + unsafe { + // Safety: cur() is Some('=') + self.input_mut().bump(); + } + + return Ok(match token { + BinOpToken::BitAnd => Self::Token::LOGICAL_AND_EQ, + BinOpToken::BitOr => Self::Token::LOGICAL_OR_EQ, + _ => unreachable!(), + }); + } + + // ||||||| + // ^ + if had_line_break_before_last && token == BinOpToken::BitOr && self.is_str("||||| ") { + let span = fixed_len_span(start, 7); + self.emit_error_span(span, SyntaxError::TS1185); + self.skip_line_comment(5); + self.skip_space::(); + return self.error_span(span, SyntaxError::TS1185); + } + + return Ok(match token { + BinOpToken::BitAnd => Self::Token::LOGICAL_AND, + BinOpToken::BitOr => Self::Token::LOGICAL_OR, + _ => unreachable!(), + }); + } + + Ok(if token == BinOpToken::BitAnd { + Self::Token::BIT_AND + } else { + Self::Token::BIT_OR + }) + } + + /// Read a token given `*` or `%`. + /// + /// This is extracted as a method to reduce size of `read_token`. + #[inline(never)] + fn read_token_mul_mod(&mut self) -> LexResult { + let is_mul = C == b'*'; + unsafe { + // Safety: cur() is Some(c) + self.input_mut().bump(); + } + let mut token = if is_mul { + BinOpToken::Mul + } else { + BinOpToken::Mod + }; + + // check for ** + if is_mul && self.input_mut().eat_byte(b'*') { + token = BinOpToken::Exp + } + + Ok(if self.input_mut().eat_byte(b'=') { + match token { + BinOpToken::Mul => Self::Token::MUL_EQ, + BinOpToken::Mod => Self::Token::MOD_EQ, + BinOpToken::Exp => Self::Token::EXP_EQ, + _ => unreachable!(), + } + } else { + match token { + BinOpToken::Mul => Self::Token::MUL, + BinOpToken::Mod => Self::Token::MOD, + BinOpToken::Exp => Self::Token::EXP, + _ => unreachable!(), + } + }) + } + + #[inline(never)] + fn read_slash(&mut self) -> LexResult> { + debug_assert_eq!(self.cur(), Some('/')); + // Divide operator + self.bump(); + Ok(Some(if self.eat(b'=') { + Self::Token::DIV_EQ + } else { + Self::Token::DIV + })) + } + + /// This can be used if there's no keyword starting with the first + /// character. + fn read_ident_unknown(&mut self) -> LexResult { + debug_assert!(self.cur().is_some()); + + let (word, _) = self.read_word_as_str_with(|l, s, _, _| { + let atom = l.atom(s); + Self::Token::unknown_ident(atom, l) + })?; + + Ok(word) + } + + /// See https://tc39.github.io/ecma262/#sec-literals-string-literals + fn read_str_lit(&mut self) -> LexResult { + debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"')); + let start = self.cur_pos(); + let quote = self.cur().unwrap() as u8; + + self.bump(); // '"' + + let mut has_escape = false; + let mut slice_start = self.input().cur_pos(); + + self.with_buf(|l, buf| { + loop { + if let Some(c) = l.input().cur_as_ascii() { + if c == quote { + let value_end = l.cur_pos(); + + let value = if !has_escape { + let s = unsafe { + // Safety: slice_start and value_end are valid position because we + // got them from `self.input` + l.input_slice(slice_start, value_end) + }; + + l.atom(s) + } else { + let s = unsafe { + // Safety: slice_start and value_end are valid position because we + // got them from `self.input` + l.input_slice(slice_start, value_end) + }; + buf.push_str(s); + + l.atom(&**buf) + }; + + unsafe { + // Safety: cur is quote + l.input_mut().bump(); + } + + let end = l.cur_pos(); + let raw = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input_slice(start, end) + }; + let raw = l.atom(raw); + return Ok(Self::Token::str(value, raw, l)); + } + + if c == b'\\' { + has_escape = true; + + { + let end = l.cur_pos(); + let s = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input_slice(slice_start, end) + }; + buf.push_str(s); + } + + if let Some(chars) = l.read_escaped_char(false)? { + for c in chars { + buf.extend(c); + } + } + + slice_start = l.cur_pos(); + continue; + } + + if (c as char).is_line_break() { + break; + } + + unsafe { + // Safety: cur is a ascii character + l.input_mut().bump(); + } + continue; + } + + match l.input().cur() { + Some(c) => { + if c.is_line_break() { + break; + } + unsafe { + // Safety: cur is Some(c) + l.input_mut().bump(); + } + } + None => break, + } + } + + { + let end = l.cur_pos(); + let s = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input_slice(slice_start, end) + }; + buf.push_str(s); + } + + l.emit_error(start, SyntaxError::UnterminatedStrLit); + + let end = l.cur_pos(); + + let raw = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input_slice(start, end) + }; + Ok(Self::Token::str(l.atom(&**buf), l.atom(raw), l)) + }) + } + + /// This can be used if there's no keyword starting with the first + /// character. + fn read_word_with( + &mut self, + convert: &dyn Fn(&str) -> Option, + ) -> LexResult> { + debug_assert!(self.cur().is_some()); + + let start = self.cur_pos(); + let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| { + if can_be_known { + if let Some(word) = convert(s) { + return word; + } + } + let atom = l.atom(s); + Self::Token::unknown_ident(atom, l) + })?; + + // Note: ctx is store in lexer because of this error. + // 'await' and 'yield' may have semantic of reserved word, which means lexer + // should know context or parser should handle this error. Our approach to this + // problem is former one. + + if has_escape && word.is_reserved(self.ctx()) { + let word = word.into_atom(self).unwrap(); + self.error(start, SyntaxError::EscapeInReservedWord { word })? + } else { + Ok(Some(word)) + } + } +} + +pub fn pos_span(p: BytePos) -> Span { + Span::new(p, p) +} + +pub fn fixed_len_span(p: BytePos, len: u32) -> Span { + Span::new(p, p + BytePos(len)) +} diff --git a/crates/swc_ecma_lexer/src/common/lexer/number.rs b/crates/swc_ecma_lexer/src/common/lexer/number.rs new file mode 100644 index 000000000000..b297e3bd1426 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/number.rs @@ -0,0 +1,18 @@ +use num_bigint::BigInt as BigIntValue; + +pub struct LazyBigInt { + pub(super) value: String, +} + +impl LazyBigInt { + #[inline] + pub(super) fn new(value: String) -> Self { + Self { value } + } + + #[inline] + pub(super) fn into_value(self) -> BigIntValue { + BigIntValue::parse_bytes(self.value.as_bytes(), RADIX as _) + .expect("failed to parse string as a bigint") + } +} diff --git a/crates/swc_ecma_lexer/src/common/lexer/state.rs b/crates/swc_ecma_lexer/src/common/lexer/state.rs new file mode 100644 index 000000000000..cda4fd80df9e --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/state.rs @@ -0,0 +1,330 @@ +use swc_common::BytePos; + +use crate::{TokenContext, TokenContexts}; + +pub trait TokenType: TokenKind { + fn is_other_and_can_have_trailing_comment(self) -> bool; + fn is_other_and_before_expr_is_false(self) -> bool; +} + +pub trait TokenKind: Copy { + fn is_dot(self) -> bool; + fn is_bin_op(self) -> bool; + fn is_semi(self) -> bool; + fn is_template(self) -> bool; + fn is_keyword(self) -> bool; + fn is_colon(self) -> bool; + fn is_lbrace(self) -> bool; + fn is_rbrace(self) -> bool; + fn is_lparen(self) -> bool; + fn is_rparen(self) -> bool; + fn is_keyword_fn(self) -> bool; + fn is_keyword_return(self) -> bool; + fn is_keyword_yield(self) -> bool; + fn is_keyword_else(self) -> bool; + fn is_keyword_class(self) -> bool; + fn is_keyword_let(self) -> bool; + fn is_keyword_var(self) -> bool; + fn is_keyword_const(self) -> bool; + fn is_keyword_if(self) -> bool; + fn is_keyword_while(self) -> bool; + fn is_keyword_for(self) -> bool; + fn is_keyword_with(self) -> bool; + fn is_lt(self) -> bool; + fn is_gt(self) -> bool; + fn is_arrow(self) -> bool; + fn is_ident(self) -> bool; + fn is_known_ident_of(self) -> bool; + fn is_slash(self) -> bool; + fn is_dollar_lbrace(self) -> bool; + fn is_plus_plus(self) -> bool; + fn is_minus_minus(self) -> bool; + fn is_back_quote(self) -> bool; + fn is_jsx_tag_start(self) -> bool; + fn is_jsx_tag_end(self) -> bool; + fn before_expr(self) -> bool; +} + +pub trait State: Clone { + type TokenKind: std::fmt::Debug + Copy + TokenKind + Into; + type TokenType: std::fmt::Debug + Copy + TokenType; + + fn is_expr_allowed(&self) -> bool; + fn set_is_expr_allowed(&mut self, is_expr_allowed: bool); + fn set_next_regexp(&mut self, start: Option); + fn had_line_break(&self) -> bool; + fn set_had_line_break(&mut self, had_line_break: bool); + fn had_line_break_before_last(&self) -> bool; + fn token_contexts(&self) -> &crate::TokenContexts; + fn mut_token_contexts(&mut self) -> &mut crate::TokenContexts; + fn set_token_type(&mut self, token_type: Self::TokenType); + fn token_type(&self) -> Option; + fn set_tpl_start(&mut self, start: BytePos); + fn syntax(&self) -> crate::Syntax; + fn prev_hi(&self) -> BytePos; + fn start(&self) -> BytePos; + fn add_current_line(&mut self, offset: usize); + fn set_line_start(&mut self, line_start: BytePos); + + fn can_skip_space(&self) -> bool { + !self + .token_contexts() + .current() + .map(|t| t.preserve_space()) + .unwrap_or_default() + } + + fn can_have_trailing_line_comment(&self) -> bool { + let Some(t) = self.token_type() else { + return true; + }; + !t.is_bin_op() + } + + fn can_have_trailing_comment(&self) -> bool { + self.token_type().is_some_and(|t| { + !t.is_keyword() + && (t.is_semi() || t.is_lbrace() || t.is_other_and_can_have_trailing_comment()) + }) + } + + fn last_was_tpl_element(&self) -> bool { + self.token_type().is_some_and(|t| t.is_template()) + } + + fn update(&mut self, start: BytePos, next: Self::TokenKind) { + if cfg!(feature = "debug") { + tracing::trace!( + "updating state: next={:?}, had_line_break={} ", + next, + self.had_line_break() + ); + } + let prev = self.token_type(); + self.set_token_type(next.into()); + let is_expr_allowed_on_next = self.is_expr_allowed_on_next(prev, start, next); + self.set_is_expr_allowed(is_expr_allowed_on_next); + } + + /// Returns true if following `LBrace` token is `block statement` according + /// to `ctx`, `prev`, `is_expr_allowed`. + fn is_brace_block( + token_contexts: &TokenContexts, + prev: Option, + had_line_break: bool, + is_expr_allowed: bool, + ) -> bool { + let Some(prev) = prev else { + return true; + }; + + if prev.is_colon() { + match token_contexts.current() { + Some(TokenContext::BraceStmt) => return true, + // `{ a: {} }` + // ^ ^ + Some(TokenContext::BraceExpr) => return false, + _ => {} + }; + } + + // function a() { + // return { a: "" }; + // } + // function a() { + // return + // { + // function b(){} + // }; + // } + if prev.is_keyword_return() || prev.is_keyword_yield() { + had_line_break + } else if prev.is_rparen() + || prev.is_semi() + || prev.is_keyword_else() + || prev.is_lt() + || prev.is_gt() + || prev.is_arrow() + { + true + } else if prev.is_lbrace() { + // If previous token was `{` + // https://github.com/swc-project/swc/issues/3241#issuecomment-1029584460 + // + let c = token_contexts.current(); + if c == Some(TokenContext::BraceExpr) { + let len = token_contexts.len(); + if let Some(TokenContext::JSXOpeningTag) = token_contexts.0.get(len - 2) { + return true; + } + } + c == Some(TokenContext::BraceStmt) + } else { + if had_line_break && prev.is_other_and_before_expr_is_false() { + return true; + } + !is_expr_allowed + } + } + + /// `is_expr_allowed`: previous value. + /// `start`: start of newly produced token. + fn is_expr_allowed_on_next( + &mut self, + prev: Option, + start: BytePos, + next: Self::TokenKind, + ) -> bool { + let is_expr_allowed = self.is_expr_allowed(); + let had_line_break = self.had_line_break(); + let had_line_break_before_last = self.had_line_break_before_last(); + let is_next_keyword = next.is_keyword(); + let syntax = self.syntax(); + let context = self.mut_token_contexts(); + + if is_next_keyword && prev.is_some_and(|prev| prev.is_dot()) { + false + } else if next.is_rparen() || next.is_rbrace() { + // TODO: Verify + if context.len() == 1 { + return true; + } else { + let out = context.pop().unwrap(); + // let a = function(){} + if out == TokenContext::BraceStmt + && matches!( + context.current(), + Some(TokenContext::FnExpr | TokenContext::ClassExpr) + ) + { + context.pop(); + return false; + } + + // ${} in template + if out == TokenContext::TplQuasi { + match context.current() { + Some(TokenContext::Tpl) => return false, + _ => return true, + } + } + // expression cannot follow expression + !out.is_expr() + } + } else if next.is_keyword_fn() { + // This is required to lex + // `x = function(){}/42/i` + if is_expr_allowed + && !Self::is_brace_block(context, prev, had_line_break, is_expr_allowed) + { + context.push(TokenContext::FnExpr); + } + false + } else if next.is_keyword_class() { + if is_expr_allowed + && !Self::is_brace_block(context, prev, had_line_break, is_expr_allowed) + { + context.push(TokenContext::ClassExpr); + } + false + } else if next.is_colon() + && matches!( + context.current(), + Some(TokenContext::FnExpr | TokenContext::ClassExpr) + ) + { + // `function`/`class` keyword is object prop + // + // ```JavaScript + // { function: expr, class: expr } + // ``` + context.pop(); // Remove FnExpr or ClassExpr + true + } else if next.is_known_ident_of() + && context.current() == Some(TokenContext::ParenStmt { is_for_loop: true }) + { + // for (a of b) {} + + // e.g. for (a of _) => true + !prev + .expect("context.current() if ParenStmt, so prev token cannot be None") + .before_expr() + } else if next.is_ident() { + let Some(prev) = prev else { + return false; + }; + had_line_break_before_last + && (prev.is_keyword_var() || prev.is_keyword_let() || prev.is_keyword_const()) + } else if next.is_lbrace() { + let cur = context.current(); + if syntax.jsx() && cur == Some(TokenContext::JSXOpeningTag) { + context.push(TokenContext::BraceExpr) + } else if syntax.jsx() && cur == Some(TokenContext::JSXExpr) { + context.push(TokenContext::TplQuasi); + } else { + let next_ctxt = + if Self::is_brace_block(context, prev, had_line_break, is_expr_allowed) { + TokenContext::BraceStmt + } else { + TokenContext::BraceExpr + }; + context.push(next_ctxt); + } + true + } else if next.is_slash() + && syntax.jsx() + && prev.is_some_and(|prev| prev.is_jsx_tag_start()) + { + context.pop(); + context.pop(); // do not consider JSX expr -> JSX open tag ->... anymore + context.push(TokenContext::JSXClosingTag); // reconsider as closing tag context + false + } else if next.is_dollar_lbrace() { + context.push(TokenContext::TplQuasi); + true + } else if next.is_lparen() { + let c = match prev { + Some(prev) => { + if prev.is_keyword_if() || prev.is_keyword_while() || prev.is_keyword_with() { + TokenContext::ParenStmt { is_for_loop: false } + } else if prev.is_keyword_for() { + TokenContext::ParenStmt { is_for_loop: true } + } else { + TokenContext::ParenExpr + } + } + None => TokenContext::ParenExpr, + }; + context.push(c); + true + } else if next.is_plus_plus() || next.is_minus_minus() { + is_expr_allowed + } else if next.is_back_quote() { + // If we are in template, ` terminates template. + if let Some(TokenContext::Tpl) = context.current() { + context.pop(); + } else { + context.push(TokenContext::Tpl); + self.set_tpl_start(start); + } + false + } else if next.is_jsx_tag_start() { + context.push(TokenContext::JSXExpr); // treat as beginning of JSX expression + context.push(TokenContext::JSXOpeningTag); // start opening tag context + false + } else if next.is_jsx_tag_end() { + let out = context.pop(); + if (out == Some(TokenContext::JSXOpeningTag) + && prev.is_some_and(|prev| prev.is_slash())) + || out == Some(TokenContext::JSXClosingTag) + { + context.pop(); + context.current() == Some(TokenContext::JSXExpr) + } else { + true + } + } else { + next.before_expr() + } + } +} diff --git a/crates/swc_ecma_lexer/src/common/lexer/token.rs b/crates/swc_ecma_lexer/src/common/lexer/token.rs new file mode 100644 index 000000000000..cc1f5743e1b9 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/lexer/token.rs @@ -0,0 +1,569 @@ +use num_bigint::BigInt; +use swc_atoms::Atom; +use swc_ecma_ast::{AssignOp, BinaryOp}; + +use super::LexResult; +use crate::common::{context::Context, input::Tokens}; + +pub trait TokenFactory<'a, TokenAndSpan, I: Tokens>: Sized + PartialEq { + type Lexer: super::Lexer<'a, TokenAndSpan>; + type Buffer: crate::common::parser::buffer::Buffer< + 'a, + I = I, + Token = Self, + Lexer = Self::Lexer, + TokenAndSpan = TokenAndSpan, + >; + + const FROM: Self; + const FOR: Self; + const INSTANCEOF: Self; + const SATISFIES: Self; + const THROW: Self; + const AS: Self; + const NAMESPACE: Self; + const RETURN: Self; + const AT: Self; + const EXPORT: Self; + const DECLARE: Self; + const ASSERTS: Self; + const ASSERT: Self; + const JSX_TAG_END: Self; + const JSX_TAG_START: Self; + const DOLLAR_LBRACE: Self; + const BACKQUOTE: Self; + const HASH: Self; + const IN: Self; + const IS: Self; + const CONST: Self; + const DOT: Self; + const TARGET: Self; + const GET: Self; + const SET: Self; + const DOTDOTDOT: Self; + const NULLISH_ASSIGN: Self; + const NULLISH_COALESCING: Self; + const QUESTION: Self; + const COLON: Self; + const COMMA: Self; + const BIT_AND: Self; + const BIT_AND_EQ: Self; + const BIT_OR: Self; + const BIT_OR_EQ: Self; + const LOGICAL_AND: Self; + const LOGICAL_AND_EQ: Self; + const LOGICAL_OR: Self; + const LOGICAL_OR_EQ: Self; + const MUL: Self; + const MUL_EQ: Self; + const MOD: Self; + const MOD_EQ: Self; + const EXP: Self; + const EXP_EQ: Self; + const DIV: Self; + const DIV_EQ: Self; + const EQUAL: Self; + const LSHIFT: Self; + const LSHIFT_EQ: Self; + const LESS: Self; + const GLOBAL: Self; + const LESS_EQ: Self; + const RSHIFT: Self; + const RSHIFT_EQ: Self; + const GREATER: Self; + const GREATER_EQ: Self; + const ZERO_FILL_RSHIFT: Self; + const ZERO_FILL_RSHIFT_EQ: Self; + const NULL: Self; + const ANY: Self; + const BOOLEAN: Self; + const BIGINT: Self; + const NEVER: Self; + const NUMBER: Self; + const OBJECT: Self; + const STRING: Self; + const SYMBOL: Self; + const UNKNOWN: Self; + const UNDEFINED: Self; + const INTRINSIC: Self; + const TRUE: Self; + const TRY: Self; + const FALSE: Self; + const ENUM: Self; + const YIELD: Self; + const LET: Self; + const VAR: Self; + const STATIC: Self; + const IMPLEMENTS: Self; + const INTERFACE: Self; + const TYPE: Self; + const PACKAGE: Self; + const PRIVATE: Self; + const PROTECTED: Self; + const PUBLIC: Self; + const READONLY: Self; + const ARROW: Self; + const REQUIRE: Self; + const AWAIT: Self; + const BREAK: Self; + const CONTINUE: Self; + const THIS: Self; + const SUPER: Self; + const WHILE: Self; + const DO: Self; + const LPAREN: Self; + const RPAREN: Self; + const LBRACKET: Self; + const RBRACKET: Self; + const LBRACE: Self; + const FINALLY: Self; + const CATCH: Self; + const SWITCH: Self; + const RBRACE: Self; + const FUNCTION: Self; + const IF: Self; + const ELSE: Self; + const CLASS: Self; + const NEW: Self; + const ABSTRACT: Self; + const ACCESSOR: Self; + const IMPORT: Self; + const PLUS: Self; + const MINUS: Self; + const BANG: Self; + const TILDE: Self; + const PLUS_PLUS: Self; + const MINUS_MINUS: Self; + const DELETE: Self; + const TYPEOF: Self; + const VOID: Self; + const EXTENDS: Self; + const SEMI: Self; + const OF: Self; + const KEYOF: Self; + const UNIQUE: Self; + const INFER: Self; + const USING: Self; + const WITH: Self; + const ASYNC: Self; + const CASE: Self; + const DEFAULT: Self; + const DEBUGGER: Self; + + fn jsx_name(name: &'a str, lexer: &mut Self::Lexer) -> Self; + fn is_jsx_name(&self) -> bool; + fn take_jsx_name(self, buffer: &mut Self::Buffer) -> Atom; + + fn str(value: Atom, raw: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_str(&self) -> bool; + fn take_str(self, buffer: &mut Self::Buffer) -> (Atom, Atom); + + fn template(cooked: LexResult, raw: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_template(&self) -> bool; + fn take_template(self, buffer: &mut Self::Buffer) -> (LexResult, Atom); + + fn jsx_text(value: Atom, raw: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_jsx_text(&self) -> bool; + fn take_jsx_text(self, buffer: &mut Self::Buffer) -> (Atom, Atom); + + fn regexp(content: Atom, flags: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_regexp(&self) -> bool; + fn take_regexp(self, buffer: &mut Self::Buffer) -> (Atom, Atom); + + fn num(value: f64, raw: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_num(&self) -> bool; + fn take_num(self, buffer: &mut Self::Buffer) -> (f64, Atom); + + fn bigint(value: Box, raw: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_bigint(&self) -> bool; + fn take_bigint(self, buffer: &mut Self::Buffer) -> (Box, Atom); + + fn unknown_ident(value: Atom, lexer: &mut Self::Lexer) -> Self; + fn is_unknown_ident(&self) -> bool; + fn take_unknown_ident(self, buffer: &mut Self::Buffer) -> Atom; + fn take_unknown_ident_ref<'b>(&'b self, buffer: &'b mut Self::Buffer) -> &'b Atom; + + fn is_known_ident(&self) -> bool; + fn take_known_ident(&self) -> Atom; + + fn starts_expr(&self) -> bool; + fn to_string(&self, buffer: &Self::Buffer) -> String; + + fn is_error(&self) -> bool; + fn take_error(self, buffer: &mut Self::Buffer) -> crate::error::Error; + + fn is_word(&self) -> bool; + fn take_word(self, buffer: &Self::Buffer) -> Option; + fn is_keyword(&self) -> bool; + + fn is_reserved(&self, ctx: super::Context) -> bool; + fn into_atom(self, lexer: &mut Self::Lexer) -> Option; + fn follows_keyword_let(&self) -> bool; + + fn is_bin_op(&self) -> bool; + fn as_bin_op(&self) -> Option; + + fn is_assign_op(&self) -> bool; + fn as_assign_op(&self) -> Option; + + #[inline(always)] + fn is_less(&self) -> bool { + Self::LESS.eq(self) + } + #[inline(always)] + fn is_less_eq(&self) -> bool { + Self::LESS_EQ.eq(self) + } + #[inline(always)] + fn is_greater(&self) -> bool { + Self::GREATER.eq(self) + } + #[inline(always)] + fn is_colon(&self) -> bool { + Self::COLON.eq(self) + } + #[inline(always)] + fn is_comma(&self) -> bool { + Self::COMMA.eq(self) + } + #[inline(always)] + fn is_equal(&self) -> bool { + Self::EQUAL.eq(self) + } + #[inline(always)] + fn is_question(&self) -> bool { + Self::QUESTION.eq(self) + } + #[inline(always)] + fn is_null(&self) -> bool { + Self::NULL.eq(self) + } + #[inline(always)] + fn is_lshift(&self) -> bool { + Self::LSHIFT.eq(self) + } + #[inline(always)] + fn is_rshift(&self) -> bool { + Self::RSHIFT.eq(self) + } + #[inline(always)] + fn is_rshift_eq(&self) -> bool { + Self::RSHIFT_EQ.eq(self) + } + #[inline(always)] + fn is_greater_eq(&self) -> bool { + Self::GREATER_EQ.eq(self) + } + #[inline(always)] + fn is_true(&self) -> bool { + Self::TRUE.eq(self) + } + #[inline(always)] + fn is_false(&self) -> bool { + Self::FALSE.eq(self) + } + #[inline(always)] + fn is_enum(&self) -> bool { + Self::ENUM.eq(self) + } + #[inline(always)] + fn is_yield(&self) -> bool { + Self::YIELD.eq(self) + } + #[inline(always)] + fn is_let(&self) -> bool { + Self::LET.eq(self) + } + #[inline(always)] + fn is_var(&self) -> bool { + Self::VAR.eq(self) + } + #[inline(always)] + fn is_static(&self) -> bool { + Self::STATIC.eq(self) + } + #[inline(always)] + fn is_extends(&self) -> bool { + Self::EXTENDS.eq(self) + } + #[inline(always)] + fn is_implements(&self) -> bool { + Self::IMPLEMENTS.eq(self) + } + #[inline(always)] + fn is_interface(&self) -> bool { + Self::INTERFACE.eq(self) + } + #[inline(always)] + fn is_type(&self) -> bool { + Self::TYPE.eq(self) + } + #[inline(always)] + fn is_package(&self) -> bool { + Self::PACKAGE.eq(self) + } + #[inline(always)] + fn is_private(&self) -> bool { + Self::PRIVATE.eq(self) + } + #[inline(always)] + fn is_protected(&self) -> bool { + Self::PROTECTED.eq(self) + } + #[inline(always)] + fn is_public(&self) -> bool { + Self::PUBLIC.eq(self) + } + #[inline(always)] + fn is_readonly(&self) -> bool { + Self::READONLY.eq(self) + } + #[inline(always)] + fn is_await(&self) -> bool { + Self::AWAIT.eq(self) + } + #[inline(always)] + fn is_break(&self) -> bool { + Self::BREAK.eq(self) + } + #[inline(always)] + fn is_continue(&self) -> bool { + Self::CONTINUE.eq(self) + } + #[inline(always)] + fn is_arrow(&self) -> bool { + Self::ARROW.eq(self) + } + #[inline(always)] + fn is_this(&self) -> bool { + Self::THIS.eq(self) + } + #[inline(always)] + fn is_super(&self) -> bool { + Self::SUPER.eq(self) + } + #[inline(always)] + fn is_using(&self) -> bool { + Self::USING.eq(self) + } + #[inline(always)] + fn is_backquote(&self) -> bool { + Self::BACKQUOTE.eq(self) + } + #[inline(always)] + fn is_lparen(&self) -> bool { + Self::LPAREN.eq(self) + } + #[inline(always)] + fn is_rparen(&self) -> bool { + Self::RPAREN.eq(self) + } + #[inline(always)] + fn is_lbracket(&self) -> bool { + Self::LBRACKET.eq(self) + } + #[inline(always)] + fn is_rbracket(&self) -> bool { + Self::RBRACKET.eq(self) + } + #[inline(always)] + fn is_lbrace(&self) -> bool { + Self::LBRACE.eq(self) + } + #[inline(always)] + fn is_rbrace(&self) -> bool { + Self::RBRACE.eq(self) + } + #[inline(always)] + fn is_function(&self) -> bool { + Self::FUNCTION.eq(self) + } + #[inline(always)] + fn is_class(&self) -> bool { + Self::CLASS.eq(self) + } + #[inline(always)] + fn is_if(&self) -> bool { + Self::IF.eq(self) + } + #[inline(always)] + fn is_return(&self) -> bool { + Self::RETURN.eq(self) + } + #[inline(always)] + fn is_switch(&self) -> bool { + Self::SWITCH.eq(self) + } + #[inline(always)] + fn is_throw(&self) -> bool { + Self::THROW.eq(self) + } + #[inline(always)] + fn is_catch(&self) -> bool { + Self::CATCH.eq(self) + } + #[inline(always)] + fn is_finally(&self) -> bool { + Self::FINALLY.eq(self) + } + #[inline(always)] + fn is_try(&self) -> bool { + Self::TRY.eq(self) + } + #[inline(always)] + fn is_with(&self) -> bool { + Self::WITH.eq(self) + } + #[inline(always)] + fn is_while(&self) -> bool { + Self::WHILE.eq(self) + } + #[inline(always)] + fn is_new(&self) -> bool { + Self::NEW.eq(self) + } + #[inline(always)] + fn is_ident_ref(&self, ctx: Context) -> bool { + self.is_word() && !self.is_reserved(ctx) + } + #[inline(always)] + fn is_import(&self) -> bool { + Self::IMPORT.eq(self) + } + #[inline(always)] + fn is_export(&self) -> bool { + Self::EXPORT.eq(self) + } + #[inline(always)] + fn is_dot(&self) -> bool { + Self::DOT.eq(self) + } + #[inline(always)] + fn is_do(&self) -> bool { + Self::DO.eq(self) + } + #[inline(always)] + fn is_for(&self) -> bool { + Self::FOR.eq(self) + } + #[inline(always)] + fn is_from(&self) -> bool { + Self::FROM.eq(self) + } + #[inline(always)] + fn is_dotdotdot(&self) -> bool { + Self::DOTDOTDOT.eq(self) + } + #[inline(always)] + fn is_plus(&self) -> bool { + Self::PLUS.eq(self) + } + #[inline(always)] + fn is_minus(&self) -> bool { + Self::MINUS.eq(self) + } + #[inline(always)] + fn is_bang(&self) -> bool { + Self::BANG.eq(self) + } + #[inline(always)] + fn is_tilde(&self) -> bool { + Self::TILDE.eq(self) + } + #[inline(always)] + fn is_plus_plus(&self) -> bool { + Self::PLUS_PLUS.eq(self) + } + #[inline(always)] + fn is_minus_minus(&self) -> bool { + Self::MINUS_MINUS.eq(self) + } + #[inline(always)] + fn is_delete(&self) -> bool { + Self::DELETE.eq(self) + } + #[inline(always)] + fn is_typeof(&self) -> bool { + Self::TYPEOF.eq(self) + } + #[inline(always)] + fn is_of(&self) -> bool { + Self::OF.eq(self) + } + #[inline(always)] + fn is_void(&self) -> bool { + Self::VOID.eq(self) + } + #[inline(always)] + fn is_hash(&self) -> bool { + Self::HASH.eq(self) + } + #[inline(always)] + fn is_in(&self) -> bool { + Self::IN.eq(self) + } + #[inline(always)] + fn is_const(&self) -> bool { + Self::CONST.eq(self) + } + #[inline(always)] + fn is_star(&self) -> bool { + Self::MUL.eq(self) + } + #[inline(always)] + fn is_semi(&self) -> bool { + Self::SEMI.eq(self) + } + #[inline(always)] + fn is_slash(&self) -> bool { + Self::DIV.eq(self) + } + #[inline(always)] + fn is_slash_eq(&self) -> bool { + Self::DIV_EQ.eq(self) + } + #[inline(always)] + fn is_jsx_tag_start(&self) -> bool { + Self::JSX_TAG_START.eq(self) + } + #[inline(always)] + fn is_jsx_tag_end(&self) -> bool { + Self::JSX_TAG_END.eq(self) + } + #[inline(always)] + fn is_asserts(&self) -> bool { + Self::ASSERTS.eq(self) + } + #[inline(always)] + fn is_is(&self) -> bool { + Self::IS.eq(self) + } + #[inline(always)] + fn is_as(&self) -> bool { + Self::AS.eq(self) + } + #[inline(always)] + fn is_satisfies(&self) -> bool { + Self::SATISFIES.eq(self) + } + #[inline(always)] + fn is_instanceof(&self) -> bool { + Self::INSTANCEOF.eq(self) + } + #[inline(always)] + fn is_async(&self) -> bool { + Self::ASYNC.eq(self) + } + #[inline(always)] + fn is_case(&self) -> bool { + Self::CASE.eq(self) + } + #[inline(always)] + fn is_default(&self) -> bool { + Self::DEFAULT.eq(self) + } + #[inline(always)] + fn is_debugger(&self) -> bool { + Self::DEBUGGER.eq(self) + } +} diff --git a/crates/swc_ecma_lexer/src/lexer/whitespace.rs b/crates/swc_ecma_lexer/src/common/lexer/whitespace.rs similarity index 98% rename from crates/swc_ecma_lexer/src/lexer/whitespace.rs rename to crates/swc_ecma_lexer/src/common/lexer/whitespace.rs index 7211ecab117b..0f2065950d36 100644 --- a/crates/swc_ecma_lexer/src/lexer/whitespace.rs +++ b/crates/swc_ecma_lexer/src/common/lexer/whitespace.rs @@ -1,5 +1,5 @@ /// Returns true if it's done -pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> u32>; +type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> u32>; /// Lookup table for whitespace static BYTE_HANDLERS: [ByteHandler; 256] = [ diff --git a/crates/swc_ecma_lexer/src/common/mod.rs b/crates/swc_ecma_lexer/src/common/mod.rs new file mode 100644 index 000000000000..eb1da1e4aee6 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/mod.rs @@ -0,0 +1,5 @@ +pub mod context; +pub mod input; +pub mod lexer; +pub mod parser; +pub mod syntax; diff --git a/crates/swc_ecma_lexer/src/common/parser/assign_target_or_spread.rs b/crates/swc_ecma_lexer/src/common/parser/assign_target_or_spread.rs new file mode 100644 index 000000000000..19cefbd6b2ff --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/assign_target_or_spread.rs @@ -0,0 +1,10 @@ +use swc_common::ast_node; +use swc_ecma_ast::{ExprOrSpread, Pat}; + +#[ast_node] +pub enum AssignTargetOrSpread { + #[tag("ExprOrSpread")] + ExprOrSpread(ExprOrSpread), + #[tag("*")] + Pat(Pat), +} diff --git a/crates/swc_ecma_lexer/src/common/parser/buffer.rs b/crates/swc_ecma_lexer/src/common/parser/buffer.rs new file mode 100644 index 000000000000..07edfd1e7686 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/buffer.rs @@ -0,0 +1,269 @@ +use debug_unreachable::debug_unreachable; +use swc_common::{BytePos, Span}; +use swc_ecma_ast::EsVersion; + +use super::token_and_span::TokenAndSpan as TokenAndSpanTrait; +use crate::{ + common::{context::Context, input::Tokens, lexer::token::TokenFactory}, + Syntax, +}; + +pub trait NextTokenAndSpan { + type Token; + fn token(&self) -> &Self::Token; + fn span(&self) -> Span; + fn had_line_break(&self) -> bool; +} + +pub trait Buffer<'a> { + type Token: std::fmt::Debug + PartialEq + Clone + TokenFactory<'a, Self::TokenAndSpan, Self::I>; + type Lexer: super::super::lexer::Lexer<'a, Self::TokenAndSpan>; + type Next: NextTokenAndSpan; + type TokenAndSpan: TokenAndSpanTrait; + type I: Tokens; + + fn new(lexer: Self::I) -> Self; + fn iter(&self) -> &Self::I; + fn iter_mut(&mut self) -> &mut Self::I; + + fn set_cur(&mut self, token: Self::TokenAndSpan); + fn next(&self) -> Option<&Self::Next>; + fn set_next(&mut self, token: Option); + fn next_mut(&mut self) -> &mut Option; + + fn cur(&mut self) -> Option<&Self::Token>; + fn get_cur(&self) -> Option<&Self::TokenAndSpan>; + fn get_cur_mut(&mut self) -> &mut Option; + + fn prev_span(&self) -> Span; + fn set_prev_span(&mut self, span: Span); + + fn peek<'b>(&'b mut self) -> Option<&'b Self::Token> + where + Self::TokenAndSpan: 'b; + + fn store(&mut self, token: Self::Token) { + debug_assert!(self.next().is_none()); + debug_assert!(self.get_cur().is_none()); + let span = self.prev_span(); + let token = Self::TokenAndSpan::new(token, span, false); + self.set_cur(token); + } + + #[allow(dead_code)] + fn cur_debug<'b>(&'b self) -> Option<&'b Self::Token> + where + Self::TokenAndSpan: 'b, + { + self.get_cur().map(|it| it.token()) + } + + fn dump_cur(&mut self) -> String; + + /// Returns current token. + fn bump(&mut self) -> Self::Token { + let prev = match self.get_cur_mut().take() { + Some(t) => t, + None => unsafe { + debug_unreachable!( + "Current token is `None`. Parser should not call bump() without knowing \ + current token" + ) + }, + }; + self.set_prev_span(prev.span()); + prev.take_token() + } + + #[inline] + fn knows_cur(&self) -> bool { + self.get_cur().is_some() + } + + fn had_line_break_before_cur(&mut self) -> bool { + self.cur(); + self.get_cur() + .map(|it| it.had_line_break()) + .unwrap_or_else(|| true) + } + + /// This returns true on eof. + fn has_linebreak_between_cur_and_peeked(&mut self) -> bool { + let _ = self.peek(); + self.next().map(|item| item.had_line_break()).unwrap_or({ + // return true on eof. + true + }) + } + + fn cut_lshift(&mut self) { + debug_assert!( + self.is(&Self::Token::LSHIFT), + "parser should only call cut_lshift when encountering LShift token" + ); + let span = self.cur_span().with_lo(self.cur_span().lo + BytePos(1)); + let token = Self::TokenAndSpan::new(Self::Token::LESS, span, false); + self.set_cur(token); + } + + fn merge_lt_gt(&mut self) { + debug_assert!( + self.is(&Self::Token::LESS) || self.is(&Self::Token::GREATER), + "parser should only call merge_lt_gt when encountering Less token" + ); + if self.peek().is_none() { + return; + } + let span = self.cur_span(); + let next = self.next().unwrap(); + if span.hi != next.span().lo { + return; + } + let cur = self.get_cur_mut().take().unwrap(); + let next = self.next_mut().take().unwrap(); + let cur_token = cur.token(); + let token = if cur_token.is_greater() { + let next_token = next.token(); + if next_token.is_greater() { + // >> + Self::Token::RSHIFT + } else if next_token.is_equal() { + // >= + Self::Token::GREATER_EQ + } else if next_token.is_rshift() { + // >>> + Self::Token::ZERO_FILL_RSHIFT + } else if next_token.is_greater_eq() { + // >>= + Self::Token::RSHIFT_EQ + } else if next_token.is_rshift_eq() { + // >>>= + Self::Token::ZERO_FILL_RSHIFT_EQ + } else { + self.set_cur(cur); + self.set_next(Some(next)); + return; + } + } else if cur_token.is_less() { + let next_token = next.token(); + if next_token.is_less() { + // << + Self::Token::LSHIFT + } else if next_token.is_equal() { + // <= + Self::Token::LESS_EQ + } else if next_token.is_less_eq() { + // <<= + Self::Token::LSHIFT_EQ + } else { + self.set_cur(cur); + self.set_next(Some(next)); + return; + } + } else { + self.set_cur(cur); + self.set_next(Some(next)); + return; + }; + let span = span.with_hi(next.span().hi); + let token = Self::TokenAndSpan::new(token, span, cur.had_line_break()); + self.set_cur(token); + } + + #[inline(always)] + fn is(&mut self, expected: &Self::Token) -> bool { + self.cur().is_some_and(|cur| cur == expected) + } + + #[inline(always)] + fn eat(&mut self, expected: &Self::Token) -> bool { + let v = self.is(expected); + if v { + self.bump(); + } + v + } + + /// Returns start of current token. + #[inline] + fn cur_pos(&mut self) -> BytePos { + let _ = self.cur(); + self.get_cur() + .map(|item| item.span().lo) + .unwrap_or_else(|| { + // eof + self.last_pos() + }) + } + + #[inline] + fn cur_span(&self) -> Span { + let data = self + .get_cur() + .map(|item| item.span()) + .unwrap_or(self.prev_span()); + Span::new(data.lo, data.hi) + } + + /// Returns last byte position of previous token. + #[inline] + fn last_pos(&self) -> BytePos { + self.prev_span().hi + } + + #[inline] + fn get_ctx(&self) -> Context { + self.iter().ctx() + } + + #[inline] + fn set_ctx(&mut self, ctx: Context) { + self.iter_mut().set_ctx(ctx); + } + + #[inline] + fn syntax(&self) -> Syntax { + self.iter().syntax() + } + + #[inline] + fn target(&self) -> EsVersion { + self.iter().target() + } + + #[inline] + fn set_expr_allowed(&mut self, allow: bool) { + self.iter_mut().set_expr_allowed(allow) + } + + #[inline] + fn set_next_regexp(&mut self, start: Option) { + self.iter_mut().set_next_regexp(start); + } + + #[inline] + fn token_context<'b>(&'b self) -> &'b crate::lexer::TokenContexts + where + Self::I: 'b, + { + self.iter().token_context() + } + + #[inline] + fn token_context_mut<'b>(&'b mut self) -> &'b mut crate::lexer::TokenContexts + where + Self::I: 'b, + { + self.iter_mut().token_context_mut() + } + + #[inline] + fn set_token_context(&mut self, c: crate::lexer::TokenContexts) { + self.iter_mut().set_token_context(c); + } + + #[inline] + fn end_pos(&self) -> BytePos { + self.iter().end_pos() + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/class_and_fn.rs b/crates/swc_ecma_lexer/src/common/parser/class_and_fn.rs new file mode 100644 index 000000000000..934d88c2f3ec --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/class_and_fn.rs @@ -0,0 +1,1643 @@ +use std::ops::DerefMut; + +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use super::{ + buffer::Buffer, + expr::{parse_args, parse_assignment_expr, parse_lhs_expr}, + has_use_strict, + ident::{parse_binding_ident, parse_opt_binding_ident, parse_private_name}, + is_constructor, + output_type::OutputType, + pat::parse_formal_params, + stmt::parse_block, + typescript::{parse_ts_modifier, parse_ts_type_args, try_parse_ts_type_ann}, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + expr::parse_subscripts, + ident::parse_ident, + is_invalid_class_name::IsInvalidClassName, + is_not_this, + is_simple_param_list::IsSimpleParameterList, + pat::{parse_constructor_params, parse_unique_formal_params}, + typescript::{ + parse_ts_heritage_clause, parse_ts_type_ann, parse_ts_type_or_type_predicate_ann, + parse_ts_type_params, try_parse_ts_index_signature, try_parse_ts_type_params, + }, + }, + }, + error::SyntaxError, + TokenContext, +}; + +struct MakeMethodArgs { + start: BytePos, + accessibility: Option, + is_abstract: bool, + static_token: Option, + decorators: Vec, + is_optional: bool, + is_override: bool, + key: Key, + kind: MethodKind, + is_async: bool, + is_generator: bool, +} + +/// If `required` is `true`, this never returns `None`. +pub fn parse_maybe_opt_binding_ident<'a>( + p: &mut impl Parser<'a>, + required: bool, + disallow_let: bool, +) -> PResult> { + if required { + parse_binding_ident(p, disallow_let).map(|v| v.id).map(Some) + } else { + parse_opt_binding_ident(p, disallow_let).map(|v| v.map(|v| v.id)) + } +} + +fn parse_maybe_decorator_args<'a, P: Parser<'a>>(p: &mut P, expr: Box) -> PResult> { + let type_args = if p.input().syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + Some(parse_ts_type_args(p)?) + } else { + None + }; + + if type_args.is_none() && !p.input_mut().is(&P::Token::LPAREN) { + return Ok(expr); + } + + let args = parse_args(p, false)?; + Ok(CallExpr { + span: p.span(expr.span_lo()), + callee: Callee::Expr(expr), + args, + ..Default::default() + } + .into()) +} + +pub fn parse_decorators<'a, P: Parser<'a>>( + p: &mut P, + allow_export: bool, +) -> PResult> { + if !p.syntax().decorators() { + return Ok(Vec::new()); + } + trace_cur!(p, parse_decorators); + + let mut decorators = Vec::new(); + let start = p.cur_pos(); + + while p.input_mut().is(&P::Token::AT) { + decorators.push(parse_decorator(p)?); + } + if decorators.is_empty() { + return Ok(decorators); + } + + if p.input_mut().is(&P::Token::EXPORT) { + if !p.ctx().contains(Context::InClass) + && !p.ctx().contains(Context::InFunction) + && !allow_export + { + syntax_error!(p, p.input().cur_span(), SyntaxError::ExportNotAllowed); + } + + if !p.ctx().contains(Context::InClass) + && !p.ctx().contains(Context::InFunction) + && !p.syntax().decorators_before_export() + { + syntax_error!(p, p.span(start), SyntaxError::DecoratorOnExport); + } + } else if !p.input_mut().is(&P::Token::CLASS) { + // syntax_error!(p, p.span(start), + // SyntaxError::InvalidLeadingDecorator) + } + + Ok(decorators) +} + +fn parse_decorator<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + trace_cur!(p, parse_decorator); + + p.assert_and_bump(&P::Token::AT)?; + + let expr = if p.input_mut().eat(&P::Token::LPAREN) { + let expr = p.parse_expr()?; + expect!(p, &P::Token::RPAREN); + expr + } else { + let expr = parse_ident(p, false, false).map(Expr::from).map(Box::new)?; + + parse_subscripts(p, Callee::Expr(expr), false, true)? + }; + + let expr = parse_maybe_decorator_args(p, expr)?; + + Ok(Decorator { + span: p.span(start), + expr, + }) +} + +pub fn parse_access_modifier<'a>(p: &mut impl Parser<'a>) -> PResult> { + Ok( + parse_ts_modifier(p, &["public", "protected", "private", "in", "out"], false)?.and_then( + |s| match s { + "public" => Some(Accessibility::Public), + "protected" => Some(Accessibility::Protected), + "private" => Some(Accessibility::Private), + other => { + p.emit_err(p.input().prev_span(), SyntaxError::TS1274(other.into())); + None + } + }, + ), + ) +} + +pub fn parse_super_class<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult<(Box, Option>)> { + let super_class = parse_lhs_expr(p)?; + match *super_class { + Expr::TsInstantiation(TsInstantiation { + expr, type_args, .. + }) => Ok((expr, Some(type_args))), + _ => { + // We still need to parse TS type arguments, + // because in some cases "super class" returned by `parse_lhs_expr` + // may not include `TsExprWithTypeArgs` + // but it's a super class with type params, for example, in JSX. + if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + Ok((super_class, parse_ts_type_args(p).map(Some)?)) + } else { + Ok((super_class, None)) + } + } + } +} + +pub fn is_class_method<'a, P: Parser<'a>>(p: &mut P) -> bool { + p.input_mut().is(&P::Token::LPAREN) + || (p.input().syntax().typescript() && p.input_mut().is(&P::Token::LESS)) + || (p.input().syntax().typescript() && p.input_mut().is(&P::Token::JSX_TAG_START)) +} + +pub fn is_class_property<'a, P: Parser<'a>>(p: &mut P, asi: bool) -> bool { + (p.input().syntax().typescript() + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_bang() || cur.is_colon())) + || p.input_mut() + .cur() + .is_some_and(|cur| cur.is_equal() || cur.is_rbrace()) + || if asi { + p.is_general_semi() + } else { + p.input_mut().is(&P::Token::SEMI) + } +} + +pub fn parse_class_prop_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + if p.input_mut().is(&P::Token::HASH) { + let name = parse_private_name(p)?; + if name.name == "constructor" { + p.emit_err(name.span, SyntaxError::PrivateConstructor); + } + Ok(Key::Private(name)) + } else { + p.parse_prop_name().map(Key::Public) + } +} + +/// `parse_args` closure should not eat '(' or ')'. +pub fn parse_fn_args_body<'a, P: Parser<'a>, F>( + p: &mut P, + decorators: Vec, + start: BytePos, + parse_args: F, + is_async: bool, + is_generator: bool, +) -> PResult> +where + F: FnOnce(&mut P) -> PResult>, +{ + trace_cur!(p, parse_fn_args_body); + // let prev_in_generator = p.ctx().in_generator; + let mut ctx = p.ctx(); + ctx.set(Context::InAsync, is_async); + ctx.set(Context::InGenerator, is_generator); + + p.with_ctx(ctx).parse_with(|p| { + let type_params = if p.syntax().typescript() { + p.in_type().parse_with(|p| { + trace_cur!(p, parse_fn_args_body__type_params); + + Ok(if p.input_mut().is(&P::Token::LESS) { + Some(parse_ts_type_params(p, false, true)?) + } else if p.input_mut().is(&P::Token::JSX_TAG_START) { + debug_assert_eq!( + p.input().token_context().current(), + Some(TokenContext::JSXOpeningTag) + ); + p.input_mut().token_context_mut().pop(); + debug_assert_eq!( + p.input().token_context().current(), + Some(TokenContext::JSXExpr) + ); + p.input_mut().token_context_mut().pop(); + + Some(parse_ts_type_params(p, false, true)?) + } else { + None + }) + })? + } else { + None + }; + + expect!(p, &P::Token::LPAREN); + + let mut arg_ctx = (p.ctx() | Context::InParameters) & !Context::InFunction; + arg_ctx.set(Context::InAsync, is_async); + arg_ctx.set(Context::InGenerator, is_generator); + let params = p.with_ctx(arg_ctx).parse_with(|p| parse_args(p))?; + + expect!(p, &P::Token::RPAREN); + + // typescript extension + let return_type = if p.syntax().typescript() && p.input_mut().is(&P::Token::COLON) { + parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON).map(Some)? + } else { + None + }; + + let body: Option<_> = parse_fn_block_body( + p, + is_async, + is_generator, + false, + params.is_simple_parameter_list(), + )?; + + if p.syntax().typescript() && body.is_none() { + // Declare functions cannot have assignment pattern in parameters + for param in ¶ms { + // TODO: Search deeply for assignment pattern using a Visitor + + let span = match ¶m.pat { + Pat::Assign(ref p) => Some(p.span()), + _ => None, + }; + + if let Some(span) = span { + p.emit_err(span, SyntaxError::TS2371) + } + } + } + + Ok(Box::new(Function { + span: p.span(start), + decorators, + type_params, + params, + body, + is_async, + is_generator, + return_type, + ctxt: Default::default(), + })) + }) +} + +pub fn parse_async_fn_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let start = p.cur_pos(); + expect!(p, &P::Token::ASYNC); + parse_fn(p, None, Some(start), Vec::new()) +} + +/// Parse function expression +pub fn parse_fn_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + parse_fn(p, None, None, Vec::new()) +} + +pub fn parse_async_fn_decl<'a, P: Parser<'a>>( + p: &mut P, + decorators: Vec, +) -> PResult { + let start = p.cur_pos(); + expect!(p, &P::Token::ASYNC); + parse_fn(p, None, Some(start), decorators) +} + +pub fn parse_fn_decl<'a, P: Parser<'a>>(p: &mut P, decorators: Vec) -> PResult { + parse_fn(p, None, None, decorators) +} + +pub fn parse_default_async_fn<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, +) -> PResult { + let start_of_async = p.cur_pos(); + expect!(p, &P::Token::ASYNC); + parse_fn(p, Some(start), Some(start_of_async), decorators) +} + +pub fn parse_default_fn<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, +) -> PResult { + parse_fn(p, Some(start), None, decorators) +} + +fn parse_fn_inner<'a, P: Parser<'a>>( + p: &mut P, + _start_of_output_type: Option, + start_of_async: Option, + decorators: Vec, + is_fn_expr: bool, + is_ident_required: bool, +) -> PResult<(Option, Box)> { + let start = start_of_async.unwrap_or_else(|| p.cur_pos()); + p.assert_and_bump(&P::Token::FUNCTION)?; + let is_async = start_of_async.is_some(); + + let is_generator = p.input_mut().eat(&P::Token::MUL); + + let ident = if is_fn_expr { + let mut ctx = p.ctx() & !Context::AllowDirectSuper & !Context::InClassField; + ctx.set(Context::InAsync, is_async); + ctx.set(Context::InGenerator, is_generator); + + parse_maybe_opt_binding_ident(p.with_ctx(ctx).deref_mut(), is_ident_required, false)? + } else { + // function declaration does not change context for `BindingIdentifier`. + parse_maybe_opt_binding_ident( + p.with_ctx(p.ctx() & !Context::AllowDirectSuper & !Context::InClassField) + .deref_mut(), + is_ident_required, + false, + )? + }; + + p.with_ctx( + p.ctx() + & !Context::AllowDirectSuper + & !Context::InClassField + & !Context::WillExpectColonForCond, + ) + .parse_with(|p| { + let f = parse_fn_args_body( + p, + decorators, + start, + parse_formal_params, + is_async, + is_generator, + )?; + + if is_fn_expr && f.body.is_none() { + unexpected!(p, "{"); + } + + Ok((ident, f)) + }) +} + +fn parse_fn<'a, P: Parser<'a>, T>( + p: &mut P, + start_of_output_type: Option, + start_of_async: Option, + decorators: Vec, +) -> PResult +where + T: OutputType, +{ + let start = start_of_async.unwrap_or_else(|| p.cur_pos()); + let (ident, f) = parse_fn_inner( + p, + start_of_output_type, + start_of_async, + decorators, + T::is_fn_expr(), + T::IS_IDENT_REQUIRED, + )?; + + match T::finish_fn(p.span(start_of_output_type.unwrap_or(start)), ident, f) { + Ok(v) => Ok(v), + Err(kind) => syntax_error!(p, kind), + } +} + +pub fn parse_class_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + class_start: BytePos, + decorators: Vec, + is_abstract: bool, +) -> PResult { + parse_class(p, start, class_start, decorators, is_abstract) +} + +pub fn parse_class_expr<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, +) -> PResult> { + parse_class(p, start, start, decorators, false) +} + +pub fn parse_default_class<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + class_start: BytePos, + decorators: Vec, + is_abstract: bool, +) -> PResult { + parse_class(p, start, class_start, decorators, is_abstract) +} + +fn make_method<'a, P: Parser<'a>, F>( + p: &mut P, + parse_args: F, + MakeMethodArgs { + start, + accessibility, + is_abstract, + static_token, + decorators, + is_optional, + is_override, + key, + kind, + is_async, + is_generator, + }: MakeMethodArgs, +) -> PResult +where + F: FnOnce(&mut P) -> PResult>, +{ + trace_cur!(p, make_method); + + let is_static = static_token.is_some(); + let function = p + .with_ctx((p.ctx() | Context::AllowDirectSuper) & !Context::InClassField) + .parse_with(|p| { + parse_fn_args_body(p, decorators, start, parse_args, is_async, is_generator) + })?; + + match kind { + MethodKind::Getter | MethodKind::Setter + if p.input().syntax().typescript() && p.input().target() == EsVersion::Es3 => + { + p.emit_err(key.span(), SyntaxError::TS1056); + } + _ => {} + } + + match key { + Key::Private(key) => { + let span = p.span(start); + if accessibility.is_some() { + p.emit_err(span.with_hi(key.span_hi()), SyntaxError::TS18010); + } + + Ok(PrivateMethod { + span, + + accessibility, + is_abstract, + is_optional, + is_override, + + is_static, + key, + function, + kind, + } + .into()) + } + Key::Public(key) => { + let span = p.span(start); + if is_abstract && function.body.is_some() { + p.emit_err(span, SyntaxError::TS1245) + } + Ok(ClassMethod { + span, + + accessibility, + is_abstract, + is_optional, + is_override, + + is_static, + key, + function, + kind, + } + .into()) + } + } +} + +pub fn parse_fn_block_or_expr_body<'a, P: Parser<'a>>( + p: &mut P, + is_async: bool, + is_generator: bool, + is_arrow_function: bool, + is_simple_parameter_list: bool, +) -> PResult> { + parse_fn_body( + p, + is_async, + is_generator, + is_arrow_function, + is_simple_parameter_list, + |p, is_simple_parameter_list| { + if p.input_mut().is(&P::Token::LBRACE) { + parse_block(p, false) + .map(|block_stmt| { + if !is_simple_parameter_list { + if let Some(span) = has_use_strict(&block_stmt) { + p.emit_err(span, SyntaxError::IllegalLanguageModeDirective); + } + } + BlockStmtOrExpr::BlockStmt(block_stmt) + }) + .map(Box::new) + } else { + parse_assignment_expr(p) + .map(BlockStmtOrExpr::Expr) + .map(Box::new) + } + }, + ) +} + +fn parse_fn_body<'a, P: Parser<'a>, T>( + p: &mut P, + is_async: bool, + is_generator: bool, + is_arrow_function: bool, + is_simple_parameter_list: bool, + f: impl FnOnce(&mut P, bool) -> PResult, +) -> PResult { + if p.ctx().contains(Context::InDeclare) + && p.syntax().typescript() + && p.input_mut().is(&P::Token::LBRACE) + { + // p.emit_err( + // p.ctx().span_of_fn_name.expect("we are not in function"), + // SyntaxError::TS1183, + // ); + p.emit_err(p.input().cur_span(), SyntaxError::TS1183); + } + + let mut ctx = (p.ctx() | Context::InFunction) + & !Context::InStaticBlock + & !Context::IsBreakAllowed + & !Context::IsContinueAllowed + & !Context::TopLevel; + ctx.set(Context::InAsync, is_async); + ctx.set(Context::InGenerator, is_generator); + ctx.set( + Context::InsideNonArrowFunctionScope, + if is_arrow_function { + p.ctx().contains(Context::InsideNonArrowFunctionScope) + } else { + true + }, + ); + + f( + p.with_ctx(ctx) + .with_state(crate::common::parser::state::State::default()) + .deref_mut(), + is_simple_parameter_list, + ) +} + +pub(super) fn parse_fn_block_body<'a, P: Parser<'a>>( + p: &mut P, + is_async: bool, + is_generator: bool, + is_arrow_function: bool, + is_simple_parameter_list: bool, +) -> PResult> { + parse_fn_body( + p, + is_async, + is_generator, + is_arrow_function, + is_simple_parameter_list, + |p, is_simple_parameter_list| { + // allow omitting body and allow placing `{` on next line + if p.input().syntax().typescript() + && !p.input_mut().is(&P::Token::LBRACE) + && p.eat_general_semi() + { + return Ok(None); + } + let block = parse_block(p.include_in_expr(true).deref_mut(), true); + block.map(|block_stmt| { + if !is_simple_parameter_list { + if let Some(span) = has_use_strict(&block_stmt) { + p.emit_err(span, SyntaxError::IllegalLanguageModeDirective); + } + } + Some(block_stmt) + }) + }, + ) +} + +fn make_property<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, + accessibility: Option, + key: Key, + is_static: bool, + accessor_token: Option, + is_optional: bool, + readonly: bool, + declare: bool, + is_abstract: bool, + is_override: bool, +) -> PResult { + if is_constructor(&key) { + syntax_error!(p, key.span(), SyntaxError::PropertyNamedConstructor); + } + if key.is_private() { + if declare { + p.emit_err( + key.span(), + SyntaxError::PrivateNameModifier("declare".into()), + ) + } + if is_abstract { + p.emit_err( + key.span(), + SyntaxError::PrivateNameModifier("abstract".into()), + ) + } + } + let definite = + p.input().syntax().typescript() && !is_optional && p.input_mut().eat(&P::Token::BANG); + + let type_ann = try_parse_ts_type_ann(p)?; + + let ctx = p.ctx() | Context::IncludeInExpr | Context::InClassField; + p.with_ctx(ctx).parse_with(|p| { + let value = if p.input_mut().is(&P::Token::EQUAL) { + p.assert_and_bump(&P::Token::EQUAL)?; + Some(parse_assignment_expr(p)?) + } else { + None + }; + + if !p.eat_general_semi() { + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + } + + if accessor_token.is_some() { + return Ok(ClassMember::AutoAccessor(AutoAccessor { + span: p.span(start), + key, + value, + type_ann, + is_static, + decorators, + accessibility, + is_abstract, + is_override, + definite, + })); + } + + Ok(match key { + Key::Private(key) => { + let span = p.span(start); + if accessibility.is_some() { + p.emit_err(span.with_hi(key.span_hi()), SyntaxError::TS18010); + } + + PrivateProp { + span: p.span(start), + key, + value, + is_static, + decorators, + accessibility, + is_optional, + is_override, + readonly, + type_ann, + definite, + ctxt: Default::default(), + } + .into() + } + Key::Public(key) => { + let span = p.span(start); + if is_abstract && value.is_some() { + p.emit_err(span, SyntaxError::TS1267) + } + ClassProp { + span, + key, + value, + is_static, + decorators, + accessibility, + is_abstract, + is_optional, + is_override, + readonly, + declare, + definite, + type_ann, + } + .into() + } + }) + }) +} + +fn parse_static_block<'a, P: Parser<'a>>(p: &mut P, start: BytePos) -> PResult { + let body = parse_block( + p.with_ctx( + p.ctx() | Context::InStaticBlock | Context::InClassField | Context::AllowUsingDecl, + ) + .deref_mut(), + false, + )?; + + let span = p.span(start); + Ok(StaticBlock { span, body }.into()) +} + +fn parse_class_member_with_is_static<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + declare_token: Option, + accessibility: Option, + static_token: Option, + accessor_token: Option, + decorators: Vec, +) -> PResult { + let mut is_static = static_token.is_some(); + + let mut is_abstract = false; + let mut is_override = false; + let mut readonly = None; + let mut modifier_span = None; + let declare = declare_token.is_some(); + while let Some(modifier) = + parse_ts_modifier(p, &["abstract", "readonly", "override", "static"], true)? + { + modifier_span = Some(p.input().prev_span()); + match modifier { + "abstract" => { + if is_abstract { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1030("abstract".into()), + ); + } else if is_override { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1029("abstract".into(), "override".into()), + ); + } + is_abstract = true; + } + "override" => { + if is_override { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1030("override".into()), + ); + } else if readonly.is_some() { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1029("override".into(), "readonly".into()), + ); + } else if declare { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1243("override".into(), "declare".into()), + ); + } else if !p.ctx().contains(Context::HasSuperClass) { + p.emit_err(p.input().prev_span(), SyntaxError::TS4112); + } + is_override = true; + } + "readonly" => { + let readonly_span = p.input().prev_span(); + if readonly.is_some() { + p.emit_err(readonly_span, SyntaxError::TS1030("readonly".into())); + } else { + readonly = Some(readonly_span); + } + } + "static" => { + if is_override { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1029("static".into(), "override".into()), + ); + } + + is_static = true; + } + _ => {} + } + } + + let accessor_token = accessor_token.or_else(|| { + if p.syntax().auto_accessors() && readonly.is_none() { + let start = p.cur_pos(); + if !peek!(p).is_some_and(|cur| cur.is_lparen()) + && p.input_mut().eat(&P::Token::ACCESSOR) + { + Some(p.span(start)) + } else { + None + } + } else { + None + } + }); + + if is_static && p.input_mut().is(&P::Token::LBRACE) { + if let Some(span) = declare_token { + p.emit_err(span, SyntaxError::TS1184); + } + if accessibility.is_some() { + p.emit_err(p.input().cur_span(), SyntaxError::TS1184); + } + return parse_static_block(p, start); + } + if p.input_mut().is(&P::Token::STATIC) && peek!(p).is_some_and(|cur| cur.is_lbrace()) { + // For "readonly", "abstract" and "override" + if let Some(span) = modifier_span { + p.emit_err(span, SyntaxError::TS1184); + } + if let Some(span) = static_token { + p.emit_err(span, SyntaxError::TS1184); + } + p.bump(); // consume "static" + return parse_static_block(p, start); + } + + if p.input().syntax().typescript() && !is_abstract && !is_override && accessibility.is_none() { + let idx = try_parse_ts_index_signature(p, start, readonly.is_some(), is_static)?; + if let Some(idx) = idx { + return Ok(idx.into()); + } + } + + if p.input_mut().eat(&P::Token::MUL) { + // generator method + let key = parse_class_prop_name(p)?; + if readonly.is_some() { + p.emit_err(p.span(start), SyntaxError::ReadOnlyMethod); + } + if is_constructor(&key) { + p.emit_err(p.span(start), SyntaxError::GeneratorConstructor); + } + + return make_method( + p, + parse_unique_formal_params, + MakeMethodArgs { + start, + decorators, + is_async: false, + is_generator: true, + accessibility, + is_abstract, + is_override, + is_optional: false, + static_token, + key, + kind: MethodKind::Method, + }, + ); + } + + trace_cur!(p, parse_class_member_with_is_static__normal_class_member); + let key = if readonly.is_some() + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_bang() || cur.is_colon()) + { + Key::Public(PropName::Ident(IdentName::new( + "readonly".into(), + readonly.unwrap(), + ))) + } else { + parse_class_prop_name(p)? + }; + let is_optional = p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + + if is_class_method(p) { + // handle a(){} / get(){} / set(){} / async(){} + + trace_cur!(p, parse_class_member_with_is_static__normal_class_method); + + if let Some(token) = declare_token { + p.emit_err(token, SyntaxError::TS1031) + } + + if readonly.is_some() { + syntax_error!(p, p.span(start), SyntaxError::ReadOnlyMethod); + } + let is_constructor = is_constructor(&key); + + if is_constructor { + if p.syntax().typescript() && is_override { + p.emit_err(p.span(start), SyntaxError::TS1089("override".into())); + } + + if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + let start = p.cur_pos(); + if peek!(p).is_some_and(|cur| cur.is_less()) { + p.assert_and_bump(&P::Token::LESS)?; + let start2 = p.cur_pos(); + p.assert_and_bump(&P::Token::GREATER)?; + + p.emit_err(p.span(start), SyntaxError::TS1098); + p.emit_err(p.span(start2), SyntaxError::TS1092); + } else { + let type_params = try_parse_ts_type_params(p, false, true)?; + + if let Some(type_params) = type_params { + for param in type_params.params { + p.emit_err(param.span(), SyntaxError::TS1092); + } + } + } + } + + expect!(p, &P::Token::LPAREN); + let params = parse_constructor_params(p)?; + expect!(p, &P::Token::RPAREN); + + if p.syntax().typescript() && p.input_mut().is(&P::Token::COLON) { + let start = p.cur_pos(); + let type_ann = parse_ts_type_ann(p, true, start)?; + + p.emit_err(type_ann.type_ann.span(), SyntaxError::TS1093); + } + + let body: Option<_> = + parse_fn_block_body(p, false, false, false, params.is_simple_parameter_list())?; + + if body.is_none() { + for param in params.iter() { + if param.is_ts_param_prop() { + p.emit_err(param.span(), SyntaxError::TS2369) + } + } + } + + if p.syntax().typescript() && body.is_none() { + // Declare constructors cannot have assignment pattern in parameters + for param in ¶ms { + // TODO: Search deeply for assignment pattern using a Visitor + + let span = match *param { + ParamOrTsParamProp::Param(ref param) => match param.pat { + Pat::Assign(ref p) => Some(p.span()), + _ => None, + }, + ParamOrTsParamProp::TsParamProp(TsParamProp { + param: TsParamPropParam::Assign(ref p), + .. + }) => Some(p.span()), + _ => None, + }; + + if let Some(span) = span { + p.emit_err(span, SyntaxError::TS2371) + } + } + } + + if let Some(static_token) = static_token { + p.emit_err(static_token, SyntaxError::TS1089("static".into())) + } + + if let Some(span) = modifier_span { + if is_abstract { + p.emit_err(span, SyntaxError::TS1242); + } + } + + return Ok(ClassMember::Constructor(Constructor { + span: p.span(start), + accessibility, + key: match key { + Key::Public(key) => key, + _ => unreachable!("is_constructor() returns false for PrivateName"), + }, + is_optional, + params, + body, + ..Default::default() + })); + } else { + return make_method( + p, + parse_formal_params, + MakeMethodArgs { + start, + is_optional, + accessibility, + decorators, + is_abstract, + is_override, + static_token, + kind: MethodKind::Method, + key, + is_async: false, + is_generator: false, + }, + ); + } + } + + let is_next_line_generator = + p.input_mut().had_line_break_before_cur() && p.input_mut().is(&P::Token::MUL); + let getter_or_setter_ident = match key { + // `get\n*` is an uninitialized property named 'get' followed by a generator. + Key::Public(PropName::Ident(ref i)) + if (i.sym == "get" || i.sym == "set") + && !is_class_property(p, /* asi */ false) + && !is_next_line_generator => + { + Some(i) + } + _ => None, + }; + + if getter_or_setter_ident.is_none() && is_class_property(p, /* asi */ true) { + return make_property( + p, + start, + decorators, + accessibility, + key, + is_static, + accessor_token, + is_optional, + readonly.is_some(), + declare, + is_abstract, + is_override, + ); + } + + if match key { + Key::Public(PropName::Ident(ref i)) => i.sym == "async", + _ => false, + } && !p.input_mut().had_line_break_before_cur() + { + // handle async foo(){} + + if parse_ts_modifier(p, &["override"], false)?.is_some() { + is_override = true; + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1029("override".into(), "async".into()), + ); + } + + let is_generator = p.input_mut().eat(&P::Token::MUL); + let key = parse_class_prop_name(p)?; + if is_constructor(&key) { + syntax_error!(p, key.span(), SyntaxError::AsyncConstructor) + } + if readonly.is_some() { + syntax_error!(p, p.span(start), SyntaxError::ReadOnlyMethod); + } + + // handle async foo(){} + let is_optional = is_optional + || p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_method( + p, + parse_unique_formal_params, + MakeMethodArgs { + start, + static_token, + key, + is_abstract, + accessibility, + is_optional, + is_override, + decorators, + kind: MethodKind::Method, + is_async: true, + is_generator, + }, + ); + } + + if let Some(i) = getter_or_setter_ident { + let key_span = key.span(); + + // handle get foo(){} / set foo(v){} + let key = parse_class_prop_name(p)?; + + if readonly.is_some() { + p.emit_err(key_span, SyntaxError::GetterSetterCannotBeReadonly); + } + + if is_constructor(&key) { + p.emit_err(key_span, SyntaxError::ConstructorAccessor); + } + + return match &*i.sym { + "get" => make_method( + p, + |p| { + let params = parse_formal_params(p)?; + + if params.iter().filter(|p| is_not_this(p)).count() != 0 { + p.emit_err(key_span, SyntaxError::GetterParam); + } + + Ok(params) + }, + MakeMethodArgs { + decorators, + start, + is_abstract, + is_async: false, + is_generator: false, + is_optional, + is_override, + accessibility, + static_token, + key, + kind: MethodKind::Getter, + }, + ), + "set" => make_method( + p, + |p| { + let params = parse_formal_params(p)?; + + if params.iter().filter(|p| is_not_this(p)).count() != 1 { + p.emit_err(key_span, SyntaxError::SetterParam); + } + + if !params.is_empty() { + if let Pat::Rest(..) = params[0].pat { + p.emit_err(params[0].pat.span(), SyntaxError::RestPatInSetter); + } + } + + Ok(params) + }, + MakeMethodArgs { + decorators, + start, + is_optional, + is_abstract, + is_override, + is_async: false, + is_generator: false, + accessibility, + static_token, + key, + kind: MethodKind::Setter, + }, + ), + _ => unreachable!(), + }; + } + + unexpected!(p, "* for generator, private key, identifier or async") +} + +fn parse_class_member<'a, P: Parser<'a>>(p: &mut P) -> PResult { + trace_cur!(p, parse_class_member); + + let start = p.cur_pos(); + let decorators = parse_decorators(p, false)?; + let declare = p.syntax().typescript() && p.input_mut().eat(&P::Token::DECLARE); + let accessibility = if p.input().syntax().typescript() { + parse_access_modifier(p)? + } else { + None + }; + // Allow `private declare`. + let declare = declare || p.syntax().typescript() && p.input_mut().eat(&P::Token::DECLARE); + + let declare_token = if declare { + // Handle declare(){} + if is_class_method(p) { + let key = Key::Public(PropName::Ident(IdentName::new( + "declare".into(), + p.span(start), + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_method( + p, + parse_unique_formal_params, + MakeMethodArgs { + start, + accessibility, + decorators, + is_abstract: false, + is_optional, + is_override: false, + is_async: false, + is_generator: false, + static_token: None, + key, + kind: MethodKind::Method, + }, + ); + } else if is_class_property(p, /* asi */ true) + || (p.syntax().typescript() && p.input_mut().is(&P::Token::QUESTION)) + { + // Property named `declare` + + let key = Key::Public(PropName::Ident(IdentName::new( + "declare".into(), + p.span(start), + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_property( + p, + start, + decorators, + accessibility, + key, + false, + None, + is_optional, + false, + false, + false, + false, + ); + } else { + Some(p.span(start)) + } + } else { + None + }; + + let static_token = { + let start = p.cur_pos(); + if p.input_mut().eat(&P::Token::STATIC) { + Some(p.span(start)) + } else { + None + } + }; + + let accessor_token = if p.syntax().auto_accessors() { + let start = p.cur_pos(); + if p.input_mut().eat(&P::Token::ACCESSOR) { + Some(p.span(start)) + } else { + None + } + } else { + None + }; + + if let Some(accessor_token) = accessor_token { + // Handle accessor(){} + if is_class_method(p) { + let key = Key::Public(PropName::Ident(IdentName::new( + "accessor".into(), + accessor_token, + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_method( + p, + parse_unique_formal_params, + MakeMethodArgs { + start, + accessibility, + decorators, + is_abstract: false, + is_optional, + is_override: false, + is_async: false, + is_generator: false, + static_token, + key, + kind: MethodKind::Method, + }, + ); + } else if is_class_property(p, /* asi */ true) + || (p.syntax().typescript() && p.input_mut().is(&P::Token::QUESTION)) + { + // Property named `accessor` + + let key = Key::Public(PropName::Ident(IdentName::new( + "accessor".into(), + accessor_token, + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + let is_static = static_token.is_some(); + return make_property( + p, + start, + decorators, + accessibility, + key, + is_static, + None, + is_optional, + false, + declare, + false, + false, + ); + } + } + + if let Some(static_token) = static_token { + // Handle static(){} + if is_class_method(p) { + let key = Key::Public(PropName::Ident(IdentName::new( + "static".into(), + static_token, + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_method( + p, + parse_unique_formal_params, + MakeMethodArgs { + start, + accessibility, + decorators, + is_abstract: false, + is_optional, + is_override: false, + is_async: false, + is_generator: false, + static_token: None, + key, + kind: MethodKind::Method, + }, + ); + } else if is_class_property(p, /* asi */ false) + || (p.syntax().typescript() && p.input_mut().is(&P::Token::QUESTION)) + { + // Property named `static` + + // Avoid to parse + // static + // {} + let is_parsing_static_blocks = p.input_mut().is(&P::Token::LBRACE); + if !is_parsing_static_blocks { + let key = Key::Public(PropName::Ident(IdentName::new( + "static".into(), + static_token, + ))); + let is_optional = + p.input().syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION); + return make_property( + p, + start, + decorators, + accessibility, + key, + false, + accessor_token, + is_optional, + false, + declare, + false, + false, + ); + } + } else { + // TODO: error if static contains escape + } + } + + parse_class_member_with_is_static( + p, + start, + declare_token, + accessibility, + static_token, + accessor_token, + decorators, + ) +} + +fn parse_class_body<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let mut elems = Vec::with_capacity(32); + let mut has_constructor_with_body = false; + while !eof!(p) && !p.input_mut().is(&P::Token::RBRACE) { + if p.input_mut().eat(&P::Token::SEMI) { + let span = p.input().prev_span(); + elems.push(ClassMember::Empty(EmptyStmt { + span: Span::new(span.lo, span.hi), + })); + continue; + } + let mut p = p.with_ctx(p.ctx() | Context::AllowDirectSuper); + let elem = parse_class_member(p.deref_mut())?; + + if !p.ctx().contains(Context::InDeclare) { + if let ClassMember::Constructor(Constructor { + body: Some(..), + span, + .. + }) = elem + { + if has_constructor_with_body { + p.emit_err(span, SyntaxError::DuplicateConstructor); + } + has_constructor_with_body = true; + } + } + elems.push(elem); + } + Ok(elems) +} + +pub fn parse_class<'a, T>( + p: &mut impl Parser<'a>, + start: BytePos, + class_start: BytePos, + decorators: Vec, + is_abstract: bool, +) -> PResult +where + T: OutputType, +{ + let (ident, mut class) = parse_class_inner( + p.with_ctx(p.ctx() | Context::InClass).deref_mut(), + start, + class_start, + decorators, + T::IS_IDENT_REQUIRED, + )?; + + if is_abstract { + class.is_abstract = true + } else { + for member in class.body.iter() { + match member { + ClassMember::ClassProp(ClassProp { + is_abstract: true, + span, + .. + }) + | ClassMember::Method(ClassMethod { + span, + is_abstract: true, + .. + }) => p.emit_err(*span, SyntaxError::TS1244), + _ => (), + } + } + } + + match T::finish_class(p.span(start), ident, class) { + Ok(v) => Ok(v), + Err(kind) => syntax_error!(p, kind), + } +} + +/// Not generic +fn parse_class_inner<'a, P: Parser<'a>>( + p: &mut P, + _start: BytePos, + class_start: BytePos, + decorators: Vec, + is_ident_required: bool, +) -> PResult<(Option, Box)> { + p.strict_mode().parse_with(|p| { + expect!(p, &P::Token::CLASS); + + let ident = parse_maybe_opt_binding_ident(p, is_ident_required, true)?; + if p.input().syntax().typescript() { + if let Some(span) = ident.invalid_class_name() { + p.emit_err(span, SyntaxError::TS2414); + } + } + + let type_params = if p.input().syntax().typescript() { + try_parse_ts_type_params(p, true, true)? + } else { + None + }; + + let (mut super_class, mut super_type_params) = if p.input_mut().eat(&P::Token::EXTENDS) { + let (super_class, super_type_params) = parse_super_class(p)?; + + if p.syntax().typescript() && p.input_mut().eat(&P::Token::COMMA) { + let exprs = parse_ts_heritage_clause(p)?; + + for e in &exprs { + p.emit_err(e.span(), SyntaxError::TS1174); + } + } + + (Some(super_class), super_type_params) + } else { + (None, None) + }; + + // Handle TS1172 + if p.input_mut().eat(&P::Token::EXTENDS) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1172); + + parse_super_class(p)?; + }; + + let implements = + if p.input().syntax().typescript() && p.input_mut().eat(&P::Token::IMPLEMENTS) { + parse_ts_heritage_clause(p)? + } else { + Vec::with_capacity(4) + }; + + { + // Handle TS1175 + if p.input().syntax().typescript() && p.input_mut().eat(&P::Token::IMPLEMENTS) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1175); + + parse_ts_heritage_clause(p)?; + } + } + + // Handle TS1173 + if p.input().syntax().typescript() && p.input_mut().eat(&P::Token::EXTENDS) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1173); + + let (sc, type_params) = parse_super_class(p)?; + + if super_class.is_none() { + super_class = Some(sc); + if type_params.is_some() { + super_type_params = type_params; + } + } + } + + expect!(p, &P::Token::LBRACE); + let mut ctx = p.ctx(); + ctx.set(Context::HasSuperClass, super_class.is_some()); + let body = parse_class_body(p.with_ctx(ctx).deref_mut())?; + + if p.input_mut().cur().is_none() { + let eof_text = p.input_mut().dump_cur(); + p.emit_err( + p.input().cur_span(), + SyntaxError::Expected(format!("{:?}", P::Token::RBRACE), eof_text), + ); + } else { + expect!(p, &P::Token::RBRACE); + } + let end = p.last_pos(); + + Ok(( + ident, + Box::new(Class { + span: Span::new(class_start, end), + decorators, + is_abstract: false, + type_params, + super_class, + super_type_params, + body, + implements, + ..Default::default() + }), + )) + }) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/ctx.rs b/crates/swc_ecma_lexer/src/common/parser/ctx.rs new file mode 100644 index 000000000000..76b7bffa5262 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/ctx.rs @@ -0,0 +1,27 @@ +use std::ops::{Deref, DerefMut}; + +use crate::common::context::Context; + +pub struct WithCtx<'a, 'w, Parser: super::Parser<'a>> { + pub(super) inner: &'w mut Parser, + pub(super) orig_ctx: Context, + pub(super) marker: std::marker::PhantomData<&'a ()>, +} +impl<'a, Parser: super::Parser<'a>> Deref for WithCtx<'a, '_, Parser> { + type Target = Parser; + + fn deref(&self) -> &Parser { + self.inner + } +} +impl<'a, Parser: super::Parser<'a>> DerefMut for WithCtx<'a, '_, Parser> { + fn deref_mut(&mut self) -> &mut Parser { + self.inner + } +} + +impl<'a, Parser: super::Parser<'a>> Drop for WithCtx<'a, '_, Parser> { + fn drop(&mut self) { + self.inner.set_ctx(self.orig_ctx); + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/expr.rs b/crates/swc_ecma_lexer/src/common/parser/expr.rs new file mode 100644 index 000000000000..251b269da557 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/expr.rs @@ -0,0 +1,2536 @@ +use std::ops::DerefMut; + +use either::Either; +use rustc_hash::FxHashMap; +use swc_common::{util::take::Take, BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use super::{ + assign_target_or_spread::AssignTargetOrSpread, buffer::Buffer, ident::parse_ident_name, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + class_and_fn::{ + parse_async_fn_expr, parse_class_expr, parse_decorators, + parse_fn_block_or_expr_body, parse_fn_expr, + }, + expr_ext::ExprExt, + ident::{parse_binding_ident, parse_ident, parse_maybe_private_name}, + is_simple_param_list::IsSimpleParameterList, + jsx::{parse_jsx_element, parse_jsx_text}, + object::parse_object_expr, + pat::{parse_paren_items_as_params, reparse_expr_as_pat}, + pat_type::PatType, + typescript::{ + eat_any_ts_modifier, next_then_parse_ts_type, parse_ts_type, parse_ts_type_args, + parse_ts_type_assertion, parse_ts_type_or_type_predicate_ann, parse_ts_type_params, + try_parse_ts, try_parse_ts_generic_async_arrow_fn, try_parse_ts_type_ann, + try_parse_ts_type_args, + }, + unwrap_ts_non_null, + }, + }, + error::{Error, SyntaxError}, + TokenContext, +}; + +pub(super) fn is_start_of_left_hand_side_expr<'a>(p: &mut impl Parser<'a>) -> bool { + let ctx = p.ctx(); + let Some(cur) = p.input_mut().cur() else { + return false; + }; + cur.is_this() + || cur.is_null() + || cur.is_super() + || cur.is_true() + || cur.is_false() + || cur.is_num() + || cur.is_bigint() + || cur.is_str() + || cur.is_backquote() + || cur.is_lparen() + || cur.is_lbrace() + || cur.is_lbracket() + || cur.is_function() + || cur.is_class() + || cur.is_new() + || cur.is_regexp() + || cur.is_ident_ref(ctx) + || cur.is_import() && { + peek!(p).is_some_and(|peek| peek.is_lparen() || peek.is_less() || peek.is_dot()) + } +} + +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_array_lit<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_array_lit); + + let start = p.input_mut().cur_pos(); + + p.assert_and_bump(&P::Token::LBRACKET)?; + + let mut elems = Vec::with_capacity(8); + + while !eof!(p) && !p.input_mut().is(&P::Token::RBRACKET) { + if p.input_mut().is(&P::Token::COMMA) { + expect!(p, &P::Token::COMMA); + elems.push(None); + continue; + } + + elems.push(p.include_in_expr(true).parse_expr_or_spread().map(Some)?); + + if !p.input_mut().is(&P::Token::RBRACKET) { + expect!(p, &P::Token::COMMA); + if p.input_mut().is(&P::Token::RBRACKET) { + let prev_span = p.input().prev_span(); + p.state_mut().trailing_commas.insert(start, prev_span); + } + } + } + + expect!(p, &P::Token::RBRACKET); + + let span = p.span(start); + Ok(ArrayLit { span, elems }.into()) +} + +pub fn at_possible_async<'a, P: Parser<'a>>(p: &P, expr: &Expr) -> PResult { + // TODO(kdy1): !this.state.containsEsc && + Ok(p.state().potential_arrow_start == Some(expr.span_lo()) && expr.is_ident_ref_to("async")) +} + +fn parse_yield_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let start = p.input_mut().cur_pos(); + p.assert_and_bump(&P::Token::YIELD)?; + debug_assert!(p.ctx().contains(Context::InGenerator)); + + // Spec says + // YieldExpression cannot be used within the FormalParameters of a generator + // function because any expressions that are part of FormalParameters are + // evaluated before the resulting generator object is in a resumable state. + if p.ctx().contains(Context::InParameters) && !p.ctx().contains(Context::InFunction) { + syntax_error!(p, p.input().prev_span(), SyntaxError::YieldParamInGen) + } + + let parse_with_arg = |p: &mut P| { + let has_star = p.input_mut().eat(&P::Token::MUL); + let err_span = p.span(start); + let arg = parse_assignment_expr(p).map_err(|err| { + Error::new( + err.span(), + SyntaxError::WithLabel { + inner: Box::new(err), + span: err_span, + note: "Tried to parse an argument of yield", + }, + ) + })?; + Ok(YieldExpr { + span: p.span(start), + arg: Some(arg), + delegate: has_star, + } + .into()) + }; + + if p.is_general_semi() || { + let Some(cur) = p.input_mut().cur() else { + return parse_with_arg(p); + }; + !cur.is_less() + && !cur.is_star() + && !cur.is_slash() + && !cur.is_slash_eq() + && !cur.starts_expr() + } { + Ok(YieldExpr { + span: p.span(start), + arg: None, + delegate: false, + } + .into()) + } else { + parse_with_arg(p) + } +} + +fn parse_tpl_elements<'a, P: Parser<'a>>( + p: &mut P, + is_tagged_tpl: bool, +) -> PResult<(Vec>, Vec)> { + trace_cur!(p, parse_tpl_elements); + + let mut exprs = Vec::new(); + + let cur_elem = p.parse_tpl_element(is_tagged_tpl)?; + let mut is_tail = cur_elem.tail; + let mut quasis = vec![cur_elem]; + + while !is_tail { + expect!(p, &P::Token::DOLLAR_LBRACE); + exprs.push(p.include_in_expr(true).parse_expr()?); + expect!(p, &P::Token::RBRACE); + let elem = p.parse_tpl_element(is_tagged_tpl)?; + is_tail = elem.tail; + quasis.push(elem); + } + + Ok((exprs, quasis)) +} + +pub fn parse_tpl<'a, P: Parser<'a>>(p: &mut P, is_tagged_tpl: bool) -> PResult { + trace_cur!(p, parse_tpl); + let start = p.input_mut().cur_pos(); + + p.assert_and_bump(&P::Token::BACKQUOTE)?; + + let (exprs, quasis) = parse_tpl_elements(p, is_tagged_tpl)?; + + expect!(p, &P::Token::BACKQUOTE); + + let span = p.span(start); + Ok(Tpl { + span, + exprs, + quasis, + }) +} + +pub fn parse_tagged_tpl<'a, P: Parser<'a>>( + p: &mut P, + tag: Box, + type_params: Option>, +) -> PResult { + let tagged_tpl_start = tag.span_lo(); + trace_cur!(p, parse_tagged_tpl); + + let tpl = Box::new(parse_tpl(p, true)?); + + let span = p.span(tagged_tpl_start); + + if tag.is_opt_chain() { + p.emit_err(span, SyntaxError::TaggedTplInOptChain); + } + + Ok(TaggedTpl { + span, + tag, + type_params, + tpl, + ..Default::default() + }) +} + +pub fn parse_lit<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + let cur = cur!(p, true); + let v = if cur.is_null() { + p.bump(); + let span = p.span(start); + Lit::Null(swc_ecma_ast::Null { span }) + } else if cur.is_true() || cur.is_false() { + let value = cur.is_true(); + p.bump(); + let span = p.span(start); + Lit::Bool(swc_ecma_ast::Bool { span, value }) + } else if cur.is_str() { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + Lit::Str(swc_ecma_ast::Str { + span: p.span(start), + value, + raw: Some(raw), + }) + } else if cur.is_num() { + let t = p.bump(); + let (value, raw) = t.take_num(p.input_mut()); + Lit::Num(swc_ecma_ast::Number { + span: p.span(start), + value, + raw: Some(raw), + }) + } else if cur.is_bigint() { + let t = p.bump(); + let (value, raw) = t.take_bigint(p.input_mut()); + Lit::BigInt(swc_ecma_ast::BigInt { + span: p.span(start), + value, + raw: Some(raw), + }) + } else { + unreachable!("parse_lit should not be called for {:?}", cur) + }; + Ok(v) +} + +/// Parse `Arguments[Yield, Await]` +#[cfg_attr(feature = "tracing-spans", tracing::instrument(skip_all))] +pub fn parse_args<'a, P: Parser<'a>>( + p: &mut P, + is_dynamic_import: bool, +) -> PResult> { + trace_cur!(p, parse_args); + + let ctx = p.ctx() & !Context::WillExpectColonForCond; + + p.with_ctx(ctx).parse_with(|p| { + let start = p.cur_pos(); + expect!(p, &P::Token::LPAREN); + + let mut first = true; + let mut expr_or_spreads = Vec::with_capacity(2); + + while !eof!(p) && !p.input_mut().is(&P::Token::RPAREN) { + if first { + first = false; + } else { + expect!(p, &P::Token::COMMA); + // Handle trailing comma. + if p.input_mut().is(&P::Token::RPAREN) { + if is_dynamic_import && !p.input().syntax().import_attributes() { + syntax_error!(p, p.span(start), SyntaxError::TrailingCommaInsideImport) + } + + break; + } + } + + expr_or_spreads.push(p.include_in_expr(true).parse_expr_or_spread()?); + } + + expect!(p, &P::Token::RPAREN); + Ok(expr_or_spreads) + }) +} + +///`parseMaybeAssign` (overridden) +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_assignment_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_assignment_expr); + + if p.input().syntax().typescript() && p.input_mut().is(&P::Token::JSX_TAG_START) { + // Note: When the JSX plugin is on, type assertions (` x`) aren't valid + // syntax. + + let cur_context = p.input().token_context().current(); + debug_assert_eq!(cur_context, Some(TokenContext::JSXOpeningTag)); + // Only time j_oTag is pushed is right after j_expr. + debug_assert_eq!( + p.input().token_context().0[p.input().token_context().len() - 2], + TokenContext::JSXExpr + ); + + let res = try_parse_ts(p, |p| parse_assignment_expr_base(p).map(Some)); + if let Some(res) = res { + return Ok(res); + } else { + debug_assert_eq!( + p.input_mut().token_context().current(), + Some(TokenContext::JSXOpeningTag) + ); + p.input_mut().token_context_mut().pop(); + debug_assert_eq!( + p.input_mut().token_context().current(), + Some(TokenContext::JSXExpr) + ); + p.input_mut().token_context_mut().pop(); + } + } + + parse_assignment_expr_base(p) +} + +/// Parse an assignment expression. This includes applications of +/// operators like `+=`. +/// +/// `parseMaybeAssign` +#[cfg_attr(feature = "tracing-spans", tracing::instrument(skip_all))] +fn parse_assignment_expr_base<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_assignment_expr_base); + let start = p.input().cur_span(); + + if p.input().syntax().typescript() + && (p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_less() || cur.is_jsx_tag_start())) + && (peek!(p).is_some_and(|peek| peek.is_word() || peek.is_jsx_name())) + { + let ctx = p.ctx() & !Context::WillExpectColonForCond; + let res = try_parse_ts(p.with_ctx(ctx).deref_mut(), |p| { + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_jsx_tag_start()) + { + if let Some(TokenContext::JSXOpeningTag) = p.input_mut().token_context().current() { + p.input_mut().token_context_mut().pop(); + + debug_assert_eq!( + p.input_mut().token_context().current(), + Some(TokenContext::JSXExpr) + ); + p.input_mut().token_context_mut().pop(); + } + } + + let type_parameters = parse_ts_type_params(p, false, true)?; + let mut arrow = parse_assignment_expr_base(p)?; + match *arrow { + Expr::Arrow(ArrowExpr { + ref mut span, + ref mut type_params, + .. + }) => { + *span = Span::new(type_parameters.span.lo, span.hi); + *type_params = Some(type_parameters); + } + _ => unexpected!(p, "("), + } + Ok(Some(arrow)) + }); + if let Some(res) = res { + if p.input().syntax().disallow_ambiguous_jsx_like() { + p.emit_err(start, SyntaxError::ReservedArrowTypeParam); + } + return Ok(res); + } + } + + if p.ctx().contains(Context::InGenerator) && p.input_mut().is(&P::Token::YIELD) { + return parse_yield_expr(p); + } + + let cur = cur!(p, true); + p.state_mut().potential_arrow_start = + if cur.is_known_ident() || cur.is_unknown_ident() || cur.is_yield() || cur.is_lparen() { + Some(p.cur_pos()) + } else { + None + }; + + let start = p.cur_pos(); + + // Try to parse conditional expression. + let cond = parse_cond_expr(p)?; + + return_if_arrow!(p, cond); + + match *cond { + // if cond is conditional expression but not left-hand-side expression, + // just return it. + Expr::Cond(..) | Expr::Bin(..) | Expr::Unary(..) | Expr::Update(..) => return Ok(cond), + _ => {} + } + + finish_assignment_expr(p, start, cond) +} + +pub fn finish_assignment_expr<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + cond: Box, +) -> PResult> { + trace_cur!(p, finish_assignment_expr); + + if let Some(op) = p.input_mut().cur().and_then(|t| t.as_assign_op()) { + let left = if op == AssignOp::Assign { + match AssignTarget::try_from(reparse_expr_as_pat(p, PatType::AssignPat, cond)?) { + Ok(pat) => pat, + Err(expr) => { + syntax_error!(p, expr.span(), SyntaxError::InvalidAssignTarget) + } + } + } else { + // It is an early Reference Error if IsValidSimpleAssignmentTarget of + // LeftHandSideExpression is false. + if !cond.is_valid_simple_assignment_target(p.ctx().contains(Context::Strict)) { + if p.input().syntax().typescript() { + p.emit_err(cond.span(), SyntaxError::TS2406); + } else { + p.emit_err(cond.span(), SyntaxError::NotSimpleAssign) + } + } + if p.input().syntax().typescript() + && cond + .as_ident() + .map(|i| i.is_reserved_in_strict_bind()) + .unwrap_or(false) + { + p.emit_strict_mode_err(cond.span(), SyntaxError::TS1100); + } + + // TODO + match AssignTarget::try_from(cond) { + Ok(v) => v, + Err(v) => { + syntax_error!(p, v.span(), SyntaxError::InvalidAssignTarget); + } + } + }; + + p.bump(); + let right = parse_assignment_expr(p)?; + Ok(AssignExpr { + span: p.span(start), + op, + // TODO: + left, + right, + } + .into()) + } else { + Ok(cond) + } +} + +/// Spec: 'ConditionalExpression' +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +fn parse_cond_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_cond_expr); + + let start = p.cur_pos(); + + let test = parse_bin_expr(p)?; + return_if_arrow!(p, test); + + if p.input_mut().eat(&P::Token::QUESTION) { + let ctx = p.ctx() + | Context::InCondExpr + | Context::WillExpectColonForCond + | Context::IncludeInExpr; + let cons = parse_assignment_expr(p.with_ctx(ctx).deref_mut())?; + expect!(p, &P::Token::COLON); + let ctx = (p.ctx() | Context::InCondExpr) & !Context::WillExpectColonForCond; + let alt = parse_assignment_expr(p.with_ctx(ctx).deref_mut())?; + let span = Span::new(start, alt.span_hi()); + Ok(CondExpr { + span, + test, + cons, + alt, + } + .into()) + } else { + Ok(test) + } +} + +#[cfg_attr(feature = "tracing-spans", tracing::instrument(skip_all))] +pub fn parse_subscripts<'a>( + p: &mut impl Parser<'a>, + mut obj: Callee, + no_call: bool, + no_computed_member: bool, +) -> PResult> { + let start = obj.span().lo; + loop { + obj = match parse_subscript(p, start, obj, no_call, no_computed_member)? { + (expr, false) => return Ok(expr), + (expr, true) => Callee::Expr(expr), + } + } +} + +/// returned bool is true if this method should be called again. +#[cfg_attr(feature = "tracing-spans", tracing::instrument(skip_all))] +fn parse_subscript<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + mut obj: Callee, + no_call: bool, + no_computed_member: bool, +) -> PResult<(Box, bool)> { + trace_cur!(p, parse_subscript); + let _ = cur!(p, false); + + if p.input().syntax().typescript() { + if !p.input_mut().had_line_break_before_cur() && p.input_mut().is(&P::Token::BANG) { + p.input_mut().set_expr_allowed(false); + p.assert_and_bump(&P::Token::BANG)?; + + let expr = match obj { + Callee::Super(..) => { + syntax_error!( + p, + p.input().cur_span(), + SyntaxError::TsNonNullAssertionNotAllowed("super".into()) + ) + } + Callee::Import(..) => { + syntax_error!( + p, + p.input().cur_span(), + SyntaxError::TsNonNullAssertionNotAllowed("import".into()) + ) + } + Callee::Expr(expr) => expr, + }; + return Ok(( + TsNonNullExpr { + span: p.span(start), + expr, + } + .into(), + true, + )); + } + + if matches!(obj, Callee::Expr(..)) && p.input_mut().is(&P::Token::LESS) { + let is_dynamic_import = obj.is_import(); + + let mut obj_opt = Some(obj); + // tsTryParseAndCatch is expensive, so avoid if not necessary. + // There are number of things we are going to "maybe" parse, like type arguments + // on tagged template expressions. If any of them fail, walk it back and + // continue. + + let mut_obj_opt = &mut obj_opt; + + let ctx = p.ctx() | Context::ShouldNotLexLtOrGtAsType; + let result = try_parse_ts(p.with_ctx(ctx).deref_mut(), |p| { + if !no_call + && at_possible_async( + p, + match &mut_obj_opt { + Some(Callee::Expr(ref expr)) => expr, + _ => unreachable!(), + }, + )? + { + // Almost certainly this is a generic async function `async () => ... + // But it might be a call with a type argument `async();` + let async_arrow_fn = try_parse_ts_generic_async_arrow_fn(p, start)?; + if let Some(async_arrow_fn) = async_arrow_fn { + return Ok(Some((async_arrow_fn.into(), true))); + } + } + + let type_args = parse_ts_type_args(p)?; + + if !no_call && p.input_mut().is(&P::Token::LPAREN) { + // possibleAsync always false here, because we would have handled it + // above. (won't be any undefined arguments) + let args = parse_args(p, is_dynamic_import)?; + + let obj = mut_obj_opt.take().unwrap(); + + if let Callee::Expr(callee) = &obj { + if let Expr::OptChain(..) = &**callee { + return Ok(Some(( + OptChainExpr { + span: p.span(start), + base: Box::new(OptChainBase::Call(OptCall { + span: p.span(start), + callee: obj.expect_expr(), + type_args: Some(type_args), + args, + ..Default::default() + })), + optional: false, + } + .into(), + true, + ))); + } + } + + Ok(Some(( + CallExpr { + span: p.span(start), + callee: obj, + type_args: Some(type_args), + args, + ..Default::default() + } + .into(), + true, + ))) + } else if p.input_mut().is(&P::Token::BACKQUOTE) { + parse_tagged_tpl( + p, + match mut_obj_opt { + Some(Callee::Expr(obj)) => obj.take(), + _ => unreachable!(), + }, + Some(type_args), + ) + .map(|expr| (expr.into(), true)) + .map(Some) + } else if p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_equal() || cur.is_as() || cur.is_satisfies()) + { + Ok(Some(( + TsInstantiation { + span: p.span(start), + expr: match mut_obj_opt { + Some(Callee::Expr(obj)) => obj.take(), + _ => unreachable!(), + }, + type_args, + } + .into(), + false, + ))) + } else if no_call { + unexpected!(p, "`") + } else { + unexpected!(p, "( or `") + } + }); + if let Some(result) = result { + return Ok(result); + } + + obj = obj_opt.unwrap(); + } + } + + let type_args = if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + try_parse_ts_type_args(p) + } else { + None + }; + + if obj.is_import() + && !p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_dot() || cur.is_lparen()) + { + unexpected!(p, "`.` or `(`") + } + + let question_dot_token = + if p.input_mut().is(&P::Token::QUESTION) && peek!(p).is_some_and(|peek| peek.is_dot()) { + let start = p.cur_pos(); + p.input_mut().eat(&P::Token::QUESTION); + Some(p.span(start)) + } else { + None + }; + + // $obj[name()] + if !no_computed_member + && ((question_dot_token.is_some() + && p.input_mut().is(&P::Token::DOT) + && peek!(p).is_some_and(|peek| peek.is_lbracket()) + && p.input_mut().eat(&P::Token::DOT) + && p.input_mut().eat(&P::Token::LBRACKET)) + || p.input_mut().eat(&P::Token::LBRACKET)) + { + let bracket_lo = p.input().prev_span().lo; + let prop = p.include_in_expr(true).parse_expr()?; + expect!(p, &P::Token::RBRACKET); + let span = Span::new(obj.span_lo(), p.input().last_pos()); + debug_assert_eq!(obj.span_lo(), span.lo()); + let prop = ComputedPropName { + span: Span::new(bracket_lo, p.input().last_pos()), + expr: prop, + }; + + let type_args = if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + try_parse_ts_type_args(p) + } else { + None + }; + + return Ok(( + Box::new(match obj { + Callee::Import(..) => unreachable!(), + Callee::Super(obj) => { + if !p.ctx().contains(Context::AllowDirectSuper) + && !p.input().syntax().allow_super_outside_method() + { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuper); + } else if question_dot_token.is_some() { + if no_call { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuperCall); + } + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuper); + } else { + SuperPropExpr { + span, + obj, + prop: SuperProp::Computed(prop), + } + .into() + } + } + Callee::Expr(obj) => { + let is_opt_chain = unwrap_ts_non_null(&obj).is_opt_chain(); + let expr = MemberExpr { + span, + obj, + prop: MemberProp::Computed(prop), + }; + let expr = if is_opt_chain || question_dot_token.is_some() { + OptChainExpr { + span, + optional: question_dot_token.is_some(), + base: Box::new(OptChainBase::Member(expr)), + } + .into() + } else { + expr.into() + }; + + if let Some(type_args) = type_args { + TsInstantiation { + expr: Box::new(expr), + type_args, + span: p.span(start), + } + .into() + } else { + expr + } + } + }), + true, + )); + } + + if (question_dot_token.is_some() + && p.input_mut().is(&P::Token::DOT) + && (peek!(p).is_some_and(|peek| peek.is_lparen()) + || (p.syntax().typescript() && peek!(p).is_some_and(|peek| peek.is_less()))) + && p.input_mut().eat(&P::Token::DOT)) + || (!no_call && p.input_mut().is(&P::Token::LPAREN)) + { + let type_args = if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + parse_ts_type_args(p).map(Some)? + } else { + None + }; + let args = parse_args(p, obj.is_import())?; + let span = p.span(start); + return if question_dot_token.is_some() + || match &obj { + Callee::Expr(obj) => unwrap_ts_non_null(obj).is_opt_chain(), + _ => false, + } { + match obj { + Callee::Super(_) | Callee::Import(_) => { + syntax_error!(p, p.input().cur_span(), SyntaxError::SuperCallOptional) + } + Callee::Expr(callee) => Ok(( + OptChainExpr { + span, + optional: question_dot_token.is_some(), + base: Box::new(OptChainBase::Call(OptCall { + span: p.span(start), + callee, + args, + type_args, + ..Default::default() + })), + } + .into(), + true, + )), + } + } else { + Ok(( + CallExpr { + span: p.span(start), + callee: obj, + args, + ..Default::default() + } + .into(), + true, + )) + }; + } + + // member expression + // $obj.name + if p.input_mut().eat(&P::Token::DOT) { + let prop = parse_maybe_private_name(p).map(|e| match e { + Either::Left(p) => MemberProp::PrivateName(p), + Either::Right(i) => MemberProp::Ident(i), + })?; + let span = p.span(obj.span_lo()); + debug_assert_eq!(obj.span_lo(), span.lo()); + debug_assert_eq!(prop.span_hi(), span.hi()); + + let type_args = if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + try_parse_ts_type_args(p) + } else { + None + }; + + return Ok(( + Box::new(match obj { + callee @ Callee::Import(_) => match prop { + MemberProp::Ident(IdentName { sym, .. }) => { + if !p.ctx().contains(Context::CanBeModule) { + let span = p.span(start); + p.emit_err(span, SyntaxError::ImportMetaInScript); + } + match &*sym { + "meta" => MetaPropExpr { + span, + kind: MetaPropKind::ImportMeta, + } + .into(), + _ => { + let args = parse_args(p, true)?; + + CallExpr { + span, + callee, + args, + type_args: None, + ..Default::default() + } + .into() + } + } + } + _ => { + unexpected!(p, "meta"); + } + }, + Callee::Super(obj) => { + if !p.ctx().contains(Context::AllowDirectSuper) + && !p.input().syntax().allow_super_outside_method() + { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuper); + } else if question_dot_token.is_some() { + if no_call { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuperCall); + } + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuper); + } else { + match prop { + MemberProp::Ident(ident) => SuperPropExpr { + span, + obj, + prop: SuperProp::Ident(ident), + } + .into(), + MemberProp::PrivateName(..) => { + syntax_error!( + p, + p.input().cur_span(), + SyntaxError::InvalidSuperCall + ) + } + MemberProp::Computed(..) => unreachable!(), + } + } + } + Callee::Expr(obj) => { + let expr = MemberExpr { span, obj, prop }; + let expr = if unwrap_ts_non_null(&expr.obj).is_opt_chain() + || question_dot_token.is_some() + { + OptChainExpr { + span: p.span(start), + optional: question_dot_token.is_some(), + base: Box::new(OptChainBase::Member(expr)), + } + .into() + } else { + expr.into() + }; + if let Some(type_args) = type_args { + TsInstantiation { + expr: Box::new(expr), + type_args, + span: p.span(start), + } + .into() + } else { + expr + } + } + }), + true, + )); + } + + match obj { + Callee::Expr(expr) => { + let expr = if let Some(type_args) = type_args { + TsInstantiation { + expr, + type_args, + span: p.span(start), + } + .into() + } else { + expr + }; + + // MemberExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + if p.input_mut().is(&P::Token::BACKQUOTE) { + let ctx = p.ctx() & !Context::WillExpectColonForCond; + let tpl = parse_tagged_tpl(p.with_ctx(ctx).deref_mut(), expr, None)?; + return Ok((tpl.into(), true)); + } + + Ok((expr, false)) + } + Callee::Super(..) => { + if no_call { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuperCall); + } + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidSuper); + } + Callee::Import(..) => { + syntax_error!(p, p.input().cur_span(), SyntaxError::InvalidImport); + } + } +} + +pub fn parse_dynamic_import_or_import_meta<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + no_call: bool, +) -> PResult> { + if p.input_mut().eat(&P::Token::DOT) { + p.mark_found_module_item(); + + let ident = parse_ident_name(p)?; + + match &*ident.sym { + "meta" => { + let span = p.span(start); + if !p.ctx().contains(Context::CanBeModule) { + p.emit_err(span, SyntaxError::ImportMetaInScript); + } + let expr = MetaPropExpr { + span, + kind: MetaPropKind::ImportMeta, + }; + parse_subscripts(p, Callee::Expr(expr.into()), no_call, false) + } + "source" => parse_dynamic_import_call(p, start, no_call, ImportPhase::Source), + // TODO: The proposal doesn't mention import.defer yet because it was + // pending on a decision for import.source. Wait to enable it until it's + // included in the proposal. + _ => unexpected!(p, "meta"), + } + } else { + parse_dynamic_import_call(p, start, no_call, ImportPhase::Evaluation) + } +} + +fn parse_dynamic_import_call<'a>( + p: &mut impl Parser<'a>, + start: BytePos, + no_call: bool, + phase: ImportPhase, +) -> PResult> { + let import = Callee::Import(Import { + span: p.span(start), + phase, + }); + + parse_subscripts(p, import, no_call, false) +} + +/// `is_new_expr`: true iff we are parsing production 'NewExpression'. +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_member_expr_or_new_expr<'a>( + p: &mut impl Parser<'a>, + is_new_expr: bool, +) -> PResult> { + let ctx = p.ctx() | Context::ShouldNotLexLtOrGtAsType; + + parse_member_expr_or_new_expr_inner(p.with_ctx(ctx).deref_mut(), is_new_expr) +} + +fn parse_member_expr_or_new_expr_inner<'a, P: Parser<'a>>( + p: &mut P, + is_new_expr: bool, +) -> PResult> { + trace_cur!(p, parse_member_expr_or_new_expr); + + let start = p.cur_pos(); + if p.input_mut().eat(&P::Token::NEW) { + if p.input_mut().eat(&P::Token::DOT) { + if p.input_mut().eat(&P::Token::TARGET) { + let span = p.span(start); + let expr = MetaPropExpr { + span, + kind: MetaPropKind::NewTarget, + } + .into(); + + let ctx = p.ctx(); + if !ctx.contains(Context::InsideNonArrowFunctionScope) + && !ctx.contains(Context::InParameters) + && !ctx.contains(Context::InClass) + { + p.emit_err(span, SyntaxError::InvalidNewTarget); + } + + return parse_subscripts(p, Callee::Expr(expr), true, false); + } + + unexpected!(p, "target") + } + + // 'NewExpression' allows new call without paren. + let callee = parse_member_expr_or_new_expr(p, is_new_expr)?; + return_if_arrow!(p, callee); + + if is_new_expr { + match *callee { + Expr::OptChain(OptChainExpr { + span, + optional: true, + .. + }) => { + syntax_error!(p, span, SyntaxError::OptChainCannotFollowConstructorCall) + } + Expr::Member(MemberExpr { ref obj, .. }) => { + if let Expr::OptChain(OptChainExpr { + span, + optional: true, + .. + }) = **obj + { + syntax_error!(p, span, SyntaxError::OptChainCannotFollowConstructorCall) + } + } + _ => {} + } + } + + let type_args = if p.input().syntax().typescript() + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_less() || cur.is_lshift()) + { + try_parse_ts(p, |p| { + let ctx = p.ctx() & !Context::ShouldNotLexLtOrGtAsType; + + let args = parse_ts_type_args(p.with_ctx(ctx).deref_mut())?; + if !p.input_mut().is(&P::Token::LPAREN) { + // This will fail + expect!(p, &P::Token::LPAREN); + } + Ok(Some(args)) + }) + } else { + None + }; + + if !is_new_expr || p.input_mut().is(&P::Token::LPAREN) { + // Parsed with 'MemberExpression' production. + let args = parse_args(p, false).map(Some)?; + + let new_expr = Callee::Expr( + NewExpr { + span: p.span(start), + callee, + args, + type_args, + ..Default::default() + } + .into(), + ); + + // We should parse subscripts for MemberExpression. + // Because it's left recursive. + return parse_subscripts(p, new_expr, true, false); + } + + // Parsed with 'NewExpression' production. + + return Ok(NewExpr { + span: p.span(start), + callee, + args: None, + type_args, + ..Default::default() + } + .into()); + } + + if p.input_mut().eat(&P::Token::SUPER) { + let base = Callee::Super(Super { + span: p.span(start), + }); + return parse_subscripts(p, base, true, false); + } else if p.input_mut().eat(&P::Token::IMPORT) { + return parse_dynamic_import_or_import_meta(p, start, true); + } + let obj = parse_primary_expr(p)?; + return_if_arrow!(p, obj); + + let type_args = if p.syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + try_parse_ts_type_args(p) + } else { + None + }; + let obj = if let Some(type_args) = type_args { + trace_cur!(p, parse_member_expr_or_new_expr__with_type_args); + TsInstantiation { + expr: obj, + type_args, + span: p.span(start), + } + .into() + } else { + obj + }; + + parse_subscripts(p, Callee::Expr(obj), true, false) +} + +/// Parse `NewExpression`. +/// This includes `MemberExpression`. +#[cfg_attr(feature = "tracing-spans", tracing::instrument(skip_all))] +pub fn parse_new_expr<'a>(p: &mut impl Parser<'a>) -> PResult> { + trace_cur!(p, parse_new_expr); + parse_member_expr_or_new_expr(p, true) +} + +/// Name from spec: 'LogicalORExpression' +pub fn parse_bin_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_bin_expr); + + let ctx = p.ctx(); + + let left = match parse_unary_expr(p) { + Ok(v) => v, + Err(err) => { + trace_cur!(p, parse_bin_expr__recovery_unary_err); + + let cur = cur!(p, true); + if (cur.is_in() && ctx.contains(Context::IncludeInExpr)) + || cur.is_instanceof() + || cur.is_bin_op() + { + p.emit_err(p.input().cur_span(), SyntaxError::TS1109); + Invalid { span: err.span() }.into() + } else { + return Err(err); + } + } + }; + + return_if_arrow!(p, left); + parse_bin_op_recursively(p, left, 0) +} + +/// Parse binary operators with the operator precedence parsing +/// algorithm. `left` is the left-hand side of the operator. +/// `minPrec` provides context that allows the function to stop and +/// defer further parser to one of its callers when it encounters an +/// operator that has a lower precedence than the set it is parsing. +/// +/// `parseExprOp` +pub fn parse_bin_op_recursively<'a>( + p: &mut impl Parser<'a>, + mut left: Box, + mut min_prec: u8, +) -> PResult> { + loop { + let (next_left, next_prec) = parse_bin_op_recursively_inner(p, left, min_prec)?; + + match &*next_left { + Expr::Bin(BinExpr { + span, + left, + op: op!("&&"), + .. + }) + | Expr::Bin(BinExpr { + span, + left, + op: op!("||"), + .. + }) => { + if let Expr::Bin(BinExpr { op: op!("??"), .. }) = &**left { + p.emit_err(*span, SyntaxError::NullishCoalescingWithLogicalOp); + } + } + _ => {} + } + + min_prec = match next_prec { + Some(v) => v, + None => return Ok(next_left), + }; + + left = next_left; + } +} + +/// Returns `(left, Some(next_prec))` or `(expr, None)`. +fn parse_bin_op_recursively_inner<'a, P: Parser<'a>>( + p: &mut P, + left: Box, + min_prec: u8, +) -> PResult<(Box, Option)> { + const PREC_OF_IN: u8 = 7; + + if p.input().syntax().typescript() + && PREC_OF_IN > min_prec + && !p.input_mut().had_line_break_before_cur() + && p.input_mut().is(&P::Token::AS) + { + let start = left.span_lo(); + let expr = left; + let node = if peek!(p).is_some_and(|cur| cur.is_const()) { + p.bump(); // as + let _ = cur!(p, false); + p.bump(); // const + TsConstAssertion { + span: p.span(start), + expr, + } + .into() + } else { + let type_ann = next_then_parse_ts_type(p)?; + TsAsExpr { + span: p.span(start), + expr, + type_ann, + } + .into() + }; + + return parse_bin_op_recursively_inner(p, node, min_prec); + } + if p.input().syntax().typescript() + && !p.input_mut().had_line_break_before_cur() + && p.input_mut().is(&P::Token::SATISFIES) + { + let start = left.span_lo(); + let expr = left; + let node = { + let type_ann = next_then_parse_ts_type(p)?; + TsSatisfiesExpr { + span: p.span(start), + expr, + type_ann, + } + .into() + }; + + return parse_bin_op_recursively_inner(p, node, min_prec); + } + + let ctx = p.ctx(); + // Return left on eof + let word = match cur!(p, false) { + Ok(cur) => cur, + Err(..) => return Ok((left, None)), + }; + let op = if word.is_in() && ctx.contains(Context::IncludeInExpr) { + op!("in") + } else if word.is_instanceof() { + op!("instanceof") + } else if let Some(op) = word.as_bin_op() { + op + } else { + return Ok((left, None)); + }; + + if op.precedence() <= min_prec { + if cfg!(feature = "debug") { + tracing::trace!( + "returning {:?} without parsing {:?} because min_prec={}, prec={}", + left, + op, + min_prec, + op.precedence() + ); + } + + return Ok((left, None)); + } + p.bump(); + if cfg!(feature = "debug") { + tracing::trace!( + "parsing binary op {:?} min_prec={}, prec={}", + op, + min_prec, + op.precedence() + ); + } + match *left { + // This is invalid syntax. + Expr::Unary { .. } | Expr::Await(..) if op == op!("**") => { + // Correct implementation would be returning Ok(left) and + // returning "unexpected token '**'" on next. + // But it's not useful error message. + + syntax_error!( + p, + SyntaxError::UnaryInExp { + // FIXME: Use display + left: format!("{left:?}"), + left_span: left.span(), + } + ) + } + _ => {} + } + + let right = { + let left_of_right = parse_unary_expr(p)?; + parse_bin_op_recursively( + p, + left_of_right, + if op == op!("**") { + // exponential operator is right associative + op.precedence() - 1 + } else { + op.precedence() + }, + )? + }; + /* this check is for all ?? operators + * a ?? b && c for this example + * b && c => This is considered as a logical expression in the ast tree + * a => Identifier + * so for ?? operator we need to check in this case the right expression to + * have parenthesis second case a && b ?? c + * here a && b => This is considered as a logical expression in the ast tree + * c => identifier + * so now here for ?? operator we need to check the left expression to have + * parenthesis if the parenthesis is missing we raise an error and + * throw it + */ + if op == op!("??") { + match *left { + Expr::Bin(BinExpr { span, op, .. }) if op == op!("&&") || op == op!("||") => { + p.emit_err(span, SyntaxError::NullishCoalescingWithLogicalOp); + } + _ => {} + } + + match *right { + Expr::Bin(BinExpr { span, op, .. }) if op == op!("&&") || op == op!("||") => { + p.emit_err(span, SyntaxError::NullishCoalescingWithLogicalOp); + } + _ => {} + } + } + + let node = BinExpr { + span: Span::new(left.span_lo(), right.span_hi()), + op, + left, + right, + } + .into(); + + Ok((node, Some(min_prec))) +} + +/// Parse unary expression and update expression. +/// +/// spec: 'UnaryExpression' +pub(super) fn parse_unary_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_unary_expr); + let start = p.cur_pos(); + + if !p.input().syntax().jsx() + && p.input().syntax().typescript() + && p.input_mut().eat(&P::Token::LESS) + { + if p.input_mut().eat(&P::Token::CONST) { + expect!(p, &P::Token::GREATER); + let expr = parse_unary_expr(p)?; + return Ok(TsConstAssertion { + span: p.span(start), + expr, + } + .into()); + } + + return parse_ts_type_assertion(p, start) + .map(Expr::from) + .map(Box::new); + } + + // Parse update expression + if p.input_mut().is(&P::Token::PLUS_PLUS) || p.input_mut().is(&P::Token::MINUS_MINUS) { + let op = if p.bump() == P::Token::PLUS_PLUS { + op!("++") + } else { + op!("--") + }; + + let arg = parse_unary_expr(p)?; + let span = Span::new(start, arg.span_hi()); + p.check_assign_target(&arg, false); + + return Ok(UpdateExpr { + span, + prefix: true, + op, + arg, + } + .into()); + } + + // Parse unary expression + + if p.input_mut().cur().is_some_and(|cur| { + cur.is_delete() + || cur.is_void() + || cur.is_typeof() + || cur.is_plus() + || cur.is_minus() + || cur.is_tilde() + || cur.is_bang() + }) { + let cur = p.bump(); + let op = if cur.is_delete() { + op!("delete") + } else if cur.is_void() { + op!("void") + } else if cur.is_typeof() { + op!("typeof") + } else if cur.is_plus() { + op!(unary, "+") + } else if cur.is_minus() { + op!(unary, "-") + } else if cur.is_tilde() { + op!("~") + } else if cur.is_bang() { + op!("!") + } else { + unreachable!() + }; + let arg_start = p.cur_pos() - BytePos(1); + let arg = match parse_unary_expr(p) { + Ok(expr) => expr, + Err(err) => { + p.emit_error(err); + Invalid { + span: Span::new(arg_start, arg_start), + } + .into() + } + }; + + if op == op!("delete") { + if let Expr::Ident(ref i) = *arg { + p.emit_strict_mode_err(i.span, SyntaxError::TS1102) + } + } + + if p.input().syntax().typescript() && op == op!("delete") { + match arg.unwrap_parens() { + Expr::Member(..) => {} + Expr::OptChain(OptChainExpr { base, .. }) + if matches!(&**base, OptChainBase::Member(..)) => {} + + expr => { + p.emit_err(expr.span(), SyntaxError::TS2703); + } + } + } + + return Ok(UnaryExpr { + span: Span::new(start, arg.span_hi()), + op, + arg, + } + .into()); + } + + if p.input_mut().is(&P::Token::AWAIT) { + return parse_await_expr(p, None); + } + + // UpdateExpression + let expr = parse_lhs_expr(p)?; + return_if_arrow!(p, expr); + + // Line terminator isn't allowed here. + if p.input_mut().had_line_break_before_cur() { + return Ok(expr); + } + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_plus_plus() || cur.is_minus_minus()) + { + p.check_assign_target(&expr, false); + + let op = if p.bump() == P::Token::PLUS_PLUS { + op!("++") + } else { + op!("--") + }; + + return Ok(UpdateExpr { + span: p.span(expr.span_lo()), + prefix: false, + op, + arg: expr, + } + .into()); + } + Ok(expr) +} + +pub fn parse_await_expr<'a, P: Parser<'a>>( + p: &mut P, + start_of_await_token: Option, +) -> PResult> { + let start = start_of_await_token.unwrap_or_else(|| p.cur_pos()); + + if start_of_await_token.is_none() { + p.assert_and_bump(&P::Token::AWAIT)?; + } + + let await_token = p.span(start); + + if p.input_mut().is(&P::Token::MUL) { + syntax_error!(p, SyntaxError::AwaitStar); + } + + let ctx = p.ctx(); + + let span = p.span(start); + + if !ctx.contains(Context::InAsync) + && (p.is_general_semi() + || p.input_mut() + .cur() + .is_some_and(|cur| cur.is_rparen() || cur.is_rbracket() || cur.is_comma())) + { + if ctx.contains(Context::Module) { + p.emit_err(span, SyntaxError::InvalidIdentInAsync); + } + + return Ok(Ident::new_no_ctxt("await".into(), span).into()); + } + + // This has been checked if start_of_await_token == true, + if start_of_await_token.is_none() && ctx.contains(Context::TopLevel) { + p.mark_found_module_item(); + if !ctx.contains(Context::CanBeModule) { + p.emit_err(await_token, SyntaxError::TopLevelAwaitInScript); + } + } + + if ctx.contains(Context::InFunction) && !ctx.contains(Context::InAsync) { + p.emit_err(await_token, SyntaxError::AwaitInFunction); + } + + if ctx.contains(Context::InParameters) && !ctx.contains(Context::InFunction) { + p.emit_err(span, SyntaxError::AwaitParamInAsync); + } + + let arg = parse_unary_expr(p)?; + Ok(AwaitExpr { + span: p.span(start), + arg, + } + .into()) +} + +pub(super) fn parse_for_head_prefix<'a>(p: &mut impl Parser<'a>) -> PResult> { + p.parse_expr() +} + +/// Parse call, dot, and `[]`-subscript expressions. +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_lhs_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_lhs_expr); + + let start = p.cur_pos(); + + // parse jsx + if p.input().syntax().jsx() { + fn into_expr(e: Either) -> Box { + match e { + Either::Left(l) => l.into(), + Either::Right(r) => r.into(), + } + } + let cur = cur!(p, true); + if cur.is_jsx_text() { + return parse_jsx_text(p) + .map(Lit::JSXText) + .map(Expr::Lit) + .map(Box::new); + } else if cur.is_jsx_tag_start() { + return parse_jsx_element(p).map(into_expr); + } + + if p.input_mut().is(&P::Token::LESS) && !peek!(p).is_some_and(|peek| peek.is_bang()) { + // In case we encounter an lt token here it will always be the start of + // jsx as the lt sign is not allowed in places that expect an expression + + // FIXME: + // p.finishToken(tt.jsxTagStart); + + return parse_jsx_element(p).map(into_expr); + } + } + + // `super()` can't be handled from parse_new_expr() + if p.input_mut().eat(&P::Token::SUPER) { + let obj = Callee::Super(Super { + span: p.span(start), + }); + return parse_subscripts(p, obj, false, false); + } else if p.input_mut().eat(&P::Token::IMPORT) { + return parse_dynamic_import_or_import_meta(p, start, false); + } + + let callee = parse_new_expr(p)?; + return_if_arrow!(p, callee); + + let type_args = if p.input().syntax().typescript() + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_less() || cur.is_lshift()) + { + try_parse_ts(p, |p| { + let type_args = parse_ts_type_args(p)?; + if p.input_mut().is(&P::Token::LPAREN) { + Ok(Some(type_args)) + } else { + Ok(None) + } + }) + } else { + None + }; + + if let Expr::New(ne @ NewExpr { args: None, .. }) = *callee { + // If this is parsed using 'NewExpression' rule, just return it. + // Because it's not left-recursive. + if type_args.is_some() { + // This fails with `expected (` + expect!(p, &P::Token::LPAREN); + } + debug_assert!( + !cur!(p, false).is_ok_and(|cur| cur.is_lparen()), + "parse_new_expr() should eat paren if it exists" + ); + return Ok(NewExpr { type_args, ..ne }.into()); + } + // 'CallExpr' rule contains 'MemberExpr (...)', + // and 'MemberExpr' rule contains 'new MemberExpr (...)' + + if p.input_mut().is(&P::Token::LPAREN) { + // This is parsed using production MemberExpression, + // which is left-recursive. + let (callee, is_import) = match callee { + _ if callee.is_ident_ref_to("import") => ( + Callee::Import(Import { + span: callee.span(), + phase: Default::default(), + }), + true, + ), + _ => (Callee::Expr(callee), false), + }; + let args = parse_args(p, is_import)?; + + let call_expr = match callee { + Callee::Expr(e) if unwrap_ts_non_null(&e).is_opt_chain() => OptChainExpr { + span: p.span(start), + base: Box::new(OptChainBase::Call(OptCall { + span: p.span(start), + callee: e, + args, + type_args, + ..Default::default() + })), + optional: false, + } + .into(), + _ => CallExpr { + span: p.span(start), + + callee, + args, + type_args, + ..Default::default() + } + .into(), + }; + + return parse_subscripts(p, Callee::Expr(call_expr), false, false); + } + if type_args.is_some() { + // This fails + expect!(p, &P::Token::LPAREN); + } + + // This is parsed using production 'NewExpression', which contains + // 'MemberExpression' + Ok(callee) +} + +// Returns (args_or_pats, trailing_comma) +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_args_or_pats<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult<(Vec, Option)> { + parse_args_or_pats_inner( + p.with_ctx(p.ctx() & !Context::WillExpectColonForCond) + .deref_mut(), + ) +} + +fn parse_args_or_pats_inner<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult<(Vec, Option)> { + trace_cur!(p, parse_args_or_pats); + + expect!(p, &P::Token::LPAREN); + + let mut items = Vec::new(); + let mut trailing_comma = None; + + // TODO(kdy1): optimize (once we parsed a pattern, we can parse everything else + // as a pattern instead of reparsing) + while !eof!(p) && !p.input_mut().is(&P::Token::RPAREN) { + // https://github.com/swc-project/swc/issues/410 + let is_async = p.input_mut().is(&P::Token::ASYNC) + && peek!(p).is_some_and(|t| t.is_lparen() || t.is_word() || t.is_function()); + + let start = p.cur_pos(); + p.state_mut().potential_arrow_start = Some(start); + let modifier_start = start; + + let has_modifier = eat_any_ts_modifier(p)?; + let pat_start = p.cur_pos(); + + let mut arg = { + if p.input().syntax().typescript() + && (p.is_ident_ref() + || (p.input_mut().is(&P::Token::DOTDOTDOT) && p.peek_is_ident_ref())) + { + let spread = if p.input_mut().eat(&P::Token::DOTDOTDOT) { + Some(p.input().prev_span()) + } else { + None + }; + + // At here, we use parse_bin_expr() instead of parse_assignment_expr() + // because `x?: number` should not be parsed as a conditional expression + let expr = if spread.is_some() { + parse_bin_expr(p)? + } else { + let mut expr = parse_bin_expr(p)?; + + if cur!(p, false).is_ok_and(|t| t.is_assign_op()) { + expr = finish_assignment_expr(p, start, expr)? + } + + expr + }; + + ExprOrSpread { spread, expr } + } else { + p.include_in_expr(true).parse_expr_or_spread()? + } + }; + + let optional = if p.input().syntax().typescript() { + if p.input_mut().is(&P::Token::QUESTION) { + if peek!(p).is_some_and(|peek| { + peek.is_comma() || peek.is_equal() || peek.is_rparen() || peek.is_colon() + }) { + p.assert_and_bump(&P::Token::QUESTION)?; + let _ = cur!(p, false); + if arg.spread.is_some() { + p.emit_err(p.input().prev_span(), SyntaxError::TS1047); + } + match *arg.expr { + Expr::Ident(..) => {} + _ => { + syntax_error!(p, arg.span(), SyntaxError::TsBindingPatCannotBeOptional) + } + } + true + } else if matches!(arg, ExprOrSpread { spread: None, .. }) { + expect!(p, &P::Token::QUESTION); + let test = arg.expr; + let ctx = p.ctx() + | Context::InCondExpr + | Context::WillExpectColonForCond + | Context::IncludeInExpr; + let cons = parse_assignment_expr(p.with_ctx(ctx).deref_mut())?; + expect!(p, &P::Token::COLON); + let ctx = (p.ctx() | Context::InCondExpr) & !Context::WillExpectColonForCond; + let alt = parse_assignment_expr(p.with_ctx(ctx).deref_mut())?; + + arg = ExprOrSpread { + spread: None, + expr: CondExpr { + span: Span::new(start, alt.span_hi()), + + test, + cons, + alt, + } + .into(), + }; + + false + } else { + false + } + } else { + false + } + } else { + false + }; + + if optional || (p.input().syntax().typescript() && p.input_mut().is(&P::Token::COLON)) { + // TODO: `async(...args?: any[]) : any => {}` + // + // if p.input().syntax().typescript() && optional && arg.spread.is_some() { + // p.emit_err(p.input().prev_span(), SyntaxError::TS1047) + // } + + let mut pat = reparse_expr_as_pat(p, PatType::BindingPat, arg.expr)?; + if optional { + match pat { + Pat::Ident(ref mut i) => i.optional = true, + _ => unreachable!(), + } + } + if let Some(span) = arg.spread { + pat = RestPat { + span: p.span(pat_start), + dot3_token: span, + arg: Box::new(pat), + type_ann: None, + } + .into(); + } + match pat { + Pat::Ident(BindingIdent { + id: Ident { ref mut span, .. }, + ref mut type_ann, + .. + }) + | Pat::Array(ArrayPat { + ref mut type_ann, + ref mut span, + .. + }) + | Pat::Object(ObjectPat { + ref mut type_ann, + ref mut span, + .. + }) + | Pat::Rest(RestPat { + ref mut type_ann, + ref mut span, + .. + }) => { + let new_type_ann = try_parse_ts_type_ann(p)?; + if new_type_ann.is_some() { + *span = Span::new(pat_start, p.input().prev_span().hi); + } + *type_ann = new_type_ann; + } + Pat::Expr(ref expr) => unreachable!("invalid pattern: Expr({:?})", expr), + Pat::Assign(..) | Pat::Invalid(..) => { + // We don't have to panic here. + // See: https://github.com/swc-project/swc/issues/1170 + // + // Also, as an exact error is added to the errors while + // creating `Invalid`, we don't have to emit a new + // error. + } + } + + if p.input_mut().eat(&P::Token::EQUAL) { + let right = parse_assignment_expr(p)?; + pat = AssignPat { + span: p.span(pat_start), + left: Box::new(pat), + right, + } + .into(); + } + + if has_modifier { + p.emit_err(p.span(modifier_start), SyntaxError::TS2369); + } + + items.push(AssignTargetOrSpread::Pat(pat)) + } else { + if has_modifier { + p.emit_err(p.span(modifier_start), SyntaxError::TS2369); + } + + items.push(AssignTargetOrSpread::ExprOrSpread(arg)); + } + + // https://github.com/swc-project/swc/issues/433 + if p.input_mut().eat(&P::Token::ARROW) && { + debug_assert_eq!(items.len(), 1); + match items[0] { + AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { ref expr, .. }) + | AssignTargetOrSpread::Pat(Pat::Expr(ref expr)) => { + matches!(**expr, Expr::Ident(..)) + } + AssignTargetOrSpread::Pat(Pat::Ident(..)) => true, + _ => false, + } + } { + let params: Vec = parse_paren_items_as_params(p, items.clone(), None)? + .into_iter() + .collect(); + + let body: Box = parse_fn_block_or_expr_body( + p, + false, + false, + true, + params.is_simple_parameter_list(), + )?; + let span = p.span(start); + + items.push(AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { + expr: Box::new( + ArrowExpr { + span, + body, + is_async, + is_generator: false, + params, + ..Default::default() + } + .into(), + ), + spread: None, + })); + } + + if !p.input_mut().is(&P::Token::RPAREN) { + expect!(p, &P::Token::COMMA); + if p.input_mut().is(&P::Token::RPAREN) { + trailing_comma = Some(p.input().prev_span()); + } + } + } + + expect!(p, &P::Token::RPAREN); + Ok((items, trailing_comma)) +} + +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_paren_expr_or_arrow_fn<'a, P: Parser<'a>>( + p: &mut P, + can_be_arrow: bool, + async_span: Option, +) -> PResult> { + trace_cur!(p, parse_paren_expr_or_arrow_fn); + + let expr_start = async_span.map(|x| x.lo()).unwrap_or_else(|| p.cur_pos()); + + // At this point, we can't know if it's parenthesized + // expression or head of arrow function. + // But as all patterns of javascript is subset of + // expressions, we can parse both as expression. + + let ctx = p.ctx() & !Context::WillExpectColonForCond; + + let (paren_items, trailing_comma) = + parse_args_or_pats(p.with_ctx(ctx).include_in_expr(true).deref_mut())?; + + let has_pattern = paren_items + .iter() + .any(|item| matches!(item, AssignTargetOrSpread::Pat(..))); + + let will_expect_colon_for_cond = p.ctx().contains(Context::WillExpectColonForCond); + // This is slow path. We handle arrow in conditional expression. + if p.syntax().typescript() + && p.ctx().contains(Context::InCondExpr) + && p.input_mut().is(&P::Token::COLON) + { + // TODO: Remove clone + let items_ref = &paren_items; + if let Some(expr) = try_parse_ts(p, |p| { + let return_type = parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON)?; + + expect!(p, &P::Token::ARROW); + + let params: Vec = + parse_paren_items_as_params(p, items_ref.clone(), trailing_comma)? + .into_iter() + .collect(); + + let body: Box = parse_fn_block_or_expr_body( + p, + async_span.is_some(), + false, + true, + params.is_simple_parameter_list(), + )?; + + if will_expect_colon_for_cond && !p.input_mut().is(&P::Token::COLON) { + trace_cur!(p, parse_arrow_in_cond__fail); + unexpected!(p, "fail") + } + + Ok(Some( + ArrowExpr { + span: p.span(expr_start), + is_async: async_span.is_some(), + is_generator: false, + params, + body, + return_type: Some(return_type), + ..Default::default() + } + .into(), + )) + }) { + return Ok(expr); + } + } + + let return_type = if !p.ctx().contains(Context::WillExpectColonForCond) + && p.input().syntax().typescript() + && p.input_mut().is(&P::Token::COLON) + { + try_parse_ts(p, |p| { + let return_type = parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON)?; + + if !p.input_mut().is(&P::Token::ARROW) { + unexpected!(p, "fail") + } + + Ok(Some(return_type)) + }) + } else { + None + }; + + // we parse arrow function at here, to handle it efficiently. + if has_pattern || return_type.is_some() || p.input_mut().is(&P::Token::ARROW) { + if p.input_mut().had_line_break_before_cur() { + syntax_error!(p, p.span(expr_start), SyntaxError::LineBreakBeforeArrow); + } + + if !can_be_arrow { + syntax_error!(p, p.span(expr_start), SyntaxError::ArrowNotAllowed); + } + expect!(p, &P::Token::ARROW); + + let params: Vec = parse_paren_items_as_params(p, paren_items, trailing_comma)? + .into_iter() + .collect(); + + let body: Box = parse_fn_block_or_expr_body( + p, + async_span.is_some(), + false, + true, + params.is_simple_parameter_list(), + )?; + let arrow_expr = ArrowExpr { + span: p.span(expr_start), + is_async: async_span.is_some(), + is_generator: false, + params, + body, + return_type, + ..Default::default() + }; + if let BlockStmtOrExpr::BlockStmt(..) = &*arrow_expr.body { + if p.input_mut().cur().is_some_and(|t| t.is_bin_op()) { + // ) is required + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + let errorred_expr = parse_bin_op_recursively(p, Box::new(arrow_expr.into()), 0)?; + + if !p.is_general_semi() { + // ; is required + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + } + + return Ok(errorred_expr); + } + } + return Ok(arrow_expr.into()); + } else { + // If there's no arrow function, we have to check there's no + // AssignProp in lhs to check against assignment in object literals + // like (a, {b = 1}); + for expr_or_spread in paren_items.iter() { + if let AssignTargetOrSpread::ExprOrSpread(e) = expr_or_spread { + if let Expr::Object(o) = &*e.expr { + for prop in o.props.iter() { + if let PropOrSpread::Prop(prop) = prop { + if let Prop::Assign(..) = **prop { + p.emit_err(prop.span(), SyntaxError::AssignProperty); + } + } + } + } + } + } + } + + let expr_or_spreads = paren_items + .into_iter() + .map(|item| -> PResult<_> { + match item { + AssignTargetOrSpread::ExprOrSpread(e) => Ok(e), + _ => syntax_error!(p, item.span(), SyntaxError::InvalidExpr), + } + }) + .collect::, _>>()?; + if let Some(async_span) = async_span { + // It's a call expression + return Ok(CallExpr { + span: p.span(async_span.lo()), + callee: Callee::Expr(Box::new( + Ident::new_no_ctxt("async".into(), async_span).into(), + )), + args: expr_or_spreads, + ..Default::default() + } + .into()); + } + + // It was not head of arrow function. + + if expr_or_spreads.is_empty() { + syntax_error!( + p, + Span::new(expr_start, p.last_pos()), + SyntaxError::EmptyParenExpr + ); + } + + // TODO: Verify that invalid expression like {a = 1} does not exists. + + // ParenthesizedExpression cannot contain spread. + if expr_or_spreads.len() == 1 { + let expr = match expr_or_spreads.into_iter().next().unwrap() { + ExprOrSpread { + spread: Some(..), + ref expr, + } => syntax_error!(p, expr.span(), SyntaxError::SpreadInParenExpr), + ExprOrSpread { expr, .. } => expr, + }; + Ok(ParenExpr { + span: p.span(expr_start), + expr, + } + .into()) + } else { + debug_assert!(expr_or_spreads.len() >= 2); + + let mut exprs = Vec::with_capacity(expr_or_spreads.len()); + for expr in expr_or_spreads { + match expr { + ExprOrSpread { + spread: Some(..), + ref expr, + } => syntax_error!(p, expr.span(), SyntaxError::SpreadInParenExpr), + ExprOrSpread { expr, .. } => exprs.push(expr), + } + } + debug_assert!(exprs.len() >= 2); + + // span of sequence expression should not include '(', ')' + let seq_expr = SeqExpr { + span: Span::new( + exprs.first().unwrap().span_lo(), + exprs.last().unwrap().span_hi(), + ), + exprs, + } + .into(); + Ok(ParenExpr { + span: p.span(expr_start), + expr: seq_expr, + } + .into()) + } +} + +/// Parse a primary expression or arrow function +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub(super) fn parse_primary_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_primary_expr); + + let _ = p.input_mut().cur(); + let start = p.cur_pos(); + + let can_be_arrow = p + .state_mut() + .potential_arrow_start + .map(|s| s == start) + .unwrap_or(false); + + if let Some(token) = p.input_mut().cur() { + if token.is_this() { + p.input_mut().bump(); + return Ok(ThisExpr { + span: p.span(start), + } + .into()); + } else if token.is_async() { + if peek!(p).is_some_and(|peek| peek.is_function()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + // handle `async function` expression + return parse_async_fn_expr(p); + } + + if can_be_arrow + && p.input().syntax().typescript() + && peek!(p).is_some_and(|peek| peek.is_less()) + { + // try parsing `async() => {}` + if let Some(res) = try_parse_ts(p, |p| { + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::ASYNC)?; + try_parse_ts_generic_async_arrow_fn(p, start) + }) { + return Ok(res.into()); + } + } + + if can_be_arrow + && peek!(p).is_some_and(|peek| peek.is_lparen()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + expect!(p, &P::Token::ASYNC); + let async_span = p.input().prev_span(); + return parse_paren_expr_or_arrow_fn(p, can_be_arrow, Some(async_span)); + } + } else if token.is_lbracket() { + let ctx = p.ctx() & !Context::WillExpectColonForCond; + return p.with_ctx(ctx).parse_with(parse_array_lit); + } else if token.is_lbrace() { + return parse_object_expr(p).map(Box::new); + } else if token.is_function() { + return parse_fn_expr(p); + } else if token.is_null() + || token.is_true() + || token.is_false() + || token.is_num() + || token.is_bigint() + || token.is_str() + { + // Literals + return Ok(parse_lit(p)?.into()); + } else if token.is_slash() || token.is_slash_eq() { + // Regexp + p.bump(); + + p.input_mut().set_next_regexp(Some(start)); + + if p.input_mut().cur().is_some_and(|cur| cur.is_regexp()) { + p.input_mut().set_next_regexp(None); + + let t = p.bump(); + let (exp, flags) = t.take_regexp(p.input_mut()); + let span = p.span(start); + + let mut flags_count = + flags + .chars() + .fold(FxHashMap::::default(), |mut map, flag| { + let key = match flag { + // https://tc39.es/ecma262/#sec-isvalidregularexpressionliteral + 'd' | 'g' | 'i' | 'm' | 's' | 'u' | 'v' | 'y' => flag, + _ => '\u{0000}', // special marker for unknown flags + }; + map.entry(key).and_modify(|count| *count += 1).or_insert(1); + map + }); + + if flags_count.remove(&'\u{0000}').is_some() { + p.emit_err(span, SyntaxError::UnknownRegExpFlags); + } + + if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) { + p.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag)); + } + + return Ok(Lit::Regex(Regex { span, exp, flags }).into()); + } + } else if token.is_backquote() { + let ctx = p.ctx() & !Context::WillExpectColonForCond; + + // parse template literal + return Ok(parse_tpl(p.with_ctx(ctx).deref_mut(), false)?.into()); + } else if token.is_lparen() { + return parse_paren_expr_or_arrow_fn(p, can_be_arrow, None); + } + } + + let decorators = parse_decorators(p, false)?; + + if p.input_mut().is(&P::Token::CLASS) { + return parse_class_expr(p, start, decorators); + } + + if p.input_mut().is(&P::Token::LET) + || (p.input().syntax().typescript() + && (p.input_mut().cur().is_some_and(|cur| cur.is_await()) || p.is_ident_ref())) + || p.is_ident_ref() + { + let ctx = p.ctx(); + let id = parse_ident( + p, + !ctx.contains(Context::InGenerator), + !ctx.contains(Context::InAsync), + )?; + if id.is_reserved_in_strict_mode( + p.ctx().contains(Context::Module) && !p.ctx().contains(Context::InDeclare), + ) { + p.emit_strict_mode_err( + p.input().prev_span(), + SyntaxError::InvalidIdentInStrict(id.sym.clone()), + ); + } + + if can_be_arrow + && id.sym == "async" + && !p.input_mut().had_line_break_before_cur() + && p.is_ident_ref() + { + // see https://github.com/tc39/ecma262/issues/2034 + // ```js + // for(async of + // for(async of x); + // for(async of =>{};;); + // ``` + if ctx.contains(Context::ForLoopInit) + && p.input_mut().is(&P::Token::OF) + && !peek!(p).is_some_and(|peek| peek.is_arrow()) + { + // ```spec https://tc39.es/ecma262/#prod-ForInOfStatement + // for ( [lookahead ∉ { let, async of }] LeftHandSideExpression[?Yield, ?Await] of AssignmentExpression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] + // [+Await] for await ( [lookahead ≠ let] LeftHandSideExpression[?Yield, ?Await] of AssignmentExpression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] + // ``` + + if !ctx.contains(Context::ForAwaitLoopInit) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1106); + } + + return Ok(id.into()); + } + + let ident = parse_binding_ident(p, false)?; + if p.input().syntax().typescript() + && ident.sym == "as" + && !p.input_mut().is(&P::Token::ARROW) + { + // async as type + let type_ann = p.in_type().parse_with(parse_ts_type)?; + return Ok(TsAsExpr { + span: p.span(start), + expr: Box::new(id.into()), + type_ann, + } + .into()); + } + + // async a => body + let arg = ident.into(); + let params = vec![arg]; + expect!(p, &P::Token::ARROW); + let body = parse_fn_block_or_expr_body( + p, + true, + false, + true, + params.is_simple_parameter_list(), + )?; + + return Ok(ArrowExpr { + span: p.span(start), + body, + params, + is_async: true, + is_generator: false, + ..Default::default() + } + .into()); + } else if can_be_arrow + && !p.input_mut().had_line_break_before_cur() + && p.input_mut().eat(&P::Token::ARROW) + { + if p.ctx().contains(Context::Strict) && id.is_reserved_in_strict_bind() { + p.emit_strict_mode_err(id.span, SyntaxError::EvalAndArgumentsInStrict) + } + let params = vec![id.into()]; + let body = parse_fn_block_or_expr_body( + p, + false, + false, + true, + params.is_simple_parameter_list(), + )?; + + return Ok(ArrowExpr { + span: p.span(start), + body, + params, + is_async: false, + is_generator: false, + ..Default::default() + } + .into()); + } else { + return Ok(id.into()); + } + } + + if p.input_mut().eat(&P::Token::HASH) { + let id = parse_ident_name(p)?; + return Ok(PrivateName { + span: p.span(start), + name: id.sym, + } + .into()); + } + + syntax_error!(p, p.input().cur_span(), SyntaxError::TS1109) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/expr_ext.rs b/crates/swc_ecma_lexer/src/common/parser/expr_ext.rs new file mode 100644 index 000000000000..40b3c9c5e09b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/expr_ext.rs @@ -0,0 +1,92 @@ +use swc_ecma_ast::{ + EsReserved, Expr, MemberExpr, ParenExpr, TsAsExpr, TsInstantiation, TsNonNullExpr, + TsSatisfiesExpr, TsTypeAssertion, +}; + +pub trait ExprExt { + fn as_expr(&self) -> &Expr; + + /// "IsValidSimpleAssignmentTarget" from spec. + fn is_valid_simple_assignment_target(&self, strict: bool) -> bool { + match self.as_expr() { + Expr::Ident(ident) => { + if strict && ident.is_reserved_in_strict_bind() { + return false; + } + true + } + + Expr::This(..) + | Expr::Lit(..) + | Expr::Array(..) + | Expr::Object(..) + | Expr::Fn(..) + | Expr::Class(..) + | Expr::Tpl(..) + | Expr::TaggedTpl(..) => false, + Expr::Paren(ParenExpr { expr, .. }) => expr.is_valid_simple_assignment_target(strict), + + Expr::Member(MemberExpr { obj, .. }) => match obj.as_ref() { + Expr::Member(..) => obj.is_valid_simple_assignment_target(strict), + Expr::OptChain(..) => false, + _ => true, + }, + + Expr::SuperProp(..) => true, + + Expr::New(..) | Expr::Call(..) => false, + // TODO: Spec only mentions `new.target` + Expr::MetaProp(..) => false, + + Expr::Update(..) => false, + + Expr::Unary(..) | Expr::Await(..) => false, + + Expr::Bin(..) => false, + + Expr::Cond(..) => false, + + Expr::Yield(..) | Expr::Arrow(..) | Expr::Assign(..) => false, + + Expr::Seq(..) => false, + + Expr::OptChain(..) => false, + + // MemberExpression is valid assignment target + Expr::PrivateName(..) => false, + + // jsx + Expr::JSXMember(..) + | Expr::JSXNamespacedName(..) + | Expr::JSXEmpty(..) + | Expr::JSXElement(..) + | Expr::JSXFragment(..) => false, + + // typescript + Expr::TsNonNull(TsNonNullExpr { ref expr, .. }) + | Expr::TsTypeAssertion(TsTypeAssertion { ref expr, .. }) + | Expr::TsAs(TsAsExpr { ref expr, .. }) + | Expr::TsInstantiation(TsInstantiation { ref expr, .. }) + | Expr::TsSatisfies(TsSatisfiesExpr { ref expr, .. }) => { + expr.is_valid_simple_assignment_target(strict) + } + + Expr::TsConstAssertion(..) => false, + + Expr::Invalid(..) => false, + } + } +} + +impl ExprExt for Box { + #[inline(always)] + fn as_expr(&self) -> &Expr { + self + } +} +impl ExprExt for Expr { + #[inline(always)] + fn as_expr(&self) -> &Expr { + self + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/ident.rs b/crates/swc_ecma_lexer/src/common/parser/ident.rs new file mode 100644 index 000000000000..8c6247de46ad --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/ident.rs @@ -0,0 +1,226 @@ +use either::Either; +use swc_atoms::atom; +use swc_common::{BytePos, Span}; +use swc_ecma_ast::*; + +use super::{buffer::Buffer, expr::parse_lit, PResult, Parser}; +use crate::{ + common::{context::Context, lexer::token::TokenFactory}, + error::SyntaxError, +}; + +// https://tc39.es/ecma262/#prod-ModuleExportName +pub fn parse_module_export_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let Ok(cur) = cur!(p, false) else { + unexpected!(p, "identifier or string"); + }; + let module_export_name = if cur.is_str() { + match parse_lit(p)? { + Lit::Str(str_lit) => ModuleExportName::Str(str_lit), + _ => unreachable!(), + } + } else if cur.is_word() { + ModuleExportName::Ident(parse_ident_name(p)?.into()) + } else { + unexpected!(p, "identifier or string"); + }; + Ok(module_export_name) +} + +/// Use this when spec says "IdentifierName". +/// This allows idents like `catch`. +pub fn parse_ident_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + let cur = cur!(p, true); + let w = if cur.is_word() { + let t = p.bump(); + t.take_word(p.input_mut()).unwrap() + } else if cur.is_jsx_name() && p.ctx().contains(Context::InType) { + let t = p.bump(); + t.take_jsx_name(p.input_mut()) + } else { + syntax_error!(p, SyntaxError::ExpectedIdent) + }; + Ok(IdentName::new(w, p.span(start))) +} + +pub fn parse_maybe_private_name<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult> { + let is_private = p.input_mut().is(&P::Token::HASH); + if is_private { + parse_private_name(p).map(Either::Left) + } else { + parse_ident_name(p).map(Either::Right) + } +} + +pub fn parse_private_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::HASH)?; + let hash_end = p.input().prev_span().hi; + if p.input_mut().cur_pos() - hash_end != BytePos(0) { + syntax_error!(p, p.span(start), SyntaxError::SpaceBetweenHashAndIdent); + } + let id = parse_ident_name(p)?; + Ok(PrivateName { + span: p.span(start), + name: id.sym, + }) +} + +/// IdentifierReference +#[inline] +pub fn parse_ident_ref<'a>(p: &mut impl Parser<'a>) -> PResult { + let ctx = p.ctx(); + parse_ident( + p, + !ctx.contains(Context::InGenerator), + !ctx.contains(Context::InAsync), + ) +} + +/// LabelIdentifier +#[inline] +pub fn parse_label_ident<'a>(p: &mut impl Parser<'a>) -> PResult { + let ctx = p.ctx(); + parse_ident( + p, + !ctx.contains(Context::InGenerator), + !ctx.contains(Context::InAsync), + ) +} + +/// babel: `parseBindingIdentifier` +/// +/// spec: `BindingIdentifier` +pub fn parse_binding_ident<'a>( + p: &mut impl Parser<'a>, + disallow_let: bool, +) -> PResult { + trace_cur!(p, parse_binding_ident); + + if disallow_let && p.input_mut().cur().is_some_and(|cur| cur.is_let()) { + unexpected!(p, "let is reserved in const, let, class declaration") + } + + // "yield" and "await" is **lexically** accepted. + let ident = parse_ident(p, true, true)?; + if ident.is_reserved_in_strict_bind() { + p.emit_strict_mode_err(ident.span, SyntaxError::EvalAndArgumentsInStrict); + } + if (p.ctx().contains(Context::InAsync) || p.ctx().contains(Context::InStaticBlock)) + && ident.sym == "await" + { + p.emit_err(ident.span, SyntaxError::ExpectedIdent); + } + if p.ctx().contains(Context::InGenerator) && ident.sym == "yield" { + p.emit_err(ident.span, SyntaxError::ExpectedIdent); + } + + Ok(ident.into()) +} + +pub fn parse_opt_binding_ident<'a>( + p: &mut impl Parser<'a>, + disallow_let: bool, +) -> PResult> { + trace_cur!(p, parse_opt_binding_ident); + let ctx = p.ctx(); + let Some(cur) = p.input_mut().cur() else { + return Ok(None); + }; + let is_binding_ident = cur.is_word() && !cur.is_reserved(ctx); + if is_binding_ident || (cur.is_this() && p.input().syntax().typescript()) { + parse_binding_ident(p, disallow_let).map(Some) + } else { + Ok(None) + } +} + +/// Identifier +/// +/// In strict mode, "yield" is SyntaxError if matched. +pub fn parse_ident<'a>( + p: &mut impl Parser<'a>, + incl_yield: bool, + incl_await: bool, +) -> PResult { + trace_cur!(p, parse_ident); + + let start = p.cur_pos(); + + let word = p.parse_with(|p| { + let t = cur!(p, true); + if !t.is_word() { + syntax_error!(p, SyntaxError::ExpectedIdent) + } + let t = p.bump(); + + // Spec: + // It is a Syntax Error if this phrase is contained in strict mode code and the + // StringValue of IdentifierName is: "implements", "interface", "let", + // "package", "private", "protected", "public", "static", or "yield". + if t.is_enum() { + p.emit_err( + p.input().prev_span(), + SyntaxError::InvalidIdentInStrict(t.clone().take_word(p.input()).unwrap()), + ); + } else if t.is_yield() + || t.is_let() + || t.is_static() + || t.is_implements() + || t.is_interface() + || t.is_package() + || t.is_private() + || t.is_protected() + || t.is_public() + { + p.emit_strict_mode_err( + p.input().prev_span(), + SyntaxError::InvalidIdentInStrict(t.clone().take_word(p.input()).unwrap()), + ); + }; + + // Spec: + // It is a Syntax Error if StringValue of IdentifierName is the same String + // value as the StringValue of any ReservedWord except for yield or await. + if t.is_await() { + let ctx = p.ctx(); + if ctx.contains(Context::InDeclare) { + Ok(atom!("await")) + } else if ctx.contains(Context::InStaticBlock) { + syntax_error!(p, p.input().prev_span(), SyntaxError::ExpectedIdent) + } else if ctx.contains(Context::Module) | ctx.contains(Context::InAsync) { + syntax_error!(p, p.input().prev_span(), SyntaxError::InvalidIdentInAsync) + } else if incl_await { + Ok(atom!("await")) + } else { + syntax_error!(p, p.input().prev_span(), SyntaxError::ExpectedIdent) + } + } else if t.is_this() && p.input().syntax().typescript() { + Ok(atom!("this")) + } else if t.is_let() { + Ok(atom!("let")) + } else if t.is_known_ident() { + let ident = t.take_known_ident(); + Ok(ident) + } else if t.is_unknown_ident() { + let ident = t.take_unknown_ident(p.input_mut()); + if p.ctx().contains(Context::InClassField) && ident == atom!("arguments") { + p.emit_err(p.input().prev_span(), SyntaxError::ArgumentsInClassField) + } + Ok(ident) + } else if t.is_yield() && incl_yield { + Ok(atom!("yield")) + } else if t.is_null() || t.is_true() || t.is_false() { + syntax_error!(p, p.input().prev_span(), SyntaxError::ExpectedIdent) + } else if t.is_keyword() { + syntax_error!(p, p.input().prev_span(), SyntaxError::ExpectedIdent) + } else { + unreachable!() + } + })?; + + Ok(Ident::new_no_ctxt(word, p.span(start))) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/is_directive.rs b/crates/swc_ecma_lexer/src/common/parser/is_directive.rs new file mode 100644 index 000000000000..3408b3c2266d --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/is_directive.rs @@ -0,0 +1,32 @@ +use swc_ecma_ast::{ModuleItem, Stmt}; + +pub trait IsDirective { + fn as_ref(&self) -> Option<&Stmt>; + fn is_use_strict(&self) -> bool { + self.as_ref().is_some_and(Stmt::is_use_strict) + } +} + +impl IsDirective for Box +where + T: IsDirective, +{ + fn as_ref(&self) -> Option<&Stmt> { + T::as_ref(&**self) + } +} + +impl IsDirective for Stmt { + fn as_ref(&self) -> Option<&Stmt> { + Some(self) + } +} + +impl IsDirective for ModuleItem { + fn as_ref(&self) -> Option<&Stmt> { + match *self { + ModuleItem::Stmt(ref s) => Some(s), + _ => None, + } + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/is_invalid_class_name.rs b/crates/swc_ecma_lexer/src/common/parser/is_invalid_class_name.rs new file mode 100644 index 000000000000..fd0ef0d0eca3 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/is_invalid_class_name.rs @@ -0,0 +1,21 @@ +use swc_common::Span; +use swc_ecma_ast::Ident; + +pub trait IsInvalidClassName { + fn invalid_class_name(&self) -> Option; +} + +impl IsInvalidClassName for Ident { + fn invalid_class_name(&self) -> Option { + match &*self.sym { + "string" | "null" | "number" | "object" | "any" | "unknown" | "boolean" | "bigint" + | "symbol" | "void" | "never" | "intrinsic" => Some(self.span), + _ => None, + } + } +} +impl IsInvalidClassName for Option { + fn invalid_class_name(&self) -> Option { + self.as_ref().and_then(|i| i.invalid_class_name()) + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/is_simple_param_list.rs b/crates/swc_ecma_lexer/src/common/parser/is_simple_param_list.rs new file mode 100644 index 000000000000..f3c380b6e04b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/is_simple_param_list.rs @@ -0,0 +1,32 @@ +use swc_ecma_ast::{Param, ParamOrTsParamProp, Pat}; + +pub trait IsSimpleParameterList { + fn is_simple_parameter_list(&self) -> bool; +} + +impl IsSimpleParameterList for Vec { + fn is_simple_parameter_list(&self) -> bool { + self.iter().all(|param| matches!(param.pat, Pat::Ident(_))) + } +} + +impl IsSimpleParameterList for Vec { + fn is_simple_parameter_list(&self) -> bool { + self.iter().all(|pat| matches!(pat, Pat::Ident(_))) + } +} + +impl IsSimpleParameterList for Vec { + fn is_simple_parameter_list(&self) -> bool { + self.iter().all(|param| { + matches!( + param, + ParamOrTsParamProp::TsParamProp(..) + | ParamOrTsParamProp::Param(Param { + pat: Pat::Ident(_), + .. + }) + ) + }) + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/jsx.rs b/crates/swc_ecma_lexer/src/common/parser/jsx.rs new file mode 100644 index 000000000000..a044b39e9ce0 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/jsx.rs @@ -0,0 +1,427 @@ +use std::ops::DerefMut; + +use either::Either; +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use super::{PResult, Parser}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + buffer::Buffer, + expr::{parse_assignment_expr, parse_lit}, + get_qualified_jsx_name, + ident::parse_ident_ref, + typescript::{parse_ts_type_args, try_parse_ts}, + }, + }, + error::SyntaxError, +}; + +/// Parses JSX closing tag starting after ">( + p: &mut P, + start: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().jsx()); + + if p.input_mut().eat(&P::Token::JSX_TAG_END) { + return Ok(Either::Left(JSXClosingFragment { + span: p.span(start), + })); + } + + let name = parse_jsx_element_name(p)?; + expect!(p, &P::Token::JSX_TAG_END); + Ok(Either::Right(JSXClosingElement { + span: p.span(start), + name, + })) +} + +/// Parses JSX expression enclosed into curly brackets. +fn parse_jsx_expr_container<'a, P: Parser<'a>>(p: &mut P, _: BytePos) -> PResult { + debug_assert!(p.input().syntax().jsx()); + + let start = p.input_mut().cur_pos(); + p.bump(); + let expr = if p.input_mut().is(&P::Token::RBRACE) { + parse_jsx_empty_expr(p).map(JSXExpr::JSXEmptyExpr)? + } else { + if p.input_mut().is(&P::Token::DOTDOTDOT) { + p.bump(); + } + p.parse_expr().map(JSXExpr::Expr)? + }; + expect!(p, &P::Token::RBRACE); + Ok(JSXExprContainer { + span: p.span(start), + expr, + }) +} + +/// Parse next token as JSX identifier +fn parse_jsx_ident<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + trace_cur!(p, parse_jsx_ident); + if cur!(p, true).is_jsx_name() { + let t = p.bump(); + let name = t.take_jsx_name(p.input_mut()); + let span = p.input().prev_span(); + Ok(Ident::new_no_ctxt(name, span)) + } else if p.ctx().contains(Context::InForcedJsxContext) { + parse_ident_ref(p) + } else { + unexpected!(p, "jsx identifier") + } +} + +/// Parse namespaced identifier. +fn parse_jsx_namespaced_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + trace_cur!(p, parse_jsx_namespaced_name); + let start = p.input_mut().cur_pos(); + let ns = parse_jsx_ident(p)?.into(); + if !p.input_mut().eat(&P::Token::COLON) { + return Ok(JSXAttrName::Ident(ns)); + } + let name = parse_jsx_ident(p).map(IdentName::from)?; + Ok(JSXAttrName::JSXNamespacedName(JSXNamespacedName { + span: Span::new(start, name.span.hi), + ns, + name, + })) +} + +/// Parses element name in any form - namespaced, member or single +/// identifier. +fn parse_jsx_element_name<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + trace_cur!(p, parse_jsx_element_name); + let start = p.input_mut().cur_pos(); + let mut node = match parse_jsx_namespaced_name(p)? { + JSXAttrName::Ident(i) => JSXElementName::Ident(i.into()), + JSXAttrName::JSXNamespacedName(i) => JSXElementName::JSXNamespacedName(i), + }; + while p.input_mut().eat(&P::Token::DOT) { + let prop = parse_jsx_ident(p).map(IdentName::from)?; + let new_node = JSXElementName::JSXMemberExpr(JSXMemberExpr { + span: p.span(start), + obj: match node { + JSXElementName::Ident(i) => JSXObject::Ident(i), + JSXElementName::JSXMemberExpr(i) => JSXObject::JSXMemberExpr(Box::new(i)), + _ => unimplemented!("JSXNamespacedName -> JSXObject"), + }, + prop, + }); + node = new_node; + } + Ok(node) +} + +/// JSXEmptyExpression is unique type since it doesn't actually parse +/// anything, and so it should start at the end of last read token (left +/// brace) and finish at the beginning of the next one (right brace). +fn parse_jsx_empty_expr<'a>(p: &mut impl Parser<'a>) -> PResult { + debug_assert!(p.input().syntax().jsx()); + let start = p.input_mut().cur_pos(); + Ok(JSXEmptyExpr { + span: Span::new(start, start), + }) +} + +pub fn parse_jsx_text<'a>(p: &mut impl Parser<'a>) -> PResult { + debug_assert!(p.input().syntax().jsx()); + debug_assert!(cur!(p, false).is_ok_and(|t| t.is_jsx_text())); + let token = p.bump(); + let span = p.input().prev_span(); + let (value, raw) = token.take_jsx_text(p.input_mut()); + Ok(JSXText { span, value, raw }) +} + +/// Parses any type of JSX attribute value. +/// +/// TODO(kdy1): Change return type to JSXAttrValue +fn parse_jsx_attr_value<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + trace_cur!(p, parse_jsx_attr_value); + + let start = p.cur_pos(); + + let cur = cur!(p, true); + if cur.is_lbrace() { + let node = parse_jsx_expr_container(p, start)?; + match node.expr { + JSXExpr::JSXEmptyExpr(..) => { + syntax_error!(p, p.span(start), SyntaxError::EmptyJSXAttr) + } + JSXExpr::Expr(..) => Ok(node.into()), + } + } else if cur.is_str() { + let lit = parse_lit(p)?; + Ok(JSXAttrValue::Lit(lit)) + } else if cur.is_jsx_tag_start() { + let expr = parse_jsx_element(p)?; + match expr { + Either::Left(n) => Ok(JSXAttrValue::JSXFragment(n)), + Either::Right(n) => Ok(JSXAttrValue::JSXElement(Box::new(n))), + } + } else { + let span = p.input().cur_span(); + syntax_error!(p, span, SyntaxError::InvalidJSXValue) + } +} + +/// Parse JSX spread child +fn parse_jsx_spread_child<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + let start = p.cur_pos(); + + expect!(p, &P::Token::LBRACE); + expect!(p, &P::Token::DOTDOTDOT); + let expr = p.parse_expr()?; + expect!(p, &P::Token::RBRACE); + + Ok(JSXSpreadChild { + span: p.span(start), + expr, + }) +} + +/// Parses following JSX attribute name-value pair. +fn parse_jsx_attr<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().jsx()); + let start = p.cur_pos(); + + debug_tracing!(p, "parse_jsx_attr"); + + if p.input_mut().eat(&P::Token::LBRACE) { + let dot3_start = p.cur_pos(); + expect!(p, &P::Token::DOTDOTDOT); + let dot3_token = p.span(dot3_start); + let expr = parse_assignment_expr(p)?; + expect!(p, &P::Token::RBRACE); + return Ok(SpreadElement { dot3_token, expr }.into()); + } + + let name = parse_jsx_namespaced_name(p)?; + let value = if p.input_mut().eat(&P::Token::EQUAL) { + parse_jsx_attr_value( + p.with_ctx(p.ctx() & !Context::InCondExpr & !Context::WillExpectColonForCond) + .deref_mut(), + ) + .map(Some)? + } else { + None + }; + + Ok(JSXAttr { + span: p.span(start), + name, + value, + } + .into()) +} + +/// Parses JSX opening tag starting after "<". +fn parse_jsx_opening_element_at<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().jsx()); + + if p.input_mut().eat(&P::Token::JSX_TAG_END) { + return Ok(Either::Left(JSXOpeningFragment { + span: p.span(start), + })); + } + + let name = parse_jsx_element_name( + p.with_ctx(p.ctx() & !Context::ShouldNotLexLtOrGtAsType) + .deref_mut(), + )?; + parse_jsx_opening_element_after_name(p, start, name).map(Either::Right) +} + +/// `jsxParseOpeningElementAfterName` +fn parse_jsx_opening_element_after_name<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + name: JSXElementName, +) -> PResult { + debug_assert!(p.input().syntax().jsx()); + + let type_args = if p.input().syntax().typescript() && p.input_mut().is(&P::Token::LESS) { + try_parse_ts(p, |p| parse_ts_type_args(p).map(Some)) + } else { + None + }; + + let mut attrs = Vec::new(); + while cur!(p, false).is_ok() { + trace_cur!(p, parse_jsx_opening__attrs_loop); + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_slash() || cur.is_jsx_tag_end()) + { + break; + } + + let attr = parse_jsx_attr(p)?; + attrs.push(attr); + } + let self_closing = p.input_mut().eat(&P::Token::DIV); + if !p.input_mut().eat(&P::Token::JSX_TAG_END) + & !(p.ctx().contains(Context::InForcedJsxContext) && p.input_mut().eat(&P::Token::GREATER)) + { + unexpected!(p, "> (jsx closing tag)"); + } + Ok(JSXOpeningElement { + span: p.span(start), + name, + attrs, + self_closing, + type_args, + }) +} + +/// Parses entire JSX element, including it"s opening tag +/// (starting after "<"), attributes, contents and closing tag. +/// +/// babel: `jsxParseElementAt` +fn parse_jsx_element_at<'a, P: Parser<'a>>( + p: &mut P, + start_pos: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().jsx()); + + let _ = cur!(p, true); + let start = p.cur_pos(); + let cur = p.bump(); + let forced_jsx_context = if cur.is_less() { + true + } else { + debug_assert!(cur.is_jsx_tag_start()); + false + }; + + let mut ctx = p.ctx() & !Context::ShouldNotLexLtOrGtAsType; + ctx.set(Context::InForcedJsxContext, forced_jsx_context); + p.with_ctx(ctx).parse_with(|p| { + debug_tracing!(p, "parse_jsx_element"); + + let opening_element = parse_jsx_opening_element_at(p, start_pos)?; + + trace_cur!(p, parse_jsx_element__after_opening_element); + + let mut children = Vec::new(); + let mut closing_element = None; + + let self_closing = match opening_element { + Either::Right(ref el) => el.self_closing, + _ => false, + }; + + if !self_closing { + 'contents: loop { + let cur = cur!(p, true); + if cur.is_jsx_tag_start() { + let start = p.cur_pos(); + if peek!(p).is_some_and(|peek| peek.is_slash()) { + p.bump(); // JSXTagStart + let _ = cur!(p, true); + p.assert_and_bump(&P::Token::DIV)?; + closing_element = parse_jsx_closing_element_at(p, start).map(Some)?; + break 'contents; + } + children.push(parse_jsx_element_at(p, start).map(|e| match e { + Either::Left(e) => JSXElementChild::from(e), + Either::Right(e) => JSXElementChild::from(Box::new(e)), + })?); + } else if cur.is_jsx_text() { + children.push(parse_jsx_text(p).map(JSXElementChild::from)?) + } else if cur.is_lbrace() { + let start = p.cur_pos(); + if peek!(p).is_some_and(|peek| peek.is_dotdotdot()) { + children.push(parse_jsx_spread_child(p).map(JSXElementChild::from)?); + } else { + children + .push(parse_jsx_expr_container(p, start).map(JSXElementChild::from)?); + } + } else { + unexpected!(p, "< (jsx tag start), jsx text or {") + } + } + } + let span = p.span(start); + + Ok(match (opening_element, closing_element) { + (Either::Left(..), Some(Either::Right(closing))) => { + syntax_error!(p, closing.span(), SyntaxError::JSXExpectedClosingTagForLtGt); + } + (Either::Right(opening), Some(Either::Left(closing))) => { + syntax_error!( + p, + closing.span(), + SyntaxError::JSXExpectedClosingTag { + tag: get_qualified_jsx_name(&opening.name) + } + ); + } + (Either::Left(opening), Some(Either::Left(closing))) => Either::Left(JSXFragment { + span, + opening, + children, + closing, + }), + (Either::Right(opening), None) => Either::Right(JSXElement { + span, + opening, + children, + closing: None, + }), + (Either::Right(opening), Some(Either::Right(closing))) => { + if get_qualified_jsx_name(&closing.name) != get_qualified_jsx_name(&opening.name) { + syntax_error!( + p, + closing.span(), + SyntaxError::JSXExpectedClosingTag { + tag: get_qualified_jsx_name(&opening.name) + } + ); + } + Either::Right(JSXElement { + span, + opening, + children, + closing: Some(closing), + }) + } + _ => unreachable!(), + }) + }) +} + +/// Parses entire JSX element from current position. +/// +/// babel: `jsxParseElement` +pub fn parse_jsx_element<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_jsx_element); + + debug_assert!(p.input().syntax().jsx()); + debug_assert!({ + let cur = cur!(p, true); + cur.is_jsx_tag_start() || cur.is_less() + }); + + let start_pos = p.cur_pos(); + + parse_jsx_element_at( + p.with_ctx(p.ctx() & !Context::InCondExpr & !Context::WillExpectColonForCond) + .deref_mut(), + start_pos, + ) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/macros.rs b/crates/swc_ecma_lexer/src/common/parser/macros.rs new file mode 100644 index 000000000000..83f641d3ed89 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/macros.rs @@ -0,0 +1,144 @@ +/// cur!($parser, required:bool) +macro_rules! cur { + ($p:expr, false) => {{ + match $p.input_mut().cur() { + Some(c) => Ok(c), + None => { + let pos = $p.input().end_pos(); + let last = Span::new(pos, pos); + Err(crate::error::Error::new( + last, + crate::error::SyntaxError::Eof, + )) + } + } + }}; + ($p:expr, true) => {{ + match $p.input_mut().cur() { + Some(c) => { + if c.is_error() { + let c = $p.input_mut().bump(); + let err = c.take_error($p.input_mut()); + return Err(err); + } else { + c + } + } + None => { + let pos = $p.input().end_pos(); + let span = Span::new(pos, pos); + let err = crate::error::Error::new(span, crate::error::SyntaxError::Eof); + return Err(err); + } + } + }}; +} + +macro_rules! unexpected { + ($p:expr, $expected:literal) => {{ + let got = $p.input_mut().dump_cur(); + syntax_error!( + $p, + $p.input().cur_span(), + SyntaxError::Unexpected { + got, + expected: $expected + } + ) + }}; +} + +macro_rules! expect { + ($p:expr, $t:expr) => {{ + if !$p.input_mut().eat($t) { + let span = $p.input().cur_span(); + let cur = $p.input_mut().dump_cur(); + syntax_error!($p, span, SyntaxError::Expected(format!("{:?}", $t), cur)) + } + }}; +} + +macro_rules! syntax_error { + ($p:expr, $err:expr) => { + syntax_error!($p, $p.input().cur_span(), $err) + }; + ($p:expr, $span:expr, $err:expr) => {{ + let err = $crate::error::Error::new($span, $err); + { + if $p.input_mut().cur().is_some_and(|t| t.is_error()) { + let c = $p.input_mut().bump(); + let err = c.take_error($p.input_mut()); + $p.emit_error(err); + } + } + if cfg!(feature = "debug") { + tracing::error!( + "Syntax error called from {}:{}:{}\nCurrent token = {:?}", + file!(), + line!(), + column!(), + $p.input_mut().cur() + ); + } + return Err(err.into()); + }}; +} + +macro_rules! peek { + ($p:expr) => {{ + debug_assert!( + $p.input().knows_cur(), + "parser should not call peek() without knowing current token. +Current token is {:?}", + cur!($p, false), + ); + $p.input_mut().peek() + }}; +} + +macro_rules! trace_cur { + ($p:expr, $name:ident) => {{ + if cfg!(feature = "debug") { + tracing::debug!("{}: {:?}", stringify!($name), $p.input_mut().cur()); + } + }}; +} + +macro_rules! debug_tracing { + ($p:expr, $name:tt) => {{ + #[cfg(feature = "debug")] + { + tracing::span!( + tracing::Level::ERROR, + $name, + cur = tracing::field::debug(&$p.input.cur()) + ) + .entered() + } + }}; +} + +/// Returns true on eof. +macro_rules! eof { + ($p:expr) => { + cur!($p, false).is_err() + }; +} + +macro_rules! return_if_arrow { + ($p:expr, $expr:expr) => {{ + // FIXME: + // + // + + // let is_cur = match $p.state.potential_arrow_start { + // Some(start) => $expr.span.lo() == start, + // None => false + // }; + // if is_cur { + if let Expr::Arrow { .. } = *$expr { + return Ok($expr); + } + // } + }}; +} diff --git a/crates/swc_ecma_lexer/src/common/parser/mod.rs b/crates/swc_ecma_lexer/src/common/parser/mod.rs new file mode 100644 index 000000000000..0ac1a5a64d13 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/mod.rs @@ -0,0 +1,474 @@ +use std::ops::DerefMut; + +use expr::parse_assignment_expr; +use expr_ext::ExprExt; +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use self::{ + buffer::{Buffer, NextTokenAndSpan}, + ctx::WithCtx, + state::{State, WithState}, + token_and_span::TokenAndSpan, +}; +use super::{context::Context, input::Tokens, lexer::token::TokenFactory}; +use crate::{ + error::{Error, SyntaxError}, + Syntax, +}; + +pub type PResult = Result; + +pub mod buffer; +pub mod ctx; +pub mod expr_ext; +pub mod is_directive; +pub mod is_invalid_class_name; +pub mod is_simple_param_list; +#[macro_use] +mod macros; +pub mod assign_target_or_spread; +pub mod class_and_fn; +pub mod expr; +pub mod ident; +pub mod jsx; +pub mod module_item; +pub mod object; +pub mod output_type; +pub mod parse_object; +pub mod pat; +pub mod pat_type; +pub mod state; +pub mod stmt; +pub mod token_and_span; +pub mod typescript; +mod util; +#[cfg(feature = "verify")] +pub mod verifier; + +pub use util::{ + get_qualified_jsx_name, has_use_strict, is_constructor, is_not_this, make_decl_declare, + unwrap_ts_non_null, +}; + +pub trait Parser<'a>: Sized + Clone { + type Token: std::fmt::Debug + + Clone + + TokenFactory<'a, Self::TokenAndSpan, Self::I, Buffer = Self::Buffer>; + type Lexer: super::lexer::Lexer<'a, Self::TokenAndSpan>; + type Next: NextTokenAndSpan; + type TokenAndSpan: TokenAndSpan; + type I: Tokens; + type Buffer: self::buffer::Buffer< + 'a, + Lexer = Self::Lexer, + Token = Self::Token, + TokenAndSpan = Self::TokenAndSpan, + I = Self::I, + >; + + fn input(&self) -> &Self::Buffer; + fn input_mut(&mut self) -> &mut Self::Buffer; + fn state(&self) -> &State; + fn state_mut(&mut self) -> &mut State; + + #[inline(always)] + fn with_state<'w>(&'w mut self, state: State) -> WithState<'a, 'w, Self> { + let orig_state = std::mem::replace(self.state_mut(), state); + WithState { + orig_state, + inner: self, + marker: std::marker::PhantomData, + } + } + + #[inline(always)] + fn ctx(&self) -> Context { + self.input().get_ctx() + } + + /// Original context is restored when returned guard is dropped. + #[inline(always)] + fn with_ctx<'w>(&'w mut self, ctx: Context) -> WithCtx<'a, 'w, Self> { + let orig_ctx = self.ctx(); + self.set_ctx(ctx); + WithCtx { + orig_ctx, + inner: self, + marker: std::marker::PhantomData, + } + } + + #[inline(always)] + fn set_ctx(&mut self, ctx: Context) { + self.input_mut().set_ctx(ctx); + } + + #[inline(always)] + fn strict_mode<'w>(&'w mut self) -> WithCtx<'a, 'w, Self> { + let ctx = self.ctx() | Context::Strict; + self.with_ctx(ctx) + } + + /// Original context is restored when returned guard is dropped. + #[inline(always)] + fn in_type<'w>(&'w mut self) -> WithCtx<'a, 'w, Self> { + let ctx = self.ctx() | Context::InType; + self.with_ctx(ctx) + } + + /// Original context is restored when returned guard is dropped. + #[inline(always)] + fn include_in_expr<'w>(&'w mut self, include_in_expr: bool) -> WithCtx<'a, 'w, Self> { + let mut ctx = self.ctx(); + ctx.set(Context::IncludeInExpr, include_in_expr); + self.with_ctx(ctx) + } + + #[inline(always)] + fn syntax(&self) -> Syntax { + self.input().syntax() + } + + /// Parse with given closure + #[inline(always)] + fn parse_with(&mut self, f: F) -> PResult + where + F: FnOnce(&mut Self) -> PResult, + { + f(self) + } + + #[cold] + fn emit_err(&mut self, span: Span, error: SyntaxError) { + if self.ctx().contains(Context::IgnoreError) || !self.syntax().early_errors() { + return; + } + self.emit_error(crate::error::Error::new(span, error)) + } + + #[cold] + fn emit_error(&mut self, error: crate::error::Error) { + if self.ctx().contains(Context::IgnoreError) || !self.syntax().early_errors() { + return; + } + let cur = self.input_mut().cur(); + if cur.is_some_and(|cur| cur.is_error()) { + let err = self.input_mut().bump(); + let err = err.take_error(self.input_mut()); + self.input().iter().add_error(err); + } + self.input().iter().add_error(error); + } + + #[cold] + fn emit_strict_mode_err(&self, span: Span, error: SyntaxError) { + if self.ctx().contains(Context::IgnoreError) { + return; + } + let error = crate::error::Error::new(span, error); + self.input().iter().add_module_mode_error(error); + } + + fn verify_expr(&mut self, expr: Box) -> PResult> { + #[cfg(feature = "verify")] + { + use swc_ecma_visit::Visit; + let mut v = self::verifier::Verifier { errors: Vec::new() }; + v.visit_expr(&expr); + for (span, error) in v.errors { + self.emit_err(span, error); + } + } + Ok(expr) + } + + #[inline(always)] + fn cur_pos(&mut self) -> BytePos { + self.input_mut().cur_pos() + } + + #[inline(always)] + fn last_pos(&self) -> BytePos { + self.input().prev_span().hi + } + + #[inline] + fn is_general_semi(&mut self) -> bool { + let Some(cur) = self.input_mut().cur() else { + return true; + }; + cur.is_semi() || cur.is_rbrace() || self.input_mut().had_line_break_before_cur() + } + + fn eat_general_semi(&mut self) -> bool { + if cfg!(feature = "debug") { + tracing::trace!("eat(';'): cur={:?}", cur!(self, false)); + } + let Some(cur) = self.input_mut().cur() else { + return true; + }; + if cur.is_semi() { + self.bump(); + true + } else { + cur.is_rbrace() || self.input_mut().had_line_break_before_cur() + } + } + + #[inline] + fn expect_general_semi(&mut self) -> PResult<()> { + if !self.eat_general_semi() { + let span = self.input().cur_span(); + let cur = self.input_mut().dump_cur(); + syntax_error!(self, span, SyntaxError::Expected(";".to_string(), cur)) + } + Ok(()) + } + + #[inline(always)] + fn bump(&mut self) -> Self::Token { + debug_assert!( + self.input().knows_cur(), + "parser should not call bump() without knowing current token" + ); + self.input_mut().bump() + } + + #[inline] + fn span(&self, start: BytePos) -> Span { + let end = self.last_pos(); + if cfg!(debug_assertions) && start > end { + unreachable!( + "assertion failed: (span.start <= span.end). + start = {}, end = {}", + start.0, end.0 + ) + } + Span::new(start, end) + } + + #[inline(always)] + fn assert_and_bump(&mut self, token: &Self::Token) -> PResult<()> { + if cfg!(debug_assertions) && !self.input_mut().is(token) { + unreachable!( + "assertion failed: expected {:?}, got {:?}", + token, + self.input_mut().cur() + ); + } + let _ = cur!(self, true); + self.bump(); + Ok(()) + } + + fn check_assign_target(&mut self, expr: &Expr, deny_call: bool) { + if !expr.is_valid_simple_assignment_target(self.ctx().contains(Context::Strict)) { + self.emit_err(expr.span(), SyntaxError::TS2406); + } + + // We follow behavior of tsc + if self.input().syntax().typescript() && self.syntax().early_errors() { + let is_eval_or_arguments = match expr { + Expr::Ident(i) => i.is_reserved_in_strict_bind(), + _ => false, + }; + + if is_eval_or_arguments { + self.emit_strict_mode_err(expr.span(), SyntaxError::TS1100); + } + + fn should_deny(e: &Expr, deny_call: bool) -> bool { + match e { + Expr::Lit(..) => false, + Expr::Call(..) => deny_call, + Expr::Bin(..) => false, + Expr::Paren(ref p) => should_deny(&p.expr, deny_call), + + _ => true, + } + } + + // It is an early Reference Error if LeftHandSideExpression is neither + // an ObjectLiteral nor an ArrayLiteral and + // IsValidSimpleAssignmentTarget of LeftHandSideExpression is false. + if !is_eval_or_arguments + && !expr.is_valid_simple_assignment_target(self.ctx().contains(Context::Strict)) + && should_deny(expr, deny_call) + { + self.emit_err(expr.span(), SyntaxError::TS2406); + } + } + } + + fn parse_tpl_element(&mut self, is_tagged_tpl: bool) -> PResult { + let start = self.cur_pos(); + let cur = cur!(self, true); + let (raw, cooked) = if cur.is_template() { + let cur = self.bump(); + let (cooked, raw) = cur.take_template(self.input_mut()); + match cooked { + Ok(cooked) => (raw, Some(cooked)), + Err(err) => { + if is_tagged_tpl { + (raw, None) + } else { + return Err(err); + } + } + } + } else { + unexpected!(self, "template token") + }; + let tail = self.input_mut().is(&Self::Token::BACKQUOTE); + Ok(TplElement { + span: self.span(start), + raw, + tail, + cooked, + }) + } + + /// spec: 'PropertyName' + fn parse_prop_name(&mut self) -> PResult { + trace_cur!(self, parse_prop_name); + let ctx = self.ctx() | Context::InPropertyName; + self.with_ctx(ctx).parse_with(|p| { + let start = p.input_mut().cur_pos(); + let cur = cur!(p, true); + let v = if cur.is_str() { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + PropName::Str(Str { + span: p.span(start), + value, + raw: Some(raw), + }) + } else if cur.is_num() { + let t = p.bump(); + let (value, raw) = t.take_num(p.input_mut()); + PropName::Num(Number { + span: p.span(start), + value, + raw: Some(raw), + }) + } else if cur.is_bigint() { + let t = p.bump(); + let (value, raw) = t.take_bigint(p.input_mut()); + PropName::BigInt(BigInt { + span: p.span(start), + value, + raw: Some(raw), + }) + } else if cur.is_word() { + let t = p.bump(); + let w = t.take_word(p.input_mut()).unwrap(); + PropName::Ident(IdentName::new(w, p.span(start))) + } else if cur.is_lbracket() { + p.bump(); + let inner_start = p.input_mut().cur_pos(); + let mut expr = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + if p.syntax().typescript() && p.input_mut().is(&Self::Token::COMMA) { + let mut exprs = vec![expr]; + while p.input_mut().eat(&Self::Token::COMMA) { + // + exprs.push(parse_assignment_expr(p.include_in_expr(true).deref_mut())?); + } + p.emit_err(p.span(inner_start), SyntaxError::TS1171); + expr = Box::new( + SeqExpr { + span: p.span(inner_start), + exprs, + } + .into(), + ); + } + expect!(p, &Self::Token::RBRACKET); + PropName::Computed(ComputedPropName { + span: p.span(start), + expr, + }) + } else { + unexpected!( + p, + "identifier, string literal, numeric literal or [ for the computed key" + ) + }; + Ok(v) + }) + } + + /// AssignmentExpression[+In, ?Yield, ?Await] + /// ...AssignmentExpression[+In, ?Yield, ?Await] + fn parse_expr_or_spread(&mut self) -> PResult { + trace_cur!(self, parse_expr_or_spread); + let start = self.input_mut().cur_pos(); + if self.input_mut().eat(&Self::Token::DOTDOTDOT) { + let spread_span = self.span(start); + let spread = Some(spread_span); + parse_assignment_expr(self.include_in_expr(true).deref_mut()) + .map_err(|err| { + Error::new( + err.span(), + SyntaxError::WithLabel { + inner: Box::new(err), + span: spread_span, + note: "An expression should follow '...'", + }, + ) + }) + .map(|expr| ExprOrSpread { spread, expr }) + } else { + parse_assignment_expr(self).map(|expr| ExprOrSpread { spread: None, expr }) + } + } + + fn parse_expr(&mut self) -> PResult> { + trace_cur!(self, parse_expr); + debug_tracing!(self, "parse_expr"); + let expr = parse_assignment_expr(self)?; + let start = expr.span_lo(); + + if self.input_mut().is(&Self::Token::COMMA) { + let mut exprs = vec![expr]; + + while self.input_mut().eat(&Self::Token::COMMA) { + exprs.push(parse_assignment_expr(self)?); + } + + return Ok(SeqExpr { + span: self.span(start), + exprs, + } + .into()); + } + + Ok(expr) + } + + fn mark_found_module_item(&mut self); + + #[inline] + fn is_ident_ref(&mut self) -> bool { + let ctx = self.ctx(); + self.input_mut() + .cur() + .is_some_and(|cur| cur.is_word() && !cur.is_reserved(ctx)) + } + + #[inline] + fn peek_is_ident_ref(&mut self) -> bool { + let ctx = self.ctx(); + peek!(self).is_some_and(|peek| peek.is_word() && !peek.is_reserved(ctx)) + } + + #[inline(always)] + fn eat_ident_ref(&mut self) -> bool { + if self.is_ident_ref() { + self.bump(); + true + } else { + false + } + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/module_item.rs b/crates/swc_ecma_lexer/src/common/parser/module_item.rs new file mode 100644 index 000000000000..758a3e34287b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/module_item.rs @@ -0,0 +1,942 @@ +use std::ops::DerefMut; + +use swc_common::Span; +use swc_ecma_ast::*; + +use super::{ + buffer::Buffer, + class_and_fn::{parse_default_async_fn, parse_default_fn, parse_fn_decl}, + expr::parse_assignment_expr, + ident::{parse_ident, parse_ident_name, parse_module_export_name}, + stmt::{parse_block_body, parse_stmt_like, parse_var_stmt}, + typescript::{parse_ts_import_equals_decl, try_parse_ts_declare, try_parse_ts_export_decl}, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + class_and_fn::{ + parse_async_fn_decl, parse_class_decl, parse_decorators, parse_default_class, + }, + ident::parse_binding_ident, + object::parse_object_expr, + typescript::{parse_ts_enum_decl, parse_ts_interface_decl}, + }, + }, + error::SyntaxError, +}; + +fn handle_import_export<'a, P: Parser<'a>>( + p: &mut P, + decorators: Vec, +) -> PResult { + if !p.ctx().contains(Context::TopLevel) { + syntax_error!(p, SyntaxError::NonTopLevelImportExport); + } + + let decl = if p.input_mut().is(&P::Token::IMPORT) { + parse_import(p)? + } else if p.input_mut().is(&P::Token::EXPORT) { + parse_export(p, decorators).map(ModuleItem::from)? + } else { + unreachable!( + "handle_import_export should not be called if current token isn't import nor export" + ) + }; + + Ok(decl) +} + +pub fn parse_module_item_block_body<'a, P: Parser<'a>>( + p: &mut P, + allow_directives: bool, + end: Option<&P::Token>, +) -> PResult> { + parse_block_body(p, allow_directives, end, handle_import_export) +} + +/// Parses `from 'foo.js' with {};` or `from 'foo.js' assert {};` +fn parse_from_clause_and_semi<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult<(Box, Option>)> { + expect!(p, &P::Token::FROM); + + let str_start = p.cur_pos(); + let cur = cur!(p, true); + let src = if cur.is_str() { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + Box::new(Str { + span: p.span(str_start), + value, + raw: Some(raw), + }) + } else { + unexpected!(p, "a string literal") + }; + let _ = cur!(p, false); + let with = if p.input().syntax().import_attributes() + && !p.input_mut().had_line_break_before_cur() + && (p.input_mut().eat(&P::Token::ASSERT) || p.input_mut().eat(&P::Token::WITH)) + { + match parse_object_expr(p)? { + Expr::Object(v) => Some(Box::new(v)), + _ => unreachable!(), + } + } else { + None + }; + p.expect_general_semi()?; + Ok((src, with)) +} + +fn parse_named_export_specifier<'a, P: Parser<'a>>( + p: &mut P, + type_only: bool, +) -> PResult { + let start = p.cur_pos(); + + let mut is_type_only = false; + + let orig = match parse_module_export_name(p)? { + ModuleExportName::Ident(orig_ident) => { + // Handle: + // `export { type xx }` + // `export { type xx as yy }` + // `export { type as }` + // `export { type as as }` + // `export { type as as as }` + if p.syntax().typescript() + && orig_ident.sym == "type" + && p.input_mut().cur().is_some_and(|cur| cur.is_word()) + { + let possibly_orig = parse_ident_name(p).map(Ident::from)?; + if possibly_orig.sym == "as" { + // `export { type as }` + if !p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + if type_only { + p.emit_err(orig_ident.span, SyntaxError::TS2207); + } + + return Ok(ExportNamedSpecifier { + span: p.span(start), + orig: ModuleExportName::Ident(possibly_orig), + exported: None, + is_type_only: true, + }); + } + + let maybe_as = parse_ident_name(p).map(Ident::from)?; + if maybe_as.sym == "as" { + if p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + // `export { type as as as }` + // `export { type as as foo }` + let exported = parse_ident_name(p).map(Ident::from)?; + + if type_only { + p.emit_err(orig_ident.span, SyntaxError::TS2207); + } + + return Ok(ExportNamedSpecifier { + span: Span::new(start, orig_ident.span.hi()), + orig: ModuleExportName::Ident(possibly_orig), + exported: Some(ModuleExportName::Ident(exported)), + is_type_only: true, + }); + } else { + // `export { type as as }` + return Ok(ExportNamedSpecifier { + span: Span::new(start, orig_ident.span.hi()), + orig: ModuleExportName::Ident(orig_ident), + exported: Some(ModuleExportName::Ident(maybe_as)), + is_type_only: false, + }); + } + } else { + // `export { type as xxx }` + return Ok(ExportNamedSpecifier { + span: Span::new(start, orig_ident.span.hi()), + orig: ModuleExportName::Ident(orig_ident), + exported: Some(ModuleExportName::Ident(maybe_as)), + is_type_only: false, + }); + } + } else { + // `export { type xx }` + // `export { type xx as yy }` + if type_only { + p.emit_err(orig_ident.span, SyntaxError::TS2207); + } + + is_type_only = true; + ModuleExportName::Ident(possibly_orig) + } + } else { + ModuleExportName::Ident(orig_ident) + } + } + module_export_name => module_export_name, + }; + + let exported = if p.input_mut().eat(&P::Token::AS) { + Some(parse_module_export_name(p)?) + } else { + None + }; + + Ok(ExportNamedSpecifier { + span: p.span(start), + orig, + exported, + is_type_only, + }) +} + +fn parse_imported_binding<'a>(p: &mut impl Parser<'a>) -> PResult { + let ctx = p.ctx() & !Context::InAsync & !Context::InGenerator; + Ok(parse_binding_ident(p.with_ctx(ctx).deref_mut(), false)?.into()) +} + +fn parse_imported_default_binding<'a>(p: &mut impl Parser<'a>) -> PResult { + parse_imported_binding(p) +} + +/// Parse `foo`, `foo2 as bar` in `import { foo, foo2 as bar }` +fn parse_import_specifier<'a, P: Parser<'a>>( + p: &mut P, + type_only: bool, +) -> PResult { + let start = p.cur_pos(); + match parse_module_export_name(p)? { + ModuleExportName::Ident(mut orig_name) => { + let mut is_type_only = false; + // Handle: + // `import { type xx } from 'mod'` + // `import { type xx as yy } from 'mod'` + // `import { type as } from 'mod'` + // `import { type as as } from 'mod'` + // `import { type as as as } from 'mod'` + if p.syntax().typescript() + && orig_name.sym == "type" + && p.input_mut().cur().is_some_and(|cur| cur.is_word()) + { + let possibly_orig_name = parse_ident_name(p).map(Ident::from)?; + if possibly_orig_name.sym == "as" { + // `import { type as } from 'mod'` + if !p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + if p.ctx().is_reserved_word(&possibly_orig_name.sym) { + syntax_error!( + p, + possibly_orig_name.span, + SyntaxError::ReservedWordInImport + ) + } + + if type_only { + p.emit_err(orig_name.span, SyntaxError::TS2206); + } + + return Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: p.span(start), + local: possibly_orig_name, + imported: None, + is_type_only: true, + })); + } + + let maybe_as: Ident = parse_binding_ident(p, false)?.into(); + if maybe_as.sym == "as" { + if p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + // `import { type as as as } from 'mod'` + // `import { type as as foo } from 'mod'` + let local: Ident = parse_binding_ident(p, false)?.into(); + + if type_only { + p.emit_err(orig_name.span, SyntaxError::TS2206); + } + + return Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: Span::new(start, orig_name.span.hi()), + local, + imported: Some(ModuleExportName::Ident(possibly_orig_name)), + is_type_only: true, + })); + } else { + // `import { type as as } from 'mod'` + return Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: Span::new(start, maybe_as.span.hi()), + local: maybe_as, + imported: Some(ModuleExportName::Ident(orig_name)), + is_type_only: false, + })); + } + } else { + // `import { type as xxx } from 'mod'` + return Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: Span::new(start, orig_name.span.hi()), + local: maybe_as, + imported: Some(ModuleExportName::Ident(orig_name)), + is_type_only: false, + })); + } + } else { + // `import { type xx } from 'mod'` + // `import { type xx as yy } from 'mod'` + if type_only { + p.emit_err(orig_name.span, SyntaxError::TS2206); + } + + orig_name = possibly_orig_name; + is_type_only = true; + } + } + + if p.input_mut().eat(&P::Token::AS) { + let local: Ident = parse_binding_ident(p, false)?.into(); + return Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: Span::new(start, local.span.hi()), + local, + imported: Some(ModuleExportName::Ident(orig_name)), + is_type_only, + })); + } + + // Handle difference between + // + // 'ImportedBinding' + // 'IdentifierName' as 'ImportedBinding' + if p.ctx().is_reserved_word(&orig_name.sym) { + syntax_error!(p, orig_name.span, SyntaxError::ReservedWordInImport) + } + + let local = orig_name; + Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: p.span(start), + local, + imported: None, + is_type_only, + })) + } + ModuleExportName::Str(orig_str) => { + if p.input_mut().eat(&P::Token::AS) { + let local: Ident = parse_binding_ident(p, false)?.into(); + Ok(ImportSpecifier::Named(ImportNamedSpecifier { + span: Span::new(start, local.span.hi()), + local, + imported: Some(ModuleExportName::Str(orig_str)), + is_type_only: false, + })) + } else { + syntax_error!( + p, + orig_str.span, + SyntaxError::ImportBindingIsString(orig_str.value) + ) + } + } + } +} + +fn parse_export<'a, P: Parser<'a>>( + p: &mut P, + mut decorators: Vec, +) -> PResult { + if !p.ctx().contains(Context::Module) { + // Switch to module mode + let ctx = p.ctx() | Context::Module | Context::Strict; + p.set_ctx(ctx); + } + + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::EXPORT)?; + let _ = cur!(p, true); + let after_export_start = p.cur_pos(); + + // "export declare" is equivalent to just "export". + let declare = p.input().syntax().typescript() && p.input_mut().eat(&P::Token::DECLARE); + + if declare { + // TODO: Remove + if let Some(decl) = try_parse_ts_declare(p, after_export_start, decorators.clone())? { + return Ok(ExportDecl { + span: p.span(start), + decl, + } + .into()); + } + } + + if p.input().syntax().typescript() && p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + let cur = cur!(p, true); + let sym = cur.clone().take_word(p.input()).unwrap(); + // TODO: remove clone + if let Some(decl) = try_parse_ts_export_decl(p, decorators.clone(), sym) { + return Ok(ExportDecl { + span: p.span(start), + decl, + } + .into()); + } + } + + if p.input().syntax().typescript() { + if p.input_mut().eat(&P::Token::IMPORT) { + let is_type_only = + p.input_mut().is(&P::Token::TYPE) && peek!(p).is_some_and(|p| p.is_word()); + + if is_type_only { + p.assert_and_bump(&P::Token::TYPE)?; + } + + let id = parse_ident_name(p)?; + + // export import A = B + return parse_ts_import_equals_decl( + p, + start, + id.into(), + /* is_export */ true, + is_type_only, + ) + .map(From::from); + } + + if p.input_mut().eat(&P::Token::EQUAL) { + // `export = x;` + let expr = p.parse_expr()?; + p.expect_general_semi()?; + return Ok(TsExportAssignment { + span: p.span(start), + expr, + } + .into()); + } + + if p.input_mut().eat(&P::Token::AS) { + // `export as namespace A;` + // See `parseNamespaceExportDeclaration` in TypeScript's own parser + expect!(p, &P::Token::NAMESPACE); + let id = parse_ident(p, false, false)?; + p.expect_general_semi()?; + return Ok(TsNamespaceExportDecl { + span: p.span(start), + id, + } + .into()); + } + } + + let ns_export_specifier_start = p.cur_pos(); + + let type_only = p.input().syntax().typescript() && p.input_mut().eat(&P::Token::TYPE); + + // Some("default") if default is exported from 'src' + let mut export_default = None; + + if !type_only && p.input_mut().eat(&P::Token::DEFAULT) { + if p.input_mut().is(&P::Token::AT) { + let start = p.cur_pos(); + let after_decorators = parse_decorators(p, false)?; + + if !decorators.is_empty() { + syntax_error!(p, p.span(start), SyntaxError::TS8038); + } + + decorators = after_decorators; + } + + if p.input().syntax().typescript() { + if p.input_mut().is(&P::Token::ABSTRACT) + && peek!(p).is_some_and(|cur| cur.is_class()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + let class_start = p.cur_pos(); + p.assert_and_bump(&P::Token::ABSTRACT)?; + let _ = cur!(p, true); + + return parse_default_class(p, start, class_start, decorators, true) + .map(ModuleDecl::ExportDefaultDecl); + } + if p.input_mut().is(&P::Token::ABSTRACT) + && peek!(p).is_some_and(|cur| cur.is_interface()) + { + p.emit_err(p.input().cur_span(), SyntaxError::TS1242); + p.assert_and_bump(&P::Token::ABSTRACT)?; + } + + if p.input_mut().is(&P::Token::INTERFACE) { + let interface_start = p.cur_pos(); + p.assert_and_bump(&P::Token::INTERFACE)?; + let decl = parse_ts_interface_decl(p, interface_start).map(DefaultDecl::from)?; + return Ok(ExportDefaultDecl { + span: p.span(start), + decl, + } + .into()); + } + } + + if p.input_mut().is(&P::Token::CLASS) { + let class_start = p.cur_pos(); + let decl = parse_default_class(p, start, class_start, decorators, false)?; + return Ok(decl.into()); + } else if p.input_mut().is(&P::Token::ASYNC) + && peek!(p).is_some_and(|cur| cur.is_function()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + let decl = parse_default_async_fn(p, start, decorators)?; + return Ok(decl.into()); + } else if p.input_mut().is(&P::Token::FUNCTION) { + let decl = parse_default_fn(p, start, decorators)?; + return Ok(decl.into()); + } else if p.input().syntax().export_default_from() + && ((p.input_mut().is(&P::Token::FROM) && peek!(p).is_some_and(|peek| peek.is_str())) + || (p.input_mut().is(&P::Token::COMMA) + && (peek!(p).is_some_and(|peek| peek.is_star() || peek.is_lbrace())))) + { + export_default = Some(Ident::new_no_ctxt("default".into(), p.input().prev_span())) + } else { + let expr = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + p.expect_general_semi()?; + return Ok(ExportDefaultExpr { + span: p.span(start), + expr, + } + .into()); + } + } + + if p.input_mut().is(&P::Token::AT) { + let start = p.cur_pos(); + let after_decorators = parse_decorators(p, false)?; + + if !decorators.is_empty() { + syntax_error!(p, p.span(start), SyntaxError::TS8038); + } + + decorators = after_decorators; + } + + let decl = if !type_only && p.input_mut().is(&P::Token::CLASS) { + let class_start = p.cur_pos(); + parse_class_decl(p, start, class_start, decorators, false)? + } else if !type_only + && p.input_mut().is(&P::Token::ASYNC) + && peek!(p).is_some_and(|cur| cur.is_function()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + parse_async_fn_decl(p, decorators)? + } else if !type_only && p.input_mut().is(&P::Token::FUNCTION) { + parse_fn_decl(p, decorators)? + } else if !type_only + && p.input().syntax().typescript() + && p.input_mut().is(&P::Token::CONST) + && peek!(p).is_some_and(|cur| cur.is_enum()) + { + let enum_start = p.cur_pos(); + p.assert_and_bump(&P::Token::CONST)?; + let _ = cur!(p, true); + p.assert_and_bump(&P::Token::ENUM)?; + return parse_ts_enum_decl(p, enum_start, /* is_const */ true) + .map(Decl::from) + .map(|decl| { + ExportDecl { + span: p.span(start), + decl, + } + .into() + }); + } else if !type_only + && (p.input_mut().is(&P::Token::VAR) + || p.input_mut().is(&P::Token::CONST) + || (p.input_mut().is(&P::Token::LET)) + && peek!(p).map(|t| t.follows_keyword_let()).unwrap_or(false)) + { + parse_var_stmt(p, false).map(Decl::Var)? + } else { + // ```javascript + // export foo, * as bar, { baz } from "mod"; // * + // export * as bar, { baz } from "mod"; // * + // export foo, { baz } from "mod"; // * + // export foo, * as bar from "mod"; // * + // export foo from "mod"; // * + // export * as bar from "mod"; // + // export { baz } from "mod"; // + // export { baz } ; // + // export * from "mod"; // + // ``` + + // export default + // export foo + let default = match export_default { + Some(default) => Some(default), + None => { + if p.input().syntax().export_default_from() + && p.input_mut().cur().is_some_and(|cur| cur.is_word()) + { + Some(parse_ident(p, false, false)?) + } else { + None + } + } + }; + + if default.is_none() + && p.input_mut().is(&P::Token::MUL) + && !peek!(p).is_some_and(|cur| cur.is_as()) + { + p.assert_and_bump(&P::Token::MUL)?; + + // improve error message for `export * from foo` + let (src, with) = parse_from_clause_and_semi(p)?; + return Ok(ExportAll { + span: p.span(start), + src, + type_only, + with, + } + .into()); + } + + let mut specifiers = Vec::new(); + + let mut has_default = false; + let mut has_ns = false; + + if let Some(default) = default { + has_default = true; + specifiers.push(ExportSpecifier::Default(ExportDefaultSpecifier { + exported: default, + })) + } + + // export foo, * as bar + // ^ + if !specifiers.is_empty() + && p.input_mut().is(&P::Token::COMMA) + && peek!(p).is_some_and(|cur| cur.is_star()) + { + p.assert_and_bump(&P::Token::COMMA)?; + + has_ns = true; + } + // export * as bar + // ^ + else if specifiers.is_empty() && p.input_mut().is(&P::Token::MUL) { + has_ns = true; + } + + if has_ns { + p.assert_and_bump(&P::Token::MUL)?; + expect!(p, &P::Token::AS); + let name = parse_module_export_name(p)?; + specifiers.push(ExportSpecifier::Namespace(ExportNamespaceSpecifier { + span: p.span(ns_export_specifier_start), + name, + })); + } + + if has_default || has_ns { + if p.input_mut().is(&P::Token::FROM) { + let (src, with) = parse_from_clause_and_semi(p)?; + return Ok(NamedExport { + span: p.span(start), + specifiers, + src: Some(src), + type_only, + with, + } + .into()); + } else if !p.input().syntax().export_default_from() { + // emit error + expect!(p, &P::Token::FROM); + } + + expect!(p, &P::Token::COMMA); + } + + expect!(p, &P::Token::LBRACE); + + while !eof!(p) && !p.input_mut().is(&P::Token::RBRACE) { + let specifier = parse_named_export_specifier(p, type_only)?; + specifiers.push(ExportSpecifier::Named(specifier)); + + if p.input_mut().is(&P::Token::RBRACE) { + break; + } else { + expect!(p, &P::Token::COMMA); + } + } + expect!(p, &P::Token::RBRACE); + + let opt = if p.input_mut().is(&P::Token::FROM) { + Some(parse_from_clause_and_semi(p)?) + } else { + for s in &specifiers { + match s { + ExportSpecifier::Default(default) => { + p.emit_err( + default.exported.span, + SyntaxError::ExportExpectFrom(default.exported.sym.clone()), + ); + } + ExportSpecifier::Namespace(namespace) => { + let export_name = match &namespace.name { + ModuleExportName::Ident(i) => i.sym.clone(), + ModuleExportName::Str(s) => s.value.clone(), + }; + p.emit_err(namespace.span, SyntaxError::ExportExpectFrom(export_name)); + } + ExportSpecifier::Named(named) => match &named.orig { + ModuleExportName::Ident(id) if id.is_reserved() => { + p.emit_err(id.span, SyntaxError::ExportExpectFrom(id.sym.clone())); + } + ModuleExportName::Str(s) => { + p.emit_err(s.span, SyntaxError::ExportBindingIsString); + } + _ => {} + }, + } + } + + p.eat_general_semi(); + + None + }; + let (src, with) = match opt { + Some(v) => (Some(v.0), v.1), + None => (None, None), + }; + return Ok(NamedExport { + span: p.span(start), + specifiers, + src, + type_only, + with, + } + .into()); + }; + + Ok(ExportDecl { + span: p.span(start), + decl, + } + .into()) +} + +fn parse_import<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + if peek!(p).is_some_and(|cur| cur.is_dot()) { + let expr = p.parse_expr()?; + + p.eat_general_semi(); + + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + if peek!(p).is_some_and(|cur| cur.is_lparen()) { + let expr = p.parse_expr()?; + + p.eat_general_semi(); + + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + // It's now import statement + + if !p.ctx().contains(Context::Module) { + // Switch to module mode + let ctx = p.ctx() | Context::Module | Context::Strict; + p.set_ctx(ctx); + } + + expect!(p, &P::Token::IMPORT); + + // Handle import 'mod.js' + let str_start = p.cur_pos(); + if cur!(p, false).is_ok_and(|cur| cur.is_str()) { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + let src = Box::new(Str { + span: p.span(str_start), + value, + raw: Some(raw), + }); + let _ = cur!(p, false); + let with = if p.input().syntax().import_attributes() + && !p.input_mut().had_line_break_before_cur() + && (p.input_mut().eat(&P::Token::ASSERT) || p.input_mut().eat(&P::Token::WITH)) + { + match parse_object_expr(p)? { + Expr::Object(v) => Some(Box::new(v)), + _ => unreachable!(), + } + } else { + None + }; + p.eat_general_semi(); + return Ok(ImportDecl { + span: p.span(start), + src, + specifiers: Vec::new(), + type_only: false, + with, + phase: Default::default(), + } + .into()); + } + + let mut type_only = false; + let mut phase = ImportPhase::Evaluation; + let mut specifiers = Vec::with_capacity(4); + + 'import_maybe_ident: { + if p.is_ident_ref() { + let mut local = parse_imported_default_binding(p)?; + + if p.input().syntax().typescript() && local.sym == "type" { + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_lbrace() || cur.is_star()) + { + type_only = true; + break 'import_maybe_ident; + } + + if p.is_ident_ref() { + if !p.input_mut().is(&P::Token::FROM) + || peek!(p).is_some_and(|cur| cur.is_from()) + { + type_only = true; + local = parse_imported_default_binding(p)?; + } else if peek!(p).is_some_and(|cur| cur.is_equal()) { + type_only = true; + local = parse_ident_name(p).map(From::from)?; + } + } + } + + if p.input().syntax().typescript() && p.input_mut().is(&P::Token::EQUAL) { + return parse_ts_import_equals_decl(p, start, local, false, type_only) + .map(ModuleDecl::from) + .map(ModuleItem::from); + } + + if matches!(&*local.sym, "source" | "defer") { + let new_phase = match &*local.sym { + "source" => ImportPhase::Source, + "defer" => ImportPhase::Defer, + _ => unreachable!(), + }; + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_lbrace() || cur.is_star()) + { + phase = new_phase; + break 'import_maybe_ident; + } + + if p.is_ident_ref() && !p.input_mut().is(&P::Token::FROM) + || peek!(p).is_some_and(|cur| cur.is_from()) + { + phase = new_phase; + local = parse_imported_default_binding(p)?; + } + } + + //TODO: Better error reporting + if !p.input_mut().is(&P::Token::FROM) { + expect!(p, &P::Token::COMMA); + } + specifiers.push(ImportSpecifier::Default(ImportDefaultSpecifier { + span: local.span, + local, + })); + } + } + + { + let import_spec_start = p.cur_pos(); + if p.input_mut().eat(&P::Token::MUL) { + expect!(p, &P::Token::AS); + let local = parse_imported_binding(p)?; + specifiers.push(ImportSpecifier::Namespace(ImportStarAsSpecifier { + span: p.span(import_spec_start), + local, + })); + } else if p.input_mut().eat(&P::Token::LBRACE) { + while !eof!(p) && !p.input_mut().is(&P::Token::RBRACE) { + specifiers.push(parse_import_specifier(p, type_only)?); + + if p.input_mut().is(&P::Token::RBRACE) { + break; + } else { + expect!(p, &P::Token::COMMA); + } + } + expect!(p, &P::Token::RBRACE); + } + } + + let src = { + expect!(p, &P::Token::FROM); + let str_start = p.cur_pos(); + + if cur!(p, true).is_str() { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + Box::new(Str { + span: p.span(str_start), + value, + raw: Some(raw), + }) + } else { + unexpected!(p, "a string literal") + } + }; + + let _ = cur!(p, false); + let with = if p.input().syntax().import_attributes() + && !p.input_mut().had_line_break_before_cur() + && (p.input_mut().eat(&P::Token::ASSERT) || p.input_mut().eat(&P::Token::WITH)) + { + match parse_object_expr(p)? { + Expr::Object(v) => Some(Box::new(v)), + _ => unreachable!(), + } + } else { + None + }; + + p.expect_general_semi()?; + + Ok(ImportDecl { + span: p.span(start), + specifiers, + src, + type_only, + with, + phase, + } + .into()) +} + +pub fn parse_module_item<'a>(p: &mut impl Parser<'a>) -> PResult { + parse_stmt_like( + p.with_ctx(p.ctx() | Context::TopLevel).deref_mut(), + true, + handle_import_export, + ) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/object.rs b/crates/swc_ecma_lexer/src/common/parser/object.rs new file mode 100644 index 000000000000..fc073eea050e --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/object.rs @@ -0,0 +1,439 @@ +use std::ops::DerefMut; + +use swc_common::{Span, Spanned, DUMMY_SP}; +use swc_ecma_ast::*; + +use super::{ + expr::parse_assignment_expr, + pat::{parse_binding_element, parse_binding_pat_or_ident}, + Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + buffer::Buffer, + class_and_fn::parse_fn_args_body, + is_not_this, + pat::{parse_formal_params, parse_unique_formal_params}, + typescript::eat_any_ts_modifier, + }, + }, + error::SyntaxError, +}; + +pub type PResult = Result; + +fn parse_object<'a, P: Parser<'a>, Object, ObjectProp>( + p: &mut P, + parse_prop: impl Fn(&mut P) -> PResult, + make_object: impl Fn(&mut P, Span, Vec, Option) -> PResult, +) -> PResult { + let ctx = p.ctx() & !Context::WillExpectColonForCond; + p.with_ctx(ctx).parse_with(|p| { + trace_cur!(p, parse_object); + + let start = p.cur_pos(); + let mut trailing_comma = None; + p.assert_and_bump(&P::Token::LBRACE)?; + + let mut props = Vec::with_capacity(8); + + while !p.input_mut().eat(&P::Token::RBRACE) { + props.push(parse_prop(p)?); + + if !p.input_mut().is(&P::Token::RBRACE) { + expect!(p, &P::Token::COMMA); + if p.input_mut().is(&P::Token::RBRACE) { + trailing_comma = Some(p.input().prev_span()); + } + } + } + + let span = p.span(start); + make_object(p, span, props, trailing_comma) + }) +} + +/// Production 'BindingProperty' +fn parse_binding_object_prop<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + if p.input_mut().eat(&P::Token::DOTDOTDOT) { + // spread element + let dot3_token = p.span(start); + + let arg = Box::new(parse_binding_pat_or_ident(p, false)?); + + return Ok(ObjectPatProp::Rest(RestPat { + span: p.span(start), + dot3_token, + arg, + type_ann: None, + })); + } + + let key = p.parse_prop_name()?; + if p.input_mut().eat(&P::Token::COLON) { + let value = Box::new(parse_binding_element(p)?); + + return Ok(ObjectPatProp::KeyValue(KeyValuePatProp { key, value })); + } + let key = match key { + PropName::Ident(ident) => ident, + _ => unexpected!(p, "an identifier"), + }; + + let value = if p.input_mut().eat(&P::Token::EQUAL) { + parse_assignment_expr(p.include_in_expr(true).deref_mut()).map(Some)? + } else { + if p.ctx().is_reserved_word(&key.sym) { + p.emit_err(key.span, SyntaxError::ReservedWordInObjShorthandOrPat); + } + + None + }; + + Ok(ObjectPatProp::Assign(AssignPatProp { + span: p.span(start), + key: key.into(), + value, + })) +} + +fn make_binding_object<'a, P: Parser<'a>>( + p: &mut P, + span: Span, + props: Vec, + trailing_comma: Option, +) -> PResult { + let len = props.len(); + for (i, prop) in props.iter().enumerate() { + if i == len - 1 { + if let ObjectPatProp::Rest(ref rest) = prop { + match *rest.arg { + Pat::Ident(..) => { + if let Some(trailing_comma) = trailing_comma { + p.emit_err(trailing_comma, SyntaxError::CommaAfterRestElement); + } + } + _ => syntax_error!(p, prop.span(), SyntaxError::DotsWithoutIdentifier), + } + } + continue; + } + + if let ObjectPatProp::Rest(..) = prop { + p.emit_err(prop.span(), SyntaxError::NonLastRestParam) + } + } + + let optional = (p.input().syntax().dts() || p.ctx().contains(Context::InDeclare)) + && p.input_mut().eat(&P::Token::QUESTION); + + Ok(ObjectPat { + span, + props, + optional, + type_ann: None, + } + .into()) +} + +pub(super) fn parse_object_pat<'a, P: Parser<'a>>(p: &mut P) -> PResult { + parse_object(p, parse_binding_object_prop, make_binding_object) +} + +fn make_expr_object<'a, P: Parser<'a>>( + p: &mut P, + span: Span, + props: Vec, + trailing_comma: Option, +) -> PResult { + if let Some(trailing_comma) = trailing_comma { + p.state_mut() + .trailing_commas + .insert(span.lo, trailing_comma); + } + Ok(ObjectLit { span, props }.into()) +} + +fn parse_expr_object_prop<'a, P: Parser<'a>>(p: &mut P) -> PResult { + trace_cur!(p, parse_object_prop); + + let start = p.cur_pos(); + // Parse as 'MethodDefinition' + + if p.input_mut().eat(&P::Token::DOTDOTDOT) { + // spread element + let dot3_token = p.span(start); + + let expr = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + + return Ok(PropOrSpread::Spread(SpreadElement { dot3_token, expr })); + } + + if p.input_mut().eat(&P::Token::MUL) { + let name = p.parse_prop_name()?; + return parse_fn_args_body( + p.with_ctx((p.ctx() | Context::AllowDirectSuper) & !Context::InClassField) + .deref_mut(), + // no decorator in an object literal + Vec::new(), + start, + parse_unique_formal_params, + false, + true, + ) + .map(|function| { + PropOrSpread::Prop(Box::new(Prop::Method(MethodProp { + key: name, + function, + }))) + }); + } + + let has_modifiers = eat_any_ts_modifier(p)?; + let modifiers_span = p.input().prev_span(); + + let key = p.parse_prop_name()?; + + if p.input().syntax().typescript() + && !p.input_mut().cur().is_some_and(|cur| { + cur.is_lparen() + || cur.is_lbracket() + || cur.is_colon() + || cur.is_comma() + || cur.is_question() + || cur.is_equal() + || cur.is_star() + || cur.is_str() + || cur.is_num() + || cur.is_word() + }) + && !(p.input().syntax().typescript() && p.input_mut().is(&P::Token::LESS)) + && !(p.input_mut().is(&P::Token::RBRACE) && matches!(key, PropName::Ident(..))) + { + trace_cur!(p, parse_object_prop_error); + + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + return Ok(PropOrSpread::Prop(Box::new(Prop::KeyValue(KeyValueProp { + key, + value: Invalid { + span: p.span(start), + } + .into(), + })))); + } + // + // {[computed()]: a,} + // { 'a': a, } + // { 0: 1, } + // { a: expr, } + if p.input_mut().eat(&P::Token::COLON) { + let value = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + return Ok(PropOrSpread::Prop(Box::new(Prop::KeyValue(KeyValueProp { + key, + value, + })))); + } + + // Handle `a(){}` (and async(){} / get(){} / set(){}) + if (p.input().syntax().typescript() && p.input_mut().is(&P::Token::LESS)) + || p.input_mut().is(&P::Token::LPAREN) + { + return parse_fn_args_body( + p.with_ctx((p.ctx() | Context::AllowDirectSuper) & !Context::InClassField) + .deref_mut(), + // no decorator in an object literal + Vec::new(), + start, + parse_unique_formal_params, + false, + false, + ) + .map(|function| Box::new(Prop::Method(MethodProp { key, function }))) + .map(PropOrSpread::Prop); + } + + let ident = match key { + PropName::Ident(ident) => ident, + // TODO + _ => unexpected!(p, "identifier"), + }; + + if p.input_mut().eat(&P::Token::QUESTION) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1162); + } + + // `ident` from parse_prop_name is parsed as 'IdentifierName' + // It means we should check for invalid expressions like { for, } + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_equal() || cur.is_comma() || cur.is_rbrace()) + { + let is_reserved_word = { p.ctx().is_reserved_word(&ident.sym) }; + if is_reserved_word { + p.emit_err(ident.span, SyntaxError::ReservedWordInObjShorthandOrPat); + } + + if p.input_mut().eat(&P::Token::EQUAL) { + let value = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + let span = p.span(start); + return Ok(PropOrSpread::Prop(Box::new(Prop::Assign(AssignProp { + span, + key: ident.into(), + value, + })))); + } + + return Ok(PropOrSpread::Prop(Box::new(Prop::from(ident)))); + } + + // get a(){} + // set a(v){} + // async a(){} + + match &*ident.sym { + "get" | "set" | "async" => { + trace_cur!(p, parse_object_prop__after_accessor); + + if has_modifiers { + p.emit_err(modifiers_span, SyntaxError::TS1042); + } + + let is_generator = ident.sym == "async" && p.input_mut().eat(&P::Token::MUL); + let key = p.parse_prop_name()?; + let key_span = key.span(); + let ctx = (p.ctx() | Context::AllowDirectSuper) & !Context::InClassField; + p.with_ctx(ctx).parse_with(|parser| { + match &*ident.sym { + "get" => parse_fn_args_body( + parser, + // no decorator in an object literal + Vec::new(), + start, + |p| { + let params = parse_formal_params(p)?; + + if params.iter().filter(|p| is_not_this(p)).count() != 0 { + p.emit_err(key_span, SyntaxError::GetterParam); + } + + Ok(params) + }, + false, + false, + ) + .map(|v| *v) + .map( + |Function { + body, return_type, .. + }| { + if parser.input().syntax().typescript() + && parser.input().target() == EsVersion::Es3 + { + parser.emit_err(key_span, SyntaxError::TS1056); + } + + PropOrSpread::Prop(Box::new(Prop::Getter(GetterProp { + span: parser.span(start), + key, + type_ann: return_type, + body, + }))) + }, + ), + "set" => { + parse_fn_args_body( + parser, + // no decorator in an object literal + Vec::new(), + start, + |p| { + let params = parse_formal_params(p)?; + + if params.iter().filter(|p| is_not_this(p)).count() != 1 { + p.emit_err(key_span, SyntaxError::SetterParam); + } + + if !params.is_empty() { + if let Pat::Rest(..) = params[0].pat { + p.emit_err(params[0].span(), SyntaxError::RestPatInSetter); + } + } + + if p.input().syntax().typescript() + && p.input().target() == EsVersion::Es3 + { + p.emit_err(key_span, SyntaxError::TS1056); + } + + Ok(params) + }, + false, + false, + ) + .map(|v| *v) + .map( + |Function { + mut params, body, .. + }| { + let mut this = None; + if params.len() >= 2 { + this = Some(params.remove(0).pat); + } + + let param = Box::new( + params.into_iter().next().map(|v| v.pat).unwrap_or_else(|| { + parser.emit_err(key_span, SyntaxError::SetterParam); + + Invalid { span: DUMMY_SP }.into() + }), + ); + + // debug_assert_eq!(params.len(), 1); + PropOrSpread::Prop(Box::new(Prop::Setter(SetterProp { + span: parser.span(start), + key, + body, + param, + this_param: this, + }))) + }, + ) + } + "async" => parse_fn_args_body( + parser, + // no decorator in an object literal + Vec::new(), + start, + parse_unique_formal_params, + true, + is_generator, + ) + .map(|function| { + PropOrSpread::Prop(Box::new(Prop::Method(MethodProp { key, function }))) + }), + _ => unreachable!(), + } + }) + } + _ => { + if p.input().syntax().typescript() { + unexpected!( + p, + "... , *, (, [, :, , ?, =, an identifier, public, protected, private, \ + readonly, <." + ) + } else { + unexpected!(p, "... , *, (, [, :, , ?, = or an identifier") + } + } + } +} + +pub fn parse_object_expr<'a, P: Parser<'a>>(p: &mut P) -> PResult { + parse_object(p, parse_expr_object_prop, make_expr_object) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/output_type.rs b/crates/swc_ecma_lexer/src/common/parser/output_type.rs new file mode 100644 index 000000000000..a300ca511611 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/output_type.rs @@ -0,0 +1,113 @@ +use swc_common::Span; +use swc_ecma_ast::{ + Class, ClassDecl, ClassExpr, Decl, DefaultDecl, ExportDefaultDecl, Expr, FnDecl, FnExpr, + Function, Ident, +}; + +use crate::error::SyntaxError; + +pub trait OutputType: Sized { + const IS_IDENT_REQUIRED: bool; + + /// From babel.. + /// + /// When parsing function expression, the binding identifier is parsed + /// according to the rules inside the function. + /// e.g. (function* yield() {}) is invalid because "yield" is disallowed in + /// generators. + /// This isn't the case with function declarations: function* yield() {} is + /// valid because yield is parsed as if it was outside the generator. + /// Therefore, this.state.inGenerator is set before or after parsing the + /// function id according to the "isStatement" parameter. + fn is_fn_expr() -> bool { + false + } + + fn finish_fn(span: Span, ident: Option, f: Box) -> Result; + + fn finish_class( + span: Span, + ident: Option, + class: Box, + ) -> Result; +} + +impl OutputType for Box { + const IS_IDENT_REQUIRED: bool = false; + + fn is_fn_expr() -> bool { + true + } + + fn finish_fn( + _span: Span, + ident: Option, + function: Box, + ) -> Result { + Ok(FnExpr { ident, function }.into()) + } + + fn finish_class( + _span: Span, + ident: Option, + class: Box, + ) -> Result { + Ok(ClassExpr { ident, class }.into()) + } +} + +impl OutputType for ExportDefaultDecl { + const IS_IDENT_REQUIRED: bool = false; + + fn finish_fn( + span: Span, + ident: Option, + function: Box, + ) -> Result { + Ok(ExportDefaultDecl { + span, + decl: DefaultDecl::Fn(FnExpr { ident, function }), + }) + } + + fn finish_class( + span: Span, + ident: Option, + class: Box, + ) -> Result { + Ok(ExportDefaultDecl { + span, + decl: DefaultDecl::Class(ClassExpr { ident, class }), + }) + } +} + +impl OutputType for Decl { + const IS_IDENT_REQUIRED: bool = true; + + fn finish_fn( + _span: Span, + ident: Option, + function: Box, + ) -> Result { + let ident = ident.ok_or(SyntaxError::ExpectedIdent)?; + + Ok(FnDecl { + declare: false, + ident, + function, + } + .into()) + } + + fn finish_class(_: Span, ident: Option, class: Box) -> Result { + let ident = ident.ok_or(SyntaxError::ExpectedIdent)?; + + Ok(ClassDecl { + declare: false, + ident, + class, + } + .into()) + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/parse_object.rs b/crates/swc_ecma_lexer/src/common/parser/parse_object.rs new file mode 100644 index 000000000000..2600ebee9d47 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/parse_object.rs @@ -0,0 +1,14 @@ +use swc_common::Span; + +use super::PResult; + +pub trait ParseObject { + type Prop; + fn make_object( + &mut self, + span: Span, + props: Vec, + trailing_comma: Option, + ) -> PResult; + fn parse_object_prop(&mut self) -> PResult; +} diff --git a/crates/swc_ecma_lexer/src/common/parser/pat.rs b/crates/swc_ecma_lexer/src/common/parser/pat.rs new file mode 100644 index 000000000000..a4f9aa1d454b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/pat.rs @@ -0,0 +1,852 @@ +use std::ops::DerefMut; + +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use super::{ + assign_target_or_spread::AssignTargetOrSpread, + class_and_fn::{parse_access_modifier, parse_decorators}, + is_not_this, + pat_type::PatType, + typescript::{ + eat_any_ts_modifier, parse_ts_modifier, parse_ts_type_ann, try_parse_ts_type_ann, + }, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + buffer::Buffer, expr::parse_assignment_expr, expr_ext::ExprExt, + ident::parse_binding_ident, object::parse_object_pat, + }, + }, + error::SyntaxError, +}; + +/// argument of arrow is pattern, although idents in pattern is already +/// checked if is a keyword, it should also be checked if is arguments or +/// eval +pub(super) fn pat_is_valid_argument_in_strict<'a>(p: &mut impl Parser<'a>, pat: &Pat) { + match pat { + Pat::Ident(i) => { + if i.is_reserved_in_strict_bind() { + p.emit_strict_mode_err(i.span, SyntaxError::EvalAndArgumentsInStrict) + } + } + Pat::Array(arr) => { + for pat in arr.elems.iter().flatten() { + pat_is_valid_argument_in_strict(p, pat) + } + } + Pat::Rest(r) => pat_is_valid_argument_in_strict(p, &r.arg), + Pat::Object(obj) => { + for prop in obj.props.iter() { + match prop { + ObjectPatProp::KeyValue(KeyValuePatProp { value, .. }) + | ObjectPatProp::Rest(RestPat { arg: value, .. }) => { + pat_is_valid_argument_in_strict(p, value) + } + ObjectPatProp::Assign(AssignPatProp { key, .. }) => { + if key.is_reserved_in_strict_bind() { + p.emit_strict_mode_err(key.span, SyntaxError::EvalAndArgumentsInStrict) + } + } + } + } + } + Pat::Assign(a) => pat_is_valid_argument_in_strict(p, &a.left), + Pat::Invalid(_) | Pat::Expr(_) => (), + } +} + +/// This does not return 'rest' pattern because non-last parameter cannot be +/// rest. +pub(super) fn reparse_expr_as_pat<'a>( + p: &mut impl Parser<'a>, + pat_ty: PatType, + expr: Box, +) -> PResult { + if let Expr::Invalid(i) = *expr { + return Ok(i.into()); + } + if pat_ty == PatType::AssignPat { + match *expr { + Expr::Object(..) | Expr::Array(..) => { + // It is a Syntax Error if LeftHandSideExpression is either + // an ObjectLiteral or an ArrayLiteral + // and LeftHandSideExpression cannot + // be reparsed as an AssignmentPattern. + } + _ => { + p.check_assign_target(&expr, true); + } + } + } + reparse_expr_as_pat_inner(p, pat_ty, expr) +} + +fn reparse_expr_as_pat_inner<'a>( + p: &mut impl Parser<'a>, + pat_ty: PatType, + expr: Box, +) -> PResult { + // In dts, we do not reparse. + debug_assert!(!p.input().syntax().dts()); + let span = expr.span(); + if pat_ty == PatType::AssignPat { + match *expr { + Expr::Object(..) | Expr::Array(..) => { + // It is a Syntax Error if LeftHandSideExpression is either + // an ObjectLiteral or an ArrayLiteral + // and LeftHandSideExpression cannot + // be reparsed as an AssignmentPattern. + } + + _ => match *expr { + // It is a Syntax Error if the LeftHandSideExpression is + // CoverParenthesizedExpressionAndArrowParameterList:(Expression) and + // Expression derives a phrase that would produce a Syntax Error according + // to these rules if that phrase were substituted for + // LeftHandSideExpression. This rule is recursively applied. + Expr::Paren(..) => { + return Ok(expr.into()); + } + Expr::Ident(i) => return Ok(i.into()), + _ => { + return Ok(expr.into()); + } + }, + } + } + + // AssignmentElement: + // DestructuringAssignmentTarget Initializer[+In]? + // + // DestructuringAssignmentTarget: + // LeftHandSideExpression + if pat_ty == PatType::AssignElement { + match *expr { + Expr::Array(..) | Expr::Object(..) => {} + Expr::Member(..) + | Expr::SuperProp(..) + | Expr::Call(..) + | Expr::New(..) + | Expr::Lit(..) + | Expr::Ident(..) + | Expr::Fn(..) + | Expr::Class(..) + | Expr::Paren(..) + | Expr::Tpl(..) + | Expr::TsAs(..) => { + if !expr.is_valid_simple_assignment_target(p.ctx().contains(Context::Strict)) { + p.emit_err(span, SyntaxError::NotSimpleAssign) + } + match *expr { + Expr::Ident(i) => return Ok(i.into()), + _ => { + return Ok(expr.into()); + } + } + } + // It's special because of optional initializer + Expr::Assign(..) => {} + _ => p.emit_err(span, SyntaxError::InvalidPat), + } + } + + match *expr { + Expr::Paren(..) => { + p.emit_err(span, SyntaxError::InvalidPat); + Ok(Invalid { span }.into()) + } + Expr::Assign( + assign_expr @ AssignExpr { + op: AssignOp::Assign, + .. + }, + ) => { + let AssignExpr { + span, left, right, .. + } = assign_expr; + Ok(AssignPat { + span, + left: match left { + AssignTarget::Simple(left) => { + Box::new(reparse_expr_as_pat(p, pat_ty, left.into())?) + } + AssignTarget::Pat(pat) => pat.into(), + }, + right, + } + .into()) + } + Expr::Object(ObjectLit { + span: object_span, + props, + }) => { + // {} + let len = props.len(); + Ok(ObjectPat { + span: object_span, + props: props + .into_iter() + .enumerate() + .map(|(idx, prop)| { + let span = prop.span(); + match prop { + PropOrSpread::Prop(prop) => match *prop { + Prop::Shorthand(id) => Ok(ObjectPatProp::Assign(AssignPatProp { + span: id.span(), + key: id.into(), + value: None, + })), + Prop::KeyValue(kv_prop) => { + Ok(ObjectPatProp::KeyValue(KeyValuePatProp { + key: kv_prop.key, + value: Box::new(reparse_expr_as_pat( + p, + pat_ty.element(), + kv_prop.value, + )?), + })) + } + Prop::Assign(assign_prop) => { + Ok(ObjectPatProp::Assign(AssignPatProp { + span, + key: assign_prop.key.into(), + value: Some(assign_prop.value), + })) + } + _ => syntax_error!(p, prop.span(), SyntaxError::InvalidPat), + }, + + PropOrSpread::Spread(SpreadElement { dot3_token, expr }) => { + if idx != len - 1 { + p.emit_err(span, SyntaxError::NonLastRestParam) + } else if let Some(trailing_comma) = + p.state().trailing_commas.get(&object_span.lo) + { + p.emit_err(*trailing_comma, SyntaxError::CommaAfterRestElement); + }; + + let element_pat_ty = pat_ty.element(); + let pat = if let PatType::BindingElement = element_pat_ty { + if let Expr::Ident(i) = *expr { + i.into() + } else { + p.emit_err(span, SyntaxError::DotsWithoutIdentifier); + Pat::Invalid(Invalid { span }) + } + } else { + reparse_expr_as_pat(p, element_pat_ty, expr)? + }; + if let Pat::Assign(_) = pat { + p.emit_err(span, SyntaxError::TS1048) + }; + Ok(ObjectPatProp::Rest(RestPat { + span, + dot3_token, + arg: Box::new(pat), + type_ann: None, + })) + } + } + }) + .collect::>()?, + optional: false, + type_ann: None, + } + .into()) + } + Expr::Ident(ident) => Ok(ident.into()), + Expr::Array(ArrayLit { + elems: mut exprs, .. + }) => { + if exprs.is_empty() { + return Ok(ArrayPat { + span, + elems: Vec::new(), + optional: false, + type_ann: None, + } + .into()); + } + // Trailing comma may exist. We should remove those commas. + let count_of_trailing_comma = exprs.iter().rev().take_while(|e| e.is_none()).count(); + let len = exprs.len(); + let mut params = Vec::with_capacity(exprs.len() - count_of_trailing_comma); + // Comma or other pattern cannot follow a rest pattern. + let idx_of_rest_not_allowed = if count_of_trailing_comma == 0 { + len - 1 + } else { + // last element is comma, so rest is not allowed for every pattern element. + len - count_of_trailing_comma + }; + for expr in exprs.drain(..idx_of_rest_not_allowed) { + match expr { + Some( + expr @ ExprOrSpread { + spread: Some(..), .. + }, + ) => p.emit_err(expr.span(), SyntaxError::NonLastRestParam), + Some(ExprOrSpread { expr, .. }) => { + params.push(reparse_expr_as_pat(p, pat_ty.element(), expr).map(Some)?) + } + None => params.push(None), + } + } + if count_of_trailing_comma == 0 { + let expr = exprs.into_iter().next().unwrap(); + let outer_expr_span = expr.span(); + let last = match expr { + // Rest + Some(ExprOrSpread { + spread: Some(dot3_token), + expr, + }) => { + // TODO: is BindingPat correct? + if let Expr::Assign(_) = *expr { + p.emit_err(outer_expr_span, SyntaxError::TS1048); + }; + if let Some(trailing_comma) = p.state().trailing_commas.get(&span.lo) { + p.emit_err(*trailing_comma, SyntaxError::CommaAfterRestElement); + } + let expr_span = expr.span(); + reparse_expr_as_pat(p, pat_ty.element(), expr) + .map(|pat| { + RestPat { + span: expr_span, + dot3_token, + arg: Box::new(pat), + type_ann: None, + } + .into() + }) + .map(Some)? + } + Some(ExprOrSpread { expr, .. }) => { + // TODO: is BindingPat correct? + reparse_expr_as_pat(p, pat_ty.element(), expr).map(Some)? + } + // TODO: syntax error if last element is ellison and ...rest exists. + None => None, + }; + params.push(last); + } + Ok(ArrayPat { + span, + elems: params, + optional: false, + type_ann: None, + } + .into()) + } + + // Invalid patterns. + // Note that assignment expression with '=' is valid, and handled above. + Expr::Lit(..) | Expr::Assign(..) => { + p.emit_err(span, SyntaxError::InvalidPat); + Ok(Invalid { span }.into()) + } + + Expr::Yield(..) if p.ctx().contains(Context::InGenerator) => { + p.emit_err(span, SyntaxError::InvalidPat); + Ok(Invalid { span }.into()) + } + + _ => { + p.emit_err(span, SyntaxError::InvalidPat); + + Ok(Invalid { span }.into()) + } + } +} + +pub(super) fn parse_binding_element<'a, P: Parser<'a>>(p: &mut P) -> PResult { + trace_cur!(p, parse_binding_element); + + let start = p.cur_pos(); + let left = parse_binding_pat_or_ident(p, false)?; + + if p.input_mut().eat(&P::Token::EQUAL) { + let right = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + + if p.ctx().contains(Context::InDeclare) { + p.emit_err(p.span(start), SyntaxError::TS2371); + } + + return Ok(AssignPat { + span: p.span(start), + left: Box::new(left), + right, + } + .into()); + } + + Ok(left) +} + +pub fn parse_binding_pat_or_ident<'a, P: Parser<'a>>( + p: &mut P, + disallow_let: bool, +) -> PResult { + trace_cur!(p, parse_binding_pat_or_ident); + + let cur = cur!(p, true); + if cur.is_yield() || cur.is_word() { + parse_binding_ident(p, disallow_let).map(Pat::from) + } else if cur.is_lbracket() { + parse_array_binding_pat(p) + } else if cur.is_lbrace() { + parse_object_pat(p) + } else { + unexpected!(p, "yield, an identifier, [ or {") + } +} + +pub fn parse_array_binding_pat<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::LBRACKET)?; + + let mut elems = Vec::new(); + + let mut rest_span = Span::default(); + + while !eof!(p) && !p.input_mut().is(&P::Token::RBRACKET) { + if p.input_mut().eat(&P::Token::COMMA) { + elems.push(None); + continue; + } + + if !rest_span.is_dummy() { + p.emit_err(rest_span, SyntaxError::NonLastRestParam); + } + + let start = p.cur_pos(); + + let mut is_rest = false; + if p.input_mut().eat(&P::Token::DOTDOTDOT) { + is_rest = true; + let dot3_token = p.span(start); + + let pat = parse_binding_pat_or_ident(p, false)?; + rest_span = p.span(start); + let pat = RestPat { + span: rest_span, + dot3_token, + arg: Box::new(pat), + type_ann: None, + } + .into(); + elems.push(Some(pat)); + } else { + elems.push(parse_binding_element(p).map(Some)?); + } + + if !p.input_mut().is(&P::Token::RBRACKET) { + expect!(p, &P::Token::COMMA); + if is_rest && p.input_mut().is(&P::Token::RBRACKET) { + p.emit_err(p.input().prev_span(), SyntaxError::CommaAfterRestElement); + } + } + } + + expect!(p, &P::Token::RBRACKET); + let optional = (p.input().syntax().dts() || p.ctx().contains(Context::InDeclare)) + && p.input_mut().eat(&P::Token::QUESTION); + + Ok(ArrayPat { + span: p.span(start), + elems, + optional, + type_ann: None, + } + .into()) +} + +/// spec: 'FormalParameter' +/// +/// babel: `parseAssignableListItem` +fn parse_formal_param_pat<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + let has_modifier = eat_any_ts_modifier(p)?; + + let pat_start = p.cur_pos(); + let mut pat = parse_binding_element(p)?; + let mut opt = false; + + if p.input().syntax().typescript() { + if p.input_mut().eat(&P::Token::QUESTION) { + match pat { + Pat::Ident(BindingIdent { + id: Ident { + ref mut optional, .. + }, + .. + }) + | Pat::Array(ArrayPat { + ref mut optional, .. + }) + | Pat::Object(ObjectPat { + ref mut optional, .. + }) => { + *optional = true; + opt = true; + } + _ if p.input().syntax().dts() || p.ctx().contains(Context::InDeclare) => {} + _ => { + syntax_error!( + p, + p.input().prev_span(), + SyntaxError::TsBindingPatCannotBeOptional + ); + } + } + } + + match pat { + Pat::Array(ArrayPat { + ref mut type_ann, + ref mut span, + .. + }) + | Pat::Object(ObjectPat { + ref mut type_ann, + ref mut span, + .. + }) + | Pat::Rest(RestPat { + ref mut type_ann, + ref mut span, + .. + }) => { + let new_type_ann = try_parse_ts_type_ann(p)?; + if new_type_ann.is_some() { + *span = Span::new(pat_start, p.input().prev_span().hi); + } + *type_ann = new_type_ann; + } + + Pat::Ident(BindingIdent { + ref mut type_ann, .. + }) => { + let new_type_ann = try_parse_ts_type_ann(p)?; + *type_ann = new_type_ann; + } + + Pat::Assign(AssignPat { ref mut span, .. }) => { + if (try_parse_ts_type_ann(p)?).is_some() { + *span = Span::new(pat_start, p.input().prev_span().hi); + p.emit_err(*span, SyntaxError::TSTypeAnnotationAfterAssign); + } + } + Pat::Invalid(..) => {} + _ => unreachable!("invalid syntax: Pat: {:?}", pat), + } + } + + let pat = if p.input_mut().eat(&P::Token::EQUAL) { + // `=` cannot follow optional parameter. + if opt { + p.emit_err(pat.span(), SyntaxError::TS1015); + } + + let right = parse_assignment_expr(p)?; + if p.ctx().contains(Context::InDeclare) { + p.emit_err(p.span(start), SyntaxError::TS2371); + } + + AssignPat { + span: p.span(start), + left: Box::new(pat), + right, + } + .into() + } else { + pat + }; + + if has_modifier { + p.emit_err(p.span(start), SyntaxError::TS2369); + return Ok(pat); + } + + Ok(pat) +} + +fn parse_constructor_param<'a, P: Parser<'a>>( + p: &mut P, + param_start: BytePos, + decorators: Vec, +) -> PResult { + let (accessibility, is_override, readonly) = if p.input().syntax().typescript() { + let accessibility = parse_access_modifier(p)?; + ( + accessibility, + parse_ts_modifier(p, &["override"], false)?.is_some(), + parse_ts_modifier(p, &["readonly"], false)?.is_some(), + ) + } else { + (None, false, false) + }; + if accessibility.is_none() && !is_override && !readonly { + let pat = parse_formal_param_pat(p)?; + Ok(ParamOrTsParamProp::Param(Param { + span: p.span(param_start), + decorators, + pat, + })) + } else { + let param = match parse_formal_param_pat(p)? { + Pat::Ident(i) => TsParamPropParam::Ident(i), + Pat::Assign(a) => TsParamPropParam::Assign(a), + node => syntax_error!(p, node.span(), SyntaxError::TsInvalidParamPropPat), + }; + Ok(ParamOrTsParamProp::TsParamProp(TsParamProp { + span: p.span(param_start), + accessibility, + is_override, + readonly, + decorators, + param, + })) + } +} + +pub fn parse_constructor_params<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let mut params = Vec::new(); + let mut rest_span = Span::default(); + + while !eof!(p) && !p.input_mut().is(&P::Token::RPAREN) { + if !rest_span.is_dummy() { + p.emit_err(rest_span, SyntaxError::TS1014); + } + + let param_start = p.cur_pos(); + let decorators = parse_decorators(p, false)?; + let pat_start = p.cur_pos(); + + let mut is_rest = false; + if p.input_mut().eat(&P::Token::DOTDOTDOT) { + is_rest = true; + let dot3_token = p.span(pat_start); + + let pat = parse_binding_pat_or_ident(p, false)?; + let type_ann = if p.input().syntax().typescript() && p.input_mut().is(&P::Token::COLON) + { + let cur_pos = p.cur_pos(); + Some(parse_ts_type_ann(p, /* eat_colon */ true, cur_pos)?) + } else { + None + }; + + rest_span = p.span(pat_start); + let pat = RestPat { + span: rest_span, + dot3_token, + arg: Box::new(pat), + type_ann, + } + .into(); + params.push(ParamOrTsParamProp::Param(Param { + span: p.span(param_start), + decorators, + pat, + })); + } else { + params.push(parse_constructor_param(p, param_start, decorators)?); + } + + if !p.input_mut().is(&P::Token::RPAREN) { + expect!(p, &P::Token::COMMA); + if p.input_mut().is(&P::Token::RPAREN) && is_rest { + p.emit_err(p.input().prev_span(), SyntaxError::CommaAfterRestElement); + } + } + } + + Ok(params) +} + +pub fn parse_formal_params<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let mut params = Vec::new(); + let mut rest_span = Span::default(); + + while !eof!(p) && !p.input_mut().is(&P::Token::RPAREN) { + if !rest_span.is_dummy() { + p.emit_err(rest_span, SyntaxError::TS1014); + } + + let param_start = p.cur_pos(); + let decorators = parse_decorators(p, false)?; + let pat_start = p.cur_pos(); + + let pat = if p.input_mut().eat(&P::Token::DOTDOTDOT) { + let dot3_token = p.span(pat_start); + + let mut pat = parse_binding_pat_or_ident(p, false)?; + + if p.input_mut().eat(&P::Token::EQUAL) { + let right = parse_assignment_expr(p)?; + p.emit_err(pat.span(), SyntaxError::TS1048); + pat = AssignPat { + span: p.span(pat_start), + left: Box::new(pat), + right, + } + .into(); + } + + let type_ann = if p.input().syntax().typescript() && p.input_mut().is(&P::Token::COLON) + { + let cur_pos = p.cur_pos(); + let ty = parse_ts_type_ann(p, /* eat_colon */ true, cur_pos)?; + Some(ty) + } else { + None + }; + + rest_span = p.span(pat_start); + let pat = RestPat { + span: rest_span, + dot3_token, + arg: Box::new(pat), + type_ann, + } + .into(); + + if p.syntax().typescript() && p.input_mut().eat(&P::Token::QUESTION) { + p.emit_err(p.input().prev_span(), SyntaxError::TS1047); + // + } + + pat + } else { + parse_formal_param_pat(p)? + }; + let is_rest = matches!(pat, Pat::Rest(_)); + + params.push(Param { + span: p.span(param_start), + decorators, + pat, + }); + + if !p.input_mut().is(&P::Token::RPAREN) { + expect!(p, &P::Token::COMMA); + if is_rest && p.input_mut().is(&P::Token::RPAREN) { + p.emit_err(p.input().prev_span(), SyntaxError::CommaAfterRestElement); + } + } + } + + Ok(params) +} + +#[allow(dead_code)] +pub fn parse_setter_param<'a>(p: &mut impl Parser<'a>, key_span: Span) -> PResult { + let params = parse_formal_params(p)?; + let cnt = params.iter().filter(|p| is_not_this(p)).count(); + + if cnt != 1 { + p.emit_err(key_span, SyntaxError::SetterParam); + } + + if !params.is_empty() { + if let Pat::Rest(..) = params[0].pat { + p.emit_err(params[0].pat.span(), SyntaxError::RestPatInSetter); + } + } + + if params.is_empty() { + syntax_error!(p, SyntaxError::SetterParamRequired); + } + + Ok(params.into_iter().next().unwrap()) +} + +pub fn parse_unique_formal_params<'a>(p: &mut impl Parser<'a>) -> PResult> { + // FIXME: This is wrong + parse_formal_params(p) +} + +pub fn parse_paren_items_as_params<'a, P: Parser<'a>>( + p: &mut P, + mut exprs: Vec, + trailing_comma: Option, +) -> PResult> { + let pat_ty = PatType::BindingPat; + + let len = exprs.len(); + if len == 0 { + return Ok(Vec::new()); + } + + let mut params = Vec::with_capacity(len); + + for expr in exprs.drain(..len - 1) { + match expr { + AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { + spread: Some(..), .. + }) + | AssignTargetOrSpread::Pat(Pat::Rest(..)) => { + p.emit_err(expr.span(), SyntaxError::TS1014) + } + AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { + spread: None, expr, .. + }) => params.push(reparse_expr_as_pat(p, pat_ty, expr)?), + AssignTargetOrSpread::Pat(pat) => params.push(pat), + } + } + + debug_assert_eq!(exprs.len(), 1); + let expr = exprs.into_iter().next().unwrap(); + let outer_expr_span = expr.span(); + let last = match expr { + // Rest + AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { + spread: Some(dot3_token), + expr, + }) => { + if let Expr::Assign(_) = *expr { + p.emit_err(outer_expr_span, SyntaxError::TS1048) + }; + if let Some(trailing_comma) = trailing_comma { + p.emit_err(trailing_comma, SyntaxError::CommaAfterRestElement); + } + let expr_span = expr.span(); + reparse_expr_as_pat(p, pat_ty, expr).map(|pat| { + RestPat { + span: expr_span, + dot3_token, + arg: Box::new(pat), + type_ann: None, + } + .into() + })? + } + AssignTargetOrSpread::ExprOrSpread(ExprOrSpread { expr, .. }) => { + reparse_expr_as_pat(p, pat_ty, expr)? + } + AssignTargetOrSpread::Pat(pat) => { + if let Some(trailing_comma) = trailing_comma { + if let Pat::Rest(..) = pat { + p.emit_err(trailing_comma, SyntaxError::CommaAfterRestElement); + } + } + pat + } + }; + params.push(last); + + if p.ctx().contains(Context::Strict) { + for param in params.iter() { + pat_is_valid_argument_in_strict(p, param) + } + } + Ok(params) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/pat_type.rs b/crates/swc_ecma_lexer/src/common/parser/pat_type.rs new file mode 100644 index 000000000000..705a63b2ec9e --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/pat_type.rs @@ -0,0 +1,17 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum PatType { + BindingPat, + BindingElement, + /// AssignmentPattern + AssignPat, + AssignElement, +} + +impl PatType { + pub fn element(self) -> Self { + match self { + PatType::BindingPat | PatType::BindingElement => PatType::BindingElement, + PatType::AssignPat | PatType::AssignElement => PatType::AssignElement, + } + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/state.rs b/crates/swc_ecma_lexer/src/common/parser/state.rs new file mode 100644 index 000000000000..07a66fbe4411 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/state.rs @@ -0,0 +1,39 @@ +use std::ops::{Deref, DerefMut}; + +use rustc_hash::FxHashMap; +use swc_atoms::Atom; +use swc_common::{BytePos, Span}; + +#[derive(Clone, Default)] +pub struct State { + pub labels: Vec, + /// Start position of an assignment expression. + pub potential_arrow_start: Option, + /// Start position of an AST node and the span of its trailing comma. + pub trailing_commas: FxHashMap, +} + +pub struct WithState<'a, 'w, Parser: super::Parser<'a>> { + pub(super) inner: &'w mut Parser, + pub(super) orig_state: crate::common::parser::state::State, + pub(super) marker: std::marker::PhantomData<&'a ()>, +} + +impl<'a, Parser: super::Parser<'a>> Deref for WithState<'a, '_, Parser> { + type Target = Parser; + + fn deref(&self) -> &Parser { + self.inner + } +} +impl<'a, Parser: super::Parser<'a>> DerefMut for WithState<'a, '_, Parser> { + fn deref_mut(&mut self) -> &mut Parser { + self.inner + } +} + +impl<'a, Parser: super::Parser<'a>> Drop for WithState<'a, '_, Parser> { + fn drop(&mut self) { + std::mem::swap(self.inner.state_mut(), &mut self.orig_state); + } +} diff --git a/crates/swc_ecma_lexer/src/common/parser/stmt.rs b/crates/swc_ecma_lexer/src/common/parser/stmt.rs new file mode 100644 index 000000000000..cacaf59a7e69 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/stmt.rs @@ -0,0 +1,1408 @@ +use std::ops::DerefMut; + +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; +use typed_arena::Arena; + +use super::{ + buffer::Buffer, + class_and_fn::parse_fn_decl, + expr::parse_assignment_expr, + is_directive::IsDirective, + pat::parse_binding_pat_or_ident, + typescript::{try_parse_ts_type_ann, ts_look_ahead}, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + class_and_fn::{parse_async_fn_decl, parse_class_decl, parse_decorators}, + expr::{parse_await_expr, parse_bin_op_recursively, parse_for_head_prefix}, + ident::{parse_binding_ident, parse_label_ident}, + pat::reparse_expr_as_pat, + pat_type::PatType, + typescript::{ + parse_ts_enum_decl, parse_ts_expr_stmt, parse_ts_interface_decl, parse_ts_type, + parse_ts_type_alias_decl, + }, + }, + }, + error::{Error, SyntaxError}, +}; + +#[allow(clippy::enum_variant_names)] +pub enum TempForHead { + For { + init: Option, + test: Option>, + update: Option>, + }, + ForIn { + left: ForHead, + right: Box, + }, + ForOf { + left: ForHead, + right: Box, + }, +} + +fn parse_normal_for_head<'a, P: Parser<'a>>( + p: &mut P, + init: Option, +) -> PResult { + let test = if p.input_mut().eat(&P::Token::SEMI) { + None + } else { + let test = p.include_in_expr(true).parse_expr().map(Some)?; + p.input_mut().eat(&P::Token::SEMI); + test + }; + + let update = if p.input_mut().is(&P::Token::RPAREN) { + None + } else { + p.include_in_expr(true).parse_expr().map(Some)? + }; + + Ok(TempForHead::For { init, test, update }) +} + +fn parse_for_each_head<'a, P: Parser<'a>>(p: &mut P, left: ForHead) -> PResult { + let is_of = p.bump().is_of(); + if is_of { + let right = parse_assignment_expr(p.include_in_expr(true).deref_mut())?; + Ok(TempForHead::ForOf { left, right }) + } else { + if let ForHead::UsingDecl(d) = &left { + p.emit_err(d.span, SyntaxError::UsingDeclNotAllowedForForInLoop) + } + let right = p.include_in_expr(true).parse_expr()?; + Ok(TempForHead::ForIn { left, right }) + } +} + +pub fn parse_return_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + let stmt = p.parse_with(|p| { + p.assert_and_bump(&P::Token::RETURN)?; + + let arg = if p.is_general_semi() { + None + } else { + p.include_in_expr(true).parse_expr().map(Some)? + }; + p.expect_general_semi()?; + Ok(ReturnStmt { + span: p.span(start), + arg, + } + .into()) + }); + + if !p.ctx().contains(Context::InFunction) && !p.input().syntax().allow_return_outside_function() + { + p.emit_err(p.span(start), SyntaxError::ReturnNotAllowed); + } + + stmt +} + +fn parse_var_declarator<'a, P: Parser<'a>>( + p: &mut P, + for_loop: bool, + kind: VarDeclKind, +) -> PResult { + let start = p.cur_pos(); + + let is_let_or_const = matches!(kind, VarDeclKind::Let | VarDeclKind::Const); + + let mut name = parse_binding_pat_or_ident(p, is_let_or_const)?; + + let definite = if p.input().syntax().typescript() { + match name { + Pat::Ident(..) => p.input_mut().eat(&P::Token::BANG), + _ => false, + } + } else { + false + }; + + // Typescript extension + if p.input().syntax().typescript() && p.input_mut().is(&P::Token::COLON) { + let type_annotation = try_parse_ts_type_ann(p)?; + match name { + Pat::Array(ArrayPat { + ref mut type_ann, .. + }) + | Pat::Ident(BindingIdent { + ref mut type_ann, .. + }) + | Pat::Object(ObjectPat { + ref mut type_ann, .. + }) + | Pat::Rest(RestPat { + ref mut type_ann, .. + }) => { + *type_ann = type_annotation; + } + _ => unreachable!("invalid syntax: Pat: {:?}", name), + } + } + + //FIXME: This is wrong. Should check in/of only on first loop. + let init = if !for_loop + || !p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_in() || cur.is_of()) + { + if p.input_mut().eat(&P::Token::EQUAL) { + let expr = parse_assignment_expr(p)?; + let expr = p.verify_expr(expr)?; + + Some(expr) + } else { + // Destructuring bindings require initializers, but + // typescript allows `declare` vars not to have initializers. + if p.ctx().contains(Context::InDeclare) { + None + } else if kind == VarDeclKind::Const + && !for_loop + && !p.ctx().contains(Context::InDeclare) + { + p.emit_err( + p.span(start), + SyntaxError::ConstDeclarationsRequireInitialization, + ); + + None + } else { + match name { + Pat::Ident(..) => None, + _ => { + syntax_error!(p, p.span(start), SyntaxError::PatVarWithoutInit) + } + } + } + } + } else { + // e.g. for(let a;;) + None + }; + + Ok(VarDeclarator { + span: p.span(start), + name, + init, + definite, + }) +} + +pub fn parse_var_stmt<'a, P: Parser<'a>>(p: &mut P, for_loop: bool) -> PResult> { + let start = p.cur_pos(); + let t = p.bump(); + let kind = if t.is_const() { + VarDeclKind::Const + } else if t.is_let() { + VarDeclKind::Let + } else if t.is_var() { + VarDeclKind::Var + } else { + unreachable!() + }; + let var_span = p.span(start); + let should_include_in = kind != VarDeclKind::Var || !for_loop; + + if p.syntax().typescript() && for_loop { + let res = if p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_in() || cur.is_of()) + { + ts_look_ahead(p, |p| { + // + if !p.input_mut().eat(&P::Token::OF) && !p.input_mut().eat(&P::Token::IN) { + return Ok(false); + } + + parse_assignment_expr(p)?; + expect!(p, &P::Token::RPAREN); + + Ok(true) + }) + } else { + Ok(false) + }; + + match res { + Ok(true) => { + let pos = var_span.hi(); + let span = Span::new(pos, pos); + p.emit_err(span, SyntaxError::TS1123); + + return Ok(Box::new(VarDecl { + span: p.span(start), + kind, + declare: false, + decls: Vec::new(), + ..Default::default() + })); + } + Err(..) => {} + _ => {} + } + } + + let mut decls = Vec::with_capacity(4); + loop { + let ctx = if should_include_in { + p.ctx() | Context::IncludeInExpr + } else { + p.ctx() + }; + + // Handle + // var a,; + // + // NewLine is ok + if p.input_mut().is(&P::Token::SEMI) || eof!(p) { + let prev_span = p.input().prev_span(); + let span = if prev_span == var_span { + Span::new(prev_span.hi, prev_span.hi) + } else { + prev_span + }; + p.emit_err(span, SyntaxError::TS1009); + break; + } + + decls.push(parse_var_declarator( + p.with_ctx(ctx).deref_mut(), + for_loop, + kind, + )?); + + if !p.input_mut().eat(&P::Token::COMMA) { + break; + } + } + + if !for_loop && !p.eat_general_semi() { + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + + let _ = p.parse_expr(); + + while !p.eat_general_semi() { + p.bump(); + + if p.input_mut().cur().is_some_and(|cur| cur.is_error()) { + break; + } + } + } + + Ok(Box::new(VarDecl { + span: p.span(start), + declare: false, + kind, + decls, + ..Default::default() + })) +} + +pub fn parse_using_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + is_await: bool, +) -> PResult>> { + // using + // reader = init() + + // is two statements + let _ = cur!(p, false); + if p.input_mut().has_linebreak_between_cur_and_peeked() { + return Ok(None); + } + + if !p.peek_is_ident_ref() { + return Ok(None); + } + + p.assert_and_bump(&P::Token::USING)?; + + let mut decls = Vec::new(); + loop { + // Handle + // var a,; + // + // NewLine is ok + if p.input_mut().is(&P::Token::SEMI) || eof!(p) { + let span = p.input().prev_span(); + p.emit_err(span, SyntaxError::TS1009); + break; + } + + decls.push(parse_var_declarator(p, false, VarDeclKind::Var)?); + if !p.input_mut().eat(&P::Token::COMMA) { + break; + } + } + + if !p.syntax().explicit_resource_management() { + p.emit_err(p.span(start), SyntaxError::UsingDeclNotEnabled); + } + + if !p.ctx().contains(Context::AllowUsingDecl) { + p.emit_err(p.span(start), SyntaxError::UsingDeclNotAllowed); + } + + for decl in &decls { + match decl.name { + Pat::Ident(..) => {} + _ => { + p.emit_err(p.span(start), SyntaxError::InvalidNameInUsingDecl); + } + } + + if decl.init.is_none() { + p.emit_err(p.span(start), SyntaxError::InitRequiredForUsingDecl); + } + } + + p.expect_general_semi()?; + + Ok(Some(Box::new(UsingDecl { + span: p.span(start), + is_await, + decls, + }))) +} + +pub fn parse_for_head<'a, P: Parser<'a>>(p: &mut P) -> PResult { + // let strict = p.ctx().contains(Context::Strict); + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_const() || cur.is_var()) + || (p.input_mut().is(&P::Token::LET) && peek!(p).map_or(false, |v| v.follows_keyword_let())) + { + let decl = parse_var_stmt(p, true)?; + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_of() || cur.is_in()) + { + if decl.decls.len() != 1 { + for d in decl.decls.iter().skip(1) { + p.emit_err(d.name.span(), SyntaxError::TooManyVarInForInHead); + } + } else { + if (p.ctx().contains(Context::Strict) || p.input_mut().is(&P::Token::OF)) + && decl.decls[0].init.is_some() + { + p.emit_err( + decl.decls[0].name.span(), + SyntaxError::VarInitializerInForInHead, + ); + } + + if p.syntax().typescript() { + let type_ann = match decl.decls[0].name { + Pat::Ident(ref v) => Some(&v.type_ann), + Pat::Array(ref v) => Some(&v.type_ann), + Pat::Rest(ref v) => Some(&v.type_ann), + Pat::Object(ref v) => Some(&v.type_ann), + _ => None, + }; + + if let Some(type_ann) = type_ann { + if type_ann.is_some() { + p.emit_err(decl.decls[0].name.span(), SyntaxError::TS2483); + } + } + } + } + + return parse_for_each_head(p, ForHead::VarDecl(decl)); + } + + expect!(p, &P::Token::SEMI); + return parse_normal_for_head(p, Some(VarDeclOrExpr::VarDecl(decl))); + } + + if p.input_mut().eat(&P::Token::SEMI) { + return parse_normal_for_head(p, None); + } + + let start = p.cur_pos(); + let init = parse_for_head_prefix(p.include_in_expr(false).deref_mut())?; + + let mut is_using_decl = false; + let mut is_await_using_decl = false; + + if p.input().syntax().explicit_resource_management() { + // using foo + let mut maybe_using_decl = init.is_ident_ref_to("using"); + let mut maybe_await_using_decl = false; + + // await using foo + if !maybe_using_decl + && init + .as_await_expr() + .filter(|e| e.arg.is_ident_ref_to("using")) + .is_some() + { + maybe_using_decl = true; + maybe_await_using_decl = true; + } + + if maybe_using_decl + && !p.input_mut().is(&P::Token::OF) + && (peek!(p).is_some_and(|peek| peek.is_of() || peek.is_in())) + { + is_using_decl = maybe_using_decl; + is_await_using_decl = maybe_await_using_decl; + } + } + + if is_using_decl { + let name = parse_binding_ident(p, false)?; + let decl = VarDeclarator { + name: name.into(), + span: p.span(start), + init: None, + definite: false, + }; + + let pat = Box::new(UsingDecl { + span: p.span(start), + is_await: is_await_using_decl, + decls: vec![decl], + }); + + cur!(p, true); + + return parse_for_each_head(p, ForHead::UsingDecl(pat)); + } + + // for (a of b) + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_of() || cur.is_in()) + { + let is_in = p.input_mut().is(&P::Token::IN); + + let pat = reparse_expr_as_pat(p, PatType::AssignPat, init)?; + + // for ({} in foo) is invalid + if p.input().syntax().typescript() && is_in { + match pat { + Pat::Ident(..) => {} + Pat::Expr(..) => {} + ref v => p.emit_err(v.span(), SyntaxError::TS2491), + } + } + + return parse_for_each_head(p, ForHead::Pat(Box::new(pat))); + } + + expect!(p, &P::Token::SEMI); + + let init = p.verify_expr(init)?; + parse_normal_for_head(p, Some(VarDeclOrExpr::Expr(init))) +} + +fn parse_for_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::FOR)?; + let await_start = p.cur_pos(); + let await_token = if p.input_mut().eat(&P::Token::AWAIT) { + Some(p.span(await_start)) + } else { + None + }; + expect!(p, &P::Token::LPAREN); + + let mut ctx = p.ctx() | Context::ForLoopInit; + ctx.set(Context::ForAwaitLoopInit, await_token.is_some()); + + let head = parse_for_head(p.with_ctx(ctx).deref_mut())?; + expect!(p, &P::Token::RPAREN); + let ctx = (p.ctx() | Context::IsBreakAllowed | Context::IsContinueAllowed) & !Context::TopLevel; + let body = parse_stmt(p.with_ctx(ctx).deref_mut()).map(Box::new)?; + + let span = p.span(start); + Ok(match head { + TempForHead::For { init, test, update } => { + if let Some(await_token) = await_token { + syntax_error!(p, await_token, SyntaxError::AwaitForStmt); + } + + ForStmt { + span, + init, + test, + update, + body, + } + .into() + } + TempForHead::ForIn { left, right } => { + if let Some(await_token) = await_token { + syntax_error!(p, await_token, SyntaxError::AwaitForStmt); + } + + ForInStmt { + span, + left, + right, + body, + } + .into() + } + TempForHead::ForOf { left, right } => ForOfStmt { + span, + is_await: await_token.is_some(), + left, + right, + body, + } + .into(), + }) +} + +pub fn parse_stmt<'a>(p: &mut impl Parser<'a>) -> PResult { + trace_cur!(p, parse_stmt); + parse_stmt_like(p, false, handle_import_export) +} + +/// Utility function used to parse large if else statements iteratively. +/// +/// THis function is recursive, but it is very cheap so stack overflow will +/// not occur. +fn adjust_if_else_clause<'a, P: Parser<'a>>(p: &mut P, cur: &mut IfStmt, alt: Box) { + cur.span = p.span(cur.span.lo); + + if let Some(Stmt::If(prev_alt)) = cur.alt.as_deref_mut() { + adjust_if_else_clause(p, prev_alt, alt) + } else { + debug_assert_eq!(cur.alt, None); + cur.alt = Some(alt); + } +} + +fn parse_if_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::IF)?; + let if_token = p.input().prev_span(); + + expect!(p, &P::Token::LPAREN); + + let test = p + .with_ctx(p.ctx() & !Context::IgnoreElseClause) + .include_in_expr(true) + .parse_expr() + .map_err(|err| { + Error::new( + err.span(), + SyntaxError::WithLabel { + inner: Box::new(err), + span: if_token, + note: "Tried to parse the condition for an if statement", + }, + ) + })?; + + expect!(p, &P::Token::RPAREN); + + let cons = { + // Prevent stack overflow + crate::maybe_grow(256 * 1024, 1024 * 1024, || { + // Annex B + if !p.ctx().contains(Context::Strict) && p.input_mut().is(&P::Token::FUNCTION) { + // TODO: report error? + } + parse_stmt( + p.with_ctx(p.ctx() & !Context::IgnoreElseClause & !Context::TopLevel) + .deref_mut(), + ) + .map(Box::new) + })? + }; + + // We parse `else` branch iteratively, to avoid stack overflow + // See https://github.com/swc-project/swc/pull/3961 + + let alt = if p.ctx().contains(Context::IgnoreElseClause) { + None + } else { + let mut cur = None; + + let ctx = p.ctx() | Context::IgnoreElseClause; + + let last = loop { + if !p.input_mut().eat(&P::Token::ELSE) { + break None; + } + + if !p.input_mut().is(&P::Token::IF) { + let ctx = p.ctx() & !Context::IgnoreElseClause & !Context::TopLevel; + + // As we eat `else` above, we need to parse statement once. + let last = parse_stmt(p.with_ctx(ctx).deref_mut())?; + break Some(last); + } + + // We encountered `else if` + + let alt = parse_if_stmt(p.with_ctx(ctx).deref_mut())?; + + match &mut cur { + Some(cur) => { + adjust_if_else_clause(p, cur, Box::new(alt.into())); + } + _ => { + cur = Some(alt); + } + } + }; + + match cur { + Some(mut cur) => { + if let Some(last) = last { + adjust_if_else_clause(p, &mut cur, Box::new(last)); + } + Some(cur.into()) + } + _ => last, + } + } + .map(Box::new); + + let span = p.span(start); + Ok(IfStmt { + span, + test, + cons, + alt, + }) +} + +fn parse_throw_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::THROW)?; + + if p.input_mut().had_line_break_before_cur() { + // TODO: Suggest throw arg; + syntax_error!(p, SyntaxError::LineBreakInThrow); + } + + let arg = p.include_in_expr(true).parse_expr()?; + p.expect_general_semi()?; + + let span = p.span(start); + Ok(ThrowStmt { span, arg }.into()) +} + +fn parse_with_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + if p.syntax().typescript() { + let span = p.input().cur_span(); + p.emit_err(span, SyntaxError::TS2410); + } + + { + let span = p.input().cur_span(); + p.emit_strict_mode_err(span, SyntaxError::WithInStrict); + } + + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::WITH)?; + + expect!(p, &P::Token::LPAREN); + let obj = p.include_in_expr(true).parse_expr()?; + expect!(p, &P::Token::RPAREN); + + let ctx = (p.ctx() | Context::InFunction) & !Context::TopLevel; + let body = parse_stmt(p.with_ctx(ctx).deref_mut()).map(Box::new)?; + + let span = p.span(start); + Ok(WithStmt { span, obj, body }.into()) +} + +fn parse_while_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::WHILE)?; + + expect!(p, &P::Token::LPAREN); + let test = p.include_in_expr(true).parse_expr()?; + expect!(p, &P::Token::RPAREN); + + let ctx = (p.ctx() | Context::IsBreakAllowed | Context::IsContinueAllowed) & !Context::TopLevel; + let body = parse_stmt(p.with_ctx(ctx).deref_mut()).map(Box::new)?; + + let span = p.span(start); + Ok(WhileStmt { span, test, body }.into()) +} + +/// It's optional since es2019 +fn parse_catch_param<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + if p.input_mut().eat(&P::Token::LPAREN) { + let mut pat = parse_binding_pat_or_ident(p, false)?; + + let type_ann_start = p.cur_pos(); + + if p.syntax().typescript() && p.input_mut().eat(&P::Token::COLON) { + let ctx = p.ctx() | Context::InType; + + let ty = parse_ts_type(p.with_ctx(ctx).deref_mut())?; + // p.emit_err(ty.span(), SyntaxError::TS1196); + + match &mut pat { + Pat::Ident(BindingIdent { type_ann, .. }) + | Pat::Array(ArrayPat { type_ann, .. }) + | Pat::Rest(RestPat { type_ann, .. }) + | Pat::Object(ObjectPat { type_ann, .. }) => { + *type_ann = Some(Box::new(TsTypeAnn { + span: p.span(type_ann_start), + type_ann: ty, + })); + } + Pat::Assign(..) => {} + Pat::Invalid(_) => {} + Pat::Expr(_) => {} + } + } + expect!(p, &P::Token::RPAREN); + Ok(Some(pat)) + } else { + Ok(None) + } +} + +fn parse_do_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::DO)?; + + let ctx = (p.ctx() | Context::IsBreakAllowed | Context::IsContinueAllowed) & !Context::TopLevel; + let body = parse_stmt(p.with_ctx(ctx).deref_mut()).map(Box::new)?; + expect!(p, &P::Token::WHILE); + expect!(p, &P::Token::LPAREN); + let test = p.include_in_expr(true).parse_expr()?; + expect!(p, &P::Token::RPAREN); + // We *may* eat semicolon. + let _ = p.eat_general_semi(); + + let span = p.span(start); + + Ok(DoWhileStmt { span, test, body }.into()) +} + +fn parse_labelled_stmt<'a, P: Parser<'a>>(p: &mut P, l: Ident) -> PResult { + let ctx = (p.ctx() | Context::IsBreakAllowed) & !Context::AllowUsingDecl; + p.with_ctx(ctx).parse_with(|p| { + let start = l.span.lo(); + + let mut errors = Vec::new(); + for lb in &p.state().labels { + if l.sym == *lb { + errors.push(Error::new( + l.span, + SyntaxError::DuplicateLabel(l.sym.clone()), + )); + } + } + p.state_mut().labels.push(l.sym.clone()); + + let body = Box::new(if p.input_mut().is(&P::Token::FUNCTION) { + let f = parse_fn_decl(p, Vec::new())?; + if let Decl::Fn(FnDecl { function, .. }) = &f { + if p.ctx().contains(Context::Strict) { + p.emit_err(function.span, SyntaxError::LabelledFunctionInStrict) + } + if function.is_generator || function.is_async { + p.emit_err(function.span, SyntaxError::LabelledGeneratorOrAsync) + } + } + + f.into() + } else { + parse_stmt(p.with_ctx(p.ctx() & !Context::TopLevel).deref_mut())? + }); + + for err in errors { + p.emit_error(err); + } + + { + let pos = p.state().labels.iter().position(|v| v == &l.sym); + if let Some(pos) = pos { + p.state_mut().labels.remove(pos); + } + } + + Ok(LabeledStmt { + span: p.span(start), + label: l, + body, + } + .into()) + }) +} + +pub fn parse_block<'a, P: Parser<'a>>(p: &mut P, allow_directives: bool) -> PResult { + let start = p.cur_pos(); + + expect!(p, &P::Token::LBRACE); + + let stmts = parse_stmt_block_body( + p.with_ctx(p.ctx() & !Context::TopLevel).deref_mut(), + allow_directives, + Some(&P::Token::RBRACE), + )?; + + let span = p.span(start); + Ok(BlockStmt { + span, + stmts, + ctxt: Default::default(), + }) +} + +fn parse_finally_block<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + Ok(if p.input_mut().eat(&P::Token::FINALLY) { + parse_block(p, false).map(Some)? + } else { + None + }) +} + +fn parse_catch_clause<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + let start = p.cur_pos(); + Ok(if p.input_mut().eat(&P::Token::CATCH) { + let param = parse_catch_param(p)?; + parse_block(p, false) + .map(|body| CatchClause { + span: p.span(start), + param, + body, + }) + .map(Some)? + } else { + None + }) +} + +fn parse_try_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::TRY)?; + + let block = parse_block(p, false)?; + + let catch_start = p.cur_pos(); + let handler = parse_catch_clause(p)?; + let finalizer = parse_finally_block(p)?; + + if handler.is_none() && finalizer.is_none() { + p.emit_err(Span::new(catch_start, catch_start), SyntaxError::TS1005); + } + + let span = p.span(start); + Ok(TryStmt { + span, + block, + handler, + finalizer, + } + .into()) +} + +fn parse_switch_stmt<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let switch_start = p.cur_pos(); + + p.assert_and_bump(&P::Token::SWITCH)?; + + expect!(p, &P::Token::LPAREN); + let discriminant = p.include_in_expr(true).parse_expr()?; + expect!(p, &P::Token::RPAREN); + + let mut cases = Vec::new(); + let mut span_of_previous_default = None; + + expect!(p, &P::Token::LBRACE); + + let ctx = p.ctx() | Context::IsBreakAllowed; + p.with_ctx(ctx).parse_with(|p| { + while p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_case() || cur.is_default()) + { + let mut cons = Vec::new(); + let is_case = p.input_mut().is(&P::Token::CASE); + let case_start = p.cur_pos(); + p.bump(); + let test = if is_case { + p.include_in_expr(true).parse_expr().map(Some)? + } else { + if let Some(previous) = span_of_previous_default { + syntax_error!(p, SyntaxError::MultipleDefault { previous }); + } + span_of_previous_default = Some(p.span(case_start)); + + None + }; + expect!(p, &P::Token::COLON); + + while !eof!(p) + && !p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_case() || cur.is_default() || cur.is_rbrace()) + { + cons.push(parse_stmt_list_item( + p.with_ctx(p.ctx() & !Context::TopLevel).deref_mut(), + )?); + } + + cases.push(SwitchCase { + span: Span::new(case_start, p.input().prev_span().hi), + test, + cons, + }); + } + + Ok(()) + })?; + + // eof or rbrace + expect!(p, &P::Token::RBRACE); + + Ok(SwitchStmt { + span: p.span(switch_start), + discriminant, + cases, + } + .into()) +} + +/// Parse a statement and maybe a declaration. +pub fn parse_stmt_list_item<'a>(p: &mut impl Parser<'a>) -> PResult { + trace_cur!(p, parse_stmt_list_item); + parse_stmt_like(p, true, handle_import_export) +} + +/// Parse a statement, declaration or module item. +pub fn parse_stmt_like<'a, P: Parser<'a>, Type: IsDirective + From>( + p: &mut P, + include_decl: bool, + handle_import_export: impl Fn(&mut P, Vec) -> PResult, +) -> PResult { + trace_cur!(p, parse_stmt_like); + + debug_tracing!(p, "parse_stmt_like"); + + let start = p.cur_pos(); + let decorators = parse_decorators(p, true)?; + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_import() || cur.is_export()) + { + return handle_import_export(p, decorators); + } + + parse_stmt_internal( + p.with_ctx((p.ctx() & !Context::WillExpectColonForCond) | Context::AllowUsingDecl) + .deref_mut(), + start, + include_decl, + decorators, + ) + .map(From::from) +} + +fn handle_import_export<'a, P: Parser<'a>>(p: &mut P, _: Vec) -> PResult { + let start = p.cur_pos(); + if p.input_mut().is(&P::Token::IMPORT) && peek!(p).is_some_and(|peek| peek.is_lparen()) { + let expr = p.parse_expr()?; + + p.eat_general_semi(); + + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + if p.input_mut().is(&P::Token::IMPORT) && peek!(p).is_some_and(|peek| peek.is_dot()) { + let expr = p.parse_expr()?; + + p.eat_general_semi(); + + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + syntax_error!(p, SyntaxError::ImportExportInScript); +} + +/// `parseStatementContent` +fn parse_stmt_internal<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + include_decl: bool, + decorators: Vec, +) -> PResult { + trace_cur!(p, parse_stmt_internal); + + let is_typescript = p.input().syntax().typescript(); + + if is_typescript + && p.input_mut().is(&P::Token::CONST) + && peek!(p).is_some_and(|peek| peek.is_enum()) + { + p.assert_and_bump(&P::Token::CONST)?; + p.assert_and_bump(&P::Token::ENUM)?; + return parse_ts_enum_decl(p, start, true) + .map(Decl::from) + .map(Stmt::from); + } + + let top_level = p.ctx().contains(Context::TopLevel); + let cur = cur!(p, true).clone(); + if cur.is_await() && (include_decl || top_level) { + if top_level { + p.mark_found_module_item(); + if !p.ctx().contains(Context::CanBeModule) { + p.emit_err(p.input().cur_span(), SyntaxError::TopLevelAwaitInScript); + } + } + + if peek!(p).is_some_and(|peek| peek.is_using()) { + let eaten_await = Some(p.input_mut().cur_pos()); + p.assert_and_bump(&P::Token::AWAIT)?; + let v = parse_using_decl(p, start, true)?; + if let Some(v) = v { + return Ok(v.into()); + } + + let expr = parse_await_expr(p, eaten_await)?; + let expr = parse_bin_op_recursively(p.include_in_expr(true).deref_mut(), expr, 0)?; + p.eat_general_semi(); + + let span = p.span(start); + return Ok(ExprStmt { span, expr }.into()); + } + } else if cur.is_break() || cur.is_continue() { + let is_break = p.input_mut().is(&P::Token::BREAK); + p.bump(); + let label = if p.eat_general_semi() { + None + } else { + let i = parse_label_ident(p).map(Some)?; + p.expect_general_semi()?; + i + }; + let span = p.span(start); + if is_break { + if label.is_some() && !p.state().labels.contains(&label.as_ref().unwrap().sym) { + p.emit_err(span, SyntaxError::TS1116); + } else if !p.ctx().contains(Context::IsBreakAllowed) { + p.emit_err(span, SyntaxError::TS1105); + } + } else if !p.ctx().contains(Context::IsContinueAllowed) { + p.emit_err(span, SyntaxError::TS1115); + } else if label.is_some() && !p.state().labels.contains(&label.as_ref().unwrap().sym) { + p.emit_err(span, SyntaxError::TS1107); + } + return Ok(if is_break { + BreakStmt { span, label }.into() + } else { + ContinueStmt { span, label }.into() + }); + } else if cur.is_debugger() { + p.bump(); + p.expect_general_semi()?; + return Ok(DebuggerStmt { + span: p.span(start), + } + .into()); + } else if cur.is_do() { + return parse_do_stmt(p); + } else if cur.is_for() { + return parse_for_stmt(p); + } else if cur.is_function() { + if !include_decl { + p.emit_err(p.input().cur_span(), SyntaxError::DeclNotAllowed); + } + return parse_fn_decl(p, decorators).map(Stmt::from); + } else if cur.is_class() { + if !include_decl { + p.emit_err(p.input().cur_span(), SyntaxError::DeclNotAllowed); + } + return parse_class_decl(p, start, start, decorators, false).map(Stmt::from); + } else if cur.is_if() { + return parse_if_stmt(p).map(Stmt::If); + } else if cur.is_return() { + return parse_return_stmt(p); + } else if cur.is_switch() { + return parse_switch_stmt(p); + } else if cur.is_throw() { + return parse_throw_stmt(p); + } else if cur.is_catch() { + // Error recovery + let span = p.input().cur_span(); + p.emit_err(span, SyntaxError::TS1005); + + let _ = parse_catch_clause(p); + let _ = parse_finally_block(p); + + return Ok(ExprStmt { + span, + expr: Invalid { span }.into(), + } + .into()); + } else if cur.is_finally() { + // Error recovery + let span = p.input().cur_span(); + p.emit_err(span, SyntaxError::TS1005); + + let _ = parse_finally_block(p); + + return Ok(ExprStmt { + span, + expr: Invalid { span }.into(), + } + .into()); + } else if cur.is_try() { + return parse_try_stmt(p); + } else if cur.is_with() { + return parse_with_stmt(p); + } else if cur.is_while() { + return parse_while_stmt(p); + } else if cur.is_var() || (cur.is_const() && include_decl) { + let v = parse_var_stmt(p, false)?; + return Ok(v.into()); + } else if cur.is_let() && include_decl { + // 'let' can start an identifier reference. + let is_keyword = match peek!(p) { + Some(t) => t.follows_keyword_let(), + _ => false, + }; + + if is_keyword { + let v = parse_var_stmt(p, false)?; + return Ok(v.into()); + } + } else if cur.is_using() && include_decl { + let v = parse_using_decl(p, start, false)?; + if let Some(v) = v { + return Ok(v.into()); + } + } else if cur.is_interface() + && is_typescript + && peek!(p).is_some_and(|peek| peek.is_word()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + let start = p.input_mut().cur_pos(); + p.bump(); + return Ok(parse_ts_interface_decl(p, start)?.into()); + } else if cur.is_type() + && is_typescript + && peek!(p).is_some_and(|peek| peek.is_word()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + let start = p.input_mut().cur_pos(); + p.bump(); + return Ok(parse_ts_type_alias_decl(p, start)?.into()); + } else if cur.is_enum() + && is_typescript + && peek!(p).is_some_and(|peek| peek.is_word()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + let start = p.input_mut().cur_pos(); + p.bump(); + return Ok(parse_ts_enum_decl(p, start, false)?.into()); + } else if cur.is_lbrace() { + return parse_block( + p.with_ctx(p.ctx() | Context::AllowUsingDecl).deref_mut(), + false, + ) + .map(Stmt::Block); + } + + if p.input_mut().eat(&P::Token::SEMI) { + return Ok(EmptyStmt { + span: p.span(start), + } + .into()); + } + + // Handle async function foo() {} + if p.input_mut().is(&P::Token::ASYNC) + && peek!(p).is_some_and(|peek| peek.is_function()) + && !p.input_mut().has_linebreak_between_cur_and_peeked() + { + return parse_async_fn_decl(p, decorators).map(From::from); + } + + // If the statement does not start with a statement keyword or a + // brace, it's an ExpressionStatement or LabeledStatement. We + // simply start parsing an expression, and afterwards, if the + // next token is a colon and the expression was a simple + // Identifier node, we switch to interpreting it as a label. + let expr = p.include_in_expr(true).parse_expr()?; + + let expr = match *expr { + Expr::Ident(ident) => { + if p.input_mut().eat(&P::Token::COLON) { + return parse_labelled_stmt(p, ident); + } + ident.into() + } + _ => p.verify_expr(expr)?, + }; + if let Expr::Ident(ref ident) = *expr { + if &*ident.sym == "interface" && p.input_mut().had_line_break_before_cur() { + p.emit_strict_mode_err( + ident.span, + SyntaxError::InvalidIdentInStrict(ident.sym.clone()), + ); + + p.eat_general_semi(); + + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + if p.input().syntax().typescript() { + if let Some(decl) = parse_ts_expr_stmt(p, decorators, ident.clone())? { + return Ok(decl.into()); + } + } + } + + if let Expr::Ident(Ident { ref sym, span, .. }) = *expr { + match &**sym { + "enum" | "interface" => { + p.emit_strict_mode_err(span, SyntaxError::InvalidIdentInStrict(sym.clone())); + } + _ => {} + } + } + + if p.syntax().typescript() { + if let Expr::Ident(ref i) = *expr { + match &*i.sym { + "public" | "static" | "abstract" => { + if p.input_mut().eat(&P::Token::INTERFACE) { + p.emit_err(i.span, SyntaxError::TS2427); + return parse_ts_interface_decl(p, start) + .map(Decl::from) + .map(Stmt::from); + } + } + _ => {} + } + } + } + + if p.eat_general_semi() { + Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()) + } else { + if cur!(p, false)?.is_bin_op() { + p.emit_err(p.input().cur_span(), SyntaxError::TS1005); + let expr = parse_bin_op_recursively(p, expr, 0)?; + return Ok(ExprStmt { + span: p.span(start), + expr, + } + .into()); + } + + syntax_error!( + p, + SyntaxError::ExpectedSemiForExprStmt { expr: expr.span() } + ); + } +} + +pub fn parse_stmt_block_body<'a, P: Parser<'a>>( + p: &mut P, + allow_directives: bool, + end: Option<&P::Token>, +) -> PResult> { + parse_block_body(p, allow_directives, end, handle_import_export) +} + +pub(super) fn parse_block_body<'a, P: Parser<'a>, Type: IsDirective + From>( + p: &mut P, + mut allow_directives: bool, + end: Option<&P::Token>, + handle_import_export: impl Fn(&mut P, Vec) -> PResult, +) -> PResult> { + trace_cur!(p, parse_block_body); + + let old_ctx = p.ctx(); + + let stmts = Arena::new(); + while { + if p.input_mut().cur().is_none() && end.is_some() { + let eof_text = p.input_mut().dump_cur(); + p.emit_err( + p.input().cur_span(), + SyntaxError::Expected(format!("{:?}", end.unwrap()), eof_text), + ); + false + } else { + let c = cur!(p, false).ok(); + c != end + } + } { + let stmt = parse_stmt_like(p, true, &handle_import_export)?; + if allow_directives { + allow_directives = false; + if stmt.is_use_strict() { + p.set_ctx(old_ctx | Context::Strict); + if p.input().knows_cur() && !p.is_general_semi() { + unreachable!( + "'use strict'; directive requires parser.input.cur to be empty or '}}', \ + but current token was: {:?}", + p.input_mut().cur() + ) + } + } + } + + stmts.alloc(stmt); + } + + if p.input_mut().cur().is_some() && end.is_some() { + p.bump(); + } + + p.set_ctx(old_ctx); + + Ok(stmts.into_vec()) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/token_and_span.rs b/crates/swc_ecma_lexer/src/common/parser/token_and_span.rs new file mode 100644 index 000000000000..4a0e952bef9f --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/token_and_span.rs @@ -0,0 +1,10 @@ +use swc_common::Span; + +pub trait TokenAndSpan { + type Token; + fn new(token: Self::Token, span: Span, had_line_break: bool) -> Self; + fn token(&self) -> &Self::Token; + fn take_token(self) -> Self::Token; + fn span(&self) -> Span; + fn had_line_break(&self) -> bool; +} diff --git a/crates/swc_ecma_lexer/src/common/parser/typescript.rs b/crates/swc_ecma_lexer/src/common/parser/typescript.rs new file mode 100644 index 000000000000..b200020f155b --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/typescript.rs @@ -0,0 +1,2923 @@ +use std::{fmt::Write, ops::DerefMut}; + +use either::Either; +use swc_atoms::{atom, Atom}; +use swc_common::{BytePos, Span, Spanned}; +use swc_ecma_ast::*; + +use super::{ + class_and_fn::{parse_class_decl, parse_fn_block_or_expr_body, parse_fn_decl}, + expr::{is_start_of_left_hand_side_expr, parse_new_expr}, + ident::parse_maybe_private_name, + is_simple_param_list::IsSimpleParameterList, + make_decl_declare, + stmt::parse_var_stmt, + PResult, Parser, +}; +use crate::{ + common::{ + context::Context, + lexer::token::TokenFactory, + parser::{ + buffer::Buffer, + expr::{parse_assignment_expr, parse_lit, parse_subscripts, parse_unary_expr}, + ident::{parse_ident, parse_ident_name}, + module_item::parse_module_item_block_body, + object::parse_object_expr, + pat::{parse_binding_pat_or_ident, parse_formal_params}, + }, + }, + error::SyntaxError, + Syntax, TsSyntax, +}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ParsingContext { + EnumMembers, + HeritageClauseElement, + TupleElementTypes, + TypeMembers, + TypeParametersOrArguments, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum UnionOrIntersection { + Union, + Intersection, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum SignatureParsingMode { + TSCallSignatureDeclaration, + TSConstructSignatureDeclaration, +} + +/// `tsParseList` +fn parse_ts_list<'a, P: Parser<'a>, T, F>( + p: &mut P, + kind: ParsingContext, + mut parse_element: F, +) -> PResult> +where + F: FnMut(&mut P) -> PResult, +{ + debug_assert!(p.input().syntax().typescript()); + let mut buf = Vec::with_capacity(8); + while !is_ts_list_terminator(p, kind)? { + // Skipping "parseListElement" from the TS source since that's just for error + // handling. + buf.push(parse_element(p)?); + } + Ok(buf) +} + +/// `tsTryParse` +pub(super) fn try_parse_ts_bool<'a, P: Parser<'a>, F>(p: &mut P, op: F) -> PResult +where + F: FnOnce(&mut P) -> PResult>, +{ + if !p.input().syntax().typescript() { + return Ok(false); + } + let prev_ignore_error = p.input().get_ctx().contains(Context::IgnoreError); + let mut cloned = p.clone(); + cloned.set_ctx(p.ctx() | Context::IgnoreError); + let res = op(&mut cloned); + match res { + Ok(Some(res)) if res => { + *p = cloned; + let mut ctx = p.ctx(); + ctx.set(Context::IgnoreError, prev_ignore_error); + p.input_mut().set_ctx(ctx); + Ok(res) + } + Err(..) => Ok(false), + _ => Ok(false), + } +} + +/// `tsParseDelimitedList` +fn parse_ts_delimited_list_inner<'a, P: Parser<'a>, T, F>( + p: &mut P, + kind: ParsingContext, + mut parse_element: F, +) -> PResult> +where + F: FnMut(&mut P) -> PResult<(BytePos, T)>, +{ + debug_assert!(p.input().syntax().typescript()); + let mut buf = Vec::new(); + loop { + trace_cur!(p, parse_ts_delimited_list_inner__element); + + if is_ts_list_terminator(p, kind)? { + break; + } + + let (_, element) = parse_element(p)?; + buf.push(element); + + if p.input_mut().eat(&P::Token::COMMA) { + continue; + } + + if is_ts_list_terminator(p, kind)? { + break; + } + + if kind == ParsingContext::EnumMembers { + let expect = P::Token::COMMA; + let cur = cur!(p, false); + let cur = match cur.ok() { + Some(tok) => tok.clone().to_string(p.input()), + None => "EOF".to_string(), + }; + p.emit_err( + p.input().cur_span(), + SyntaxError::Expected(format!("{expect:?}"), cur), + ); + continue; + } + // This will fail with an error about a missing comma + expect!(p, &P::Token::COMMA); + } + + Ok(buf) +} + +/// In no lexer context +fn ts_in_no_context<'a, P: Parser<'a>, T, F>(p: &mut P, op: F) -> PResult +where + F: FnOnce(&mut P) -> PResult, +{ + debug_assert!(p.input().syntax().typescript()); + trace_cur!(p, ts_in_no_context__before); + let saved = std::mem::take(p.input_mut().token_context_mut()); + p.input_mut().token_context_mut().push(saved.0[0]); + debug_assert_eq!(p.input().token_context().len(), 1); + let res = op(p); + p.input_mut().set_token_context(saved); + trace_cur!(p, ts_in_no_context__after); + res +} + +/// `tsIsListTerminator` +pub fn is_ts_list_terminator<'a>(p: &mut impl Parser<'a>, kind: ParsingContext) -> PResult { + debug_assert!(p.input().syntax().typescript()); + let Some(cur) = p.input_mut().cur() else { + return Ok(false); + }; + Ok(match kind { + ParsingContext::EnumMembers | ParsingContext::TypeMembers => cur.is_rbrace(), + ParsingContext::HeritageClauseElement => { + cur.is_lbrace() || cur.is_implements() || cur.is_extends() + } + ParsingContext::TupleElementTypes => cur.is_rbracket(), + ParsingContext::TypeParametersOrArguments => cur.is_greater(), + }) +} + +/// `tsNextTokenCanFollowModifier` +pub(super) fn ts_next_token_can_follow_modifier<'a>(p: &mut impl Parser<'a>) -> PResult { + debug_assert!(p.input().syntax().typescript()); + // Note: TypeScript's implementation is much more complicated because + // more things are considered modifiers there. + // This implementation only handles modifiers not handled by @babel/parser + // itself. And "static". TODO: Would be nice to avoid lookahead. Want a + // hasLineBreakUpNext() method... + p.bump(); + Ok(!p.input_mut().had_line_break_before_cur() + && p.input_mut().cur().is_some_and(|cur| { + cur.is_lbracket() + || cur.is_lbrace() + || cur.is_star() + || cur.is_dotdotdot() + || cur.is_hash() + || cur.is_word() + || cur.is_str() + || cur.is_num() + || cur.is_bigint() + })) +} + +/// `tsTryParse` +pub fn try_parse_ts<'a, P: Parser<'a>, T, F>(p: &mut P, op: F) -> Option +where + F: FnOnce(&mut P) -> PResult>, +{ + if !p.input().syntax().typescript() { + return None; + } + debug_tracing!(p, "try_parse_ts"); + + trace_cur!(p, try_parse_ts); + + let prev_ignore_error = p.input().get_ctx().contains(Context::IgnoreError); + let mut cloned = p.clone(); + cloned.set_ctx(p.ctx() | Context::IgnoreError); + let res = op(&mut cloned); + match res { + Ok(Some(res)) => { + *p = cloned; + trace_cur!(p, try_parse_ts__success_value); + let mut ctx = p.ctx(); + ctx.set(Context::IgnoreError, prev_ignore_error); + p.input_mut().set_ctx(ctx); + Some(res) + } + Ok(None) => { + trace_cur!(p, try_parse_ts__success_no_value); + None + } + Err(..) => { + trace_cur!(p, try_parse_ts__fail); + None + } + } +} + +/// `tsParseTypeMemberSemicolon` +fn parse_ts_type_member_semicolon<'a, P: Parser<'a>>(p: &mut P) -> PResult<()> { + debug_assert!(p.input().syntax().typescript()); + + if !p.input_mut().eat(&P::Token::COMMA) { + p.expect_general_semi() + } else { + Ok(()) + } +} + +/// `tsIsStartOfConstructSignature` +fn is_ts_start_of_construct_signature<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + p.bump(); + let Some(cur) = p.input_mut().cur() else { + return Ok(false); + }; + Ok(cur.is_lparen() || cur.is_less()) +} + +/// `tsParseDelimitedList` +fn parse_ts_delimited_list<'a, P: Parser<'a>, T, F>( + p: &mut P, + kind: ParsingContext, + mut parse_element: F, +) -> PResult> +where + F: FnMut(&mut P) -> PResult, +{ + parse_ts_delimited_list_inner(p, kind, |p| { + let start = p.input_mut().cur_pos(); + Ok((start, parse_element(p)?)) + }) +} + +/// `tsParseUnionOrIntersectionType` +fn parse_ts_union_or_intersection_type<'a, P: Parser<'a>, F>( + p: &mut P, + kind: UnionOrIntersection, + mut parse_constituent_type: F, + operator: &P::Token, +) -> PResult> +where + F: FnMut(&mut P) -> PResult>, +{ + trace_cur!(p, parse_ts_union_or_intersection_type); + + debug_assert!(p.input().syntax().typescript()); + + let start = p.input_mut().cur_pos(); // include the leading operator in the start + p.input_mut().eat(operator); + trace_cur!(p, parse_ts_union_or_intersection_type__first_type); + + let ty = parse_constituent_type(p)?; + trace_cur!(p, parse_ts_union_or_intersection_type__after_first); + + if p.input_mut().is(operator) { + let mut types = vec![ty]; + + while p.input_mut().eat(operator) { + trace_cur!(p, parse_ts_union_or_intersection_type__constituent); + + types.push(parse_constituent_type(p)?); + } + + return Ok(Box::new(TsType::TsUnionOrIntersectionType(match kind { + UnionOrIntersection::Union => TsUnionOrIntersectionType::TsUnionType(TsUnionType { + span: p.span(start), + types, + }), + UnionOrIntersection::Intersection => { + TsUnionOrIntersectionType::TsIntersectionType(TsIntersectionType { + span: p.span(start), + types, + }) + } + }))); + } + Ok(ty) +} + +pub fn eat_any_ts_modifier<'a>(p: &mut impl Parser<'a>) -> PResult { + if p.syntax().typescript() + && { + let cur = cur!(p, false)?; + cur.is_public() || cur.is_protected() || cur.is_private() || cur.is_readonly() + } + && peek!(p).is_some_and(|t| t.is_word() || t.is_lbrace() || t.is_lbracket()) + { + let _ = parse_ts_modifier(p, &["public", "protected", "private", "readonly"], false); + Ok(true) + } else { + Ok(false) + } +} + +/// Parses a modifier matching one the given modifier names. +/// +/// `tsParseModifier` +pub fn parse_ts_modifier<'a, P: Parser<'a>>( + p: &mut P, + allowed_modifiers: &[&'static str], + stop_on_start_of_class_static_blocks: bool, +) -> PResult> { + if !p.input().syntax().typescript() { + return Ok(None); + } + let pos = { + let cur = cur!(p, true); + let modifier = if cur.is_unknown_ident() { + cur.clone().take_unknown_ident_ref(p.input_mut()).clone() + } else if cur.is_known_ident() { + cur.take_known_ident() + } else if cur.is_in() { + atom!("in") + } else if cur.is_const() { + atom!("const") + } else { + return Ok(None); + }; + // TODO: compare atom rather than string. + allowed_modifiers + .iter() + .position(|s| **s == *modifier.as_str()) + }; + if let Some(pos) = pos { + if stop_on_start_of_class_static_blocks + && p.input_mut().is(&P::Token::STATIC) + && peek!(p).is_some_and(|peek| peek.is_lbrace()) + { + return Ok(None); + } + if try_parse_ts_bool(p, |p| ts_next_token_can_follow_modifier(p).map(Some))? { + return Ok(Some(allowed_modifiers[pos])); + } + } + Ok(None) +} + +fn parse_ts_bracketed_list<'a, P: Parser<'a>, T, F>( + p: &mut P, + kind: ParsingContext, + parse_element: F, + bracket: bool, + skip_first_token: bool, +) -> PResult> +where + F: FnMut(&mut P) -> PResult, +{ + debug_assert!(p.input().syntax().typescript()); + if !skip_first_token { + if bracket { + expect!(p, &P::Token::LBRACKET); + } else { + expect!(p, &P::Token::LESS); + } + } + let result = parse_ts_delimited_list(p, kind, parse_element)?; + if bracket { + expect!(p, &P::Token::RBRACKET); + } else { + expect!(p, &P::Token::GREATER); + } + Ok(result) +} + +/// `tsParseThisTypeNode` +pub fn parse_ts_this_type_node<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + expect!(p, &P::Token::THIS); + Ok(TsThisType { + span: p.input().prev_span(), + }) +} + +/// `tsParseEntityName` +pub fn parse_ts_entity_name<'a, P: Parser<'a>>( + p: &mut P, + allow_reserved_words: bool, +) -> PResult { + debug_assert!(p.input().syntax().typescript()); + trace_cur!(p, parse_ts_entity_name); + let start = p.input_mut().cur_pos(); + let init = parse_ident_name(p)?; + if &*init.sym == "void" { + let dot_start = p.input_mut().cur_pos(); + let dot_span = p.span(dot_start); + p.emit_err(dot_span, SyntaxError::TS1005) + } + let mut entity = TsEntityName::Ident(init.into()); + while p.input_mut().eat(&P::Token::DOT) { + let dot_start = p.input_mut().cur_pos(); + let Some(cur) = p.input_mut().cur() else { + p.emit_err(Span::new(dot_start, dot_start), SyntaxError::TS1003); + return Ok(entity); + }; + if !cur.is_hash() && !cur.is_word() { + p.emit_err(Span::new(dot_start, dot_start), SyntaxError::TS1003); + return Ok(entity); + } + let left = entity; + let right = if allow_reserved_words { + parse_ident_name(p)? + } else { + parse_ident(p, false, false)?.into() + }; + let span = p.span(start); + entity = TsEntityName::TsQualifiedName(Box::new(TsQualifiedName { span, left, right })); + } + Ok(entity) +} + +pub fn ts_look_ahead<'a, P: Parser<'a>, T, F>(p: &mut P, op: F) -> PResult +where + F: FnOnce(&mut P) -> PResult, +{ + debug_assert!(p.input().syntax().typescript()); + let mut cloned = p.clone(); + cloned.set_ctx(p.ctx() | Context::IgnoreError); + op(&mut cloned) +} + +/// `tsParseTypeArguments` +pub fn parse_ts_type_args<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_type_args); + debug_assert!(p.input().syntax().typescript()); + + let start = p.input_mut().cur_pos(); + let params = p.in_type().parse_with(|p| { + // Temporarily remove a JSX parsing context, which makes us scan different + // tokens. + ts_in_no_context(p, |p| { + if p.input_mut().is(&P::Token::LSHIFT) { + p.input_mut().cut_lshift(); + } else { + expect!(p, &P::Token::LESS); + } + parse_ts_delimited_list(p, ParsingContext::TypeParametersOrArguments, |p| { + trace_cur!(p, parse_ts_type_args__arg); + + parse_ts_type(p) + }) + }) + })?; + // This reads the next token after the `>` too, so do this in the enclosing + // context. But be sure not to parse a regex in the jsx expression + // ` />`, so set exprAllowed = false + p.input_mut().set_expr_allowed(false); + expect!(p, &P::Token::GREATER); + Ok(Box::new(TsTypeParamInstantiation { + span: p.span(start), + params, + })) +} + +/// `tsParseTypeReference` +pub fn parse_ts_type_ref<'a, P: Parser<'a>>(p: &mut P) -> PResult { + trace_cur!(p, parse_ts_type_ref); + debug_assert!(p.input().syntax().typescript()); + + let start = p.input_mut().cur_pos(); + + let has_modifier = eat_any_ts_modifier(p)?; + + let type_name = parse_ts_entity_name(p, /* allow_reserved_words */ true)?; + trace_cur!(p, parse_ts_type_ref__type_args); + let type_params = + if !p.input_mut().had_line_break_before_cur() && p.input_mut().is(&P::Token::LESS) { + Some(parse_ts_type_args( + p.with_ctx(p.ctx() & !Context::ShouldNotLexLtOrGtAsType) + .deref_mut(), + )?) + } else { + None + }; + + if has_modifier { + p.emit_err(p.span(start), SyntaxError::TS2369); + } + + Ok(TsTypeRef { + span: p.span(start), + type_name, + type_params, + }) +} + +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn parse_ts_type_ann<'a, P: Parser<'a>>( + p: &mut P, + eat_colon: bool, + start: BytePos, +) -> PResult> { + trace_cur!(p, parse_ts_type_ann); + + debug_assert!(p.input().syntax().typescript()); + + p.in_type().parse_with(|p| { + if eat_colon { + p.assert_and_bump(&P::Token::COLON)?; + } + + trace_cur!(p, parse_ts_type_ann__after_colon); + + let type_ann = parse_ts_type(p)?; + + Ok(Box::new(TsTypeAnn { + span: p.span(start), + type_ann, + })) + }) +} + +/// `tsParseThisTypePredicate` +pub fn parse_ts_this_type_predicate<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + has_asserts_keyword: bool, + lhs: TsThisType, +) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let param_name = TsThisTypeOrIdent::TsThisType(lhs); + let type_ann = if p.input_mut().eat(&P::Token::IS) { + let cur_pos = p.input_mut().cur_pos(); + Some(parse_ts_type_ann( + p, // eat_colon + false, cur_pos, + )?) + } else { + None + }; + + Ok(TsTypePredicate { + span: p.span(start), + asserts: has_asserts_keyword, + param_name, + type_ann, + }) +} + +/// `tsEatThenParseType` +fn eat_then_parse_ts_type<'a, P: Parser<'a>>( + p: &mut P, + token_to_eat: &P::Token, +) -> PResult>> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + p.in_type().parse_with(|p| { + if !p.input_mut().eat(token_to_eat) { + return Ok(None); + } + + parse_ts_type(p).map(Some) + }) +} + +/// `tsExpectThenParseType` +fn expect_then_parse_ts_type<'a, P: Parser<'a>>( + p: &mut P, + token: &P::Token, + token_str: &'static str, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + p.in_type().parse_with(|p| { + if !p.input_mut().eat(token) { + let got = format!("{:?}", cur!(p, false).ok()); + syntax_error!( + p, + p.input().cur_span(), + SyntaxError::Unexpected { + got, + expected: token_str + } + ); + } + + parse_ts_type(p) + }) +} + +/// `tsParseMappedTypeParameter` +fn parse_ts_mapped_type_param<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.input_mut().cur_pos(); + let name = parse_ident_name(p)?; + let constraint = Some(expect_then_parse_ts_type(p, &P::Token::IN, "in")?); + + Ok(TsTypeParam { + span: p.span(start), + name: name.into(), + is_in: false, + is_out: false, + is_const: false, + constraint, + default: None, + }) +} + +/// `tsParseTypeParameter` +fn parse_ts_type_param<'a, P: Parser<'a>>( + p: &mut P, + permit_in_out: bool, + permit_const: bool, +) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let mut is_in = false; + let mut is_out = false; + let mut is_const = false; + + let start = p.input_mut().cur_pos(); + + while let Some(modifer) = parse_ts_modifier( + p, + &[ + "public", + "private", + "protected", + "readonly", + "abstract", + "const", + "override", + "in", + "out", + ], + false, + )? { + match modifer { + "const" => { + is_const = true; + if !permit_const { + p.emit_err(p.input().prev_span(), SyntaxError::TS1277("const".into())); + } + } + "in" => { + if !permit_in_out { + p.emit_err(p.input().prev_span(), SyntaxError::TS1274("in".into())); + } else if is_in { + p.emit_err(p.input().prev_span(), SyntaxError::TS1030("in".into())); + } else if is_out { + p.emit_err( + p.input().prev_span(), + SyntaxError::TS1029("in".into(), "out".into()), + ); + } + is_in = true; + } + "out" => { + if !permit_in_out { + p.emit_err(p.input().prev_span(), SyntaxError::TS1274("out".into())); + } else if is_out { + p.emit_err(p.input().prev_span(), SyntaxError::TS1030("out".into())); + } + is_out = true; + } + other => p.emit_err(p.input().prev_span(), SyntaxError::TS1273(other.into())), + }; + } + + let name = p.in_type().parse_with(parse_ident_name)?.into(); + let constraint = eat_then_parse_ts_type(p, &P::Token::EXTENDS)?; + let default = eat_then_parse_ts_type(p, &P::Token::EQUAL)?; + + Ok(TsTypeParam { + span: p.span(start), + name, + is_in, + is_out, + is_const, + constraint, + default, + }) +} + +/// `tsParseTypeParameter` +pub fn parse_ts_type_params<'a, P: Parser<'a>>( + p: &mut P, + permit_in_out: bool, + permit_const: bool, +) -> PResult> { + p.in_type().parse_with(|p| { + ts_in_no_context(p, |p| { + let start = p.input_mut().cur_pos(); + + let Some(cur) = p.input_mut().cur() else { + unexpected!(p, "< (jsx tag start)") + }; + if !cur.is_less() && !cur.is_jsx_tag_start() { + unexpected!(p, "< (jsx tag start)") + } + p.bump(); + + let params = parse_ts_bracketed_list( + p, + ParsingContext::TypeParametersOrArguments, + |p| parse_ts_type_param(p, permit_in_out, permit_const), // bracket + false, + // skip_first_token + true, + )?; + + Ok(Box::new(TsTypeParamDecl { + span: p.span(start), + params, + })) + }) + }) +} + +/// `tsTryParseTypeParameters` +pub fn try_parse_ts_type_params<'a, P: Parser<'a>>( + p: &mut P, + permit_in_out: bool, + permit_const: bool, +) -> PResult>> { + if !cfg!(feature = "typescript") { + return Ok(None); + } + + if p.input_mut().cur().is_some_and(|cur| cur.is_less()) { + return parse_ts_type_params(p, permit_in_out, permit_const).map(Some); + } + + Ok(None) +} + +/// `tsParseTypeOrTypePredicateAnnotation` +pub fn parse_ts_type_or_type_predicate_ann<'a, P: Parser<'a>>( + p: &mut P, + return_token: &P::Token, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + p.in_type().parse_with(|p| { + let return_token_start = p.input_mut().cur_pos(); + if !p.input_mut().eat(return_token) { + let cur = format!("{:?}", cur!(p, false).ok()); + let span = p.input_mut().cur_span(); + syntax_error!( + p, + span, + SyntaxError::Expected(format!("{return_token:?}"), cur) + ) + } + + let type_pred_start = p.input_mut().cur_pos(); + let has_type_pred_asserts = p.input_mut().cur().is_some_and(|cur| cur.is_asserts()) && { + let ctx = p.ctx(); + peek!(p).is_some_and(|peek| { + if peek.is_word() { + !peek.is_reserved(ctx) + } else { + false + } + }) + }; + if has_type_pred_asserts { + p.assert_and_bump(&P::Token::ASSERTS)?; + cur!(p, false)?; + } + + let has_type_pred_is = p.is_ident_ref() + && peek!(p).is_some_and(|peek| peek.is_is()) + && !p.input_mut().has_linebreak_between_cur_and_peeked(); + let is_type_predicate = has_type_pred_asserts || has_type_pred_is; + if !is_type_predicate { + return parse_ts_type_ann( + p, + // eat_colon + false, + return_token_start, + ); + } + + let type_pred_var = parse_ident_name(p)?; + let type_ann = if has_type_pred_is { + p.assert_and_bump(&P::Token::IS)?; + let pos = p.input_mut().cur_pos(); + Some(parse_ts_type_ann( + p, // eat_colon + false, pos, + )?) + } else { + None + }; + + let node = Box::new(TsType::TsTypePredicate(TsTypePredicate { + span: p.span(type_pred_start), + asserts: has_type_pred_asserts, + param_name: TsThisTypeOrIdent::Ident(type_pred_var.into()), + type_ann, + })); + + Ok(Box::new(TsTypeAnn { + span: p.span(return_token_start), + type_ann: node, + })) + }) +} + +fn is_start_of_expr<'a>(p: &mut impl Parser<'a>) -> bool { + is_start_of_left_hand_side_expr(p) || { + let Some(cur) = p.input_mut().cur() else { + return false; + }; + cur.is_plus() + || cur.is_minus() + || cur.is_tilde() + || cur.is_bang() + || cur.is_delete() + || cur.is_typeof() + || cur.is_void() + || cur.is_plus_plus() + || cur.is_minus_minus() + || cur.is_less() + || cur.is_await() + || cur.is_yield() + || (cur.is_hash() && peek!(p).is_some_and(|peek| peek.is_word())) + } +} + +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub(super) fn try_parse_ts_type_args<'a, P: Parser<'a>>( + p: &mut P, +) -> Option> { + trace_cur!(p, try_parse_ts_type_args); + debug_assert!(p.input().syntax().typescript()); + + try_parse_ts(p, |p| { + let type_args = parse_ts_type_args(p)?; + let cur = p.input_mut().cur(); + if cur.is_some_and(|cur| { + cur.is_less() // invalid syntax + || cur.is_greater() || cur.is_equal() || cur.is_rshift() || cur.is_greater_eq() || cur.is_plus() || cur.is_minus() // becomes relational expression + || cur.is_lparen() || cur.is_backquote() // these should be type + // arguments in function + // call or template, not + // instantiation + // expression + }) { + Ok(None) + } else if p.input_mut().had_line_break_before_cur() + || cur!(p, false).is_ok_and(|t| t.is_bin_op()) + || !is_start_of_expr(p) + { + Ok(Some(type_args)) + } else { + Ok(None) + } + }) +} + +/// `tsTryParseType` +fn try_parse_ts_type<'a, P: Parser<'a>>(p: &mut P) -> PResult>> { + if !cfg!(feature = "typescript") { + return Ok(None); + } + + eat_then_parse_ts_type(p, &P::Token::COLON) +} + +/// `tsTryParseTypeAnnotation` +#[cfg_attr( + feature = "tracing-spans", + tracing::instrument(level = "debug", skip_all) +)] +pub fn try_parse_ts_type_ann<'a, P: Parser<'a>>(p: &mut P) -> PResult>> { + if !cfg!(feature = "typescript") { + return Ok(None); + } + + if p.input_mut().is(&P::Token::COLON) { + let pos = p.cur_pos(); + return parse_ts_type_ann(p, /* eat_colon */ true, pos).map(Some); + } + + Ok(None) +} + +/// `tsNextThenParseType` +pub(super) fn next_then_parse_ts_type<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let result = p.in_type().parse_with(|p| { + p.bump(); + parse_ts_type(p) + }); + + if !p.ctx().contains(Context::InType) + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_less() || cur.is_greater()) + { + p.input_mut().merge_lt_gt(); + } + + result +} + +/// `tsParseEnumMember` +fn parse_ts_enum_member<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + // Computed property names are grammar errors in an enum, so accept just string + // literal or identifier. + let cur = cur!(p, true); + let id = if cur.is_str() { + parse_lit(p).map(|lit| match lit { + Lit::Str(s) => TsEnumMemberId::Str(s), + _ => unreachable!(), + })? + } else if cur.is_num() { + let cur = p.bump(); + let (value, raw) = cur.take_num(p.input_mut()); + let mut new_raw = String::new(); + + new_raw.push('"'); + new_raw.push_str(raw.as_str()); + new_raw.push('"'); + + let span = p.span(start); + + // Recover from error + p.emit_err(span, SyntaxError::TS2452); + + TsEnumMemberId::Str(Str { + span, + value: value.to_string().into(), + raw: Some(new_raw.into()), + }) + } else if cur.is_lbracket() { + p.assert_and_bump(&P::Token::LBRACKET)?; + let _ = p.parse_expr()?; + p.emit_err(p.span(start), SyntaxError::TS1164); + p.assert_and_bump(&P::Token::RBRACKET)?; + TsEnumMemberId::Ident(Ident::new_no_ctxt(atom!(""), p.span(start))) + } else { + parse_ident_name(p) + .map(Ident::from) + .map(TsEnumMemberId::from)? + }; + + let init = if p.input_mut().eat(&P::Token::EQUAL) { + Some(parse_assignment_expr(p)?) + } else if p.input_mut().is(&P::Token::COMMA) || p.input_mut().is(&P::Token::RBRACE) { + None + } else { + let start = p.cur_pos(); + p.bump(); + p.input_mut().store(P::Token::COMMA); + p.emit_err(Span::new(start, start), SyntaxError::TS1005); + None + }; + + Ok(TsEnumMember { + span: p.span(start), + id, + init, + }) +} + +/// `tsParseEnumDeclaration` +pub fn parse_ts_enum_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + is_const: bool, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let id = parse_ident_name(p)?; + expect!(p, &P::Token::LBRACE); + let members = parse_ts_delimited_list(p, ParsingContext::EnumMembers, parse_ts_enum_member)?; + expect!(p, &P::Token::RBRACE); + + Ok(Box::new(TsEnumDecl { + span: p.span(start), + declare: false, + is_const, + id: id.into(), + members, + })) +} + +/// `tsTryParseTypeOrTypePredicateAnnotation` +/// +/// Used for parsing return types. +pub fn try_parse_ts_type_or_type_predicate_ann<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult>> { + if !cfg!(feature = "typescript") { + return Ok(None); + } + + if p.input_mut().is(&P::Token::COLON) { + parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON).map(Some) + } else { + Ok(None) + } +} + +/// `tsParseTemplateLiteralType` +fn parse_ts_tpl_lit_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + + p.assert_and_bump(&P::Token::BACKQUOTE)?; + + let (types, quasis) = parse_ts_tpl_type_elements(p)?; + + expect!(p, &P::Token::BACKQUOTE); + + Ok(TsTplLitType { + span: p.span(start), + types, + quasis, + }) +} + +fn parse_ts_tpl_type_elements<'a, P: Parser<'a>>( + p: &mut P, +) -> PResult<(Vec>, Vec)> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + trace_cur!(p, parse_tpl_elements); + + let mut types = Vec::new(); + + let cur_elem = p.parse_tpl_element(false)?; + let mut is_tail = cur_elem.tail; + let mut quasis = vec![cur_elem]; + + while !is_tail { + expect!(p, &P::Token::DOLLAR_LBRACE); + types.push(parse_ts_type(p)?); + expect!(p, &P::Token::RBRACE); + let elem = p.parse_tpl_element(false)?; + is_tail = elem.tail; + quasis.push(elem); + } + + Ok((types, quasis)) +} + +/// `tsParseLiteralTypeNode` +pub fn parse_ts_lit_type_node<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + + let lit = if p.input_mut().is(&P::Token::BACKQUOTE) { + let tpl = parse_ts_tpl_lit_type(p)?; + TsLit::Tpl(tpl) + } else { + match parse_lit(p)? { + Lit::BigInt(n) => TsLit::BigInt(n), + Lit::Bool(n) => TsLit::Bool(n), + Lit::Num(n) => TsLit::Number(n), + Lit::Str(n) => TsLit::Str(n), + _ => unreachable!(), + } + }; + + Ok(TsLitType { + span: p.span(start), + lit, + }) +} + +/// `tsParseHeritageClause` +pub fn parse_ts_heritage_clause<'a>(p: &mut impl Parser<'a>) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + parse_ts_delimited_list( + p, + ParsingContext::HeritageClauseElement, + parse_ts_heritage_clause_element, + ) +} + +fn parse_ts_heritage_clause_element<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + // Note: TS uses parseLeftHandSideExpressionOrHigher, + // then has grammar errors later if it's not an EntityName. + + let ident = parse_ident_name(p)?.into(); + let expr = parse_subscripts(p, Callee::Expr(ident), true, true)?; + if !matches!( + &*expr, + Expr::Ident(..) | Expr::Member(..) | Expr::TsInstantiation(..) + ) { + p.emit_err(p.span(start), SyntaxError::TS2499); + } + + match *expr { + Expr::TsInstantiation(v) => Ok(TsExprWithTypeArgs { + span: v.span, + expr: v.expr, + type_args: Some(v.type_args), + }), + _ => { + let type_args = if p.input_mut().is(&P::Token::LESS) { + Some(parse_ts_type_args(p)?) + } else { + None + }; + + Ok(TsExprWithTypeArgs { + span: p.span(start), + expr, + type_args, + }) + } + } +} + +/// `tsSkipParameterStart` +fn skip_ts_parameter_start<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let _ = eat_any_ts_modifier(p)?; + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_word() || cur.is_this()) + { + p.bump(); + return Ok(true); + } + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_lbrace() || cur.is_lbracket()) + && parse_binding_pat_or_ident(p, false).is_ok() + { + return Ok(true); + } + + Ok(false) +} + +/// `tsIsUnambiguouslyStartOfFunctionType` +fn is_ts_unambiguously_start_of_fn_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + p.assert_and_bump(&P::Token::LPAREN)?; + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_rparen() || cur.is_dotdotdot()) + { + // ( ) + // ( ... + return Ok(true); + } + if skip_ts_parameter_start(p)? { + if p.input_mut().cur().is_some_and(|cur| { + cur.is_colon() || cur.is_comma() || cur.is_equal() || cur.is_question() + }) { + // ( xxx : + // ( xxx , + // ( xxx ? + // ( xxx = + return Ok(true); + } + if p.input_mut().eat(&P::Token::RPAREN) + && p.input_mut().cur().is_some_and(|cur| cur.is_arrow()) + { + // ( xxx ) => + return Ok(true); + } + } + Ok(false) +} + +fn is_ts_start_of_fn_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + if p.input_mut().cur().is_some_and(|cur| cur.is_less()) { + return Ok(true); + } + + Ok(p.input_mut().cur().is_some_and(|cur| cur.is_lparen()) + && ts_look_ahead(p, is_ts_unambiguously_start_of_fn_type)?) +} + +/// `tsIsUnambiguouslyIndexSignature` +fn is_ts_unambiguously_index_signature<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + // Note: babel's comment is wrong + p.assert_and_bump(&P::Token::LBRACKET)?; // Skip '[' + + // ',' is for error recovery + Ok(p.eat_ident_ref() + && p.input_mut() + .cur() + .is_some_and(|cur| cur.is_comma() || cur.is_colon())) +} + +/// `tsTryParseIndexSignature` +pub fn try_parse_ts_index_signature<'a, P: Parser<'a>>( + p: &mut P, + index_signature_start: BytePos, + readonly: bool, + is_static: bool, +) -> PResult> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + if !(p.input_mut().cur().is_some_and(|cur| cur.is_lbracket()) + && ts_look_ahead(p, is_ts_unambiguously_index_signature)?) + { + return Ok(None); + } + + expect!(p, &P::Token::LBRACKET); + + let ident_start = p.cur_pos(); + let mut id = parse_ident_name(p).map(BindingIdent::from)?; + let type_ann_start = p.cur_pos(); + + if p.input_mut().eat(&P::Token::COMMA) { + p.emit_err(id.span, SyntaxError::TS1096); + } else { + expect!(p, &P::Token::COLON); + } + + let type_ann = parse_ts_type_ann(p, /* eat_colon */ false, type_ann_start)?; + id.span = p.span(ident_start); + id.type_ann = Some(type_ann); + + expect!(p, &P::Token::RBRACKET); + + let params = vec![TsFnParam::Ident(id)]; + + let ty = try_parse_ts_type_ann(p)?; + let type_ann = ty; + + parse_ts_type_member_semicolon(p)?; + + Ok(Some(TsIndexSignature { + span: p.span(index_signature_start), + readonly, + is_static, + params, + type_ann, + })) +} + +/// `tsIsExternalModuleReference` +fn is_ts_external_module_ref<'a, P: Parser<'a>>(p: &mut P) -> bool { + debug_assert!(p.input().syntax().typescript()); + p.input_mut().is(&P::Token::REQUIRE) && peek!(p).is_some_and(|t| t.is_lparen()) +} + +/// `tsParseModuleReference` +fn parse_ts_module_ref<'a>(p: &mut impl Parser<'a>) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + if is_ts_external_module_ref(p) { + parse_ts_external_module_ref(p).map(From::from) + } else { + parse_ts_entity_name(p, /* allow_reserved_words */ false).map(From::from) + } +} + +/// `tsParseExternalModuleReference` +fn parse_ts_external_module_ref<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + expect!(p, &P::Token::REQUIRE); + expect!(p, &P::Token::LPAREN); + let cur = cur!(p, true); + if !cur.is_str() { + unexpected!(p, "a string literal") + } + let expr = match parse_lit(p)? { + Lit::Str(s) => s, + _ => unreachable!(), + }; + expect!(p, &P::Token::RPAREN); + Ok(TsExternalModuleRef { + span: p.span(start), + expr, + }) +} + +/// `tsParseImportEqualsDeclaration` +pub fn parse_ts_import_equals_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + id: Ident, + is_export: bool, + is_type_only: bool, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + expect!(p, &P::Token::EQUAL); + let module_ref = parse_ts_module_ref(p)?; + p.expect_general_semi()?; + + Ok(Box::new(TsImportEqualsDecl { + span: p.span(start), + id, + is_export, + is_type_only, + module_ref, + })) +} + +/// `tsParseBindingListForSignature` +/// +/// Eats ')` at the end but does not eat `(` at start. +fn parse_ts_binding_list_for_signature<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + debug_assert!(p.input().syntax().typescript()); + + let params = parse_formal_params(p)?; + let mut list = Vec::with_capacity(4); + + for param in params { + let item = match param.pat { + Pat::Ident(pat) => TsFnParam::Ident(pat), + Pat::Array(pat) => TsFnParam::Array(pat), + Pat::Object(pat) => TsFnParam::Object(pat), + Pat::Rest(pat) => TsFnParam::Rest(pat), + _ => unexpected!( + p, + "an identifier, [ for an array pattern, { for an object patter or ... for a rest \ + pattern" + ), + }; + list.push(item); + } + expect!(p, &P::Token::RPAREN); + Ok(list) +} + +/// `tsIsStartOfMappedType` +pub fn is_ts_start_of_mapped_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + p.bump(); + if p.input_mut().eat(&P::Token::PLUS) || p.input_mut().eat(&P::Token::MINUS) { + return Ok(p.input_mut().is(&P::Token::READONLY)); + } + if p.input_mut().is(&P::Token::READONLY) { + p.bump(); + } + if !p.input_mut().is(&P::Token::LBRACKET) { + return Ok(false); + } + p.bump(); + if !p.is_ident_ref() { + return Ok(false); + } + p.bump(); + + Ok(p.input_mut().is(&P::Token::IN)) +} + +/// `tsParseSignatureMember` +fn parse_ts_signature_member<'a, P: Parser<'a>>( + p: &mut P, + kind: SignatureParsingMode, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + + if kind == SignatureParsingMode::TSConstructSignatureDeclaration { + expect!(p, &P::Token::NEW); + } + + // ----- inlined p.tsFillSignature(tt.colon, node); + let type_params = try_parse_ts_type_params(p, false, true)?; + expect!(p, &P::Token::LPAREN); + let params = parse_ts_binding_list_for_signature(p)?; + let type_ann = if p.input_mut().is(&P::Token::COLON) { + Some(parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON)?) + } else { + None + }; + // ----- + + parse_ts_type_member_semicolon(p)?; + + match kind { + SignatureParsingMode::TSCallSignatureDeclaration => Ok(Either::Left(TsCallSignatureDecl { + span: p.span(start), + params, + type_ann, + type_params, + })), + SignatureParsingMode::TSConstructSignatureDeclaration => { + Ok(Either::Right(TsConstructSignatureDecl { + span: p.span(start), + params, + type_ann, + type_params, + })) + } + } +} + +fn try_parse_ts_tuple_element_name<'a, P: Parser<'a>>(p: &mut P) -> Option { + if !cfg!(feature = "typescript") { + return Default::default(); + } + + try_parse_ts(p, |p| { + let start = p.cur_pos(); + + let rest = if p.input_mut().eat(&P::Token::DOTDOTDOT) { + Some(p.input().prev_span()) + } else { + None + }; + + let mut ident = parse_ident_name(p).map(Ident::from)?; + if p.input_mut().eat(&P::Token::QUESTION) { + ident.optional = true; + ident.span = ident.span.with_hi(p.input().prev_span().hi); + } + expect!(p, &P::Token::COLON); + + Ok(Some(if let Some(dot3_token) = rest { + RestPat { + span: p.span(start), + dot3_token, + arg: ident.into(), + type_ann: None, + } + .into() + } else { + ident.into() + })) + }) +} + +/// `tsParseTupleElementType` +fn parse_ts_tuple_element_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + // parses `...TsType[]` + let start = p.cur_pos(); + + let label = try_parse_ts_tuple_element_name(p); + + if p.input_mut().eat(&P::Token::DOTDOTDOT) { + let type_ann = parse_ts_type(p)?; + return Ok(TsTupleElement { + span: p.span(start), + label, + ty: Box::new(TsType::TsRestType(TsRestType { + span: p.span(start), + type_ann, + })), + }); + } + + let ty = parse_ts_type(p)?; + // parses `TsType?` + if p.input_mut().eat(&P::Token::QUESTION) { + let type_ann = ty; + return Ok(TsTupleElement { + span: p.span(start), + label, + ty: Box::new(TsType::TsOptionalType(TsOptionalType { + span: p.span(start), + type_ann, + })), + }); + } + + Ok(TsTupleElement { + span: p.span(start), + label, + ty, + }) +} + +/// `tsParseTupleType` +pub fn parse_ts_tuple_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + let elems = parse_ts_bracketed_list( + p, + ParsingContext::TupleElementTypes, + parse_ts_tuple_element_type, + /* bracket */ true, + /* skipFirstToken */ false, + )?; + + // Validate the elementTypes to ensure: + // No mandatory elements may follow optional elements + // If there's a rest element, it must be at the end of the tuple + + let mut seen_optional_element = false; + + for elem in elems.iter() { + match *elem.ty { + TsType::TsRestType(..) => {} + TsType::TsOptionalType(..) => { + seen_optional_element = true; + } + _ if seen_optional_element => { + syntax_error!(p, p.span(start), SyntaxError::TsRequiredAfterOptional) + } + _ => {} + } + } + + Ok(TsTupleType { + span: p.span(start), + elem_types: elems, + }) +} + +/// `tsParseMappedType` +pub fn parse_ts_mapped_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + expect!(p, &P::Token::LBRACE); + let mut readonly = None; + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_plus() || cur.is_minus()) + { + readonly = Some(if p.input_mut().cur().is_some_and(|cur| cur.is_plus()) { + TruePlusMinus::Plus + } else { + TruePlusMinus::Minus + }); + p.bump(); + expect!(p, &P::Token::READONLY) + } else if p.input_mut().eat(&P::Token::READONLY) { + readonly = Some(TruePlusMinus::True); + } + + expect!(p, &P::Token::LBRACKET); + let type_param = parse_ts_mapped_type_param(p)?; + let name_type = if p.input_mut().eat(&P::Token::AS) { + Some(parse_ts_type(p)?) + } else { + None + }; + expect!(p, &P::Token::RBRACKET); + + let mut optional = None; + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_plus() || cur.is_minus()) + { + optional = Some(if p.input_mut().cur().is_some_and(|cur| cur.is_plus()) { + TruePlusMinus::Plus + } else { + TruePlusMinus::Minus + }); + p.bump(); // +, - + expect!(p, &P::Token::QUESTION); + } else if p.input_mut().eat(&P::Token::QUESTION) { + optional = Some(TruePlusMinus::True); + } + + let type_ann = try_parse_ts_type(p)?; + p.expect_general_semi()?; + expect!(p, &P::Token::RBRACE); + + Ok(TsMappedType { + span: p.span(start), + readonly, + optional, + type_param, + name_type, + type_ann, + }) +} + +/// `tsParseParenthesizedType` +pub fn parse_ts_parenthesized_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + trace_cur!(p, parse_ts_parenthesized_type); + + let start = p.cur_pos(); + expect!(p, &P::Token::LPAREN); + let type_ann = parse_ts_type(p)?; + expect!(p, &P::Token::RPAREN); + Ok(TsParenthesizedType { + span: p.span(start), + type_ann, + }) +} + +/// `tsParseTypeAliasDeclaration` +pub fn parse_ts_type_alias_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let id = parse_ident_name(p)?; + let type_params = try_parse_ts_type_params(p, true, false)?; + let type_ann = expect_then_parse_ts_type(p, &P::Token::EQUAL, "=")?; + p.expect_general_semi()?; + Ok(Box::new(TsTypeAliasDecl { + declare: false, + span: p.span(start), + id: id.into(), + type_params, + type_ann, + })) +} + +/// `tsParseFunctionOrConstructorType` +fn parse_ts_fn_or_constructor_type<'a, P: Parser<'a>>( + p: &mut P, + is_fn_type: bool, +) -> PResult { + trace_cur!(p, parse_ts_fn_or_constructor_type); + + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + let is_abstract = if !is_fn_type { + p.input_mut().eat(&P::Token::ABSTRACT) + } else { + false + }; + if !is_fn_type { + expect!(p, &P::Token::NEW); + } + + // ----- inlined `p.tsFillSignature(tt.arrow, node)` + let type_params = try_parse_ts_type_params(p, false, true)?; + expect!(p, &P::Token::LPAREN); + let params = parse_ts_binding_list_for_signature(p)?; + let type_ann = parse_ts_type_or_type_predicate_ann(p, &P::Token::ARROW)?; + // ----- end + + Ok(if is_fn_type { + TsFnOrConstructorType::TsFnType(TsFnType { + span: p.span(start), + type_params, + params, + type_ann, + }) + } else { + TsFnOrConstructorType::TsConstructorType(TsConstructorType { + span: p.span(start), + type_params, + params, + type_ann, + is_abstract, + }) + }) +} + +/// `tsParseUnionTypeOrHigher` +fn parse_ts_union_type_or_higher<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_union_type_or_higher); + debug_assert!(p.input().syntax().typescript()); + + parse_ts_union_or_intersection_type( + p, + UnionOrIntersection::Union, + parse_ts_intersection_type_or_higher, + &P::Token::BIT_OR, + ) +} + +/// `tsParseIntersectionTypeOrHigher` +fn parse_ts_intersection_type_or_higher<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_intersection_type_or_higher); + + debug_assert!(p.input().syntax().typescript()); + + parse_ts_union_or_intersection_type( + p, + UnionOrIntersection::Intersection, + parse_ts_type_operator_or_higher, + &P::Token::BIT_AND, + ) +} + +/// `tsParseTypeOperatorOrHigher` +fn parse_ts_type_operator_or_higher<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_type_operator_or_higher); + debug_assert!(p.input().syntax().typescript()); + + let operator = if p.input_mut().is(&P::Token::KEYOF) { + Some(TsTypeOperatorOp::KeyOf) + } else if p.input_mut().is(&P::Token::UNIQUE) { + Some(TsTypeOperatorOp::Unique) + } else if p.input_mut().is(&P::Token::READONLY) { + Some(TsTypeOperatorOp::ReadOnly) + } else { + None + }; + + match operator { + Some(operator) => parse_ts_type_operator(p, operator) + .map(TsType::from) + .map(Box::new), + None => { + trace_cur!(p, parse_ts_type_operator_or_higher__not_operator); + + if p.input_mut().is(&P::Token::INFER) { + parse_ts_infer_type(p).map(TsType::from).map(Box::new) + } else { + let readonly = parse_ts_modifier(p, &["readonly"], false)?.is_some(); + parse_ts_array_type_or_higher(p, readonly) + } + } + } +} + +/// `tsParseTypeOperator` +fn parse_ts_type_operator<'a, P: Parser<'a>>( + p: &mut P, + op: TsTypeOperatorOp, +) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + match op { + TsTypeOperatorOp::Unique => expect!(p, &P::Token::UNIQUE), + TsTypeOperatorOp::KeyOf => expect!(p, &P::Token::KEYOF), + TsTypeOperatorOp::ReadOnly => expect!(p, &P::Token::READONLY), + } + + let type_ann = parse_ts_type_operator_or_higher(p)?; + Ok(TsTypeOperator { + span: p.span(start), + op, + type_ann, + }) +} + +/// `tsParseInferType` +fn parse_ts_infer_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + expect!(p, &P::Token::INFER); + let type_param_name = parse_ident_name(p)?; + let constraint = try_parse_ts(p, |p| { + expect!(p, &P::Token::EXTENDS); + let constraint = parse_ts_non_conditional_type(p); + if p.ctx().contains(Context::DisallowConditionalTypes) + || !p.input_mut().is(&P::Token::QUESTION) + { + constraint.map(Some) + } else { + Ok(None) + } + }); + let type_param = TsTypeParam { + span: type_param_name.span(), + name: type_param_name.into(), + is_in: false, + is_out: false, + is_const: false, + constraint, + default: None, + }; + Ok(TsInferType { + span: p.span(start), + type_param, + }) +} + +/// `tsParseNonConditionalType` +fn parse_ts_non_conditional_type<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_non_conditional_type); + + debug_assert!(p.input().syntax().typescript()); + + if is_ts_start_of_fn_type(p)? { + return parse_ts_fn_or_constructor_type(p, true) + .map(TsType::from) + .map(Box::new); + } + if (p.input_mut().is(&P::Token::ABSTRACT) && peek!(p).is_some_and(|cur| cur.is_new())) + || p.input_mut().is(&P::Token::NEW) + { + // As in `new () => Date` + return parse_ts_fn_or_constructor_type(p, false) + .map(TsType::from) + .map(Box::new); + } + + parse_ts_union_type_or_higher(p) +} + +/// `tsParseArrayTypeOrHigher` +fn parse_ts_array_type_or_higher<'a, P: Parser<'a>>( + p: &mut P, + readonly: bool, +) -> PResult> { + trace_cur!(p, parse_ts_array_type_or_higher); + debug_assert!(p.input().syntax().typescript()); + + let mut ty = parse_ts_non_array_type(p)?; + + while !p.input_mut().had_line_break_before_cur() && p.input_mut().eat(&P::Token::LBRACKET) { + if p.input_mut().eat(&P::Token::RBRACKET) { + ty = Box::new(TsType::TsArrayType(TsArrayType { + span: p.span(ty.span_lo()), + elem_type: ty, + })); + } else { + let index_type = parse_ts_type(p)?; + expect!(p, &P::Token::RBRACKET); + ty = Box::new(TsType::TsIndexedAccessType(TsIndexedAccessType { + span: p.span(ty.span_lo()), + readonly, + obj_type: ty, + index_type, + })) + } + } + + Ok(ty) +} + +/// Be sure to be in a type context before calling p. +/// +/// `tsParseType` +pub fn parse_ts_type<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + trace_cur!(p, parse_ts_type); + + debug_assert!(p.input().syntax().typescript()); + + // Need to set `state.inType` so that we don't parse JSX in a type context. + debug_assert!(p.ctx().contains(Context::InType)); + + let start = p.cur_pos(); + + let ctx = p.ctx() & !Context::DisallowConditionalTypes; + p.with_ctx(ctx).parse_with(|p| { + let ty = parse_ts_non_conditional_type(p)?; + if p.input_mut().had_line_break_before_cur() || !p.input_mut().eat(&P::Token::EXTENDS) { + return Ok(ty); + } + + let check_type = ty; + let extends_type = { + parse_ts_non_conditional_type( + p.with_ctx(p.ctx() | Context::DisallowConditionalTypes) + .deref_mut(), + )? + }; + + expect!(p, &P::Token::QUESTION); + + let true_type = parse_ts_type(p)?; + + expect!(p, &P::Token::COLON); + + let false_type = parse_ts_type(p)?; + + Ok(Box::new(TsType::TsConditionalType(TsConditionalType { + span: p.span(start), + check_type, + extends_type, + true_type, + false_type, + }))) + }) +} + +/// `parsePropertyName` in babel. +/// +/// Returns `(computed, key)`. +fn parse_ts_property_name<'a, P: Parser<'a>>(p: &mut P) -> PResult<(bool, Box)> { + let (computed, key) = if p.input_mut().eat(&P::Token::LBRACKET) { + let key = parse_assignment_expr(p)?; + expect!(p, &P::Token::RBRACKET); + (true, key) + } else { + let ctx = p.ctx() | Context::InPropertyName; + p.with_ctx(ctx).parse_with(|p| { + // We check if it's valid for it to be a private name when we push it. + let cur = cur!(p, true); + let key = if cur.is_num() || cur.is_str() { + parse_new_expr(p) + } else { + parse_maybe_private_name(p).map(|e| match e { + Either::Left(e) => { + p.emit_err(e.span(), SyntaxError::PrivateNameInInterface); + + e.into() + } + Either::Right(e) => e.into(), + }) + }; + key.map(|key| (false, key)) + })? + }; + + Ok((computed, key)) +} + +/// `tsParsePropertyOrMethodSignature` +fn parse_ts_property_or_method_signature<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + readonly: bool, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let (computed, key) = parse_ts_property_name(p)?; + + let optional = p.input_mut().eat(&P::Token::QUESTION); + + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_lparen() || cur.is_less()) + { + if readonly { + syntax_error!(p, SyntaxError::ReadOnlyMethod) + } + + let type_params = try_parse_ts_type_params(p, false, true)?; + expect!(p, &P::Token::LPAREN); + let params = parse_ts_binding_list_for_signature(p)?; + let type_ann = if p.input_mut().is(&P::Token::COLON) { + parse_ts_type_or_type_predicate_ann(p, &P::Token::COLON).map(Some)? + } else { + None + }; + // ----- + + parse_ts_type_member_semicolon(p)?; + Ok(Either::Right(TsMethodSignature { + span: p.span(start), + computed, + key, + optional, + type_params, + params, + type_ann, + })) + } else { + let type_ann = try_parse_ts_type_ann(p)?; + + parse_ts_type_member_semicolon(p)?; + Ok(Either::Left(TsPropertySignature { + span: p.span(start), + computed, + readonly, + key, + optional, + type_ann, + })) + } +} + +/// `tsParseTypeMember` +fn parse_ts_type_member<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + fn into_type_elem(e: Either) -> TsTypeElement { + match e { + Either::Left(e) => e.into(), + Either::Right(e) => e.into(), + } + } + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_lparen() || cur.is_less()) + { + return parse_ts_signature_member(p, SignatureParsingMode::TSCallSignatureDeclaration) + .map(into_type_elem); + } + if p.input_mut().is(&P::Token::NEW) && ts_look_ahead(p, is_ts_start_of_construct_signature)? { + return parse_ts_signature_member(p, SignatureParsingMode::TSConstructSignatureDeclaration) + .map(into_type_elem); + } + // Instead of fullStart, we create a node here. + let start = p.cur_pos(); + let readonly = parse_ts_modifier(p, &["readonly"], false)?.is_some(); + + let idx = try_parse_ts_index_signature(p, start, readonly, false)?; + if let Some(idx) = idx { + return Ok(idx.into()); + } + + if let Some(v) = try_parse_ts(p, |p| { + let start = p.input_mut().cur_pos(); + + if readonly { + syntax_error!(p, SyntaxError::GetterSetterCannotBeReadonly) + } + + let is_get = if p.input_mut().eat(&P::Token::GET) { + true + } else { + expect!(p, &P::Token::SET); + false + }; + + let (computed, key) = parse_ts_property_name(p)?; + + if is_get { + expect!(p, &P::Token::LPAREN); + expect!(p, &P::Token::RPAREN); + let type_ann = try_parse_ts_type_ann(p)?; + + parse_ts_type_member_semicolon(p)?; + + Ok(Some(TsTypeElement::TsGetterSignature(TsGetterSignature { + span: p.span(start), + key, + computed, + type_ann, + }))) + } else { + expect!(p, &P::Token::LPAREN); + let params = parse_ts_binding_list_for_signature(p)?; + if params.is_empty() { + syntax_error!(p, SyntaxError::SetterParamRequired) + } + let param = params.into_iter().next().unwrap(); + + parse_ts_type_member_semicolon(p)?; + + Ok(Some(TsTypeElement::TsSetterSignature(TsSetterSignature { + span: p.span(start), + key, + computed, + param, + }))) + } + }) { + return Ok(v); + } + + parse_ts_property_or_method_signature(p, start, readonly).map(|e| match e { + Either::Left(e) => e.into(), + Either::Right(e) => e.into(), + }) +} + +/// `tsParseObjectTypeMembers` +fn parse_ts_object_type_members<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + expect!(p, &P::Token::LBRACE); + let members = parse_ts_list(p, ParsingContext::TypeMembers, |p| parse_ts_type_member(p))?; + expect!(p, &P::Token::RBRACE); + Ok(members) +} + +/// `tsParseTypeLiteral` +pub fn parse_ts_type_lit<'a>(p: &mut impl Parser<'a>) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + let members = parse_ts_object_type_members(p)?; + Ok(TsTypeLit { + span: p.span(start), + members, + }) +} + +/// `tsParseInterfaceDeclaration` +pub fn parse_ts_interface_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let id = parse_ident_name(p)?; + match &*id.sym { + "string" | "null" | "number" | "object" | "any" | "unknown" | "boolean" | "bigint" + | "symbol" | "void" | "never" | "intrinsic" => { + p.emit_err(id.span, SyntaxError::TS2427); + } + _ => {} + } + + let type_params = try_parse_ts_type_params(p, true, false)?; + + let extends = if p.input_mut().eat(&P::Token::EXTENDS) { + parse_ts_heritage_clause(p)? + } else { + Vec::new() + }; + + // Recover from + // + // interface I extends A extends B {} + if p.input_mut().is(&P::Token::EXTENDS) { + p.emit_err(p.input().cur_span(), SyntaxError::TS1172); + + while !eof!(p) && !p.input_mut().is(&P::Token::LBRACE) { + p.bump(); + } + } + + let body_start = p.cur_pos(); + let body = parse_ts_object_type_members(p.in_type().deref_mut())?; + let body = TsInterfaceBody { + span: p.span(body_start), + body, + }; + Ok(Box::new(TsInterfaceDecl { + span: p.span(start), + declare: false, + id: id.into(), + type_params, + extends, + body, + })) +} + +/// `tsParseTypeAssertion` +pub(super) fn parse_ts_type_assertion<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + if p.input().syntax().disallow_ambiguous_jsx_like() { + p.emit_err(p.span(start), SyntaxError::ReservedTypeAssertion); + } + + // Not actually necessary to set state.inType because we never reach here if JSX + // plugin is enabled, but need `tsInType` to satisfy the assertion in + // `tsParseType`. + let type_ann = p.in_type().parse_with(parse_ts_type)?; + expect!(p, &P::Token::GREATER); + let expr = parse_unary_expr(p)?; + Ok(TsTypeAssertion { + span: p.span(start), + type_ann, + expr, + }) +} + +/// `tsParseImportType` +fn parse_ts_import_type<'a, P: Parser<'a>>(p: &mut P) -> PResult { + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::IMPORT)?; + + expect!(p, &P::Token::LPAREN); + + let _ = cur!(p, false); + + let arg_span = p.input().cur_span(); + + let cur = cur!(p, true); + let arg = if cur.is_str() { + let t = p.bump(); + let (value, raw) = t.take_str(p.input_mut()); + Str { + span: arg_span, + value, + raw: Some(raw), + } + } else { + p.bump(); + p.emit_err(arg_span, SyntaxError::TS1141); + Str { + span: arg_span, + value: "".into(), + raw: Some("\"\"".into()), + } + }; + + // the "assert" keyword is deprecated and this syntax is niche, so + // don't support it + let attributes = if p.input_mut().eat(&P::Token::COMMA) + && p.input().syntax().import_attributes() + && p.input_mut().is(&P::Token::LBRACE) + { + Some(parse_ts_call_options(p)?) + } else { + None + }; + + expect!(p, &P::Token::RPAREN); + + let qualifier = if p.input_mut().eat(&P::Token::DOT) { + parse_ts_entity_name(p, false).map(Some)? + } else { + None + }; + + let type_args = if p.input_mut().is(&P::Token::LESS) { + parse_ts_type_args( + p.with_ctx(p.ctx() & !Context::ShouldNotLexLtOrGtAsType) + .deref_mut(), + ) + .map(Some)? + } else { + None + }; + + Ok(TsImportType { + span: p.span(start), + arg, + qualifier, + type_args, + attributes, + }) +} + +fn parse_ts_call_options<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + let start = p.cur_pos(); + p.assert_and_bump(&P::Token::LBRACE)?; + + expect!(p, &P::Token::WITH); + expect!(p, &P::Token::COLON); + + let value = match parse_object_expr(p)? { + Expr::Object(v) => v, + _ => unreachable!(), + }; + p.input_mut().eat(&P::Token::COMMA); + expect!(p, &P::Token::RBRACE); + Ok(TsImportCallOptions { + span: p.span(start), + with: Box::new(value), + }) +} + +/// `tsParseTypeQuery` +fn parse_ts_type_query<'a, P: Parser<'a>>(p: &mut P) -> PResult { + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + expect!(p, &P::Token::TYPEOF); + let expr_name = if p.input_mut().is(&P::Token::IMPORT) { + parse_ts_import_type(p).map(From::from)? + } else { + parse_ts_entity_name( + p, // allow_reserved_word + true, + ) + .map(From::from)? + }; + + let type_args = + if !p.input_mut().had_line_break_before_cur() && p.input_mut().is(&P::Token::LESS) { + Some(parse_ts_type_args( + p.with_ctx(p.ctx() & !Context::ShouldNotLexLtOrGtAsType) + .deref_mut(), + )?) + } else { + None + }; + + Ok(TsTypeQuery { + span: p.span(start), + expr_name, + type_args, + }) +} + +/// `tsParseModuleBlock` +fn parse_ts_module_block<'a, P: Parser<'a>>(p: &mut P) -> PResult { + trace_cur!(p, parse_ts_module_block); + + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + expect!(p, &P::Token::LBRACE); + // Inside of a module block is considered "top-level", meaning it can have + // imports and exports. + let body = p.with_ctx(p.ctx() | Context::TopLevel).parse_with(|p| { + parse_module_item_block_body( + p, + /* directives */ false, + /* end */ Some(&P::Token::RBRACE), + ) + })?; + + Ok(TsModuleBlock { + span: p.span(start), + body, + }) +} + +/// `tsParseModuleOrNamespaceDeclaration` +fn parse_ts_module_or_ns_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + namespace: bool, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let id = parse_ident_name(p)?; + let body: TsNamespaceBody = if p.input_mut().eat(&P::Token::DOT) { + let inner_start = p.cur_pos(); + let inner = parse_ts_module_or_ns_decl(p, inner_start, namespace)?; + let inner = TsNamespaceDecl { + span: inner.span, + id: match inner.id { + TsModuleName::Ident(i) => i, + _ => unreachable!(), + }, + body: Box::new(inner.body.unwrap()), + declare: inner.declare, + global: inner.global, + }; + inner.into() + } else { + parse_ts_module_block(p).map(From::from)? + }; + + Ok(Box::new(TsModuleDecl { + span: p.span(start), + declare: false, + id: TsModuleName::Ident(id.into()), + body: Some(body), + global: false, + namespace, + })) +} + +/// `tsParseAmbientExternalModuleDeclaration` +fn parse_ts_ambient_external_module_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult> { + debug_assert!(p.input().syntax().typescript()); + + let (global, id) = if p.input_mut().is(&P::Token::GLOBAL) { + let id = parse_ident_name(p)?; + (true, TsModuleName::Ident(id.into())) + } else if cur!(p, true).is_str() { + let id = parse_lit(p).map(|lit| match lit { + Lit::Str(s) => TsModuleName::Str(s), + _ => unreachable!(), + })?; + (false, id) + } else { + unexpected!(p, "global or a string literal"); + }; + + let body = if p.input_mut().is(&P::Token::LBRACE) { + Some(parse_ts_module_block(p).map(TsNamespaceBody::from)?) + } else { + p.expect_general_semi()?; + None + }; + + Ok(Box::new(TsModuleDecl { + span: p.span(start), + declare: false, + id, + global, + body, + namespace: false, + })) +} + +/// `tsParseNonArrayType` +pub fn parse_ts_non_array_type<'a, P: Parser<'a>>(p: &mut P) -> PResult> { + if !cfg!(feature = "typescript") { + unreachable!() + } + trace_cur!(p, parse_ts_non_array_type); + debug_assert!(p.input().syntax().typescript()); + + let start = p.cur_pos(); + + let cur = cur!(p, true); + if cur.is_known_ident() + || cur.is_unknown_ident() + || cur.is_void() + || cur.is_yield() + || cur.is_null() + || cur.is_await() + || cur.is_break() + { + if p.input_mut().is(&P::Token::ASSERTS) && peek!(p).is_some_and(|peek| peek.is_this()) { + p.bump(); + let this_keyword = parse_ts_this_type_node(p)?; + return parse_ts_this_type_predicate(p, start, true, this_keyword) + .map(TsType::from) + .map(Box::new); + } + let kind = if p.input_mut().is(&P::Token::VOID) { + Some(TsKeywordTypeKind::TsVoidKeyword) + } else if p.input_mut().is(&P::Token::NULL) { + Some(TsKeywordTypeKind::TsNullKeyword) + } else if p.input_mut().is(&P::Token::ANY) { + Some(TsKeywordTypeKind::TsAnyKeyword) + } else if p.input_mut().is(&P::Token::BOOLEAN) { + Some(TsKeywordTypeKind::TsBooleanKeyword) + } else if p.input_mut().is(&P::Token::BIGINT) { + Some(TsKeywordTypeKind::TsBigIntKeyword) + } else if p.input_mut().is(&P::Token::NEVER) { + Some(TsKeywordTypeKind::TsNeverKeyword) + } else if p.input_mut().is(&P::Token::NUMBER) { + Some(TsKeywordTypeKind::TsNumberKeyword) + } else if p.input_mut().is(&P::Token::OBJECT) { + Some(TsKeywordTypeKind::TsObjectKeyword) + } else if p.input_mut().is(&P::Token::STRING) { + Some(TsKeywordTypeKind::TsStringKeyword) + } else if p.input_mut().is(&P::Token::SYMBOL) { + Some(TsKeywordTypeKind::TsSymbolKeyword) + } else if p.input_mut().is(&P::Token::UNKNOWN) { + Some(TsKeywordTypeKind::TsUnknownKeyword) + } else if p.input_mut().is(&P::Token::UNDEFINED) { + Some(TsKeywordTypeKind::TsUndefinedKeyword) + } else if p.input_mut().is(&P::Token::INTRINSIC) { + Some(TsKeywordTypeKind::TsIntrinsicKeyword) + } else { + None + }; + + let peeked_is_dot = peek!(p).is_some_and(|cur| cur.is_dot()); + + match kind { + Some(kind) if !peeked_is_dot => { + p.bump(); + return Ok(Box::new(TsType::TsKeywordType(TsKeywordType { + span: p.span(start), + kind, + }))); + } + _ => { + return parse_ts_type_ref(p).map(TsType::from).map(Box::new); + } + } + } else if cur.is_bigint() + || cur.is_str() + || cur.is_num() + || cur.is_true() + || cur.is_false() + || cur.is_backquote() + { + return parse_ts_lit_type_node(p).map(TsType::from).map(Box::new); + } else if cur.is_minus() { + let start = p.cur_pos(); + + p.bump(); + + let cur = cur!(p, true); + if !(cur.is_num() || cur.is_bigint()) { + unexpected!(p, "numeric literal or bigint literal") + } + + let lit = parse_lit(p)?; + let lit = match lit { + Lit::Num(Number { span, value, raw }) => { + let mut new_raw = String::from("-"); + + match raw { + Some(raw) => { + new_raw.push_str(&raw); + } + _ => { + write!(new_raw, "{value}").unwrap(); + } + }; + + TsLit::Number(Number { + span, + value: -value, + raw: Some(new_raw.into()), + }) + } + Lit::BigInt(BigInt { span, value, raw }) => { + let mut new_raw = String::from("-"); + + match raw { + Some(raw) => { + new_raw.push_str(&raw); + } + _ => { + write!(new_raw, "{value}").unwrap(); + } + }; + + TsLit::BigInt(BigInt { + span, + value: Box::new(-*value), + raw: Some(new_raw.into()), + }) + } + _ => unreachable!(), + }; + + return Ok(Box::new(TsType::TsLitType(TsLitType { + span: p.span(start), + lit, + }))); + } else if cur.is_import() { + return parse_ts_import_type(p).map(TsType::from).map(Box::new); + } else if cur.is_this() { + let start = p.cur_pos(); + let this_keyword = parse_ts_this_type_node(p)?; + return if !p.input_mut().had_line_break_before_cur() && p.input_mut().is(&P::Token::IS) { + parse_ts_this_type_predicate(p, start, false, this_keyword) + .map(TsType::from) + .map(Box::new) + } else { + Ok(Box::new(TsType::TsThisType(this_keyword))) + }; + } else if cur.is_typeof() { + return parse_ts_type_query(p).map(TsType::from).map(Box::new); + } else if cur.is_lbrace() { + return if ts_look_ahead(p, is_ts_start_of_mapped_type)? { + parse_ts_mapped_type(p).map(TsType::from).map(Box::new) + } else { + parse_ts_type_lit(p).map(TsType::from).map(Box::new) + }; + } else if cur.is_lbracket() { + return parse_ts_tuple_type(p).map(TsType::from).map(Box::new); + } else if cur.is_lparen() { + return parse_ts_parenthesized_type(p) + .map(TsType::from) + .map(Box::new); + } + + // switch (p.state.type) { + // } + + unexpected!( + p, + "an identifier, void, yield, null, await, break, a string literal, a numeric literal, \ + true, false, `, -, import, this, typeof, {, [, (" + ) +} + +/// `tsParseExpressionStatement` +pub fn parse_ts_expr_stmt<'a, P: Parser<'a>>( + p: &mut P, + decorators: Vec, + expr: Ident, +) -> PResult> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + let start = expr.span_lo(); + + match &*expr.sym { + "declare" => { + let decl = try_parse_ts_declare(p, start, decorators)?; + if let Some(decl) = decl { + Ok(Some(make_decl_declare(decl))) + } else { + Ok(None) + } + } + "global" => { + // `global { }` (with no `declare`) may appear inside an ambient module + // declaration. + // Would like to use tsParseAmbientExternalModuleDeclaration here, but already + // ran past "global". + if p.input_mut().is(&P::Token::LBRACE) { + let global = true; + let id = TsModuleName::Ident(expr); + let body = parse_ts_module_block(p) + .map(TsNamespaceBody::from) + .map(Some)?; + Ok(Some( + TsModuleDecl { + span: p.span(start), + global, + declare: false, + namespace: false, + id, + body, + } + .into(), + )) + } else { + Ok(None) + } + } + _ => parse_ts_decl(p, start, decorators, expr.sym, /* next */ false), + } +} + +/// `tsTryParseDeclare` +pub fn try_parse_ts_declare<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, +) -> PResult> { + if !p.syntax().typescript() { + return Ok(None); + } + + if p.ctx().contains(Context::InDeclare) + && matches!(p.syntax(), Syntax::Typescript(TsSyntax { dts: false, .. })) + { + let span_of_declare = p.span(start); + p.emit_err(span_of_declare, SyntaxError::TS1038); + } + + let declare_start = start; + let ctx = p.ctx() | Context::InDeclare; + p.with_ctx(ctx).parse_with(|p| { + if p.input_mut().is(&P::Token::FUNCTION) { + return parse_fn_decl(p, decorators) + .map(|decl| match decl { + Decl::Fn(f) => FnDecl { + declare: true, + function: Box::new(Function { + span: Span { + lo: declare_start, + ..f.function.span + }, + ..*f.function + }), + ..f + } + .into(), + _ => decl, + }) + .map(Some); + } + + if p.input_mut().is(&P::Token::CLASS) { + return parse_class_decl(p, start, start, decorators, false) + .map(|decl| match decl { + Decl::Class(c) => ClassDecl { + declare: true, + class: Box::new(Class { + span: Span { + lo: declare_start, + ..c.class.span + }, + ..*c.class + }), + ..c + } + .into(), + _ => decl, + }) + .map(Some); + } + + if p.input_mut().is(&P::Token::CONST) && peek!(p).is_some_and(|peek| peek.is_enum()) { + p.assert_and_bump(&P::Token::CONST)?; + let _ = cur!(p, true); + p.assert_and_bump(&P::Token::ENUM)?; + + return parse_ts_enum_decl(p, start, /* is_const */ true) + .map(|decl| TsEnumDecl { + declare: true, + span: Span { + lo: declare_start, + ..decl.span + }, + ..*decl + }) + .map(Box::new) + .map(From::from) + .map(Some); + } + if p.input_mut() + .cur() + .is_some_and(|cur| cur.is_const() || cur.is_var() || cur.is_let()) + { + return parse_var_stmt(p, false) + .map(|decl| VarDecl { + declare: true, + span: Span { + lo: declare_start, + ..decl.span + }, + ..*decl + }) + .map(Box::new) + .map(From::from) + .map(Some); + } + + if p.input_mut().is(&P::Token::GLOBAL) { + return parse_ts_ambient_external_module_decl(p, start) + .map(Decl::from) + .map(make_decl_declare) + .map(Some); + } else if p.input_mut().cur().is_some_and(|cur| cur.is_word()) { + let cur = cur!(p, true); + let value = cur.clone().take_word(p.input_mut()).unwrap(); + return parse_ts_decl(p, start, decorators, value, /* next */ true) + .map(|v| v.map(make_decl_declare)); + } + + Ok(None) + }) +} + +/// `tsTryParseExportDeclaration` +/// +/// Note: this won't be called unless the keyword is allowed in +/// `shouldParseExportDeclaration`. +pub fn try_parse_ts_export_decl<'a, P: Parser<'a>>( + p: &mut P, + decorators: Vec, + value: Atom, +) -> Option { + if !cfg!(feature = "typescript") { + return None; + } + + try_parse_ts(p, |p| { + let start = p.cur_pos(); + let opt = parse_ts_decl(p, start, decorators, value, true)?; + Ok(opt) + }) +} + +/// Common to tsTryParseDeclare, tsTryParseExportDeclaration, and +/// tsParseExpressionStatement. +/// +/// `tsParseDeclaration` +fn parse_ts_decl<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, + decorators: Vec, + value: Atom, + next: bool, +) -> PResult> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + match &*value { + "abstract" => { + if next + || (p.input_mut().is(&P::Token::CLASS) + && !p.input_mut().had_line_break_before_cur()) + { + if next { + p.bump(); + } + return Ok(Some(parse_class_decl(p, start, start, decorators, true)?)); + } + } + + "enum" => { + if next || p.is_ident_ref() { + if next { + p.bump(); + } + return parse_ts_enum_decl(p, start, /* is_const */ false) + .map(From::from) + .map(Some); + } + } + + "interface" => { + if next || (p.is_ident_ref()) { + if next { + p.bump(); + } + + return parse_ts_interface_decl(p, start).map(From::from).map(Some); + } + } + + "module" if !p.input_mut().had_line_break_before_cur() => { + if next { + p.bump(); + } + + let cur = cur!(p, true); + if cur.is_str() { + return parse_ts_ambient_external_module_decl(p, start) + .map(From::from) + .map(Some); + } else if next || p.is_ident_ref() { + return parse_ts_module_or_ns_decl(p, start, false) + .map(From::from) + .map(Some); + } + } + + "namespace" => { + if next || p.is_ident_ref() { + if next { + p.bump(); + } + return parse_ts_module_or_ns_decl(p, start, true) + .map(From::from) + .map(Some); + } + } + + "type" => { + if next || (!p.input_mut().had_line_break_before_cur() && p.is_ident_ref()) { + if next { + p.bump(); + } + return parse_ts_type_alias_decl(p, start).map(From::from).map(Some); + } + } + + _ => {} + } + + Ok(None) +} + +/// `tsTryParseGenericAsyncArrowFunction` +pub fn try_parse_ts_generic_async_arrow_fn<'a, P: Parser<'a>>( + p: &mut P, + start: BytePos, +) -> PResult> { + if !cfg!(feature = "typescript") { + return Ok(Default::default()); + } + + let res = if p + .input_mut() + .cur() + .is_some_and(|cur| cur.is_less() || cur.is_jsx_tag_start()) + { + try_parse_ts(p, |p| { + let type_params = parse_ts_type_params(p, false, false)?; + // Don't use overloaded parseFunctionParams which would look for "<" again. + expect!(p, &P::Token::LPAREN); + let params: Vec = parse_formal_params(p)?.into_iter().map(|p| p.pat).collect(); + expect!(p, &P::Token::RPAREN); + let return_type = try_parse_ts_type_or_type_predicate_ann(p)?; + expect!(p, &P::Token::ARROW); + + Ok(Some((type_params, params, return_type))) + }) + } else { + None + }; + + let (type_params, params, return_type) = match res { + Some(v) => v, + None => return Ok(None), + }; + + let ctx = (p.ctx() | Context::InAsync) & !Context::InGenerator; + p.with_ctx(ctx).parse_with(|p| { + let is_generator = false; + let is_async = true; + let body = + parse_fn_block_or_expr_body(p, true, false, true, params.is_simple_parameter_list())?; + Ok(Some(ArrowExpr { + span: p.span(start), + body, + is_async, + is_generator, + type_params: Some(type_params), + params, + return_type, + ..Default::default() + })) + }) +} diff --git a/crates/swc_ecma_lexer/src/common/parser/util.rs b/crates/swc_ecma_lexer/src/common/parser/util.rs new file mode 100644 index 000000000000..66a98e871c72 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/util.rs @@ -0,0 +1,88 @@ +use swc_atoms::{atom, Atom}; +use swc_common::{Span, Spanned}; +use swc_ecma_ast::{ + BindingIdent, BlockStmt, Decl, Expr, Ident, IdentName, JSXElementName, JSXMemberExpr, + JSXNamespacedName, JSXObject, Key, Param, Pat, PropName, Str, +}; + +pub fn unwrap_ts_non_null(mut expr: &Expr) -> &Expr { + while let Expr::TsNonNull(ts_non_null) = expr { + expr = &ts_non_null.expr; + } + expr +} + +pub fn is_not_this(p: &Param) -> bool { + !matches!( + &p.pat, + Pat::Ident(BindingIdent { + id: Ident{ sym: this, .. }, + .. + }) if atom!("this").eq(this) + ) +} + +pub fn has_use_strict(block: &BlockStmt) -> Option { + block + .stmts + .iter() + .take_while(|s| s.can_precede_directive()) + .find_map(|s| { + if s.is_use_strict() { + Some(s.span()) + } else { + None + } + }) +} + +pub fn is_constructor(key: &Key) -> bool { + matches!( + &key, + Key::Public(PropName::Ident(IdentName { + sym: constructor, + .. + })) | Key::Public(PropName::Str(Str { + value: constructor, + .. + })) if atom!("constructor").eq(constructor) + ) +} + +pub fn get_qualified_jsx_name(name: &JSXElementName) -> Atom { + fn get_qualified_obj_name(obj: &JSXObject) -> Atom { + match *obj { + JSXObject::Ident(ref i) => i.sym.clone(), + JSXObject::JSXMemberExpr(ref member) => format!( + "{}.{}", + get_qualified_obj_name(&member.obj), + member.prop.sym + ) + .into(), + } + } + match *name { + JSXElementName::Ident(ref i) => i.sym.clone(), + JSXElementName::JSXNamespacedName(JSXNamespacedName { + ref ns, ref name, .. + }) => format!("{}:{}", ns.sym, name.sym).into(), + JSXElementName::JSXMemberExpr(JSXMemberExpr { + ref obj, ref prop, .. + }) => format!("{}.{}", get_qualified_obj_name(obj), prop.sym).into(), + } +} + +/// Mark as declare +pub fn make_decl_declare(mut decl: Decl) -> Decl { + match decl { + Decl::Class(ref mut c) => c.declare = true, + Decl::Fn(ref mut f) => f.declare = true, + Decl::Var(ref mut v) => v.declare = true, + Decl::TsInterface(ref mut i) => i.declare = true, + Decl::TsTypeAlias(ref mut a) => a.declare = true, + Decl::TsEnum(ref mut e) => e.declare = true, + Decl::TsModule(ref mut m) => m.declare = true, + Decl::Using(..) => unreachable!("Using is not a valid declaration for `declare` keyword"), + } + decl +} diff --git a/crates/swc_ecma_lexer/src/common/parser/verifier.rs b/crates/swc_ecma_lexer/src/common/parser/verifier.rs new file mode 100644 index 000000000000..a935bbce1a2e --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/parser/verifier.rs @@ -0,0 +1,24 @@ +use swc_common::{Span, Spanned}; +use swc_ecma_ast::{AssignProp, Expr}; +use swc_ecma_visit::{noop_visit_type, Visit, VisitWith}; + +use crate::error::SyntaxError; + +pub struct Verifier { + pub errors: Vec<(Span, SyntaxError)>, +} + +impl Visit for Verifier { + noop_visit_type!(); + + fn visit_assign_prop(&mut self, p: &AssignProp) { + self.errors.push((p.span(), SyntaxError::AssignProperty)); + } + + fn visit_expr(&mut self, e: &Expr) { + match *e { + Expr::Fn(..) | Expr::Arrow(..) => {} + _ => e.visit_children_with(self), + } + } +} diff --git a/crates/swc_ecma_lexer/src/common/syntax.rs b/crates/swc_ecma_lexer/src/common/syntax.rs new file mode 100644 index 000000000000..858cd278d094 --- /dev/null +++ b/crates/swc_ecma_lexer/src/common/syntax.rs @@ -0,0 +1,229 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] +#[serde(deny_unknown_fields, tag = "syntax")] +pub enum Syntax { + /// Standard + #[serde(rename = "ecmascript")] + Es(EsSyntax), + /// This variant requires the cargo feature `typescript` to be enabled. + #[cfg(feature = "typescript")] + #[cfg_attr(docsrs, doc(cfg(feature = "typescript")))] + #[serde(rename = "typescript")] + Typescript(TsSyntax), +} + +impl Default for Syntax { + fn default() -> Self { + Syntax::Es(Default::default()) + } +} + +impl Syntax { + pub fn auto_accessors(self) -> bool { + match self { + Syntax::Es(EsSyntax { + auto_accessors: true, + .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + _ => false, + } + } + + pub fn import_attributes(self) -> bool { + match self { + Syntax::Es(EsSyntax { + import_attributes, .. + }) => import_attributes, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } + + /// Should we parse jsx? + pub fn jsx(self) -> bool { + match self { + Syntax::Es(EsSyntax { jsx: true, .. }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(TsSyntax { tsx: true, .. }) => true, + _ => false, + } + } + + pub fn fn_bind(self) -> bool { + matches!(self, Syntax::Es(EsSyntax { fn_bind: true, .. })) + } + + pub fn decorators(self) -> bool { + match self { + Syntax::Es(EsSyntax { + decorators: true, .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(TsSyntax { + decorators: true, .. + }) => true, + _ => false, + } + } + + pub fn decorators_before_export(self) -> bool { + match self { + Syntax::Es(EsSyntax { + decorators_before_export: true, + .. + }) => true, + #[cfg(feature = "typescript")] + Syntax::Typescript(..) => true, + _ => false, + } + } + + /// Should we parse typescript? + #[cfg(not(feature = "typescript"))] + pub const fn typescript(self) -> bool { + false + } + + /// Should we parse typescript? + #[cfg(feature = "typescript")] + pub const fn typescript(self) -> bool { + matches!(self, Syntax::Typescript(..)) + } + + pub fn export_default_from(self) -> bool { + matches!( + self, + Syntax::Es(EsSyntax { + export_default_from: true, + .. + }) + ) + } + + pub fn dts(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => t.dts, + _ => false, + } + } + + pub fn allow_super_outside_method(self) -> bool { + match self { + Syntax::Es(EsSyntax { + allow_super_outside_method, + .. + }) => allow_super_outside_method, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } + + pub fn allow_return_outside_function(self) -> bool { + match self { + Syntax::Es(EsSyntax { + allow_return_outside_function, + .. + }) => allow_return_outside_function, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => false, + } + } + + pub fn early_errors(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => !t.no_early_errors, + Syntax::Es(..) => true, + } + } + + pub fn disallow_ambiguous_jsx_like(self) -> bool { + match self { + #[cfg(feature = "typescript")] + Syntax::Typescript(t) => t.disallow_ambiguous_jsx_like, + _ => false, + } + } + + pub fn explicit_resource_management(&self) -> bool { + match self { + Syntax::Es(EsSyntax { + explicit_resource_management: using_decl, + .. + }) => *using_decl, + #[cfg(feature = "typescript")] + Syntax::Typescript(_) => true, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TsSyntax { + #[serde(default)] + pub tsx: bool, + + #[serde(default)] + pub decorators: bool, + + /// `.d.ts` + #[serde(skip, default)] + pub dts: bool, + + #[serde(skip, default)] + pub no_early_errors: bool, + + /// babel: `disallowAmbiguousJSXLike` + /// Even when JSX parsing is not enabled, this option disallows using syntax + /// that would be ambiguous with JSX (` y` type assertions and + /// `()=>{}` type arguments) + /// see: https://babeljs.io/docs/en/babel-plugin-transform-typescript#disallowambiguousjsxlike + #[serde(skip, default)] + pub disallow_ambiguous_jsx_like: bool, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct EsSyntax { + #[serde(default)] + pub jsx: bool, + + /// Support function bind expression. + #[serde(rename = "functionBind")] + #[serde(default)] + pub fn_bind: bool, + + /// Enable decorators. + #[serde(default)] + pub decorators: bool, + + /// babel: `decorators.decoratorsBeforeExport` + /// + /// Effective only if `decorator` is true. + #[serde(rename = "decoratorsBeforeExport")] + #[serde(default)] + pub decorators_before_export: bool, + + #[serde(default)] + pub export_default_from: bool, + + /// Stage 3. + #[serde(default, alias = "importAssertions")] + pub import_attributes: bool, + + #[serde(default, rename = "allowSuperOutsideMethod")] + pub allow_super_outside_method: bool, + + #[serde(default, rename = "allowReturnOutsideFunction")] + pub allow_return_outside_function: bool, + + #[serde(default)] + pub auto_accessors: bool, + + #[serde(default)] + pub explicit_resource_management: bool, +} diff --git a/crates/swc_ecma_lexer/src/error.rs b/crates/swc_ecma_lexer/src/error.rs index ac2626360a40..6d75cf3311e9 100644 --- a/crates/swc_ecma_lexer/src/error.rs +++ b/crates/swc_ecma_lexer/src/error.rs @@ -8,8 +8,6 @@ use swc_common::{ Span, Spanned, }; -use crate::token::Token; - /// Note: this struct is 8 bytes. #[derive(Debug, Clone, PartialEq)] pub struct Error { @@ -126,7 +124,7 @@ pub enum SyntaxError { }, ReservedWordInImport, AssignProperty, - Expected(&'static Token, String), + Expected(String, String), ExpectedSemiForExprStmt { expr: Span, }, @@ -374,7 +372,7 @@ impl SyntaxError { SyntaxError::ReservedWordInImport => "cannot import as reserved word".into(), SyntaxError::AssignProperty => "assignment property is invalid syntax".into(), SyntaxError::Expected(token, ref got) => { - format!("Expected '{token:?}', got '{got}'").into() + format!("Expected '{token}', got '{got}'").into() } SyntaxError::ExpectedSemiForExprStmt { .. } => "Expected ';', '}' or ".into(), diff --git a/crates/swc_ecma_lexer/src/input.rs b/crates/swc_ecma_lexer/src/input.rs index ae9633803b6d..db69ba651adc 100644 --- a/crates/swc_ecma_lexer/src/input.rs +++ b/crates/swc_ecma_lexer/src/input.rs @@ -1,61 +1,17 @@ use std::{cell::RefCell, mem, mem::take, rc::Rc}; -use debug_unreachable::debug_unreachable; use lexer::TokenContexts; use swc_common::{BytePos, Span}; use swc_ecma_ast::EsVersion; use crate::{ + common::{input::Tokens, syntax::Syntax}, error::Error, lexer::{self}, - tok, token::*, - Context, Syntax, + Context, }; -/// Clone should be cheap if you are parsing typescript because typescript -/// syntax requires backtracking. -pub trait Tokens: Clone + Iterator { - fn set_ctx(&mut self, ctx: Context); - fn ctx(&self) -> Context; - fn syntax(&self) -> Syntax; - fn target(&self) -> EsVersion; - - fn start_pos(&self) -> BytePos { - BytePos(0) - } - - fn set_expr_allowed(&mut self, allow: bool); - fn set_next_regexp(&mut self, start: Option); - - fn token_context(&self) -> &lexer::TokenContexts; - fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; - fn set_token_context(&mut self, _c: lexer::TokenContexts); - - /// Implementors should use Rc>>. - /// - /// It is required because parser should backtrack while parsing typescript - /// code. - fn add_error(&self, error: Error); - - /// Add an error which is valid syntax in script mode. - /// - /// This errors should be dropped if it's not a module. - /// - /// Implementor should check for if [Context].module, and buffer errors if - /// module is false. Also, implementors should move errors to the error - /// buffer on set_ctx if the parser mode become module mode. - fn add_module_mode_error(&self, error: Error); - - fn end_pos(&self) -> BytePos; - - fn take_errors(&mut self) -> Vec; - - /// If the program was parsed as a script, this contains the module - /// errors should the program be identified as a module in the future. - fn take_script_module_errors(&mut self) -> Vec; -} - #[derive(Clone)] pub struct TokensInput { iter: as IntoIterator>::IntoIter, @@ -93,7 +49,7 @@ impl Iterator for TokensInput { } } -impl Tokens for TokensInput { +impl Tokens for TokensInput { fn set_ctx(&mut self, ctx: Context) { if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() { let mut module_errors = self.module_errors.borrow_mut(); @@ -102,42 +58,53 @@ impl Tokens for TokensInput { self.ctx = ctx; } + #[inline(always)] fn ctx(&self) -> Context { self.ctx } + #[inline(always)] fn syntax(&self) -> Syntax { self.syntax } + #[inline(always)] fn target(&self) -> EsVersion { self.target } + #[inline(always)] fn start_pos(&self) -> BytePos { self.start_pos } + #[inline(always)] fn set_expr_allowed(&mut self, _: bool) {} + #[inline(always)] fn set_next_regexp(&mut self, _: Option) {} + #[inline(always)] fn token_context(&self) -> &TokenContexts { &self.token_ctx } + #[inline(always)] fn token_context_mut(&mut self) -> &mut TokenContexts { &mut self.token_ctx } + #[inline(always)] fn set_token_context(&mut self, c: TokenContexts) { self.token_ctx = c; } + #[inline(always)] fn add_error(&self, error: Error) { self.errors.borrow_mut().push(error); } + #[inline(always)] fn add_module_mode_error(&self, error: Error) { if self.ctx.contains(Context::Module) { self.add_error(error); @@ -146,10 +113,12 @@ impl Tokens for TokensInput { self.module_errors.borrow_mut().push(error); } + #[inline(always)] fn take_errors(&mut self) -> Vec { take(&mut self.errors.borrow_mut()) } + #[inline(always)] fn take_script_module_errors(&mut self) -> Vec { take(&mut self.module_errors.borrow_mut()) } @@ -165,12 +134,12 @@ impl Tokens for TokensInput { /// Note: Lexer need access to parser's context to lex correctly. #[derive(Debug)] -pub struct Capturing { +pub struct Capturing> { inner: I, captured: Rc>>, } -impl Clone for Capturing { +impl> Clone for Capturing { fn clone(&self) -> Self { Capturing { inner: self.inner.clone(), @@ -179,7 +148,7 @@ impl Clone for Capturing { } } -impl Capturing { +impl> Capturing { pub fn new(input: I) -> Self { Capturing { inner: input, @@ -197,7 +166,7 @@ impl Capturing { } } -impl Iterator for Capturing { +impl> Iterator for Capturing { type Item = TokenAndSpan; fn next(&mut self) -> Option { @@ -225,55 +194,68 @@ impl Iterator for Capturing { } } -impl Tokens for Capturing { +impl> Tokens for Capturing { + #[inline(always)] fn set_ctx(&mut self, ctx: Context) { self.inner.set_ctx(ctx) } + #[inline(always)] fn ctx(&self) -> Context { self.inner.ctx() } + #[inline(always)] fn syntax(&self) -> Syntax { self.inner.syntax() } + #[inline(always)] fn target(&self) -> EsVersion { self.inner.target() } + #[inline(always)] fn start_pos(&self) -> BytePos { self.inner.start_pos() } + #[inline(always)] fn set_expr_allowed(&mut self, allow: bool) { self.inner.set_expr_allowed(allow) } + #[inline(always)] fn set_next_regexp(&mut self, start: Option) { self.inner.set_next_regexp(start); } + #[inline(always)] fn token_context(&self) -> &TokenContexts { self.inner.token_context() } + #[inline(always)] fn token_context_mut(&mut self) -> &mut TokenContexts { self.inner.token_context_mut() } + #[inline(always)] fn set_token_context(&mut self, c: TokenContexts) { self.inner.set_token_context(c) } + #[inline(always)] fn add_error(&self, error: Error) { self.inner.add_error(error); } + #[inline(always)] fn add_module_mode_error(&self, error: Error) { self.inner.add_module_mode_error(error) } + #[inline(always)] fn take_errors(&mut self) -> Vec { self.inner.take_errors() } @@ -289,7 +271,7 @@ impl Tokens for Capturing { /// This struct is responsible for managing current token and peeked token. #[derive(Clone)] -pub struct Buffer { +pub struct Buffer> { pub iter: I, /// Span of the previous token. pub prev_span: Span, @@ -298,8 +280,14 @@ pub struct Buffer { pub next: Option, } -impl Buffer { - pub fn new(lexer: I) -> Self { +impl<'a, I: Tokens> crate::common::parser::buffer::Buffer<'a> for Buffer { + type I = I; + type Lexer = super::lexer::Lexer<'a>; + type Next = TokenAndSpan; + type Token = Token; + type TokenAndSpan = TokenAndSpan; + + fn new(lexer: I) -> Self { let start_pos = lexer.start_pos(); Buffer { iter: lexer, @@ -309,90 +297,37 @@ impl Buffer { } } - pub fn store(&mut self, token: Token) { - debug_assert!(self.next.is_none()); - debug_assert!(self.cur.is_none()); - let span = self.prev_span; - - self.cur = Some(TokenAndSpan { - span, - token, - had_line_break: false, - }); + #[inline(always)] + fn set_cur(&mut self, token: TokenAndSpan) { + self.cur = Some(token); } - #[allow(dead_code)] - pub fn cur_debug(&self) -> Option<&Token> { - self.cur.as_ref().map(|it| &it.token) + #[inline(always)] + fn next(&self) -> Option<&TokenAndSpan> { + self.next.as_ref() } #[cold] #[inline(never)] - pub fn dump_cur(&mut self) -> String { + fn dump_cur(&mut self) -> String { match self.cur() { Some(v) => format!("{v:?}"), None => "".to_string(), } } - /// Returns current token. - pub fn bump(&mut self) -> Token { - let prev = match self.cur.take() { - Some(t) => t, - None => unsafe { - debug_unreachable!( - "Current token is `None`. Parser should not call bump() without knowing \ - current token" - ) - }, - }; - self.prev_span = prev.span; - - prev.token + #[inline(always)] + fn set_next(&mut self, token: Option) { + self.next = token; } - pub fn knows_cur(&self) -> bool { - self.cur.is_some() + #[inline(always)] + fn next_mut(&mut self) -> &mut Option { + &mut self.next } - pub fn peek(&mut self) -> Option<&Token> { - debug_assert!( - self.cur.is_some(), - "parser should not call peek() without knowing current token" - ); - - if self.next.is_none() { - self.next = self.iter.next(); - } - - self.next.as_ref().map(|ts| &ts.token) - } - - /// Returns true on eof. - pub fn had_line_break_before_cur(&mut self) -> bool { - self.cur(); - - self.cur - .as_ref() - .map(|it| it.had_line_break) - .unwrap_or_else(|| true) - } - - /// This returns true on eof. - pub fn has_linebreak_between_cur_and_peeked(&mut self) -> bool { - let _ = self.peek(); - self.next - .as_ref() - .map(|item| item.had_line_break) - .unwrap_or({ - // return true on eof. - true - }) - } - - /// Get current token. Returns `None` only on eof. #[inline] - pub fn cur(&mut self) -> Option<&Token> { + fn cur(&mut self) -> Option<&Token> { if self.cur.is_none() { // If we have peeked a token, take it instead of calling lexer.next() self.cur = self.next.take().or_else(|| self.iter.next()); @@ -404,165 +339,50 @@ impl Buffer { } } - #[inline] - pub fn cut_lshift(&mut self) { - debug_assert!( - self.is(&tok!("<<")), - "parser should only call cut_lshift when encountering LShift token" - ); - self.cur = Some(TokenAndSpan { - token: tok!('<'), - span: self.cur_span().with_lo(self.cur_span().lo + BytePos(1)), - had_line_break: false, - }); - } - - pub fn merge_lt_gt(&mut self) { + fn peek<'b>(&'b mut self) -> Option<&'b Token> + where + TokenAndSpan: 'b, + { debug_assert!( - self.is(&tok!('<')) || self.is(&tok!('>')), - "parser should only call merge_lt_gt when encountering '<' or '>' token" + self.cur().is_some(), + "parser should not call peek() without knowing current token" ); - let span = self.cur_span(); - - if self.peek().is_none() { - return; - } - - let next = self.next.as_ref().unwrap(); - - if span.hi != next.span.lo { - return; - } - - let cur = self.cur.take().unwrap(); - let next = self.next.take().unwrap(); - - let token = match (&cur.token, &next.token) { - (tok!('>'), tok!('>')) => tok!(">>"), - (tok!('>'), tok!('=')) => tok!(">="), - (tok!('>'), tok!(">>")) => tok!(">>>"), - (tok!('>'), tok!(">=")) => tok!(">>="), - (tok!('>'), tok!(">>=")) => tok!(">>>="), - (tok!('<'), tok!('<')) => tok!("<<"), - (tok!('<'), tok!('=')) => tok!("<="), - (tok!('<'), tok!("<=")) => tok!("<<="), - - _ => { - self.cur = Some(cur); - self.next = Some(next); - return; - } - }; - let span = span.with_hi(next.span.hi); - - self.cur = Some(TokenAndSpan { - token, - span, - had_line_break: cur.had_line_break, - }); - } - - #[inline] - pub fn is(&mut self, expected: &Token) -> bool { - match self.cur() { - Some(t) => *expected == *t, - _ => false, + if self.next().is_none() { + let next = self.iter_mut().next(); + self.set_next(next); } - } - #[inline] - pub fn eat(&mut self, expected: &Token) -> bool { - let v = self.is(expected); - if v { - self.bump(); - } - v + self.next().map(|ts| &ts.token) } - /// Returns start of current token. - #[inline] - pub fn cur_pos(&mut self) -> BytePos { - let _ = self.cur(); - self.cur - .as_ref() - .map(|item| item.span.lo) - .unwrap_or_else(|| { - // eof - self.last_pos() - }) + #[inline(always)] + fn get_cur(&self) -> Option<&TokenAndSpan> { + self.cur.as_ref() } - #[inline] - pub fn cur_span(&self) -> Span { - let data = self - .cur - .as_ref() - .map(|item| item.span) - .unwrap_or(self.prev_span); - - Span::new(data.lo, data.hi) - } - - /// Returns last byte position of previous token. - #[inline] - pub fn last_pos(&self) -> BytePos { - self.prev_span.hi + #[inline(always)] + fn get_cur_mut(&mut self) -> &mut Option { + &mut self.cur } - /// Returns span of the previous token. - #[inline] - pub fn prev_span(&self) -> Span { + #[inline(always)] + fn prev_span(&self) -> Span { self.prev_span } - #[inline] - pub fn get_ctx(&self) -> Context { - self.iter.ctx() + #[inline(always)] + fn set_prev_span(&mut self, span: Span) { + self.prev_span = span; } - #[inline] - pub fn set_ctx(&mut self, ctx: Context) { - self.iter.set_ctx(ctx); + #[inline(always)] + fn iter(&self) -> &I { + &self.iter } - #[inline] - pub fn syntax(&self) -> Syntax { - self.iter.syntax() - } - - #[inline] - pub fn target(&self) -> EsVersion { - self.iter.target() - } - - #[inline] - pub fn set_expr_allowed(&mut self, allow: bool) { - self.iter.set_expr_allowed(allow) - } - - #[inline] - pub fn set_next_regexp(&mut self, start: Option) { - self.iter.set_next_regexp(start); - } - - #[inline] - pub fn token_context(&self) -> &lexer::TokenContexts { - self.iter.token_context() - } - - #[inline] - pub fn token_context_mut(&mut self) -> &mut lexer::TokenContexts { - self.iter.token_context_mut() - } - - #[inline] - pub fn set_token_context(&mut self, c: lexer::TokenContexts) { - self.iter.set_token_context(c) - } - - #[inline] - pub fn end_pos(&self) -> BytePos { - self.iter.end_pos() + #[inline(always)] + fn iter_mut(&mut self) -> &mut I { + &mut self.iter } } diff --git a/crates/swc_ecma_lexer/src/lexer/input.rs b/crates/swc_ecma_lexer/src/lexer/input.rs deleted file mode 100644 index be5699338314..000000000000 --- a/crates/swc_ecma_lexer/src/lexer/input.rs +++ /dev/null @@ -1 +0,0 @@ -pub use swc_common::input::*; diff --git a/crates/swc_ecma_lexer/src/lexer/jsx.rs b/crates/swc_ecma_lexer/src/lexer/jsx.rs index 5bac72fa8f29..ec533ff5028e 100644 --- a/crates/swc_ecma_lexer/src/lexer/jsx.rs +++ b/crates/swc_ecma_lexer/src/lexer/jsx.rs @@ -1,5 +1,4 @@ use either::Either; -use smartstring::{LazyCompact, SmartString}; use super::*; @@ -125,529 +124,4 @@ impl Lexer<'_> { } } } - - pub(super) fn read_jsx_entity(&mut self) -> LexResult<(char, String)> { - debug_assert!(self.syntax.jsx()); - - fn from_code(s: &str, radix: u32) -> LexResult { - // TODO(kdy1): unwrap -> Err - let c = char::from_u32( - u32::from_str_radix(s, radix).expect("failed to parse string as number"), - ) - .expect("failed to parse number as char"); - - Ok(c) - } - - fn is_hex(s: &str) -> bool { - s.chars().all(|c| c.is_ascii_hexdigit()) - } - - fn is_dec(s: &str) -> bool { - s.chars().all(|c| c.is_ascii_digit()) - } - - let mut s = SmartString::::default(); - - let c = self.input.cur(); - debug_assert_eq!(c, Some('&')); - unsafe { - // Safety: cur() was Some('&') - self.input.bump(); - } - - let start_pos = self.input.cur_pos(); - - for _ in 0..10 { - let c = match self.input.cur() { - Some(c) => c, - None => break, - }; - unsafe { - // Safety: cur() was Some(c) - self.input.bump(); - } - - if c == ';' { - if let Some(stripped) = s.strip_prefix('#') { - if stripped.starts_with('x') { - if is_hex(&s[2..]) { - let value = from_code(&s[2..], 16)?; - - return Ok((value, format!("&{s};"))); - } - } else if is_dec(stripped) { - let value = from_code(stripped, 10)?; - - return Ok((value, format!("&{s};"))); - } - } else if let Some(entity) = xhtml(&s) { - return Ok((entity, format!("&{s};"))); - } - - break; - } - - s.push(c) - } - - unsafe { - // Safety: start_pos is a valid position because we got it from self.input - self.input.reset_to(start_pos); - } - - Ok(('&', "&".to_string())) - } - - pub(super) fn read_jsx_new_line( - &mut self, - normalize_crlf: bool, - ) -> LexResult> { - debug_assert!(self.syntax.jsx()); - - let ch = self.input.cur().unwrap(); - unsafe { - // Safety: cur() was Some(ch) - self.input.bump(); - } - - let out = if ch == '\r' && self.input.cur() == Some('\n') { - unsafe { - // Safety: cur() was Some('\n') - self.input.bump(); - } - Either::Left(if normalize_crlf { "\n" } else { "\r\n" }) - } else { - Either::Right(ch) - }; - let cur_pos = self.input.cur_pos(); - self.state.cur_line += 1; - self.state.line_start = cur_pos; - - Ok(out) - } - - pub(super) fn read_jsx_str(&mut self, quote: char) -> LexResult { - debug_assert!(self.syntax.jsx()); - - let start = self.input.cur_pos(); - - unsafe { - // Safety: cur() was Some(quote) - self.input.bump(); // `quote` - } - - let mut out = String::new(); - let mut chunk_start = self.input.cur_pos(); - - loop { - let ch = match self.input.cur() { - Some(c) => c, - None => { - let start = self.state.start; - self.emit_error(start, SyntaxError::UnterminatedStrLit); - break; - } - }; - - let cur_pos = self.input.cur_pos(); - - if ch == '\\' { - let value = unsafe { - // Safety: We already checked for the range - self.input.slice(chunk_start, cur_pos) - }; - - out.push_str(value); - out.push('\\'); - - self.bump(); - - chunk_start = self.input.cur_pos(); - - continue; - } - - if ch == quote { - break; - } - - if ch == '&' { - let value = unsafe { - // Safety: We already checked for the range - self.input.slice(chunk_start, cur_pos) - }; - - out.push_str(value); - - let jsx_entity = self.read_jsx_entity()?; - - out.push(jsx_entity.0); - - chunk_start = self.input.cur_pos(); - } else if ch.is_line_terminator() { - let value = unsafe { - // Safety: We already checked for the range - self.input.slice(chunk_start, cur_pos) - }; - - out.push_str(value); - - match self.read_jsx_new_line(false)? { - Either::Left(s) => { - out.push_str(s); - } - Either::Right(c) => { - out.push(c); - } - } - - chunk_start = cur_pos + BytePos(ch.len_utf8() as _); - } else { - unsafe { - // Safety: cur() was Some(ch) - self.input.bump(); - } - } - } - - let value = if out.is_empty() { - // Fast path: We don't need to allocate - - let cur_pos = self.input.cur_pos(); - let value = unsafe { - // Safety: We already checked for the range - self.input.slice(chunk_start, cur_pos) - }; - - self.atoms.atom(value) - } else { - let cur_pos = self.input.cur_pos(); - let value = unsafe { - // Safety: We already checked for the range - self.input.slice(chunk_start, cur_pos) - }; - - out.push_str(value); - - self.atoms.atom(out) - }; - - // it might be at the end of the file when - // the string literal is unterminated - if self.input.peek_ahead().is_some() { - unsafe { - // Safety: We called peek_ahead() which means cur() was Some - self.input.bump(); - } - } - - let end = self.input.cur_pos(); - let raw = unsafe { - // Safety: Both of `start` and `end` are generated from `cur_pos()` - self.input.slice(start, end) - }; - - Ok(Token::Str { - value, - raw: self.atoms.atom(raw), - }) - } - - /// Read a JSX identifier (valid tag or attribute name). - /// - /// Optimized version since JSX identifiers can"t contain - /// escape characters and so can be read as single slice. - /// Also assumes that first character was already checked - /// by isIdentifierStart in readToken. - pub(super) fn read_jsx_word(&mut self) -> LexResult { - debug_assert!(self.syntax.jsx()); - debug_assert!(self.input.cur().is_some()); - debug_assert!(self.input.cur().unwrap().is_ident_start()); - - let mut first = true; - let slice = self.input.uncons_while(|c| { - if first { - first = false; - c.is_ident_start() - } else { - c.is_ident_part() || c == '-' - } - }); - - Ok(Token::JSXName { - name: self.atoms.atom(slice), - }) - } -} - -macro_rules! xhtml { - ( - $( - $i:ident : $s:expr, - )* - ) => { - fn xhtml(s: &str) -> Option { - match s{ - $(stringify!($i) => Some($s),)* - _ => None, - } - } - }; } - -xhtml!( - quot: '\u{0022}', - amp: '&', - apos: '\u{0027}', - lt: '<', - gt: '>', - nbsp: '\u{00A0}', - iexcl: '\u{00A1}', - cent: '\u{00A2}', - pound: '\u{00A3}', - curren: '\u{00A4}', - yen: '\u{00A5}', - brvbar: '\u{00A6}', - sect: '\u{00A7}', - uml: '\u{00A8}', - copy: '\u{00A9}', - ordf: '\u{00AA}', - laquo: '\u{00AB}', - not: '\u{00AC}', - shy: '\u{00AD}', - reg: '\u{00AE}', - macr: '\u{00AF}', - deg: '\u{00B0}', - plusmn: '\u{00B1}', - sup2: '\u{00B2}', - sup3: '\u{00B3}', - acute: '\u{00B4}', - micro: '\u{00B5}', - para: '\u{00B6}', - middot: '\u{00B7}', - cedil: '\u{00B8}', - sup1: '\u{00B9}', - ordm: '\u{00BA}', - raquo: '\u{00BB}', - frac14: '\u{00BC}', - frac12: '\u{00BD}', - frac34: '\u{00BE}', - iquest: '\u{00BF}', - Agrave: '\u{00C0}', - Aacute: '\u{00C1}', - Acirc: '\u{00C2}', - Atilde: '\u{00C3}', - Auml: '\u{00C4}', - Aring: '\u{00C5}', - AElig: '\u{00C6}', - Ccedil: '\u{00C7}', - Egrave: '\u{00C8}', - Eacute: '\u{00C9}', - Ecirc: '\u{00CA}', - Euml: '\u{00CB}', - Igrave: '\u{00CC}', - Iacute: '\u{00CD}', - Icirc: '\u{00CE}', - Iuml: '\u{00CF}', - ETH: '\u{00D0}', - Ntilde: '\u{00D1}', - Ograve: '\u{00D2}', - Oacute: '\u{00D3}', - Ocirc: '\u{00D4}', - Otilde: '\u{00D5}', - Ouml: '\u{00D6}', - times: '\u{00D7}', - Oslash: '\u{00D8}', - Ugrave: '\u{00D9}', - Uacute: '\u{00DA}', - Ucirc: '\u{00DB}', - Uuml: '\u{00DC}', - Yacute: '\u{00DD}', - THORN: '\u{00DE}', - szlig: '\u{00DF}', - agrave: '\u{00E0}', - aacute: '\u{00E1}', - acirc: '\u{00E2}', - atilde: '\u{00E3}', - auml: '\u{00E4}', - aring: '\u{00E5}', - aelig: '\u{00E6}', - ccedil: '\u{00E7}', - egrave: '\u{00E8}', - eacute: '\u{00E9}', - ecirc: '\u{00EA}', - euml: '\u{00EB}', - igrave: '\u{00EC}', - iacute: '\u{00ED}', - icirc: '\u{00EE}', - iuml: '\u{00EF}', - eth: '\u{00F0}', - ntilde: '\u{00F1}', - ograve: '\u{00F2}', - oacute: '\u{00F3}', - ocirc: '\u{00F4}', - otilde: '\u{00F5}', - ouml: '\u{00F6}', - divide: '\u{00F7}', - oslash: '\u{00F8}', - ugrave: '\u{00F9}', - uacute: '\u{00FA}', - ucirc: '\u{00FB}', - uuml: '\u{00FC}', - yacute: '\u{00FD}', - thorn: '\u{00FE}', - yuml: '\u{00FF}', - OElig: '\u{0152}', - oelig: '\u{0153}', - Scaron: '\u{0160}', - scaron: '\u{0161}', - Yuml: '\u{0178}', - fnof: '\u{0192}', - circ: '\u{02C6}', - tilde: '\u{02DC}', - Alpha: '\u{0391}', - Beta: '\u{0392}', - Gamma: '\u{0393}', - Delta: '\u{0394}', - Epsilon: '\u{0395}', - Zeta: '\u{0396}', - Eta: '\u{0397}', - Theta: '\u{0398}', - Iota: '\u{0399}', - Kappa: '\u{039A}', - Lambda: '\u{039B}', - Mu: '\u{039C}', - Nu: '\u{039D}', - Xi: '\u{039E}', - Omicron: '\u{039F}', - Pi: '\u{03A0}', - Rho: '\u{03A1}', - Sigma: '\u{03A3}', - Tau: '\u{03A4}', - Upsilon: '\u{03A5}', - Phi: '\u{03A6}', - Chi: '\u{03A7}', - Psi: '\u{03A8}', - Omega: '\u{03A9}', - alpha: '\u{03B1}', - beta: '\u{03B2}', - gamma: '\u{03B3}', - delta: '\u{03B4}', - epsilon: '\u{03B5}', - zeta: '\u{03B6}', - eta: '\u{03B7}', - theta: '\u{03B8}', - iota: '\u{03B9}', - kappa: '\u{03BA}', - lambda: '\u{03BB}', - mu: '\u{03BC}', - nu: '\u{03BD}', - xi: '\u{03BE}', - omicron: '\u{03BF}', - pi: '\u{03C0}', - rho: '\u{03C1}', - sigmaf: '\u{03C2}', - sigma: '\u{03C3}', - tau: '\u{03C4}', - upsilon: '\u{03C5}', - phi: '\u{03C6}', - chi: '\u{03C7}', - psi: '\u{03C8}', - omega: '\u{03C9}', - thetasym: '\u{03D1}', - upsih: '\u{03D2}', - piv: '\u{03D6}', - ensp: '\u{2002}', - emsp: '\u{2003}', - thinsp: '\u{2009}', - zwnj: '\u{200C}', - zwj: '\u{200D}', - lrm: '\u{200E}', - rlm: '\u{200F}', - ndash: '\u{2013}', - mdash: '\u{2014}', - lsquo: '\u{2018}', - rsquo: '\u{2019}', - sbquo: '\u{201A}', - ldquo: '\u{201C}', - rdquo: '\u{201D}', - bdquo: '\u{201E}', - dagger: '\u{2020}', - Dagger: '\u{2021}', - bull: '\u{2022}', - hellip: '\u{2026}', - permil: '\u{2030}', - prime: '\u{2032}', - Prime: '\u{2033}', - lsaquo: '\u{2039}', - rsaquo: '\u{203A}', - oline: '\u{203E}', - frasl: '\u{2044}', - euro: '\u{20AC}', - image: '\u{2111}', - weierp: '\u{2118}', - real: '\u{211C}', - trade: '\u{2122}', - alefsym: '\u{2135}', - larr: '\u{2190}', - uarr: '\u{2191}', - rarr: '\u{2192}', - darr: '\u{2193}', - harr: '\u{2194}', - crarr: '\u{21B5}', - lArr: '\u{21D0}', - uArr: '\u{21D1}', - rArr: '\u{21D2}', - dArr: '\u{21D3}', - hArr: '\u{21D4}', - forall: '\u{2200}', - part: '\u{2202}', - exist: '\u{2203}', - empty: '\u{2205}', - nabla: '\u{2207}', - isin: '\u{2208}', - notin: '\u{2209}', - ni: '\u{220B}', - prod: '\u{220F}', - sum: '\u{2211}', - minus: '\u{2212}', - lowast: '\u{2217}', - radic: '\u{221A}', - prop: '\u{221D}', - infin: '\u{221E}', - ang: '\u{2220}', - and: '\u{2227}', - or: '\u{2228}', - cap: '\u{2229}', - cup: '\u{222A}', - int: '\u{222B}', - there4: '\u{2234}', - sim: '\u{223C}', - cong: '\u{2245}', - asymp: '\u{2248}', - ne: '\u{2260}', - equiv: '\u{2261}', - le: '\u{2264}', - ge: '\u{2265}', - sub: '\u{2282}', - sup: '\u{2283}', - nsub: '\u{2284}', - sube: '\u{2286}', - supe: '\u{2287}', - oplus: '\u{2295}', - otimes: '\u{2297}', - perp: '\u{22A5}', - sdot: '\u{22C5}', - lceil: '\u{2308}', - rceil: '\u{2309}', - lfloor: '\u{230A}', - rfloor: '\u{230B}', - lang: '\u{2329}', - rang: '\u{232A}', - loz: '\u{25CA}', - spades: '\u{2660}', - clubs: '\u{2663}', - hearts: '\u{2665}', - diams: '\u{2666}', -); diff --git a/crates/swc_ecma_lexer/src/lexer/mod.rs b/crates/swc_ecma_lexer/src/lexer/mod.rs index ee638ab9fd27..e25abb54b2e0 100644 --- a/crates/swc_ecma_lexer/src/lexer/mod.rs +++ b/crates/swc_ecma_lexer/src/lexer/mod.rs @@ -1,32 +1,31 @@ //! ECMAScript lexer. -use std::{cell::RefCell, char, iter::FusedIterator, mem::transmute, rc::Rc}; +use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc}; -use arrayvec::ArrayVec; -use either::Either::{Left, Right}; -use swc_atoms::{Atom, AtomStoreCell}; +use swc_atoms::AtomStoreCell; use swc_common::{ comments::Comments, input::{Input, StringInput}, BytePos, Span, }; -use swc_ecma_ast::{op, AssignOp, EsVersion, Ident}; +use swc_ecma_ast::{AssignOp, EsVersion}; -pub use self::state::{TokenContext, TokenContexts}; -use self::{ - comments_buffer::CommentsBuffer, - state::State, - table::{ByteHandler, BYTE_HANDLERS}, - util::*, -}; +pub use self::state::{TokenContext, TokenContexts, TokenType}; +use self::table::{ByteHandler, BYTE_HANDLERS}; use crate::{ + common::{ + lexer::{ + char::CharExt, comments_buffer::CommentsBuffer, fixed_len_span, pos_span, LexResult, + Lexer as LexerTrait, + }, + syntax::Syntax, + }, error::{Error, SyntaxError}, tok, - token::{BinOpToken, IdentLike, Token, Word}, - Context, Syntax, + token::{BinOpToken, Token, TokenAndSpan}, + Context, }; -mod comments_buffer; mod jsx; mod number; mod state; @@ -34,129 +33,103 @@ mod table; #[cfg(test)] mod tests; pub mod util; -mod whitespace; - -pub(crate) type LexResult = Result; -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct Char(u32); +#[derive(Clone)] +pub struct Lexer<'a> { + comments: Option<&'a dyn Comments>, + /// [Some] if comment comment parsing is enabled. Otherwise [None] + comments_buffer: Option, -impl From for Char { - fn from(c: char) -> Self { - Char(c as u32) - } -} + pub ctx: Context, + input: StringInput<'a>, + start_pos: BytePos, -impl From for Char { - fn from(c: u32) -> Self { - Char(c) - } -} + state: self::state::State, + pub(crate) syntax: Syntax, + pub(crate) target: EsVersion, -pub(crate) struct CharIter(ArrayVec); + errors: Rc>>, + module_errors: Rc>>, -/// Ported from https://github.com/web-infra-dev/oxc/blob/99a4816ce7b6132b2667257984f9d92ae3768f03/crates/oxc_parser/src/lexer/mod.rs#L1349-L1374 -impl IntoIterator for Char { - type IntoIter = CharIter; - type Item = char; + buf: Rc>, - #[allow(unsafe_code)] - fn into_iter(self) -> Self::IntoIter { - // // TODO: Check if this is correct - // fn to_char(v: u8) -> char { - // char::from_digit(v as _, 16).unwrap_or('0') - // } + atoms: Rc, +} - CharIter(match char::from_u32(self.0) { - Some(c) => { - let mut buf = ArrayVec::new(); - // Safety: we can make sure that `buf` has enough capacity - unsafe { - buf.push_unchecked(c); - } - buf - } - None => { - let mut buf = ArrayVec::new(); +impl FusedIterator for Lexer<'_> {} - let high = self.0 & 0xffff0000 >> 16; +impl<'a> crate::common::lexer::Lexer<'a, TokenAndSpan> for Lexer<'a> { + type State = self::state::State; + type Token = self::Token; - let low = self.0 & 0x0000ffff; + #[inline(always)] + fn input(&self) -> &StringInput<'a> { + &self.input + } - // The second code unit of a surrogate pair is always in the range from 0xDC00 - // to 0xDFFF, and is called a low surrogate or a trail surrogate. - if !(0xdc00..=0xdfff).contains(&low) { - // Safety: we can make sure that `buf` has enough capacity - unsafe { - buf.push_unchecked('\\'); - buf.push_unchecked('u'); - for c in format!("{high:x}").chars() { - buf.push_unchecked(c); - } - buf.push_unchecked('\\'); - buf.push_unchecked('u'); - for c in format!("{low:x}").chars() { - buf.push_unchecked(c); - } - } - } else { - // `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair` - let astral_code_point = (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000; + #[inline(always)] + fn input_mut(&mut self) -> &mut StringInput<'a> { + &mut self.input + } - // Safety: we can make sure that `buf` has enough capacity - unsafe { - buf.push_unchecked('\\'); - buf.push_unchecked('u'); - for c in format!("{astral_code_point:x}").chars() { - buf.push_unchecked(c); - } - } - } + #[inline(always)] + fn push_error(&self, error: crate::error::Error) { + self.errors.borrow_mut().push(error); + } - buf - } - }) + #[inline(always)] + fn state(&self) -> &Self::State { + &self.state } -} -impl Iterator for CharIter { - type Item = char; + #[inline(always)] + fn state_mut(&mut self) -> &mut Self::State { + &mut self.state + } - fn next(&mut self) -> Option { - if self.0.is_empty() { - None - } else { - Some(self.0.remove(0)) - } + #[inline(always)] + fn comments(&self) -> Option<&'a dyn swc_common::comments::Comments> { + self.comments } -} -impl FusedIterator for CharIter {} + #[inline(always)] + fn comments_buffer(&self) -> Option<&crate::common::lexer::comments_buffer::CommentsBuffer> { + self.comments_buffer.as_ref() + } -#[derive(Clone)] -pub struct Lexer<'a> { - comments: Option<&'a dyn Comments>, - /// [Some] if comment comment parsing is enabled. Otherwise [None] - comments_buffer: Option, + #[inline(always)] + fn comments_buffer_mut( + &mut self, + ) -> Option<&mut crate::common::lexer::comments_buffer::CommentsBuffer> { + self.comments_buffer.as_mut() + } - pub ctx: Context, - input: StringInput<'a>, - start_pos: BytePos, + #[inline(always)] + unsafe fn input_slice(&mut self, start: BytePos, end: BytePos) -> &'a str { + self.input.slice(start, end) + } - state: State, - pub(crate) syntax: Syntax, - pub(crate) target: EsVersion, + #[inline(always)] + fn input_uncons_while(&mut self, f: impl FnMut(char) -> bool) -> &'a str { + self.input_mut().uncons_while(f) + } - errors: Rc>>, - module_errors: Rc>>, + #[inline(always)] + fn atom<'b>(&self, s: impl Into>) -> swc_atoms::Atom { + self.atoms.atom(s) + } - buf: Rc>, + #[inline(always)] + fn skip_block_comment(&mut self) { + self.skip_block_comment(); + } - atoms: Rc, + #[inline(always)] + fn buf(&self) -> std::rc::Rc> { + self.buf.clone() + } } -impl FusedIterator for Lexer<'_> {} - impl<'a> Lexer<'a> { pub fn new( syntax: Syntax, @@ -172,7 +145,7 @@ impl<'a> Lexer<'a> { ctx: Default::default(), input, start_pos, - state: State::new(syntax, start_pos), + state: self::state::State::new(syntax, start_pos), syntax, target, errors: Default::default(), @@ -182,18 +155,6 @@ impl<'a> Lexer<'a> { } } - /// Utility method to reuse buffer. - fn with_buf(&mut self, op: F) -> LexResult - where - F: for<'any> FnOnce(&mut Lexer<'any>, &mut String) -> LexResult, - { - let b = self.buf.clone(); - let mut buf = b.borrow_mut(); - buf.clear(); - - op(self, &mut buf) - } - /// babel: `getTokenFromCode` fn read_token(&mut self) -> LexResult> { let byte = match self.input.as_str().as_bytes().first() { @@ -216,362 +177,6 @@ impl<'a> Lexer<'a> { } } - /// `#` - fn read_token_number_sign(&mut self) -> LexResult> { - debug_assert!(self.cur().is_some()); - - unsafe { - // Safety: cur() is Some('#') - self.input.bump(); // '#' - } - - // `#` can also be a part of shebangs, however they should have been - // handled by `read_shebang()` - debug_assert!( - !self.input.is_at_start() || self.cur() != Some('!'), - "#! should have already been handled by read_shebang()" - ); - Ok(Some(Token::Hash)) - } - - /// Read a token given `.`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_dot(&mut self) -> LexResult { - // Check for eof - let next = match self.input.peek() { - Some(next) => next, - None => { - unsafe { - // Safety: cur() is Some(',') - self.input.bump(); - } - return Ok(tok!('.')); - } - }; - if next.is_ascii_digit() { - return self.read_number(true).map(|v| match v { - Left((value, raw)) => Token::Num { value, raw }, - Right((value, raw)) => Token::BigInt { value, raw }, - }); - } - - unsafe { - // Safety: cur() is Some - // 1st `.` - self.input.bump(); - } - - if next == '.' && self.input.peek() == Some('.') { - unsafe { - // Safety: peek() was Some - - self.input.bump(); // 2nd `.` - self.input.bump(); // 3rd `.` - } - - return Ok(tok!("...")); - } - - Ok(tok!('.')) - } - - /// Read a token given `?`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_question_mark(&mut self) -> LexResult { - match self.input.peek() { - Some('?') => { - unsafe { - // Safety: peek() was some - self.input.bump(); - self.input.bump(); - } - if self.input.cur() == Some('=') { - unsafe { - // Safety: cur() was some - self.input.bump(); - } - - return Ok(tok!("??=")); - } - Ok(tok!("??")) - } - _ => { - unsafe { - // Safety: peek() is callable only if cur() is Some - self.input.bump(); - } - Ok(tok!('?')) - } - } - } - - /// Read a token given `:`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_colon(&mut self) -> LexResult { - unsafe { - // Safety: cur() is Some(':') - self.input.bump(); - } - Ok(tok!(':')) - } - - /// Read a token given `0`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_zero(&mut self) -> LexResult { - let next = self.input.peek(); - - let bigint = match next { - Some('x') | Some('X') => self.read_radix_number::<16>(), - Some('o') | Some('O') => self.read_radix_number::<8>(), - Some('b') | Some('B') => self.read_radix_number::<2>(), - _ => { - return self.read_number(false).map(|v| match v { - Left((value, raw)) => Token::Num { value, raw }, - Right((value, raw)) => Token::BigInt { value, raw }, - }); - } - }; - - bigint.map(|v| match v { - Left((value, raw)) => Token::Num { value, raw }, - Right((value, raw)) => Token::BigInt { value, raw }, - }) - } - - /// Read a token given `|` or `&`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_logical(&mut self) -> LexResult { - let had_line_break_before_last = self.had_line_break_before_last(); - let start = self.cur_pos(); - - unsafe { - // Safety: cur() is Some(c as char) - self.input.bump(); - } - let token = if C == b'&' { - BinOpToken::BitAnd - } else { - BinOpToken::BitOr - }; - - // '|=', '&=' - if self.input.eat_byte(b'=') { - return Ok(Token::AssignOp(match token { - BinOpToken::BitAnd => AssignOp::BitAndAssign, - BinOpToken::BitOr => AssignOp::BitOrAssign, - _ => unreachable!(), - })); - } - - // '||', '&&' - if self.input.cur() == Some(C as char) { - unsafe { - // Safety: cur() is Some(c) - self.input.bump(); - } - - if self.input.cur() == Some('=') { - unsafe { - // Safety: cur() is Some('=') - self.input.bump(); - } - return Ok(Token::AssignOp(match token { - BinOpToken::BitAnd => op!("&&="), - BinOpToken::BitOr => op!("||="), - _ => unreachable!(), - })); - } - - // ||||||| - // ^ - if had_line_break_before_last && token == BinOpToken::BitOr && self.is_str("||||| ") { - let span = fixed_len_span(start, 7); - self.emit_error_span(span, SyntaxError::TS1185); - self.skip_line_comment(5); - self.skip_space::(); - return self.error_span(span, SyntaxError::TS1185); - } - - return Ok(Token::BinOp(match token { - BinOpToken::BitAnd => BinOpToken::LogicalAnd, - BinOpToken::BitOr => BinOpToken::LogicalOr, - _ => unreachable!(), - })); - } - - Ok(Token::BinOp(token)) - } - - /// Read a token given `*` or `%`. - /// - /// This is extracted as a method to reduce size of `read_token`. - #[inline(never)] - fn read_token_mul_mod(&mut self) -> LexResult { - let is_mul = C == b'*'; - unsafe { - // Safety: cur() is Some(c) - self.input.bump(); - } - let mut token = if is_mul { - Token::BinOp(BinOpToken::Mul) - } else { - Token::BinOp(BinOpToken::Mod) - }; - - // check for ** - if is_mul && self.input.eat_byte(b'*') { - token = Token::BinOp(BinOpToken::Exp) - } - - if self.input.eat_byte(b'=') { - token = match token { - Token::BinOp(BinOpToken::Mul) => Token::AssignOp(AssignOp::MulAssign), - Token::BinOp(BinOpToken::Mod) => Token::AssignOp(AssignOp::ModAssign), - Token::BinOp(BinOpToken::Exp) => Token::AssignOp(AssignOp::ExpAssign), - _ => unreachable!(), - } - } - - Ok(token) - } - - /// Read an escaped character for string literal. - /// - /// In template literal, we should preserve raw string. - fn read_escaped_char(&mut self, in_template: bool) -> LexResult>> { - debug_assert_eq!(self.cur(), Some('\\')); - - let start = self.cur_pos(); - - self.bump(); // '\' - - let c = match self.cur() { - Some(c) => c, - None => self.error_span(pos_span(start), SyntaxError::InvalidStrEscape)?, - }; - - macro_rules! push_c_and_ret { - ($c:expr) => {{ - $c - }}; - } - - let c = match c { - '\\' => push_c_and_ret!('\\'), - 'n' => push_c_and_ret!('\n'), - 'r' => push_c_and_ret!('\r'), - 't' => push_c_and_ret!('\t'), - 'b' => push_c_and_ret!('\u{0008}'), - 'v' => push_c_and_ret!('\u{000b}'), - 'f' => push_c_and_ret!('\u{000c}'), - '\r' => { - self.bump(); // remove '\r' - - self.eat(b'\n'); - - return Ok(None); - } - '\n' | '\u{2028}' | '\u{2029}' => { - self.bump(); - - return Ok(None); - } - - // read hexadecimal escape sequences - 'x' => { - self.bump(); // 'x' - - match self.read_int_u32::<16>(2)? { - Some(val) => return Ok(Some(vec![Char::from(val)])), - None => self.error( - start, - SyntaxError::BadCharacterEscapeSequence { - expected: "2 hex characters", - }, - )?, - } - } - - // read unicode escape sequences - 'u' => match self.read_unicode_escape() { - Ok(chars) => return Ok(Some(chars)), - Err(err) => self.error(start, err.into_kind())?, - }, - - // octal escape sequences - '0'..='7' => { - self.bump(); - - let first_c = if c == '0' { - match self.cur() { - Some(next) if next.is_digit(8) => c, - // \0 is not an octal literal nor decimal literal. - _ => return Ok(Some(vec!['\u{0000}'.into()])), - } - } else { - c - }; - - // TODO: Show template instead of strict mode - if in_template { - self.error(start, SyntaxError::LegacyOctal)? - } - - self.emit_strict_mode_error(start, SyntaxError::LegacyOctal); - - let mut value: u8 = first_c.to_digit(8).unwrap() as u8; - - macro_rules! one { - ($check:expr) => {{ - let cur = self.cur(); - - match cur.and_then(|c| c.to_digit(8)) { - Some(v) => { - value = if $check { - let new_val = value - .checked_mul(8) - .and_then(|value| value.checked_add(v as u8)); - match new_val { - Some(val) => val, - None => return Ok(Some(vec![Char::from(value as char)])), - } - } else { - value * 8 + v as u8 - }; - - self.bump(); - } - _ => return Ok(Some(vec![Char::from(value as u32)])), - } - }}; - } - - one!(false); - one!(true); - - return Ok(Some(vec![Char::from(value as char)])); - } - _ => c, - }; - - unsafe { - // Safety: cur() is Some(c) if this method is called. - self.input.bump(); - } - - Ok(Some(vec![c.into()])) - } - fn read_token_plus_minus(&mut self) -> LexResult> { let start = self.cur_pos(); @@ -660,20 +265,6 @@ impl<'a> Lexer<'a> { } impl Lexer<'_> { - #[inline(never)] - fn read_slash(&mut self) -> LexResult> { - debug_assert_eq!(self.cur(), Some('/')); - - // Divide operator - self.bump(); - - Ok(Some(if self.eat(b'=') { - tok!("/=") - } else { - tok!('/') - })) - } - #[inline(never)] fn read_token_lt_gt(&mut self) -> LexResult> { let had_line_break_before_last = self.had_line_break_before_last(); @@ -757,625 +348,4 @@ impl Lexer<'_> { Ok(Some(token)) } - - /// This can be used if there's no keyword starting with the first - /// character. - fn read_ident_unknown(&mut self) -> LexResult { - debug_assert!(self.cur().is_some()); - - let (word, _) = self - .read_word_as_str_with(|l, s, _, _| Word::Ident(IdentLike::Other(l.atoms.atom(s))))?; - - Ok(Word(word)) - } - - /// This can be used if there's no keyword starting with the first - /// character. - fn read_word_with( - &mut self, - convert: &dyn Fn(&str) -> Option, - ) -> LexResult> { - debug_assert!(self.cur().is_some()); - - let start = self.cur_pos(); - let (word, has_escape) = self.read_word_as_str_with(|l, s, _, can_be_known| { - if can_be_known { - if let Some(word) = convert(s) { - return word; - } - } - - Word::Ident(IdentLike::Other(l.atoms.atom(s))) - })?; - - // Note: ctx is store in lexer because of this error. - // 'await' and 'yield' may have semantic of reserved word, which means lexer - // should know context or parser should handle this error. Our approach to this - // problem is former one. - - if has_escape && self.ctx.is_reserved(&word) { - self.error( - start, - SyntaxError::EscapeInReservedWord { word: word.into() }, - )? - } else { - Ok(Some(Token::Word(word))) - } - } - - /// This method is optimized for texts without escape sequences. - /// - /// `convert(text, has_escape, can_be_keyword)` - fn read_word_as_str_with(&mut self, convert: F) -> LexResult<(Ret, bool)> - where - F: for<'any> FnOnce(&'any mut Lexer<'_>, &str, bool, bool) -> Ret, - { - debug_assert!(self.cur().is_some()); - let mut first = true; - let mut can_be_keyword = true; - let mut slice_start = self.cur_pos(); - let mut has_escape = false; - - self.with_buf(|l, buf| { - loop { - if let Some(c) = l.input.cur_as_ascii() { - // Performance optimization - if can_be_keyword && (c.is_ascii_uppercase() || c.is_ascii_digit()) { - can_be_keyword = false; - } - - if Ident::is_valid_ascii_continue(c) { - l.bump(); - continue; - } else if first && Ident::is_valid_ascii_start(c) { - l.bump(); - first = false; - continue; - } - - // unicode escape - if c == b'\\' { - first = false; - has_escape = true; - let start = l.cur_pos(); - l.bump(); - - if !l.is(b'u') { - l.error_span(pos_span(start), SyntaxError::ExpectedUnicodeEscape)? - } - - { - let end = l.input.cur_pos(); - let s = unsafe { - // Safety: start and end are valid position because we got them from - // `self.input` - l.input.slice(slice_start, start) - }; - buf.push_str(s); - unsafe { - // Safety: We got end from `self.input` - l.input.reset_to(end); - } - } - - let chars = l.read_unicode_escape()?; - - if let Some(c) = chars.first() { - let valid = if first { - c.is_ident_start() - } else { - c.is_ident_part() - }; - - if !valid { - l.emit_error(start, SyntaxError::InvalidIdentChar); - } - } - - for c in chars { - buf.extend(c); - } - - slice_start = l.cur_pos(); - continue; - } - - // ASCII but not a valid identifier - - break; - } else if let Some(c) = l.input.cur() { - if Ident::is_valid_non_ascii_continue(c) { - l.bump(); - continue; - } else if first && Ident::is_valid_non_ascii_start(c) { - l.bump(); - first = false; - continue; - } - } - - break; - } - - let end = l.cur_pos(); - - let value = if !has_escape { - // Fast path: raw slice is enough if there's no escape. - - let s = unsafe { - // Safety: slice_start and end are valid position because we got them from - // `self.input` - l.input.slice(slice_start, end) - }; - let s = unsafe { - // Safety: We don't use 'static. We just bypass the lifetime check. - transmute::<&str, &'static str>(s) - }; - - convert(l, s, has_escape, can_be_keyword) - } else { - let s = unsafe { - // Safety: slice_start and end are valid position because we got them from - // `self.input` - l.input.slice(slice_start, end) - }; - buf.push_str(s); - - convert(l, buf, has_escape, can_be_keyword) - }; - - Ok((value, has_escape)) - }) - } - - fn read_unicode_escape(&mut self) -> LexResult> { - debug_assert_eq!(self.cur(), Some('u')); - - let mut chars = Vec::with_capacity(4); - let mut is_curly = false; - - self.bump(); // 'u' - - if self.eat(b'{') { - is_curly = true; - } - - let state = self.input.cur_pos(); - let c = match self.read_int_u32::<16>(if is_curly { 0 } else { 4 }) { - Ok(Some(val)) => { - if 0x0010_ffff >= val { - char::from_u32(val) - } else { - let start = self.cur_pos(); - - self.error( - start, - SyntaxError::BadCharacterEscapeSequence { - expected: if is_curly { - "1-6 hex characters in the range 0 to 10FFFF." - } else { - "4 hex characters" - }, - }, - )? - } - } - _ => { - let start = self.cur_pos(); - - self.error( - start, - SyntaxError::BadCharacterEscapeSequence { - expected: if is_curly { - "1-6 hex characters" - } else { - "4 hex characters" - }, - }, - )? - } - }; - - match c { - Some(c) => { - chars.push(c.into()); - } - _ => { - unsafe { - // Safety: state is valid position because we got it from cur_pos() - self.input.reset_to(state); - } - - chars.push(Char::from('\\')); - chars.push(Char::from('u')); - - if is_curly { - chars.push(Char::from('{')); - - for _ in 0..6 { - if let Some(c) = self.input.cur() { - if c == '}' { - break; - } - - self.bump(); - - chars.push(Char::from(c)); - } else { - break; - } - } - - chars.push(Char::from('}')); - } else { - for _ in 0..4 { - if let Some(c) = self.input.cur() { - self.bump(); - - chars.push(Char::from(c)); - } - } - } - } - } - - if is_curly && !self.eat(b'}') { - self.error(state, SyntaxError::InvalidUnicodeEscape)? - } - - Ok(chars) - } - - /// See https://tc39.github.io/ecma262/#sec-literals-string-literals - fn read_str_lit(&mut self) -> LexResult { - debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"')); - let start = self.cur_pos(); - let quote = self.cur().unwrap() as u8; - - self.bump(); // '"' - - let mut has_escape = false; - let mut slice_start = self.input.cur_pos(); - - self.with_buf(|l, buf| { - loop { - if let Some(c) = l.input.cur_as_ascii() { - if c == quote { - let value_end = l.cur_pos(); - - let value = if !has_escape { - let s = unsafe { - // Safety: slice_start and value_end are valid position because we - // got them from `self.input` - l.input.slice(slice_start, value_end) - }; - - l.atoms.atom(s) - } else { - let s = unsafe { - // Safety: slice_start and value_end are valid position because we - // got them from `self.input` - l.input.slice(slice_start, value_end) - }; - buf.push_str(s); - - l.atoms.atom(&**buf) - }; - - unsafe { - // Safety: cur is quote - l.input.bump(); - } - - let end = l.cur_pos(); - - let raw = unsafe { - // Safety: start and end are valid position because we got them from - // `self.input` - l.input.slice(start, end) - }; - let raw = l.atoms.atom(raw); - - return Ok(Token::Str { value, raw }); - } - - if c == b'\\' { - has_escape = true; - - { - let end = l.cur_pos(); - let s = unsafe { - // Safety: start and end are valid position because we got them from - // `self.input` - l.input.slice(slice_start, end) - }; - buf.push_str(s); - } - - if let Some(chars) = l.read_escaped_char(false)? { - for c in chars { - buf.extend(c); - } - } - - slice_start = l.cur_pos(); - continue; - } - - if (c as char).is_line_break() { - break; - } - - unsafe { - // Safety: cur is a ascii character - l.input.bump(); - } - continue; - } - - match l.input.cur() { - Some(c) => { - if c.is_line_break() { - break; - } - unsafe { - // Safety: cur is Some(c) - l.input.bump(); - } - } - None => break, - } - } - - { - let end = l.cur_pos(); - let s = unsafe { - // Safety: start and end are valid position because we got them from - // `self.input` - l.input.slice(slice_start, end) - }; - buf.push_str(s); - } - - l.emit_error(start, SyntaxError::UnterminatedStrLit); - - let end = l.cur_pos(); - - let raw = unsafe { - // Safety: start and end are valid position because we got them from - // `self.input` - l.input.slice(start, end) - }; - Ok(Token::Str { - value: l.atoms.atom(&*buf), - raw: l.atoms.atom(raw), - }) - }) - } - - /// Expects current char to be '/' - fn read_regexp(&mut self, start: BytePos) -> LexResult { - unsafe { - // Safety: start is valid position, and cur() is Some('/') - self.input.reset_to(start); - } - - debug_assert_eq!(self.cur(), Some('/')); - - let start = self.cur_pos(); - - self.bump(); - - let (mut escaped, mut in_class) = (false, false); - - let content = self.with_buf(|l, buf| { - while let Some(c) = l.cur() { - // This is ported from babel. - // Seems like regexp literal cannot contain linebreak. - if c.is_line_terminator() { - let span = l.span(start); - - return Err(Error::new(span, SyntaxError::UnterminatedRegExp)); - } - - if escaped { - escaped = false; - } else { - match c { - '[' => in_class = true, - ']' if in_class => in_class = false, - // Terminates content part of regex literal - '/' if !in_class => break, - _ => {} - } - - escaped = c == '\\'; - } - - l.bump(); - buf.push(c); - } - - Ok(l.atoms.atom(&**buf)) - })?; - - // input is terminated without following `/` - if !self.is(b'/') { - let span = self.span(start); - - return Err(Error::new(span, SyntaxError::UnterminatedRegExp)); - } - - self.bump(); // '/' - - // Spec says "It is a Syntax Error if IdentifierPart contains a Unicode escape - // sequence." TODO: check for escape - - // Need to use `read_word` because '\uXXXX' sequences are allowed - // here (don't ask). - // let flags_start = self.cur_pos(); - let flags = { - match self.cur() { - Some(c) if c.is_ident_start() => self - .read_word_as_str_with(|l, s, _, _| l.atoms.atom(s)) - .map(Some), - _ => Ok(None), - } - }? - .map(|(value, _)| value) - .unwrap_or_default(); - - Ok(Token::Regex(content, flags)) - } - - #[cold] - fn read_shebang(&mut self) -> LexResult> { - if self.input.cur() != Some('#') || self.input.peek() != Some('!') { - return Ok(None); - } - unsafe { - // Safety: cur() is Some('#') - self.input.bump(); - // Safety: cur() is Some('!') - self.input.bump(); - } - let s = self.input.uncons_while(|c| !c.is_line_terminator()); - Ok(Some(self.atoms.atom(s))) - } - - fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult { - let start = self.cur_pos(); - - let mut cooked = Ok(String::new()); - let mut cooked_slice_start = start; - let raw_slice_start = start; - - macro_rules! consume_cooked { - () => {{ - if let Ok(cooked) = &mut cooked { - let last_pos = self.cur_pos(); - cooked.push_str(unsafe { - // Safety: Both of start and last_pos are valid position because we got them - // from `self.input` - self.input.slice(cooked_slice_start, last_pos) - }); - } - }}; - } - - while let Some(c) = self.cur() { - if c == '`' || (c == '$' && self.peek() == Some('{')) { - if start == self.cur_pos() && self.state.last_was_tpl_element() { - if c == '$' { - self.bump(); - self.bump(); - return Ok(tok!("${")); - } else { - self.bump(); - return Ok(tok!('`')); - } - } - - // If we don't have any escape - let cooked = if cooked_slice_start == raw_slice_start { - let last_pos = self.cur_pos(); - let s = unsafe { - // Safety: Both of start and last_pos are valid position because we got them - // from `self.input` - self.input.slice(cooked_slice_start, last_pos) - }; - - Ok(self.atoms.atom(s)) - } else { - consume_cooked!(); - - cooked.map(|s| self.atoms.atom(s)) - }; - - // TODO: Handle error - let end = self.input.cur_pos(); - let raw = unsafe { - // Safety: Both of start and last_pos are valid position because we got them - // from `self.input` - self.input.slice(raw_slice_start, end) - }; - return Ok(Token::Template { - cooked, - raw: self.atoms.atom(raw), - }); - } - - if c == '\\' { - consume_cooked!(); - - match self.read_escaped_char(true) { - Ok(Some(chars)) => { - if let Ok(ref mut cooked) = cooked { - for c in chars { - cooked.extend(c); - } - } - } - Ok(None) => {} - Err(error) => { - cooked = Err(error); - } - } - - cooked_slice_start = self.cur_pos(); - } else if c.is_line_terminator() { - self.state.had_line_break = true; - - consume_cooked!(); - - let c = if c == '\r' && self.peek() == Some('\n') { - self.bump(); // '\r' - '\n' - } else { - match c { - '\n' => '\n', - '\r' => '\n', - '\u{2028}' => '\u{2028}', - '\u{2029}' => '\u{2029}', - _ => unreachable!(), - } - }; - - self.bump(); - - if let Ok(ref mut cooked) = cooked { - cooked.push(c); - } - cooked_slice_start = self.cur_pos(); - } else { - self.bump(); - } - } - - self.error(start_of_tpl, SyntaxError::UnterminatedTpl)? - } - - #[inline] - #[allow(clippy::misnamed_getters)] - pub fn had_line_break_before_last(&self) -> bool { - self.state.had_line_break - } - - #[inline] - pub fn set_expr_allowed(&mut self, allow: bool) { - self.state.is_expr_allowed = allow; - } - - #[inline] - pub fn set_next_regexp(&mut self, start: Option) { - self.state.next_regexp = start; - } -} - -fn pos_span(p: BytePos) -> Span { - Span::new(p, p) -} - -fn fixed_len_span(p: BytePos, len: u32) -> Span { - Span::new(p, p + BytePos(len)) } diff --git a/crates/swc_ecma_lexer/src/lexer/number.rs b/crates/swc_ecma_lexer/src/lexer/number.rs index def0fd5d5934..26f5b1bdef7c 100644 --- a/crates/swc_ecma_lexer/src/lexer/number.rs +++ b/crates/swc_ecma_lexer/src/lexer/number.rs @@ -1,524 +1,11 @@ -//! Lexer methods related to reading numbers. -//! -//! -//! See https://tc39.github.io/ecma262/#sec-literals-numeric-literals -use std::borrow::Cow; - -use either::Either; -use num_bigint::BigInt as BigIntValue; -use num_traits::{Num as NumTrait, ToPrimitive}; -use tracing::trace; - -use super::*; - -struct LazyBigInt { - value: String, -} - -impl LazyBigInt { - fn new(value: String) -> Self { - Self { value } - } - - #[inline] - fn into_value(self) -> BigIntValue { - BigIntValue::parse_bytes(self.value.as_bytes(), RADIX as _) - .expect("failed to parse string as a bigint") - } -} - -impl Lexer<'_> { - /// Reads an integer, octal integer, or floating-point number - pub(super) fn read_number( - &mut self, - starts_with_dot: bool, - ) -> LexResult, Atom)>> { - debug_assert!(self.cur().is_some()); - - if starts_with_dot { - debug_assert_eq!( - self.cur(), - Some('.'), - "read_number(starts_with_dot = true) expects current char to be '.'" - ); - } - - let start = self.cur_pos(); - - let val = if starts_with_dot { - // first char is '.' - 0f64 - } else { - let starts_with_zero = self.cur().unwrap() == '0'; - - // Use read_number_no_dot to support long numbers. - let (val, s, not_octal) = self.read_number_no_dot_as_str::<10>()?; - - if self.eat(b'n') { - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - - return Ok(Either::Right(( - Box::new(s.into_value()), - self.atoms.atom(raw), - ))); - } - - if starts_with_zero { - // TODO: I guess it would be okay if I don't use -ffast-math - // (or something like that), but needs review. - if val == 0.0f64 { - // If only one zero is used, it's decimal. - // And if multiple zero is used, it's octal. - // - // e.g. `0` is decimal (so it can be part of float) - // - // e.g. `000` is octal - if start.0 != self.last_pos().0 - 1 { - // `-1` is utf 8 length of `0` - - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - let raw = self.atoms.atom(raw); - return self - .make_legacy_octal(start, 0f64) - .map(|value| Either::Left((value, raw))); - } - } else { - // strict mode hates non-zero decimals starting with zero. - // e.g. 08.1 is strict mode violation but 0.1 is valid float. - - if val.fract() == 0.0 { - let val_str = &s.value; - - // if it contains '8' or '9', it's decimal. - if not_octal { - // Continue parsing - self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal); - } else { - // It's Legacy octal, and we should reinterpret value. - let val = BigIntValue::from_str_radix(val_str, 8) - .unwrap_or_else(|err| { - panic!( - "failed to parse {val_str} using `from_str_radix`: {err:?}" - ) - }) - .to_f64() - .unwrap_or_else(|| { - panic!("failed to parse {val_str} into float using BigInt") - }); - - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - let raw = self.atoms.atom(raw); - - return self - .make_legacy_octal(start, val) - .map(|value| Either::Left((value, raw))); - } - } - } - } - - val - }; - - // At this point, number cannot be an octal literal. - - let mut val: f64 = val; - - // `0.a`, `08.a`, `102.a` are invalid. - // - // `.1.a`, `.1e-4.a` are valid, - if self.cur() == Some('.') { - self.bump(); - - if starts_with_dot { - debug_assert!(self.cur().is_some()); - debug_assert!(self.cur().unwrap().is_ascii_digit()); - } - - // Read numbers after dot - self.read_int::<10>(0)?; - - val = { - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - - // Remove number separator from number - if raw.contains('_') { - Cow::Owned(raw.replace('_', "")) - } else { - Cow::Borrowed(raw) - } - .parse() - .expect("failed to parse float using rust's impl") - }; - } - - // Handle 'e' and 'E' - // - // .5e1 = 5 - // 1e2 = 100 - // 1e+2 = 100 - // 1e-2 = 0.01 - match self.cur() { - Some('e') | Some('E') => { - self.bump(); - - let next = match self.cur() { - Some(next) => next, - None => { - let pos = self.cur_pos(); - self.error(pos, SyntaxError::NumLitTerminatedWithExp)? - } - }; - - let positive = if next == '+' || next == '-' { - self.bump(); // remove '+', '-' - - next == '+' - } else { - true - }; - - let exp = self.read_number_no_dot::<10>()?; - - val = if exp == f64::INFINITY { - if positive && val != 0.0 { - f64::INFINITY - } else { - 0.0 - } - } else { - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - - if raw.contains('_') { - Cow::Owned(raw.replace('_', "")) - } else { - Cow::Borrowed(raw) - } - .parse() - .expect("failed to parse float literal") - } - } - _ => {} - } - - self.ensure_not_ident()?; - - let end = self.cur_pos(); - let raw_str = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - Ok(Either::Left((val, raw_str.into()))) - } - - /// Returns `Left(value)` or `Right(BigInt)` - pub(super) fn read_radix_number( - &mut self, - ) -> LexResult, Atom)>> { - debug_assert!( - RADIX == 2 || RADIX == 8 || RADIX == 16, - "radix should be one of 2, 8, 16, but got {RADIX}" - ); - debug_assert_eq!(self.cur(), Some('0')); - - let start = self.cur_pos(); - - self.bump(); - - match self.input.cur() { - Some(..) => { - self.bump(); - } - _ => { - unreachable!(); - } - } - - let (val, s, _) = self.read_number_no_dot_as_str::()?; - - if self.eat(b'n') { - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - - return Ok(Either::Right(( - Box::new(s.into_value()), - self.atoms.atom(raw), - ))); - } - - self.ensure_not_ident()?; - - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - - Ok(Either::Left((val, self.atoms.atom(raw)))) - } - - /// This can read long integers like - /// "13612536612375123612312312312312312312312". - fn read_number_no_dot(&mut self) -> LexResult { - debug_assert!( - RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, - "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}" - ); - let start = self.cur_pos(); - - let mut read_any = false; - - let res = self.read_digits::<_, f64, RADIX>( - |total, radix, v| { - read_any = true; - - Ok((f64::mul_add(total, radix as f64, v as f64), true)) - }, - true, - ); - - if !read_any { - self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?; - } - res - } - - /// This can read long integers like - /// "13612536612375123612312312312312312312312". - /// - /// - Returned `bool` is `true` is there was `8` or `9`. - fn read_number_no_dot_as_str( - &mut self, - ) -> LexResult<(f64, LazyBigInt, bool)> { - debug_assert!( - RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, - "radix for read_number_no_dot should be one of 2, 8, 10, 16, but got {RADIX}" - ); - let start = self.cur_pos(); - - let mut non_octal = false; - let mut read_any = false; - - self.read_digits::<_, f64, RADIX>( - |total, radix, v| { - read_any = true; - - if v == 8 || v == 9 { - non_octal = true; - } - - Ok((f64::mul_add(total, radix as f64, v as f64), true)) - }, - true, - )?; - - if !read_any { - self.error(start, SyntaxError::ExpectedDigit { radix: RADIX })?; - } - - let end = self.cur_pos(); - let raw = unsafe { - // Safety: We got both start and end position from `self.input` - self.input.slice(start, end) - }; - // Remove number separator from number - let raw_number_str = raw.replace('_', ""); - let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32) - .expect("failed to parse float using BigInt") - .to_f64() - .expect("failed to parse float using BigInt"); - Ok((parsed_float, LazyBigInt::new(raw_number_str), non_octal)) - } - - /// Ensure that ident cannot directly follow numbers. - fn ensure_not_ident(&mut self) -> LexResult<()> { - match self.cur() { - Some(c) if c.is_ident_start() => { - let span = pos_span(self.cur_pos()); - self.error_span(span, SyntaxError::IdentAfterNum)? - } - _ => Ok(()), - } - } - - /// Read an integer in the given radix. Return `None` if zero digits - /// were read, the integer value otherwise. - /// When `len` is not zero, this - /// will return `None` unless the integer has exactly `len` digits. - pub(super) fn read_int(&mut self, len: u8) -> LexResult> { - let mut count = 0u16; - let v = self.read_digits::<_, Option, RADIX>( - |opt: Option, radix, val| { - count += 1; - let total = opt.unwrap_or_default() * radix as f64 + val as f64; - - Ok((Some(total), count != len as u16)) - }, - true, - )?; - if len != 0 && count != len as u16 { - Ok(None) - } else { - Ok(v) - } - } - - pub(super) fn read_int_u32(&mut self, len: u8) -> LexResult> { - let start = self.state.start; - - let mut count = 0; - let v = self.read_digits::<_, Option, RADIX>( - |opt: Option, radix, val| { - count += 1; - - let total = opt - .unwrap_or_default() - .checked_mul(radix as u32) - .and_then(|v| v.checked_add(val)) - .ok_or_else(|| { - let span = Span::new(start, start); - Error::new(span, SyntaxError::InvalidUnicodeEscape) - })?; - - Ok((Some(total), count != len)) - }, - true, - )?; - if len != 0 && count != len { - Ok(None) - } else { - Ok(v) - } - } - - /// `op`- |total, radix, value| -> (total * radix + value, continue) - fn read_digits( - &mut self, - mut op: F, - allow_num_separator: bool, - ) -> LexResult - where - F: FnMut(Ret, u8, u32) -> LexResult<(Ret, bool)>, - Ret: Copy + Default, - { - debug_assert!( - RADIX == 2 || RADIX == 8 || RADIX == 10 || RADIX == 16, - "radix for read_int should be one of 2, 8, 10, 16, but got {RADIX}" - ); - - if cfg!(feature = "debug") { - trace!("read_digits(radix = {}), cur = {:?}", RADIX, self.cur()); - } - - let start = self.cur_pos(); - let mut total: Ret = Default::default(); - let mut prev = None; - - while let Some(c) = self.cur() { - if allow_num_separator && c == '_' { - let is_allowed = |c: Option| { - if c.is_none() { - return false; - } - - let c = c.unwrap(); - - c.is_digit(RADIX as _) - }; - let is_forbidden = |c: Option| { - if c.is_none() { - return true; - } - - if RADIX == 16 { - matches!(c.unwrap(), '.' | 'X' | '_' | 'x') - } else { - matches!(c.unwrap(), '.' | 'B' | 'E' | 'O' | '_' | 'b' | 'e' | 'o') - } - }; - - let next = self.input.peek(); - - if !is_allowed(next) || is_forbidden(prev) || is_forbidden(next) { - self.emit_error( - start, - SyntaxError::NumericSeparatorIsAllowedOnlyBetweenTwoDigits, - ); - } - - // Ignore this _ character - unsafe { - // Safety: cur() returns Some(c) where c is a valid char - self.input.bump(); - } - - continue; - } - - // e.g. (val for a) = 10 where radix = 16 - let val = if let Some(val) = c.to_digit(RADIX as _) { - val - } else { - return Ok(total); - }; - - self.bump(); - - let (t, cont) = op(total, RADIX, val)?; - - total = t; - - if !cont { - return Ok(total); - } - - prev = Some(c); - } - - Ok(total) - } - - fn make_legacy_octal(&mut self, start: BytePos, val: f64) -> LexResult { - self.ensure_not_ident()?; - - if self.syntax.typescript() && self.target >= EsVersion::Es5 { - self.emit_error(start, SyntaxError::TS1085); - } - - self.emit_strict_mode_error(start, SyntaxError::LegacyOctal); - - Ok(val) - } -} - #[cfg(test)] mod tests { use std::panic; - use super::*; + use num_bigint::BigInt as BigIntValue; + use swc_atoms::Atom; + + use super::super::*; fn lex(s: &'static str, f: F) -> Ret where diff --git a/crates/swc_ecma_lexer/src/lexer/state.rs b/crates/swc_ecma_lexer/src/lexer/state.rs index d7288fdfaa6d..c3fc4e38fbc0 100644 --- a/crates/swc_ecma_lexer/src/lexer/state.rs +++ b/crates/swc_ecma_lexer/src/lexer/state.rs @@ -5,23 +5,26 @@ use swc_common::{BytePos, Span}; use swc_ecma_ast::EsVersion; use tracing::trace; -use super::{ - comments_buffer::{BufferedComment, BufferedCommentKind}, - Context, Input, Lexer, -}; +use super::{Context, Input, Lexer, LexerTrait}; use crate::{ + common::{ + input::Tokens, + lexer::{ + char::CharExt, + comments_buffer::{BufferedComment, BufferedCommentKind}, + state::State as StateTrait, + }, + }, error::{Error, SyntaxError}, - input::Tokens, - lexer::util::CharExt, token::{BinOpToken, Keyword, Token, TokenAndSpan, TokenKind, WordKind}, - Syntax, *, + *, }; /// State of lexer. /// /// Ported from babylon. #[derive(Clone)] -pub(super) struct State { +pub struct State { pub is_expr_allowed: bool, pub next_regexp: Option, /// if line break exists between previous token and new token? @@ -42,8 +45,93 @@ pub(super) struct State { token_type: Option, } +impl common::lexer::state::State for State { + type TokenKind = crate::token::TokenKind; + type TokenType = self::TokenType; + + #[inline(always)] + fn is_expr_allowed(&self) -> bool { + self.is_expr_allowed + } + + #[inline(always)] + fn set_is_expr_allowed(&mut self, is_expr_allowed: bool) { + self.is_expr_allowed = is_expr_allowed; + } + + #[inline(always)] + fn set_next_regexp(&mut self, start: Option) { + self.next_regexp = start; + } + + #[inline(always)] + fn had_line_break(&self) -> bool { + self.had_line_break + } + + #[inline(always)] + fn set_had_line_break(&mut self, had_line_break: bool) { + self.had_line_break = had_line_break; + } + + #[inline(always)] + fn had_line_break_before_last(&self) -> bool { + self.had_line_break_before_last + } + + #[inline(always)] + fn token_contexts(&self) -> &crate::TokenContexts { + &self.context + } + + #[inline(always)] + fn mut_token_contexts(&mut self) -> &mut crate::TokenContexts { + &mut self.context + } + + #[inline(always)] + fn set_token_type(&mut self, token_type: Self::TokenType) { + self.token_type = Some(token_type); + } + + #[inline(always)] + fn token_type(&self) -> Option { + self.token_type + } + + #[inline(always)] + fn set_tpl_start(&mut self, start: BytePos) { + self.tpl_start = start; + } + + #[inline(always)] + fn syntax(&self) -> crate::Syntax { + self.syntax + } + + #[inline(always)] + fn prev_hi(&self) -> BytePos { + self.prev_hi + } + + #[inline(always)] + fn start(&self) -> BytePos { + self.start + } + + #[inline(always)] + fn add_current_line(&mut self, offset: usize) { + self.cur_line += offset; + } + + #[inline(always)] + fn set_line_start(&mut self, line_start: BytePos) { + self.line_start = line_start; + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum TokenType { +pub enum TokenType { Template, Dot, Colon, @@ -64,7 +152,7 @@ enum TokenType { } impl TokenType { #[inline] - const fn before_expr(self) -> bool { + pub const fn before_expr(self) -> bool { match self { TokenType::JSXName | TokenType::JSXTagStart @@ -122,7 +210,205 @@ impl From for TokenType { } } -impl Tokens for Lexer<'_> { +impl crate::common::lexer::state::TokenKind for TokenType { + #[inline(always)] + fn is_dot(self) -> bool { + self == Self::Dot + } + + #[inline(always)] + fn is_bin_op(self) -> bool { + matches!(self, Self::BinOp(_)) + } + + #[inline(always)] + fn is_semi(self) -> bool { + self == Self::Semi + } + + #[inline(always)] + fn is_template(self) -> bool { + self == Self::Template + } + + #[inline(always)] + fn is_keyword(self) -> bool { + matches!(self, Self::Keyword(_)) + } + + #[inline(always)] + fn is_colon(self) -> bool { + self == Self::Colon + } + + #[inline(always)] + fn is_lbrace(self) -> bool { + self == Self::LBrace + } + + #[inline(always)] + fn is_rbrace(self) -> bool { + unreachable!("RBrace is not a token type") + } + + #[inline(always)] + fn is_lparen(self) -> bool { + unreachable!("LParen is not a token type") + } + + #[inline(always)] + fn is_rparen(self) -> bool { + self == Self::RParen + } + + #[inline(always)] + fn is_keyword_fn(self) -> bool { + self == Self::Keyword(Keyword::Function) + } + + #[inline(always)] + fn is_keyword_return(self) -> bool { + self == Self::Keyword(Keyword::Return) + } + + #[inline(always)] + fn is_keyword_yield(self) -> bool { + self == Self::Keyword(Keyword::Yield) + } + + #[inline(always)] + fn is_keyword_else(self) -> bool { + self == Self::Keyword(Keyword::Else) + } + + #[inline(always)] + fn is_keyword_class(self) -> bool { + self == Self::Keyword(Keyword::Class) + } + + #[inline(always)] + fn is_keyword_let(self) -> bool { + self == Self::Keyword(Keyword::Let) + } + + #[inline(always)] + fn is_keyword_var(self) -> bool { + self == Self::Keyword(Keyword::Var) + } + + #[inline(always)] + fn is_keyword_const(self) -> bool { + self == Self::Keyword(Keyword::Const) + } + + #[inline(always)] + fn is_keyword_if(self) -> bool { + self == Self::Keyword(Keyword::If) + } + + #[inline(always)] + fn is_keyword_while(self) -> bool { + self == Self::Keyword(Keyword::While) + } + + #[inline(always)] + fn is_keyword_for(self) -> bool { + self == Self::Keyword(Keyword::For) + } + + #[inline(always)] + fn is_keyword_with(self) -> bool { + self == Self::Keyword(Keyword::With) + } + + #[inline(always)] + fn is_lt(self) -> bool { + self == Self::BinOp(BinOpToken::Lt) + } + + #[inline(always)] + fn is_gt(self) -> bool { + self == Self::BinOp(BinOpToken::Gt) + } + + #[inline(always)] + fn is_arrow(self) -> bool { + self == Self::Arrow + } + + #[inline(always)] + fn is_ident(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn is_known_ident_of(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn is_slash(self) -> bool { + self == Self::BinOp(BinOpToken::Div) + } + + #[inline(always)] + fn is_dollar_lbrace(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn is_plus_plus(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn is_minus_minus(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn is_back_quote(self) -> bool { + unreachable!() + } + + #[inline(always)] + fn before_expr(self) -> bool { + self.before_expr() + } + + #[inline(always)] + fn is_jsx_tag_start(self) -> bool { + self == Self::JSXTagStart + } + + #[inline(always)] + fn is_jsx_tag_end(self) -> bool { + self == Self::JSXTagEnd + } +} + +impl crate::common::lexer::state::TokenType for TokenType { + #[inline(always)] + fn is_other_and_before_expr_is_false(self) -> bool { + match self { + TokenType::Other { before_expr, .. } => !before_expr, + _ => false, + } + } + + #[inline(always)] + fn is_other_and_can_have_trailing_comment(self) -> bool { + match self { + TokenType::Other { + can_have_trailing_comment, + .. + } => can_have_trailing_comment, + _ => false, + } + } +} + +impl Tokens for Lexer<'_> { #[inline] fn set_ctx(&mut self, ctx: Context) { if ctx.contains(Context::Module) && !self.module_errors.borrow().is_empty() { @@ -154,7 +440,7 @@ impl Tokens for Lexer<'_> { #[inline] fn set_expr_allowed(&mut self, allow: bool) { - self.set_expr_allowed(allow) + self.state.is_expr_allowed = allow; } #[inline] @@ -177,10 +463,12 @@ impl Tokens for Lexer<'_> { self.state.context = c; } + #[inline] fn add_error(&self, error: Error) { self.errors.borrow_mut().push(error); } + #[inline] fn add_module_mode_error(&self, error: Error) { if self.ctx.contains(Context::Module) { self.add_error(error); @@ -189,64 +477,23 @@ impl Tokens for Lexer<'_> { self.module_errors.borrow_mut().push(error); } + #[inline] fn take_errors(&mut self) -> Vec { take(&mut self.errors.borrow_mut()) } + #[inline] fn take_script_module_errors(&mut self) -> Vec { take(&mut self.module_errors.borrow_mut()) } + #[inline] fn end_pos(&self) -> BytePos { self.input.end_pos() } } impl Lexer<'_> { - /// Consume pending comments. - /// - /// This is called when the input is exhausted. - #[cold] - #[inline(never)] - fn consume_pending_comments(&mut self) { - if let Some(comments) = self.comments.as_mut() { - let comments_buffer = self.comments_buffer.as_mut().unwrap(); - let last = self.state.prev_hi; - - // move the pending to the leading or trailing - for c in comments_buffer.take_pending_leading() { - // if the file had no tokens and no shebang, then treat any - // comments in the leading comments buffer as leading. - // Otherwise treat them as trailing. - if last == self.start_pos { - comments_buffer.push(BufferedComment { - kind: BufferedCommentKind::Leading, - pos: last, - comment: c, - }); - } else { - comments_buffer.push(BufferedComment { - kind: BufferedCommentKind::Trailing, - pos: last, - comment: c, - }); - } - } - - // now fill the user's passed in comments - for comment in comments_buffer.take_comments() { - match comment.kind { - BufferedCommentKind::Leading => { - comments.add_leading(comment.pos, comment.comment); - } - BufferedCommentKind::Trailing => { - comments.add_trailing(comment.pos, comment.comment); - } - } - } - } - } - fn next_token(&mut self, start: &mut BytePos) -> Result, Error> { if let Some(start) = self.state.next_regexp { return Ok(Some(self.read_regexp(start)?)); @@ -413,340 +660,10 @@ impl State { } } -impl State { - pub fn can_skip_space(&self) -> bool { - !self - .context - .current() - .map(|t| t.preserve_space()) - .unwrap_or(false) - } - - pub fn can_have_trailing_line_comment(&self) -> bool { - match self.token_type { - Some(TokenType::BinOp(..)) => false, - _ => true, - } - } - - pub fn can_have_trailing_comment(&self) -> bool { - match self.token_type { - Some(TokenType::Keyword(..)) => false, - Some(TokenType::Semi) | Some(TokenType::LBrace) => true, - Some(TokenType::Other { - can_have_trailing_comment, - .. - }) => can_have_trailing_comment, - _ => false, - } - } - - pub fn last_was_tpl_element(&self) -> bool { - matches!(self.token_type, Some(TokenType::Template)) - } - - fn update(&mut self, start: BytePos, next: TokenKind) { - if cfg!(feature = "debug") { - trace!( - "updating state: next={:?}, had_line_break={} ", - next, - self.had_line_break - ); - } - - let prev = self.token_type.take(); - self.token_type = Some(TokenType::from(next)); - - self.is_expr_allowed = self.is_expr_allowed_on_next(prev, start, next); - } - - /// `is_expr_allowed`: previous value. - /// `start`: start of newly produced token. - fn is_expr_allowed_on_next( - &mut self, - prev: Option, - start: BytePos, - next: TokenKind, - ) -> bool { - let State { - ref mut context, - had_line_break, - had_line_break_before_last, - is_expr_allowed, - syntax, - .. - } = *self; - - let is_next_keyword = matches!(next, TokenKind::Word(WordKind::Keyword(..))); - - if is_next_keyword && prev == Some(TokenType::Dot) { - false - } else { - // ported updateContext - match next { - TokenKind::RParen | TokenKind::RBrace => { - // TODO: Verify - if context.len() == 1 { - return true; - } - - let out = context.pop().unwrap(); - - // let a = function(){} - if out == TokenContext::BraceStmt - && matches!( - context.current(), - Some(TokenContext::FnExpr | TokenContext::ClassExpr) - ) - { - context.pop(); - return false; - } - - // ${} in template - if out == TokenContext::TplQuasi { - match context.current() { - Some(TokenContext::Tpl) => return false, - _ => return true, - } - } - - // expression cannot follow expression - !out.is_expr() - } - - TokenKind::Word(WordKind::Keyword(Keyword::Function)) => { - // This is required to lex - // `x = function(){}/42/i` - if is_expr_allowed - && !context.is_brace_block(prev, had_line_break, is_expr_allowed) - { - context.push(TokenContext::FnExpr); - } - false - } - - TokenKind::Word(WordKind::Keyword(Keyword::Class)) => { - if is_expr_allowed - && !context.is_brace_block(prev, had_line_break, is_expr_allowed) - { - context.push(TokenContext::ClassExpr); - } - false - } - - TokenKind::Colon - if matches!( - context.current(), - Some(TokenContext::FnExpr | TokenContext::ClassExpr) - ) => - { - // `function`/`class` keyword is object prop - // - // ```JavaScript - // { function: expr, class: expr } - // ``` - context.pop(); // Remove FnExpr or ClassExpr - true - } - - // for (a of b) {} - known_ident_token!("of") - if Some(TokenContext::ParenStmt { is_for_loop: true }) == context.current() => - { - // e.g. for (a of _) => true - !prev - .expect("context.current() if ParenStmt, so prev token cannot be None") - .before_expr() - } - - TokenKind::Word(WordKind::Ident(..)) => { - // variable declaration - match prev { - Some(prev) => match prev { - // handle automatic semicolon insertion. - TokenType::Keyword(Keyword::Let) - | TokenType::Keyword(Keyword::Const) - | TokenType::Keyword(Keyword::Var) - if had_line_break_before_last => - { - true - } - _ => false, - }, - _ => false, - } - } - - TokenKind::LBrace => { - let cur = context.current(); - if syntax.jsx() && cur == Some(TokenContext::JSXOpeningTag) { - context.push(TokenContext::BraceExpr) - } else if syntax.jsx() && cur == Some(TokenContext::JSXExpr) { - context.push(TokenContext::TplQuasi); - } else { - let next_ctxt = - if context.is_brace_block(prev, had_line_break, is_expr_allowed) { - TokenContext::BraceStmt - } else { - TokenContext::BraceExpr - }; - context.push(next_ctxt); - } - true - } - - TokenKind::BinOp(BinOpToken::Div) - if syntax.jsx() && prev == Some(TokenType::JSXTagStart) => - { - context.pop(); - context.pop(); // do not consider JSX expr -> JSX open tag -> ... anymore - context.push(TokenContext::JSXClosingTag); // reconsider as closing tag context - false - } - - TokenKind::DollarLBrace => { - context.push(TokenContext::TplQuasi); - true - } - - TokenKind::LParen => { - // if, for, with, while is statement - - context.push(match prev { - Some(TokenType::Keyword(k)) => match k { - Keyword::If | Keyword::With | Keyword::While => { - TokenContext::ParenStmt { is_for_loop: false } - } - Keyword::For => TokenContext::ParenStmt { is_for_loop: true }, - _ => TokenContext::ParenExpr, - }, - _ => TokenContext::ParenExpr, - }); - true - } - - // remains unchanged. - TokenKind::PlusPlus | TokenKind::MinusMinus => is_expr_allowed, - - TokenKind::BackQuote => { - // If we are in template, ` terminates template. - if let Some(TokenContext::Tpl) = context.current() { - context.pop(); - } else { - self.tpl_start = start; - context.push(TokenContext::Tpl); - } - false - } - - // tt.jsxTagStart.updateContext - TokenKind::JSXTagStart => { - context.push(TokenContext::JSXExpr); // treat as beginning of JSX expression - context.push(TokenContext::JSXOpeningTag); // start opening tag context - false - } - - // tt.jsxTagEnd.updateContext - TokenKind::JSXTagEnd => { - let out = context.pop(); - if (out == Some(TokenContext::JSXOpeningTag) - && prev == Some(TokenType::BinOp(BinOpToken::Div))) - || out == Some(TokenContext::JSXClosingTag) - { - context.pop(); - context.current() == Some(TokenContext::JSXExpr) - } else { - true - } - } - - _ => next.before_expr(), - } - } - } -} - #[derive(Clone, Default)] pub struct TokenContexts(pub SmallVec<[TokenContext; 128]>); impl TokenContexts { - /// Returns true if following `LBrace` token is `block statement` according - /// to `ctx`, `prev`, `is_expr_allowed`. - fn is_brace_block( - &self, - prev: Option, - had_line_break: bool, - is_expr_allowed: bool, - ) -> bool { - if let Some(TokenType::Colon) = prev { - match self.current() { - Some(TokenContext::BraceStmt) => return true, - // `{ a: {} }` - // ^ ^ - Some(TokenContext::BraceExpr) => return false, - _ => {} - }; - } - - match prev { - // function a() { - // return { a: "" }; - // } - // function a() { - // return - // { - // function b(){} - // }; - // } - Some(TokenType::Keyword(Keyword::Return)) - | Some(TokenType::Keyword(Keyword::Yield)) => { - return had_line_break; - } - - Some(TokenType::Keyword(Keyword::Else)) - | Some(TokenType::Semi) - | None - | Some(TokenType::RParen) => { - return true; - } - - // If previous token was `{` - Some(TokenType::LBrace) => { - // https://github.com/swc-project/swc/issues/3241#issuecomment-1029584460 - // - if self.current() == Some(TokenContext::BraceExpr) { - let len = self.len(); - if let Some(TokenContext::JSXOpeningTag) = self.0.get(len - 2) { - return true; - } - } - - return self.current() == Some(TokenContext::BraceStmt); - } - - // `class C { ... }` - Some(TokenType::BinOp(BinOpToken::Lt)) | Some(TokenType::BinOp(BinOpToken::Gt)) => { - return true - } - - // () => {} - Some(TokenType::Arrow) => return true, - _ => {} - } - - if had_line_break { - if let Some(TokenType::Other { - before_expr: false, .. - }) = prev - { - return true; - } - } - - !is_expr_allowed - } - #[inline] pub fn len(&self) -> usize { self.0.len() @@ -803,7 +720,7 @@ pub enum TokenContext { } impl TokenContext { - pub(crate) const fn is_expr(&self) -> bool { + pub const fn is_expr(&self) -> bool { matches!( self, Self::BraceExpr @@ -816,7 +733,7 @@ impl TokenContext { ) } - pub(crate) const fn preserve_space(&self) -> bool { + pub const fn preserve_space(&self) -> bool { match self { Self::Tpl | Self::JSXExpr => true, _ => false, diff --git a/crates/swc_ecma_lexer/src/lexer/table.rs b/crates/swc_ecma_lexer/src/lexer/table.rs index 20377c4b8bcd..0e4f793229e0 100644 --- a/crates/swc_ecma_lexer/src/lexer/table.rs +++ b/crates/swc_ecma_lexer/src/lexer/table.rs @@ -8,8 +8,9 @@ use either::Either; use swc_common::input::Input; use swc_ecma_ast::AssignOp; -use super::{pos_span, util::CharExt, LexResult, Lexer}; +use super::{LexResult, Lexer, LexerTrait}; use crate::{ + common::lexer::{char::CharExt, pos_span}, error::SyntaxError, token::{BinOpToken, IdentLike, Keyword, KnownIdent, Token, Word}, }; @@ -64,74 +65,92 @@ const IDN: ByteHandler = Some(|lexer| lexer.read_ident_unknown().map(Some)); const L_A: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "abstract" => Some(Word::Ident(IdentLike::Known(KnownIdent::Abstract))), - "as" => Some(Word::Ident(IdentLike::Known(KnownIdent::As))), - "await" => Some(Word::Keyword(Keyword::Await)), - "async" => Some(Word::Ident(IdentLike::Known(KnownIdent::Async))), - "assert" => Some(Word::Ident(IdentLike::Known(KnownIdent::Assert))), - "asserts" => Some(Word::Ident(IdentLike::Known(KnownIdent::Asserts))), - "any" => Some(Word::Ident(IdentLike::Known(KnownIdent::Any))), - "accessor" => Some(Word::Ident(IdentLike::Known(KnownIdent::Accessor))), + "abstract" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Abstract, + )))), + "as" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::As)))), + "await" => Some(Token::Word(Word::Keyword(Keyword::Await))), + "async" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Async, + )))), + "assert" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Assert, + )))), + "asserts" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Asserts, + )))), + "any" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Any)))), + "accessor" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Accessor, + )))), _ => None, }) }); const L_B: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "break" => Some(Word::Keyword(Keyword::Break)), - "boolean" => Some(Word::Ident(IdentLike::Known(KnownIdent::Boolean))), - "bigint" => Some(Word::Ident(IdentLike::Known(KnownIdent::Bigint))), + "break" => Some(Token::Word(Word::Keyword(Keyword::Break))), + "boolean" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Boolean, + )))), + "bigint" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Bigint, + )))), _ => None, }) }); const L_C: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "case" => Some(Word::Keyword(Keyword::Case)), - "catch" => Some(Word::Keyword(Keyword::Catch)), - "class" => Some(Word::Keyword(Keyword::Class)), - "const" => Some(Word::Keyword(Keyword::Const)), - "continue" => Some(Word::Keyword(Keyword::Continue)), + "case" => Some(Token::Word(Word::Keyword(Keyword::Case))), + "catch" => Some(Token::Word(Word::Keyword(Keyword::Catch))), + "class" => Some(Token::Word(Word::Keyword(Keyword::Class))), + "const" => Some(Token::Word(Word::Keyword(Keyword::Const))), + "continue" => Some(Token::Word(Word::Keyword(Keyword::Continue))), _ => None, }) }); const L_D: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "debugger" => Some(Word::Keyword(Keyword::Debugger)), - "default" => Some(Word::Keyword(Keyword::Default_)), - "delete" => Some(Word::Keyword(Keyword::Delete)), - "do" => Some(Word::Keyword(Keyword::Do)), - "declare" => Some(Word::Ident(IdentLike::Known(KnownIdent::Declare))), + "debugger" => Some(Token::Word(Word::Keyword(Keyword::Debugger))), + "default" => Some(Token::Word(Word::Keyword(Keyword::Default_))), + "delete" => Some(Token::Word(Word::Keyword(Keyword::Delete))), + "do" => Some(Token::Word(Word::Keyword(Keyword::Do))), + "declare" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Declare, + )))), _ => None, }) }); const L_E: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "else" => Some(Word::Keyword(Keyword::Else)), - "enum" => Some(Word::Ident(IdentLike::Known(KnownIdent::Enum))), - "export" => Some(Word::Keyword(Keyword::Export)), - "extends" => Some(Word::Keyword(Keyword::Extends)), + "else" => Some(Token::Word(Word::Keyword(Keyword::Else))), + "enum" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Enum)))), + "export" => Some(Token::Word(Word::Keyword(Keyword::Export))), + "extends" => Some(Token::Word(Word::Keyword(Keyword::Extends))), _ => None, }) }); const L_F: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "false" => Some(Word::False), - "finally" => Some(Word::Keyword(Keyword::Finally)), - "for" => Some(Word::Keyword(Keyword::For)), - "function" => Some(Word::Keyword(Keyword::Function)), - "from" => Some(Word::Ident(IdentLike::Known(KnownIdent::From))), + "false" => Some(Token::Word(Word::False)), + "finally" => Some(Token::Word(Word::Keyword(Keyword::Finally))), + "for" => Some(Token::Word(Word::Keyword(Keyword::For))), + "function" => Some(Token::Word(Word::Keyword(Keyword::Function))), + "from" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::From)))), _ => None, }) }); const L_G: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "global" => Some(Word::Ident(IdentLike::Known(KnownIdent::Global))), - "get" => Some(Word::Ident(IdentLike::Known(KnownIdent::Get))), + "global" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Global, + )))), + "get" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Get)))), _ => None, }) }); @@ -140,15 +159,23 @@ const L_H: ByteHandler = IDN; const L_I: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "if" => Some(Word::Keyword(Keyword::If)), - "import" => Some(Word::Keyword(Keyword::Import)), - "in" => Some(Word::Keyword(Keyword::In)), - "instanceof" => Some(Word::Keyword(Keyword::InstanceOf)), - "is" => Some(Word::Ident(IdentLike::Known(KnownIdent::Is))), - "infer" => Some(Word::Ident(IdentLike::Known(KnownIdent::Infer))), - "interface" => Some(Word::Ident(IdentLike::Known(KnownIdent::Interface))), - "implements" => Some(Word::Ident(IdentLike::Known(KnownIdent::Implements))), - "intrinsic" => Some(Word::Ident(IdentLike::Known(KnownIdent::Intrinsic))), + "if" => Some(Token::Word(Word::Keyword(Keyword::If))), + "import" => Some(Token::Word(Word::Keyword(Keyword::Import))), + "in" => Some(Token::Word(Word::Keyword(Keyword::In))), + "instanceof" => Some(Token::Word(Word::Keyword(Keyword::InstanceOf))), + "is" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Is)))), + "infer" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Infer, + )))), + "interface" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Interface, + )))), + "implements" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Implements, + )))), + "intrinsic" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Intrinsic, + )))), _ => None, }) }); @@ -157,50 +184,68 @@ const L_J: ByteHandler = IDN; const L_K: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "keyof" => Some(Word::Ident(IdentLike::Known(KnownIdent::Keyof))), + "keyof" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Keyof, + )))), _ => None, }) }); const L_L: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "let" => Some(Word::Keyword(Keyword::Let)), + "let" => Some(Token::Word(Word::Keyword(Keyword::Let))), _ => None, }) }); const L_M: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "meta" => Some(Word::Ident(IdentLike::Known(KnownIdent::Meta))), + "meta" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Meta)))), _ => None, }) }); const L_N: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "new" => Some(Word::Keyword(Keyword::New)), - "null" => Some(Word::Null), - "number" => Some(Word::Ident(IdentLike::Known(KnownIdent::Number))), - "never" => Some(Word::Ident(IdentLike::Known(KnownIdent::Never))), - "namespace" => Some(Word::Ident(IdentLike::Known(KnownIdent::Namespace))), + "new" => Some(Token::Word(Word::Keyword(Keyword::New))), + "null" => Some(Token::Word(Word::Null)), + "number" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Number, + )))), + "never" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Never, + )))), + "namespace" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Namespace, + )))), _ => None, }) }); const L_O: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "of" => Some(Word::Ident(IdentLike::Known(KnownIdent::Of))), - "object" => Some(Word::Ident(IdentLike::Known(KnownIdent::Object))), + "of" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Of)))), + "object" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Object, + )))), _ => None, }) }); const L_P: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "public" => Some(Word::Ident(IdentLike::Known(KnownIdent::Public))), - "package" => Some(Word::Ident(IdentLike::Known(KnownIdent::Package))), - "protected" => Some(Word::Ident(IdentLike::Known(KnownIdent::Protected))), - "private" => Some(Word::Ident(IdentLike::Known(KnownIdent::Private))), + "public" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Public, + )))), + "package" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Package, + )))), + "protected" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Protected, + )))), + "private" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Private, + )))), _ => None, }) }); @@ -209,61 +254,83 @@ const L_Q: ByteHandler = IDN; const L_R: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "return" => Some(Word::Keyword(Keyword::Return)), - "readonly" => Some(Word::Ident(IdentLike::Known(KnownIdent::Readonly))), - "require" => Some(Word::Ident(IdentLike::Known(KnownIdent::Require))), + "return" => Some(Token::Word(Word::Keyword(Keyword::Return))), + "readonly" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Readonly, + )))), + "require" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Require, + )))), _ => None, }) }); const L_S: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "super" => Some(Word::Keyword(Keyword::Super)), - "static" => Some(Word::Ident(IdentLike::Known(KnownIdent::Static))), - "switch" => Some(Word::Keyword(Keyword::Switch)), - "symbol" => Some(Word::Ident(IdentLike::Known(KnownIdent::Symbol))), - "set" => Some(Word::Ident(IdentLike::Known(KnownIdent::Set))), - "string" => Some(Word::Ident(IdentLike::Known(KnownIdent::String))), - "satisfies" => Some(Word::Ident(IdentLike::Known(KnownIdent::Satisfies))), + "super" => Some(Token::Word(Word::Keyword(Keyword::Super))), + "static" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Static, + )))), + "switch" => Some(Token::Word(Word::Keyword(Keyword::Switch))), + "symbol" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Symbol, + )))), + "set" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Set)))), + "string" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::String, + )))), + "satisfies" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Satisfies, + )))), _ => None, }) }); const L_T: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "this" => Some(Word::Keyword(Keyword::This)), - "throw" => Some(Word::Keyword(Keyword::Throw)), - "true" => Some(Word::True), - "typeof" => Some(Word::Keyword(Keyword::TypeOf)), - "try" => Some(Word::Keyword(Keyword::Try)), - "type" => Some(Word::Ident(IdentLike::Known(KnownIdent::Type))), - "target" => Some(Word::Ident(IdentLike::Known(KnownIdent::Target))), + "this" => Some(Token::Word(Word::Keyword(Keyword::This))), + "throw" => Some(Token::Word(Word::Keyword(Keyword::Throw))), + "true" => Some(Token::Word(Word::True)), + "typeof" => Some(Token::Word(Word::Keyword(Keyword::TypeOf))), + "try" => Some(Token::Word(Word::Keyword(Keyword::Try))), + "type" => Some(Token::Word(Word::Ident(IdentLike::Known(KnownIdent::Type)))), + "target" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Target, + )))), _ => None, }) }); const L_U: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "using" => Some(Word::Ident(IdentLike::Known(KnownIdent::Using))), - "unique" => Some(Word::Ident(IdentLike::Known(KnownIdent::Unique))), - "undefined" => Some(Word::Ident(IdentLike::Known(KnownIdent::Undefined))), - "unknown" => Some(Word::Ident(IdentLike::Known(KnownIdent::Unknown))), + "using" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Using, + )))), + "unique" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Unique, + )))), + "undefined" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Undefined, + )))), + "unknown" => Some(Token::Word(Word::Ident(IdentLike::Known( + KnownIdent::Unknown, + )))), _ => None, }) }); const L_V: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "var" => Some(Word::Keyword(Keyword::Var)), - "void" => Some(Word::Keyword(Keyword::Void)), + "var" => Some(Token::Word(Word::Keyword(Keyword::Var))), + "void" => Some(Token::Word(Word::Keyword(Keyword::Void))), _ => None, }) }); const L_W: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "while" => Some(Word::Keyword(Keyword::While)), - "with" => Some(Word::Keyword(Keyword::With)), + "while" => Some(Token::Word(Word::Keyword(Keyword::While))), + "with" => Some(Token::Word(Word::Keyword(Keyword::With))), _ => None, }) }); @@ -272,7 +339,7 @@ const L_X: ByteHandler = IDN; const L_Y: ByteHandler = Some(|lexer| { lexer.read_word_with(&|s| match s { - "yield" => Some(Word::Keyword(Keyword::Yield)), + "yield" => Some(Token::Word(Word::Keyword(Keyword::Yield))), _ => None, }) }); diff --git a/crates/swc_ecma_lexer/src/lexer/tests.rs b/crates/swc_ecma_lexer/src/lexer/tests.rs index cb7c96cd6370..c1c6c0b2ed5f 100644 --- a/crates/swc_ecma_lexer/src/lexer/tests.rs +++ b/crates/swc_ecma_lexer/src/lexer/tests.rs @@ -9,6 +9,7 @@ use test::{black_box, Bencher}; use super::state::{lex, lex_module_errors, lex_tokens, with_lexer}; use crate::{ + common::syntax::EsSyntax, error::{Error, SyntaxError}, lexer::state::lex_errors, token::{ @@ -983,7 +984,7 @@ a" fn jsx_01() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1002,7 +1003,7 @@ fn jsx_01() { fn jsx_02() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1028,7 +1029,7 @@ fn jsx_02() { fn jsx_03() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1056,7 +1057,7 @@ fn jsx_03() { fn jsx_04() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1098,7 +1099,7 @@ fn empty() { fn issue_191() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1155,7 +1156,7 @@ fn issue_5722() { fn jsx_05() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1181,7 +1182,7 @@ fn jsx_05() { fn issue_299_01() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1218,7 +1219,7 @@ fn issue_299_01() { fn issue_299_02() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1255,7 +1256,7 @@ fn issue_299_02() { fn jsx_string_1() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1292,7 +1293,7 @@ fn jsx_string_1() { fn jsx_string_2() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1329,7 +1330,7 @@ fn jsx_string_2() { fn jsx_string_3() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1366,7 +1367,7 @@ fn jsx_string_3() { fn jsx_string_4() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1403,7 +1404,7 @@ fn jsx_string_4() { fn jsx_string_5() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1440,7 +1441,7 @@ fn jsx_string_5() { fn jsx_string_6() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1477,7 +1478,7 @@ fn jsx_string_6() { fn jsx_string_7() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1514,7 +1515,7 @@ fn jsx_string_7() { fn jsx_string_8() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1551,7 +1552,7 @@ fn jsx_string_8() { fn jsx_string_9() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1614,7 +1615,7 @@ fn issue_401() { fn issue_481() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -1960,7 +1961,7 @@ fn issue_2853_4_ts() { #[test] fn issue_2853_5_jsx() { let (tokens, errors) = lex_errors( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -2010,7 +2011,7 @@ fn issue_2853_6_tsx() { #[test] fn issue_2853_7_jsx() { let (tokens, errors) = lex_errors( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), @@ -2196,7 +2197,7 @@ class C { fn issue_9106() { assert_eq!( lex_tokens( - crate::Syntax::Es(crate::EsSyntax { + crate::Syntax::Es(EsSyntax { jsx: true, ..Default::default() }), diff --git a/crates/swc_ecma_lexer/src/lexer/util.rs b/crates/swc_ecma_lexer/src/lexer/util.rs index eaaf1772bf30..acbb196bf174 100644 --- a/crates/swc_ecma_lexer/src/lexer/util.rs +++ b/crates/swc_ecma_lexer/src/lexer/util.rs @@ -2,270 +2,23 @@ //! //! //! [babylon/util/identifier.js]:https://github.com/babel/babel/blob/master/packages/babylon/src/util/identifier.js -use std::char; use swc_common::{ comments::{Comment, CommentKind}, input::Input, - BytePos, Span, + Span, }; -use swc_ecma_ast::Ident; -use tracing::warn; -use super::{comments_buffer::BufferedComment, whitespace::SkipWhitespace, Char, LexResult, Lexer}; +use super::{Lexer, LexerTrait}; use crate::{ - error::{Error, SyntaxError}, - lexer::comments_buffer::BufferedCommentKind, - Context, Tokens, + common::lexer::{ + comments_buffer::{BufferedComment, BufferedCommentKind}, + state::State, + }, + error::SyntaxError, }; impl Lexer<'_> { - pub(super) fn span(&self, start: BytePos) -> Span { - let end = self.last_pos(); - if cfg!(debug_assertions) && start > end { - unreachable!( - "assertion failed: (span.start <= span.end). - start = {}, end = {}", - start.0, end.0 - ) - } - Span { lo: start, hi: end } - } - - #[inline(always)] - pub(super) fn bump(&mut self) { - unsafe { - // Safety: Actually this is not safe but this is an internal method. - self.input.bump() - } - } - - #[inline(always)] - pub(super) fn is(&mut self, c: u8) -> bool { - self.input.is_byte(c) - } - - #[inline(always)] - pub(super) fn is_str(&self, s: &str) -> bool { - self.input.is_str(s) - } - - #[inline(always)] - pub(super) fn eat(&mut self, c: u8) -> bool { - self.input.eat_byte(c) - } - - #[inline(always)] - pub(super) fn cur(&mut self) -> Option { - self.input.cur() - } - - #[inline(always)] - pub(super) fn peek(&mut self) -> Option { - self.input.peek() - } - - #[inline(always)] - pub(super) fn peek_ahead(&mut self) -> Option { - self.input.peek_ahead() - } - - #[inline(always)] - pub(super) fn cur_pos(&mut self) -> BytePos { - self.input.cur_pos() - } - - #[inline(always)] - pub(super) fn last_pos(&self) -> BytePos { - self.input.last_pos() - } - - /// Shorthand for `let span = self.span(start); self.error_span(span)` - #[cold] - #[inline(never)] - pub(super) fn error(&mut self, start: BytePos, kind: SyntaxError) -> LexResult { - let span = self.span(start); - self.error_span(span, kind) - } - - #[cold] - #[inline(never)] - pub(super) fn error_span(&mut self, span: Span, kind: SyntaxError) -> LexResult { - Err(Error::new(span, kind)) - } - - #[cold] - #[inline(never)] - pub(super) fn emit_error(&mut self, start: BytePos, kind: SyntaxError) { - let span = self.span(start); - self.emit_error_span(span, kind) - } - - #[cold] - #[inline(never)] - pub(super) fn emit_error_span(&mut self, span: Span, kind: SyntaxError) { - if self.ctx.contains(Context::IgnoreError) { - return; - } - - warn!("Lexer error at {:?}", span); - let err = Error::new(span, kind); - self.errors.borrow_mut().push(err); - } - - #[cold] - #[inline(never)] - pub(super) fn emit_strict_mode_error(&mut self, start: BytePos, kind: SyntaxError) { - let span = self.span(start); - self.emit_strict_mode_error_span(span, kind) - } - - #[cold] - #[inline(never)] - pub(super) fn emit_strict_mode_error_span(&mut self, span: Span, kind: SyntaxError) { - if self.ctx.contains(Context::Strict) { - self.emit_error_span(span, kind); - return; - } - - let err = Error::new(span, kind); - - self.add_module_mode_error(err); - } - - #[cold] - #[inline(never)] - pub(super) fn emit_module_mode_error(&mut self, start: BytePos, kind: SyntaxError) { - let span = self.span(start); - self.emit_module_mode_error_span(span, kind) - } - - /// Some codes are valid in a strict mode script but invalid in module - /// code. - #[cold] - #[inline(never)] - pub(super) fn emit_module_mode_error_span(&mut self, span: Span, kind: SyntaxError) { - let err = Error::new(span, kind); - - self.add_module_mode_error(err); - } - - /// Skip comments or whitespaces. - /// - /// See https://tc39.github.io/ecma262/#sec-white-space - #[inline(never)] - pub(super) fn skip_space(&mut self) { - loop { - let (offset, newline) = { - let mut skip = SkipWhitespace { - input: self.input.as_str(), - newline: false, - offset: 0, - }; - - skip.scan(); - - (skip.offset, skip.newline) - }; - - self.input.bump_bytes(offset as usize); - if newline { - self.state.had_line_break = true; - } - - if LEX_COMMENTS && self.input.is_byte(b'/') { - if self.peek() == Some('/') { - self.skip_line_comment(2); - continue; - } else if self.peek() == Some('*') { - self.skip_block_comment(); - continue; - } - } - - break; - } - } - - #[inline(never)] - pub(super) fn skip_line_comment(&mut self, start_skip: usize) { - let start = self.cur_pos(); - self.input.bump_bytes(start_skip); - let slice_start = self.cur_pos(); - - // foo // comment for foo - // bar - // - // foo - // // comment for bar - // bar - // - let is_for_next = self.state.had_line_break || !self.state.can_have_trailing_line_comment(); - - // Optimization: Performance improvement with byte-based termination character - // search - let input_str = self.input.as_str(); - let bytes = input_str.as_bytes(); - let mut idx = 0; - let len = bytes.len(); - - // Direct search for line termination characters (ASCII case optimization) - while idx < len { - let b = bytes[idx]; - if b == b'\r' || b == b'\n' { - self.state.had_line_break = true; - break; - } else if b > 127 { - // non-ASCII case: Check for Unicode line termination characters - let s = &input_str[idx..]; - if let Some(first_char) = s.chars().next() { - if first_char == '\u{2028}' || first_char == '\u{2029}' { - self.state.had_line_break = true; - break; - } - idx += first_char.len_utf8() - 1; // -1은 아래 증가분 고려 - } - } - idx += 1; - } - - // Process until the end of string if no line termination character is found - if idx == len { - idx = len; - } - - self.input.bump_bytes(idx); - let end = self.cur_pos(); - - // Create and process slice only if comments need to be stored - if let Some(comments) = self.comments_buffer.as_mut() { - let s = unsafe { - // Safety: We know that the start and the end are valid - self.input.slice(slice_start, end) - }; - let cmt = Comment { - kind: CommentKind::Line, - span: Span::new(start, end), - text: self.atoms.atom(s), - }; - - if is_for_next { - comments.push_pending_leading(cmt); - } else { - comments.push(BufferedComment { - kind: BufferedCommentKind::Trailing, - pos: self.state.prev_hi, - comment: cmt, - }); - } - } - - unsafe { - // Safety: We got end from self.input - self.input.reset_to(end); - } - } - /// Expects current char to be '/' and next char to be '*'. #[inline(never)] pub(super) fn skip_block_comment(&mut self) { @@ -301,7 +54,7 @@ impl Lexer<'_> { if was_star && b == b'/' { // Found comment end: "*/" - self.input.bump_bytes(pos + 1); // 종료 '/' 포함해서 이동 + self.input.bump_bytes(pos + 1); let end = self.cur_pos(); @@ -312,7 +65,29 @@ impl Lexer<'_> { is_for_next = false; } - self.store_comment(is_for_next, start, end, slice_start); + if let Some(comments) = self.comments_buffer.as_mut() { + let src = unsafe { + // Safety: We got slice_start and end from self.input so those are valid. + self.input.slice(slice_start, end) + }; + let s = &src[..src.len() - 2]; + let cmt = Comment { + kind: CommentKind::Block, + span: Span::new(start, end), + text: self.atoms.atom(s), + }; + + let _ = self.input.peek(); + if is_for_next { + comments.push_pending_leading(cmt); + } else { + comments.push(BufferedComment { + kind: BufferedCommentKind::Trailing, + pos: self.state.prev_hi, + comment: cmt, + }); + } + } return; } @@ -329,7 +104,7 @@ impl Lexer<'_> { self.state.had_line_break = true; } // Skip multibyte characters - pos += c.len_utf8() - 1; // -1은 아래 증가분 고려 + pos += c.len_utf8() - 1; // `-1` will incrumented below } } @@ -338,123 +113,9 @@ impl Lexer<'_> { } // If we reached here, it's an unterminated block comment - self.input.bump_bytes(len); // 남은 입력 건너뛰기 + self.input.bump_bytes(len); // skip remaining let end = self.input.end_pos(); let span = Span::new(end, end); self.emit_error_span(span, SyntaxError::UnterminatedBlockComment) } - - #[inline(never)] - fn store_comment( - &mut self, - is_for_next: bool, - start: BytePos, - end: BytePos, - slice_start: BytePos, - ) { - if let Some(comments) = self.comments_buffer.as_mut() { - let src = unsafe { - // Safety: We got slice_start and end from self.input so those are valid. - self.input.slice(slice_start, end) - }; - let s = &src[..src.len() - 2]; - let cmt = Comment { - kind: CommentKind::Block, - span: Span::new(start, end), - text: self.atoms.atom(s), - }; - - let _ = self.input.peek(); - if is_for_next { - comments.push_pending_leading(cmt); - } else { - comments.push(BufferedComment { - kind: BufferedCommentKind::Trailing, - pos: self.state.prev_hi, - comment: cmt, - }); - } - } - } -} - -/// Implemented for `char`. -pub trait CharExt: Copy { - fn to_char(self) -> Option; - - /// Test whether a given character code starts an identifier. - /// - /// https://tc39.github.io/ecma262/#prod-IdentifierStart - #[inline] - fn is_ident_start(self) -> bool { - let c = match self.to_char() { - Some(c) => c, - None => return false, - }; - Ident::is_valid_start(c) - } - - /// Test whether a given character is part of an identifier. - #[inline] - fn is_ident_part(self) -> bool { - let c = match self.to_char() { - Some(c) => c, - None => return false, - }; - Ident::is_valid_continue(c) - } - - /// See https://tc39.github.io/ecma262/#sec-line-terminators - #[inline] - fn is_line_terminator(self) -> bool { - let c = match self.to_char() { - Some(c) => c, - None => return false, - }; - matches!(c, '\r' | '\n' | '\u{2028}' | '\u{2029}') - } - - /// See https://tc39.github.io/ecma262/#sec-literals-string-literals - #[inline] - fn is_line_break(self) -> bool { - let c = match self.to_char() { - Some(c) => c, - None => return false, - }; - matches!(c, '\r' | '\n') - } - - /// See https://tc39.github.io/ecma262/#sec-white-space - #[inline] - fn is_ws(self) -> bool { - let c = match self.to_char() { - Some(c) => c, - None => return false, - }; - match c { - '\u{0009}' | '\u{000b}' | '\u{000c}' | '\u{0020}' | '\u{00a0}' | '\u{feff}' => true, - _ => { - if self.is_line_terminator() { - // NOTE: Line terminator is not whitespace. - false - } else { - c.is_whitespace() - } - } - } - } -} - -impl CharExt for Char { - #[inline(always)] - fn to_char(self) -> Option { - char::from_u32(self.0) - } -} - -impl CharExt for char { - #[inline(always)] - fn to_char(self) -> Option { - Some(self) - } } diff --git a/crates/swc_ecma_lexer/src/lib.rs b/crates/swc_ecma_lexer/src/lib.rs index 74d216b0beed..f168eea39e0b 100644 --- a/crates/swc_ecma_lexer/src/lib.rs +++ b/crates/swc_ecma_lexer/src/lib.rs @@ -1,3 +1,8 @@ +//! # swc_ecma_lexer +//! +//! This crate provides a lexer for ECMAScript and TypeScript. It can ensure +//! these tokens are correctly parsed. + #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(test, feature(test))] #![deny(clippy::all)] @@ -8,317 +13,27 @@ #![allow(clippy::vec_box)] #![allow(clippy::wrong_self_convention)] #![allow(clippy::match_like_matches_macro)] +#![allow(unexpected_cfgs)] -use serde::{Deserialize, Serialize}; - +pub mod common; pub mod lexer; - -use input::Tokens; -pub use lexer::*; - +mod parser; #[macro_use] pub mod token; pub mod error; pub mod input; mod utils; -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] -#[serde(deny_unknown_fields, tag = "syntax")] -pub enum Syntax { - /// Standard - #[serde(rename = "ecmascript")] - Es(EsSyntax), - /// This variant requires the cargo feature `typescript` to be enabled. - #[cfg(feature = "typescript")] - #[cfg_attr(docsrs, doc(cfg(feature = "typescript")))] - #[serde(rename = "typescript")] - Typescript(TsSyntax), -} - -impl Default for Syntax { - fn default() -> Self { - Syntax::Es(Default::default()) - } -} - -impl Syntax { - pub fn auto_accessors(self) -> bool { - match self { - Syntax::Es(EsSyntax { - auto_accessors: true, - .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - _ => false, - } - } - - pub fn import_attributes(self) -> bool { - match self { - Syntax::Es(EsSyntax { - import_attributes, .. - }) => import_attributes, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } - - /// Should we parse jsx? - pub fn jsx(self) -> bool { - match self { - Syntax::Es(EsSyntax { jsx: true, .. }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(TsSyntax { tsx: true, .. }) => true, - _ => false, - } - } - - pub fn fn_bind(self) -> bool { - matches!(self, Syntax::Es(EsSyntax { fn_bind: true, .. })) - } - - pub fn decorators(self) -> bool { - match self { - Syntax::Es(EsSyntax { - decorators: true, .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(TsSyntax { - decorators: true, .. - }) => true, - _ => false, - } - } - - pub fn decorators_before_export(self) -> bool { - match self { - Syntax::Es(EsSyntax { - decorators_before_export: true, - .. - }) => true, - #[cfg(feature = "typescript")] - Syntax::Typescript(..) => true, - _ => false, - } - } - - /// Should we parse typescript? - #[cfg(not(feature = "typescript"))] - pub const fn typescript(self) -> bool { - false - } - - /// Should we parse typescript? - #[cfg(feature = "typescript")] - pub const fn typescript(self) -> bool { - matches!(self, Syntax::Typescript(..)) - } - - pub fn export_default_from(self) -> bool { - matches!( - self, - Syntax::Es(EsSyntax { - export_default_from: true, - .. - }) - ) - } - - pub fn dts(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => t.dts, - _ => false, - } - } - - pub fn allow_super_outside_method(self) -> bool { - match self { - Syntax::Es(EsSyntax { - allow_super_outside_method, - .. - }) => allow_super_outside_method, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } - - pub fn allow_return_outside_function(self) -> bool { - match self { - Syntax::Es(EsSyntax { - allow_return_outside_function, - .. - }) => allow_return_outside_function, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => false, - } - } - - pub fn early_errors(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => !t.no_early_errors, - Syntax::Es(..) => true, - } - } - - pub fn disallow_ambiguous_jsx_like(self) -> bool { - match self { - #[cfg(feature = "typescript")] - Syntax::Typescript(t) => t.disallow_ambiguous_jsx_like, - _ => false, - } - } - - pub fn explicit_resource_management(&self) -> bool { - match self { - Syntax::Es(EsSyntax { - explicit_resource_management: using_decl, - .. - }) => *using_decl, - #[cfg(feature = "typescript")] - Syntax::Typescript(_) => true, - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TsSyntax { - #[serde(default)] - pub tsx: bool, - - #[serde(default)] - pub decorators: bool, - - /// `.d.ts` - #[serde(skip, default)] - pub dts: bool, - - #[serde(skip, default)] - pub no_early_errors: bool, - - /// babel: `disallowAmbiguousJSXLike` - /// Even when JSX parsing is not enabled, this option disallows using syntax - /// that would be ambiguous with JSX (` y` type assertions and - /// `()=>{}` type arguments) - /// see: https://babeljs.io/docs/en/babel-plugin-transform-typescript#disallowambiguousjsxlike - #[serde(skip, default)] - pub disallow_ambiguous_jsx_like: bool, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct EsSyntax { - #[serde(default)] - pub jsx: bool, - - /// Support function bind expression. - #[serde(rename = "functionBind")] - #[serde(default)] - pub fn_bind: bool, - - /// Enable decorators. - #[serde(default)] - pub decorators: bool, - - /// babel: `decorators.decoratorsBeforeExport` - /// - /// Effective only if `decorator` is true. - #[serde(rename = "decoratorsBeforeExport")] - #[serde(default)] - pub decorators_before_export: bool, - - #[serde(default)] - pub export_default_from: bool, - - /// Stage 3. - #[serde(default, alias = "importAssertions")] - pub import_attributes: bool, - - #[serde(default, rename = "allowSuperOutsideMethod")] - pub allow_super_outside_method: bool, - - #[serde(default, rename = "allowReturnOutsideFunction")] - pub allow_return_outside_function: bool, +use common::parser::{buffer::Buffer, Parser as ParserTrait}; +pub use swc_common::input::StringInput; - #[serde(default)] - pub auto_accessors: bool, - - #[serde(default)] - pub explicit_resource_management: bool, -} - -bitflags::bitflags! { - #[derive(Debug, Clone, Copy, Default)] - pub struct Context: u32 { - - /// `true` while backtracking - const IgnoreError = 1 << 0; - - /// Is in module code? - const Module = 1 << 1; - const CanBeModule = 1 << 2; - const Strict = 1 << 3; - - const ForLoopInit = 1 << 4; - const ForAwaitLoopInit = 1 << 5; - - const IncludeInExpr = 1 << 6; - /// If true, await expression is parsed, and "await" is treated as a - /// keyword. - const InAsync = 1 << 7; - /// If true, yield expression is parsed, and "yield" is treated as a - /// keyword. - const InGenerator = 1 << 8; - - /// If true, await is treated as a keyword. - const InStaticBlock = 1 << 9; - - const IsContinueAllowed = 1 << 10; - const IsBreakAllowed = 1 << 11; - - const InType = 1 << 12; - /// Typescript extension. - const ShouldNotLexLtOrGtAsType = 1 << 13; - /// Typescript extension. - const InDeclare = 1 << 14; - - /// If true, `:` should not be treated as a type annotation. - const InCondExpr = 1 << 15; - const WillExpectColonForCond = 1 << 16; - - const InClass = 1 << 17; - - const InClassField = 1 << 18; - - const InFunction = 1 << 19; - - /// This indicates current scope or the scope out of arrow function is - /// function declaration or function expression or not. - const InsideNonArrowFunctionScope = 1 << 20; - - const InParameters = 1 << 21; - - const HasSuperClass = 1 << 22; - - const InPropertyName = 1 << 23; - - const InForcedJsxContext = 1 << 24; - - // If true, allow super.x and super[x] - const AllowDirectSuper = 1 << 25; - - const IgnoreElseClause = 1 << 26; - - const DisallowConditionalTypes = 1 << 27; - - const AllowUsingDecl = 1 << 28; - - const TopLevel = 1 << 29; - } -} +use self::common::{context::Context, parser::PResult}; +pub use self::{ + common::syntax::{EsSyntax, Syntax, TsSyntax}, + input::Capturing, + lexer::{Lexer, TokenContext, TokenContexts, TokenType}, + parser::Parser, +}; #[cfg(test)] fn with_test_sess(src: &str, f: F) -> Result @@ -651,3 +366,41 @@ macro_rules! tok { ))) }; } + +#[inline(always)] +#[cfg(any( + target_arch = "wasm32", + target_arch = "arm", + not(feature = "stacker"), + // miri does not work with stacker + miri +))] +fn maybe_grow R>(_red_zone: usize, _stack_size: usize, callback: F) -> R { + callback() +} + +#[inline(always)] +#[cfg(all( + not(any(target_arch = "wasm32", target_arch = "arm", miri)), + feature = "stacker" +))] +fn maybe_grow R>(red_zone: usize, stack_size: usize, callback: F) -> R { + stacker::maybe_grow(red_zone, stack_size, callback) +} + +#[macro_export] +macro_rules! token_including_semi { + (';') => { + Token::Semi + }; + ($t:tt) => { + $crate::tok!($t) + }; +} + +pub fn lexer(input: Lexer) -> PResult> { + let capturing = input::Capturing::new(input); + let mut parser = parser::Parser::new_from(capturing); + let _ = parser.parse_module()?; + Ok(parser.input_mut().iter_mut().take()) +} diff --git a/crates/swc_ecma_lexer/src/parser/macros.rs b/crates/swc_ecma_lexer/src/parser/macros.rs new file mode 100644 index 000000000000..8ad072b7ccfa --- /dev/null +++ b/crates/swc_ecma_lexer/src/parser/macros.rs @@ -0,0 +1,114 @@ +#[allow(unused)] +macro_rules! peeked_is { + ($p:expr, BindingIdent) => {{ + let ctx = $p.ctx(); + match peek!($p) { + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), + _ => false, + } + }}; + + ($p:expr, IdentRef) => {{ + let ctx = $p.ctx(); + match peek!($p) { + Some(&Token::Word(ref w)) => !ctx.is_reserved(w), + _ => false, + } + }}; + + ($p:expr,IdentName) => {{ + match peek!($p) { + Some(&Token::Word(..)) => true, + _ => false, + } + }}; + + ($p:expr, JSXName) => {{ + match peek!($p) { + Some(&Token::JSXName { .. }) => true, + _ => false, + } + }}; + + ($p:expr, Str) => {{ + match peek!($p) { + Some(&Token::Str { .. }) => true, + _ => false, + } + }}; + + ($p:expr, ';') => {{ + compile_error!("peeked_is!(self, ';') is invalid"); + }}; + + ($p:expr, $t:tt) => { + match peek!($p) { + Some(&crate::token_including_semi!($t)) => true, + _ => false, + } + }; +} + +/// cur!($parser, required:bool) +macro_rules! cur { + ($p:expr, false) => {{ + match $p.input.cur() { + Some(c) => Ok(c), + None => { + let pos = $p.input.end_pos(); + let last = Span::new(pos, pos); + + Err(crate::error::Error::new( + last, + crate::error::SyntaxError::Eof, + )) + } + } + }}; +} + +macro_rules! bump { + ($p:expr) => {{ + debug_assert!( + $p.input.knows_cur(), + "parser should not call bump() without knowing current token" + ); + $p.input.bump() + }}; +} + +macro_rules! cur_pos { + ($p:expr) => {{ + $p.input.cur_pos() + }}; +} + +macro_rules! last_pos { + ($p:expr) => { + $p.input.prev_span().hi + }; +} + +macro_rules! trace_cur { + ($p:expr, $name:ident) => {{ + if cfg!(feature = "debug") { + tracing::debug!("{}: {:?}", stringify!($name), $p.input.cur()); + } + }}; +} + +/// This macro requires macro named 'last_pos' to be in scope. +macro_rules! span { + ($p:expr, $start:expr) => {{ + let start: ::swc_common::BytePos = $start; + let end: ::swc_common::BytePos = last_pos!($p); + if cfg!(debug_assertions) && start > end { + unreachable!( + "assertion failed: (span.start <= span.end). + start = {}, end = {}", + start.0, end.0 + ) + } + ::swc_common::Span::new(start, end) + }}; +} diff --git a/crates/swc_ecma_lexer/src/parser/mod.rs b/crates/swc_ecma_lexer/src/parser/mod.rs new file mode 100644 index 000000000000..edc503f4c177 --- /dev/null +++ b/crates/swc_ecma_lexer/src/parser/mod.rs @@ -0,0 +1,215 @@ +#![allow(clippy::let_unit_value)] +#![deny(non_snake_case)] + +use std::ops::DerefMut; + +use swc_atoms::Atom; +use swc_common::Span; +use swc_ecma_ast::*; + +use crate::{ + common::{ + input::Tokens, + parser::{ + buffer::Buffer as BufferTrait, module_item::parse_module_item_block_body, + stmt::parse_stmt_block_body, Parser as ParserTrait, + }, + }, + error::Error, + input::Buffer, + token::{Token, TokenAndSpan}, + Context, Syntax, TsSyntax, *, +}; + +#[macro_use] +mod macros; +#[cfg(feature = "typescript")] +mod typescript; + +/// EcmaScript parser. +#[derive(Clone)] +pub struct Parser> { + state: crate::common::parser::state::State, + input: Buffer, + found_module_item: bool, +} + +impl<'a, I: Tokens> crate::common::parser::Parser<'a> for Parser { + type Buffer = Buffer; + type I = I; + type Lexer = crate::lexer::Lexer<'a>; + type Next = TokenAndSpan; + type Token = Token; + type TokenAndSpan = TokenAndSpan; + + #[inline(always)] + fn input(&self) -> &Self::Buffer { + &self.input + } + + #[inline(always)] + fn input_mut(&mut self) -> &mut Self::Buffer { + &mut self.input + } + + #[inline(always)] + fn state(&self) -> &common::parser::state::State { + &self.state + } + + #[inline(always)] + fn state_mut(&mut self) -> &mut common::parser::state::State { + &mut self.state + } + + #[inline(always)] + fn mark_found_module_item(&mut self) { + self.found_module_item = true; + } +} + +impl> Parser { + pub fn new_from(mut input: I) -> Self { + #[cfg(feature = "typescript")] + let in_declare = matches!( + input.syntax(), + Syntax::Typescript(TsSyntax { dts: true, .. }) + ); + #[cfg(not(feature = "typescript"))] + let in_declare = false; + let mut ctx = input.ctx() | Context::TopLevel; + ctx.set(Context::InDeclare, in_declare); + input.set_ctx(ctx); + + Parser { + state: Default::default(), + input: Buffer::new(input), + found_module_item: false, + } + } + + pub fn take_errors(&mut self) -> Vec { + self.input.iter.take_errors() + } + + pub fn take_script_module_errors(&mut self) -> Vec { + self.input.iter.take_script_module_errors() + } + + pub fn parse_script(&mut self) -> PResult