Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
84 commits
Select commit Hold shift + click to select a range
0593005
chore(es/parser): custom lexer
bvanjoi Apr 17, 2025
1af17c5
chore: use parser/Lexer rather than lexer/Lexer
bvanjoi Apr 21, 2025
05ecdc3
chore(es/parser): split token value and token kind
bvanjoi Apr 21, 2025
50a14cb
chore(es/parser): dont clone token value
bvanjoi Apr 21, 2025
0b13a36
faster bin/assign op check
bvanjoi Apr 22, 2025
2ab7acc
faster is_keyword/is_known_ident
bvanjoi Apr 22, 2025
68ea259
rm swc_ecma_lexer/TokenKind in lexer state update
bvanjoi Apr 22, 2025
b12edce
rm swc_ecma_lexer/TokenType in lexer state update
bvanjoi Apr 22, 2025
f1494c6
fix ci
bvanjoi Apr 22, 2025
e408467
chore(es/parser): common lexer::comment_buffer
bvanjoi Apr 23, 2025
b4ddd63
chore(es/parser): common lexer::whiltespace
bvanjoi Apr 23, 2025
ffef036
chore(es/parser): common lexer::LexResult
bvanjoi Apr 23, 2025
1dcf727
chore: common lexer::{Char, CharIter, CharExt}
bvanjoi Apr 23, 2025
2bd112a
chore: common syntax
bvanjoi Apr 23, 2025
2d3a038
chore: common Context
bvanjoi Apr 23, 2025
b5f02b6
chore: common input::Tokens
bvanjoi Apr 23, 2025
e242bbf
chore: rm useless input
bvanjoi Apr 23, 2025
005782d
chore(es/parser): common lexer:State
bvanjoi Apr 23, 2025
3c4f255
inline methods in TokenKind
bvanjoi Apr 24, 2025
97f06dd
chore(es/lexer): common utils of lexer, part1
bvanjoi Apr 25, 2025
1990be2
chore(es/lexer): common emit_error of lexer
bvanjoi Apr 25, 2025
61e1c80
chore(es/lexer): common `skip_line_comment`
bvanjoi Apr 25, 2025
9e494f9
chore(es/lexer): common `skip_space`
bvanjoi Apr 26, 2025
f1c3d80
chore(es/lexer): rm `store_comment`
bvanjoi Apr 26, 2025
5f0a376
chore(es/lexer): common number, part1
bvanjoi Apr 26, 2025
2200899
chore(es/lexer): common number, part2
bvanjoi Apr 26, 2025
49c309a
chore(es/lexer): common `consume_pending_comments`
bvanjoi Apr 26, 2025
ad2f1b5
chore(es/lexer): common `read_jsx_word`
bvanjoi Apr 26, 2025
f731312
chore(es/lexer): common `read_jsx_str`
bvanjoi Apr 26, 2025
7639d5b
chore(es/lexer): common read_token, part1
bvanjoi Apr 27, 2025
11dd6df
chore(es/lexer): common read_token, part2
bvanjoi Apr 27, 2025
0c1e6a6
chore(es/lexer): common read_token, part3
bvanjoi Apr 27, 2025
7ddce90
chore(es/parser): common `ExprExt`
bvanjoi Apr 27, 2025
38fac47
chore(es/parser): delete test in lexer/parser
bvanjoi Apr 27, 2025
19483f3
chore(es/parser): common `with_state`
bvanjoi Apr 27, 2025
8f387a4
chore(es/parser): common buffer
bvanjoi Apr 28, 2025
cba7792
chore(es/parser): common `WithCtx`
bvanjoi Apr 28, 2025
14e4f04
chore(es/lexer): some traits
bvanjoi Apr 28, 2025
8a62879
chore(es/parser): common `emit_error`
bvanjoi Apr 29, 2025
38dd0d0
chore(es/parser): common helpers
bvanjoi Apr 29, 2025
6d40db2
chore(es/parser): common `Verifier`
bvanjoi Apr 29, 2025
6088581
chore(es/parser): common `verify_expr`
bvanjoi Apr 29, 2025
5ddc49a
chore(es/parser): common `parse_lit`
bvanjoi Apr 29, 2025
96913aa
chore(es/parser): common `parse_ident_name`
bvanjoi Apr 29, 2025
d54c0df
chore(es/parser): common `make_decl_declare`
bvanjoi Apr 29, 2025
5ebf51a
chore(es/parser): common parse private name
bvanjoi Apr 30, 2025
b43056d
chore(es/parser): common parse_ident
bvanjoi May 1, 2025
6d2ef49
chore(es/parser): common `is_start_of_expr`
bvanjoi May 1, 2025
5e96c40
chore(es/parser): binding ident and ts modifier
bvanjoi May 1, 2025
ab747cf
chore(es/parser): common `parse_ts_this_type_node`
bvanjoi May 2, 2025
72feea9
chore(es/parser): common `parse_tpl_element`
bvanjoi May 2, 2025
0ce0b21
chore(es/parser): parse jsx name
bvanjoi May 4, 2025
5f03110
chore(es/parser): parse jsx empty expr
bvanjoi May 4, 2025
f907c25
chore(es/parser): common `eat_any_ts_modifier`
bvanjoi May 4, 2025
a188f05
chore(es/parser): common `parse_jsx_text`
bvanjoi May 4, 2025
fe5f85f
chore(es/parser): common reparse_expr_as_pat
bvanjoi May 6, 2025
491d9b7
chore(es/parser): common parse_paren_items_as_params
bvanjoi May 6, 2025
9559649
chore(es/parser): common parse_assignment_expr
bvanjoi May 6, 2025
20ac777
chore(es/parser): common parse_ts_entity_name
bvanjoi May 6, 2025
7663175
chore(es/parser): common parse_ty, part1
bvanjoi May 6, 2025
2147d59
chore(es/parser): common parse_ty, part2
bvanjoi May 7, 2025
a69c928
chore(es/parser): common parse_ty, part3
bvanjoi May 7, 2025
512dd41
chore(es/parser): use assoc const item for token
bvanjoi May 7, 2025
eba0ca8
chore(es/parser): common parse_ty, part4
bvanjoi May 7, 2025
a5f15de
fix rebase issue
bvanjoi May 8, 2025
dcaeceb
chore(es/parser): common parse_ty, part5
bvanjoi May 8, 2025
a66f294
chore(es/parser): common parse jsx
bvanjoi May 8, 2025
db91d88
chore(es/parser): common parse expr, part1
bvanjoi May 8, 2025
a507880
chore(es/parser): common parse stmt, part1
bvanjoi May 8, 2025
bebb6bc
chore(es/parser): common parse_ident
bvanjoi May 8, 2025
aa37759
chore(es/parser): common parse pattern, part2
bvanjoi May 8, 2025
5de4922
chore(es/parser): common parse_ty, part6
bvanjoi May 8, 2025
014f754
chore(es/parser): common parse pattern, part3
bvanjoi May 9, 2025
96f69ab
chore(es/parser): common parse_ty, part7
bvanjoi May 9, 2025
dca14f0
chore(es/parser): common parse_ty, part8
bvanjoi May 9, 2025
4388351
chore\(es/parser\): common parse stmt, part2
bvanjoi May 9, 2025
9fd1182
chore(es/parser): common parse expr
bvanjoi May 10, 2025
08f540e
chore(es/parser): common parse stmt
bvanjoi May 10, 2025
32c48fa
chore(es/parser): common parse class and fn
bvanjoi May 11, 2025
97c19b3
Create twenty-rocks-deny.md
kdy1 May 12, 2025
a712751
refactor(es/lexer): delete Korean comment
bvanjoi May 13, 2025
a0b476e
refactor(ecma/lexer): self by value
bvanjoi May 14, 2025
0c6e879
refactor(es/lexer): add lifetime for Input tarit
bvanjoi May 17, 2025
3db38aa
Create kind-weeks-study.md
kdy1 May 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/kind-weeks-study.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
swc_common: major
---

refactor(es/parser): Split parser into also-lex/parse-only
6 changes: 6 additions & 0 deletions .changeset/twenty-rocks-deny.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
swc_ecma_lexer: major
swc_ecma_parser: major
---

split token value and token kind
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions crates/swc_common/src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ impl<'a> From<&'a SourceFile> for StringInput<'a> {
}
}

impl Input for StringInput<'_> {
impl<'a> Input<'a> for StringInput<'a> {
#[inline]
fn cur(&self) -> Option<char> {
self.iter.clone().next()
Expand Down Expand Up @@ -133,7 +133,7 @@ impl Input for StringInput<'_> {
}

#[inline]
unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str {
unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
let s = self.orig;

Expand All @@ -151,7 +151,7 @@ impl Input for StringInput<'_> {
}

#[inline]
fn uncons_while<F>(&mut self, mut pred: F) -> &str
fn uncons_while<F>(&mut self, mut pred: F) -> &'a str
where
F: FnMut(char) -> bool,
{
Expand Down Expand Up @@ -237,7 +237,7 @@ impl Input for StringInput<'_> {
}
}

pub trait Input: Clone {
pub trait Input<'a>: Clone {
fn cur(&self) -> Option<char>;
fn peek(&self) -> Option<char>;
fn peek_ahead(&self) -> Option<char>;
Expand Down Expand Up @@ -270,11 +270,11 @@ pub trait Input: Clone {
///
/// - start should be less than or equal to end.
/// - start and end should be in the valid range of input.
unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str;
unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str;

/// Takes items from stream, testing each one with predicate. returns the
/// range of items which passed predicate.
fn uncons_while<F>(&mut self, f: F) -> &str
fn uncons_while<F>(&mut self, f: F) -> &'a str
where
F: FnMut(char) -> bool;

Expand Down
20 changes: 10 additions & 10 deletions crates/swc_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub(crate) type LexResult<T> = Result<T, ErrorKind>;
#[derive(Clone)]
pub struct Lexer<'a, I>
where
I: Input,
I: Input<'a>,
{
comments: Option<&'a dyn Comments>,
pending_leading_comments: Vec<Comment>,
Expand All @@ -41,7 +41,7 @@ where

impl<'a, I> Lexer<'a, I>
where
I: Input,
I: Input<'a>,
{
pub fn new(input: I, comments: Option<&'a dyn Comments>, config: ParserConfig) -> Self {
let start_pos = input.last_pos();
Expand All @@ -65,7 +65,7 @@ where

fn with_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
where
F: for<'any> FnOnce(&mut Lexer<I>, &mut String) -> LexResult<Ret>,
F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String) -> LexResult<Ret>,
{
let b = self.buf.clone();
let mut buf = b.borrow_mut();
Expand All @@ -77,7 +77,7 @@ where

fn with_sub_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
where
F: for<'any> FnOnce(&mut Lexer<I>, &mut String) -> LexResult<Ret>,
F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String) -> LexResult<Ret>,
{
let b = self.sub_buf.clone();
let mut sub_buf = b.borrow_mut();
Expand All @@ -89,7 +89,7 @@ where

fn with_buf_and_raw_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
where
F: for<'any> FnOnce(&mut Lexer<I>, &mut String, &mut String) -> LexResult<Ret>,
F: for<'any> FnOnce(&mut Lexer<'a, I>, &mut String, &mut String) -> LexResult<Ret>,
{
let b = self.buf.clone();
let r = self.raw_buf.clone();
Expand All @@ -103,7 +103,7 @@ where
}
}

impl<I: Input> Iterator for Lexer<'_, I> {
impl<'a, I: Input<'a>> Iterator for Lexer<'a, I> {
type Item = TokenAndSpan;

fn next(&mut self) -> Option<Self::Item> {
Expand Down Expand Up @@ -133,9 +133,9 @@ pub struct LexerState {
pos: BytePos,
}

impl<I> ParserInput for Lexer<'_, I>
impl<'a, I> ParserInput for Lexer<'a, I>
where
I: Input,
I: Input<'a>,
{
type State = LexerState;

Expand Down Expand Up @@ -185,9 +185,9 @@ where
}
}

impl<I> Lexer<'_, I>
impl<'a, I> Lexer<'a, I>
where
I: Input,
I: Input<'a>,
{
#[inline(always)]
fn cur(&mut self) -> Option<char> {
Expand Down
12 changes: 6 additions & 6 deletions crates/swc_css_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ where
///
/// If there are syntax errors but if it was recoverable, it will be appended
/// to `errors`.
pub fn parse_file<'a, 'b, T>(
pub fn parse_file<'a, T>(
fm: &'a SourceFile,
comments: Option<&'b dyn Comments>,
comments: Option<&'a dyn Comments>,
config: ParserConfig,
errors: &mut Vec<Error>,
) -> PResult<T>
where
Parser<Lexer<'b, StringInput<'a>>>: Parse<T>,
Parser<Lexer<'a, StringInput<'a>>>: Parse<T>,
{
parse_string_input(StringInput::from(fm), comments, config, errors)
}
Expand All @@ -57,14 +57,14 @@ where
///
/// If there are syntax errors but if it was recoverable, it will be appended
/// to `errors`.
pub fn parse_string_input<'a, 'b, T>(
pub fn parse_string_input<'a, T>(
input: StringInput<'a>,
comments: Option<&'b dyn Comments>,
comments: Option<&'a dyn Comments>,
config: ParserConfig,
errors: &mut Vec<Error>,
) -> PResult<T>
where
Parser<Lexer<'b, StringInput<'a>>>: Parse<T>,
Parser<Lexer<'a, StringInput<'a>>>: Parse<T>,
{
let lexer = Lexer::new(input, comments, config);
let mut parser = Parser::new(lexer, config);
Expand Down
3 changes: 3 additions & 0 deletions crates/swc_ecma_lexer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ swc_ecma_visit = { version = "10.0.0", path = "../swc_ecma_visit" }
swc_malloc = { version = "1.2.2", path = "../swc_malloc" }
testing = { version = "11.0.0", path = "../testing" }

[[example]]
name = "lexer"

[[bench]]
harness = false
name = "lexer"
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use swc_common::{
errors::{ColorConfig, Handler},
input::StringInput,
sync::Lrc,
FileName, SourceMap,
};
use swc_ecma_parser::{lexer::Lexer, Capturing, Parser, StringInput, Syntax};
use swc_ecma_lexer::{lexer, lexer::Lexer, Syntax};

fn main() {
let cm: Lrc<SourceMap> = Default::default();
Expand All @@ -19,25 +20,15 @@ fn main() {
"function foo() {}".into(),
);

let lexer = Lexer::new(
let l = Lexer::new(
Syntax::Es(Default::default()),
Default::default(),
StringInput::from(&*fm),
None,
);

let capturing = Capturing::new(lexer);

let mut parser = Parser::new_from(capturing);

for e in parser.take_errors() {
e.into_diagnostic(&handler).emit();
}

let _module = parser
.parse_module()
let tokens = lexer(l)
.map_err(|e| e.into_diagnostic(&handler).emit())
.expect("Failed to parse module.");

println!("Tokens: {:?}", parser.input().take());
.expect("Failed to lex.");
println!("Tokens: {tokens:?}",);
}
69 changes: 69 additions & 0 deletions crates/swc_ecma_lexer/src/common/context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
bitflags::bitflags! {
#[derive(Debug, Clone, Copy, Default)]
pub struct Context: u32 {

/// `true` while backtracking
const IgnoreError = 1 << 0;

/// Is in module code?
const Module = 1 << 1;
const CanBeModule = 1 << 2;
const Strict = 1 << 3;

const ForLoopInit = 1 << 4;
const ForAwaitLoopInit = 1 << 5;

const IncludeInExpr = 1 << 6;
/// If true, await expression is parsed, and "await" is treated as a
/// keyword.
const InAsync = 1 << 7;
/// If true, yield expression is parsed, and "yield" is treated as a
/// keyword.
const InGenerator = 1 << 8;

/// If true, await is treated as a keyword.
const InStaticBlock = 1 << 9;

const IsContinueAllowed = 1 << 10;
const IsBreakAllowed = 1 << 11;

const InType = 1 << 12;
/// Typescript extension.
const ShouldNotLexLtOrGtAsType = 1 << 13;
/// Typescript extension.
const InDeclare = 1 << 14;

/// If true, `:` should not be treated as a type annotation.
const InCondExpr = 1 << 15;
const WillExpectColonForCond = 1 << 16;

const InClass = 1 << 17;

const InClassField = 1 << 18;

const InFunction = 1 << 19;

/// This indicates current scope or the scope out of arrow function is
/// function declaration or function expression or not.
const InsideNonArrowFunctionScope = 1 << 20;

const InParameters = 1 << 21;

const HasSuperClass = 1 << 22;

const InPropertyName = 1 << 23;

const InForcedJsxContext = 1 << 24;

// If true, allow super.x and super[x]
const AllowDirectSuper = 1 << 25;

const IgnoreElseClause = 1 << 26;

const DisallowConditionalTypes = 1 << 27;

const AllowUsingDecl = 1 << 28;

const TopLevel = 1 << 29;
}
}
48 changes: 48 additions & 0 deletions crates/swc_ecma_lexer/src/common/input.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use swc_common::BytePos;
use swc_ecma_ast::EsVersion;

use super::{context::Context, syntax::Syntax};
use crate::{error::Error, lexer};

/// Clone should be cheap if you are parsing typescript because typescript
/// syntax requires backtracking.
pub trait Tokens<TokenAndSpan>: Clone + Iterator<Item = TokenAndSpan> {
fn set_ctx(&mut self, ctx: Context);
fn ctx(&self) -> Context;
fn syntax(&self) -> Syntax;
fn target(&self) -> EsVersion;

fn start_pos(&self) -> BytePos {
BytePos(0)
}

fn set_expr_allowed(&mut self, allow: bool);
fn set_next_regexp(&mut self, start: Option<BytePos>);

fn token_context(&self) -> &lexer::TokenContexts;
fn token_context_mut(&mut self) -> &mut lexer::TokenContexts;
fn set_token_context(&mut self, _c: lexer::TokenContexts);

/// Implementors should use Rc<RefCell<Vec<Error>>>.
///
/// It is required because parser should backtrack while parsing typescript
/// code.
fn add_error(&self, error: Error);

/// Add an error which is valid syntax in script mode.
///
/// This errors should be dropped if it's not a module.
///
/// Implementor should check for if [Context].module, and buffer errors if
/// module is false. Also, implementors should move errors to the error
/// buffer on set_ctx if the parser mode become module mode.
fn add_module_mode_error(&self, error: Error);

fn end_pos(&self) -> BytePos;

fn take_errors(&mut self) -> Vec<Error>;

/// If the program was parsed as a script, this contains the module
/// errors should the program be identified as a module in the future.
fn take_script_module_errors(&mut self) -> Vec<Error>;
}
Loading
Loading