Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 14 additions & 17 deletions apps/oxlint/src/js_plugins/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ unsafe fn parse_raw_impl(
const BOM: &str = "\u{feff}";
const BOM_LEN: usize = BOM.len();

let mut source_text = program.source_text;
let original_source_text = program.source_text;
let mut source_text = original_source_text;
let has_bom = source_text.starts_with(BOM);
if has_bom {
source_text = &source_text[BOM_LEN..];
Expand All @@ -216,22 +217,18 @@ unsafe fn parse_raw_impl(
span_converter.convert_program(program);
span_converter.convert_comments(&mut program.comments);

let (tokens_offset, tokens_len) = if has_bom {
// Fallback to TypeScript token parsing in JS for BOM files.
(0, 0)
} else {
let tokens_json = to_estree_tokens_json(
&tokens,
program,
EstreeTokenOptions::linter(),
&allocator,
);
let tokens_json = allocator.alloc_str(&tokens_json);
let tokens_offset = tokens_json.as_ptr() as u32;
#[expect(clippy::cast_possible_truncation)]
let tokens_len = tokens_json.len() as u32;
(tokens_offset, tokens_len)
};
let tokens_json = to_estree_tokens_json(
&tokens,
program,
original_source_text,
&span_converter,
EstreeTokenOptions::linter(),
&allocator,
);
let tokens_json = allocator.alloc_str(&tokens_json);
let tokens_offset = tokens_json.as_ptr() as u32;
#[expect(clippy::cast_possible_truncation)]
let tokens_len = tokens_json.len() as u32;

// Return offset of `Program` within buffer (bottom 32 bits of pointer)
let program_offset = ptr::from_ref(program) as u32;
Expand Down
1 change: 1 addition & 0 deletions apps/oxlint/test/fixtures/tokens/files/bom.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
a = b;
3 changes: 3 additions & 0 deletions apps/oxlint/test/fixtures/tokens/files/unicode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
a;
// 😀🤪😆😎🤮
b;
107 changes: 105 additions & 2 deletions apps/oxlint/test/fixtures/tokens/output.snap.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,50 @@

# stdout
```
x tokens-plugin(tokens): Identifier ("a")
,-[files/bom.js:1:4]
1 | a = b;
: ^
`----

x tokens-plugin(tokens): Tokens and comments:
| Identifier loc= 1:0 - 1:1 range= 0-1 "a"
| Punctuator loc= 1:2 - 1:3 range= 2-3 "="
| Identifier loc= 1:4 - 1:5 range= 4-5 "b"
| Punctuator loc= 1:5 - 1:6 range= 5-6 ";"
,-[files/bom.js:1:4]
1 | a = b;
: ^^^^^^^
`----

x tokens-plugin(tokens): Tokens:
| Identifier loc= 1:0 - 1:1 range= 0-1 "a"
| Punctuator loc= 1:2 - 1:3 range= 2-3 "="
| Identifier loc= 1:4 - 1:5 range= 4-5 "b"
| Punctuator loc= 1:5 - 1:6 range= 5-6 ";"
,-[files/bom.js:1:4]
1 | a = b;
: ^^^^^^^
`----

x tokens-plugin(tokens): Punctuator ("=")
,-[files/bom.js:1:6]
1 | a = b;
: ^
`----

x tokens-plugin(tokens): Identifier ("b")
,-[files/bom.js:1:8]
1 | a = b;
: ^
`----

x tokens-plugin(tokens): Punctuator (";")
,-[files/bom.js:1:9]
1 | a = b;
: ^
`----

x tokens-plugin(tokens): Keyword ("const")
,-[files/generic_arrow.ts:1:1]
1 | const obj = {
Expand Down Expand Up @@ -1071,8 +1115,67 @@
: ^
`----

Found 0 warnings and 109 errors.
Finished in Xms on 4 files with 1 rules using X threads.
x tokens-plugin(tokens): Identifier ("a")
,-[files/unicode.js:1:1]
1 | a;
: ^
2 | // 😀🤪😆😎🤮
`----

x tokens-plugin(tokens): Tokens and comments:
| Identifier loc= 1:0 - 1:1 range= 0-1 "a"
| Punctuator loc= 1:1 - 1:2 range= 1-2 ";"
| Line loc= 2:0 - 2:13 range= 3-16 " 😀🤪😆😎🤮"
| Identifier loc= 3:0 - 3:1 range= 17-18 "b"
| Punctuator loc= 3:1 - 3:2 range= 18-19 ";"
,-[files/unicode.js:1:1]
1 | ,-> a;
2 | | // 😀🤪😆😎🤮
3 | `-> b;
`----

x tokens-plugin(tokens): Tokens:
| Identifier loc= 1:0 - 1:1 range= 0-1 "a"
| Punctuator loc= 1:1 - 1:2 range= 1-2 ";"
| Identifier loc= 3:0 - 3:1 range= 17-18 "b"
| Punctuator loc= 3:1 - 3:2 range= 18-19 ";"
,-[files/unicode.js:1:1]
1 | ,-> a;
2 | | // 😀🤪😆😎🤮
3 | `-> b;
`----

x tokens-plugin(tokens): Punctuator (";")
,-[files/unicode.js:1:2]
1 | a;
: ^
2 | // 😀🤪😆😎🤮
`----

x tokens-plugin(tokens): Line (" 😀🤪😆😎🤮")
,-[files/unicode.js:2:1]
1 | a;
2 | // 😀🤪😆😎🤮
: ^^^^^^^^^^^^^
3 | b;
`----

x tokens-plugin(tokens): Identifier ("b")
,-[files/unicode.js:3:1]
2 | // 😀🤪😆😎🤮
3 | b;
: ^
`----

x tokens-plugin(tokens): Punctuator (";")
,-[files/unicode.js:3:2]
2 | // 😀🤪😆😎🤮
3 | b;
: ^
`----

Found 0 warnings and 122 errors.
Finished in Xms on 6 files with 1 rules using X threads.
```

# stderr
Expand Down
3 changes: 3 additions & 0 deletions apps/oxlint/test/fixtures/tokens/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ const rule: Rule = {

const { ast } = sourceCode;

// Ensure that `bom.js` does have a BOM (guarding against it being accidentally removed by e.g. formatting)
if (context.filename.endsWith("bom.js")) assert(sourceCode.hasBOM);

for (const tokenOrComment of tokensAndComments) {
// Check getting `range` / `loc` properties twice results in same objects
const { range, loc } = tokenOrComment;
Expand Down
23 changes: 14 additions & 9 deletions crates/oxc_estree_tokens/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,28 @@ impl EstreeTokenOptions {
}

/// Serialize tokens to JSON.
///
/// `source_text` must be the original source text, prior to BOM removal.
/// i.e. BOM must be present on start of `source_text`, if the file has a BOM.
pub fn to_estree_tokens_json(
tokens: &[Token],
program: &Program<'_>,
source_text: &str,
span_converter: &Utf8ToUtf16,
options: EstreeTokenOptions,
allocator: &Allocator,
) -> String {
let estree_tokens = to_estree_tokens(tokens, program, options, allocator);
let estree_tokens =
to_estree_tokens(tokens, program, source_text, span_converter, options, allocator);
serde_json::to_string_pretty(&estree_tokens).unwrap_or_default()
}

/// Convert `Token`s to `EstreeToken`s.
fn to_estree_tokens<'a>(
tokens: &[Token],
program: &Program<'a>,
source_text: &'a str,
span_converter: &Utf8ToUtf16,
options: EstreeTokenOptions,
allocator: &'a Allocator,
) -> ArenaVec<'a, EstreeToken<'a>> {
Expand All @@ -95,22 +103,19 @@ fn to_estree_tokens<'a>(
};
context.visit_program(program);

// Create UTF-8 to UTF-16 conversion table
let source_text = program.source_text;
let utf8_to_utf16 = Utf8ToUtf16::new(source_text);
let mut converter = utf8_to_utf16.converter();

// Convert tokens to `EstreeToken`s
let mut span_converter = span_converter.converter();

let mut estree_tokens = ArenaVec::with_capacity_in(tokens.len(), allocator);
for token in tokens {
let kind = token.kind();
let source_value = &source_text[token.start() as usize..token.end() as usize];

let mut start = token.start();
let mut end = token.end();
if let Some(converter) = converter.as_mut() {
converter.convert_offset(&mut start);
converter.convert_offset(&mut end);
if let Some(span_converter) = span_converter.as_mut() {
span_converter.convert_offset(&mut start);
span_converter.convert_offset(&mut end);
}
let span_utf16 = Span::new(start, end);

Expand Down
30 changes: 13 additions & 17 deletions crates/oxc_linter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,28 +572,24 @@ impl Linter {
span_converter.convert_program(program);
span_converter.convert_comments(&mut program.comments);

let (tokens_offset, tokens_len) = if has_bom {
// Keep JS fallback path for BOM sources.
(0, 0)
} else if let Some(parser_tokens) = ctx_host.current_sub_host().parser_tokens() {
let tokens_json = to_estree_tokens_json(
parser_tokens,
program,
EstreeTokenOptions::linter(),
allocator,
);
if tokens_json.is_empty() {
(0, 0)
} else {
let (tokens_offset, tokens_len) =
if let Some(tokens) = ctx_host.current_sub_host().parser_tokens() {
let tokens_json = to_estree_tokens_json(
tokens,
program,
original_source_text,
&span_converter,
EstreeTokenOptions::linter(),
allocator,
);
let tokens_json = allocator.alloc_str(&tokens_json);
let tokens_offset = tokens_json.as_ptr() as u32;
#[expect(clippy::cast_possible_truncation)]
let tokens_len = tokens_json.len() as u32;
(tokens_offset, tokens_len)
}
} else {
(0, 0)
};
} else {
(0, 0)
};

// Get offset of `Program` within buffer (bottom 32 bits of pointer)
let program_offset = ptr::from_ref(program) as u32;
Expand Down
2 changes: 2 additions & 0 deletions tasks/benchmark/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ fn bench_estree_tokens(criterion: &mut Criterion) {
let tokens_json = to_estree_tokens_json(
&tokens,
&program,
program.source_text,
&span_converter,
EstreeTokenOptions::test262(),
&allocator,
);
Expand Down
22 changes: 18 additions & 4 deletions tasks/coverage/src/tools.rs
Original file line number Diff line number Diff line change
Expand Up @@ -854,8 +854,14 @@ pub fn run_estree_test262_tokens(files: &[Test262File]) -> Vec<CoverageResult> {
let span_converter = Utf8ToUtf16::new(source_text);
span_converter.convert_program_with_ascending_order_checks(&mut program);

let oxc_tokens_json =
to_estree_tokens_json(&tokens, &program, EstreeTokenOptions::test262(), &allocator);
let oxc_tokens_json = to_estree_tokens_json(
&tokens,
&program,
source_text,
&span_converter,
EstreeTokenOptions::test262(),
&allocator,
);

let token_path = workspace_root()
.join("estree-conformance/tests/test262-tokens")
Expand Down Expand Up @@ -898,8 +904,14 @@ pub fn run_estree_acorn_jsx_tokens(files: &[AcornJsxFile]) -> Vec<CoverageResult
let span_converter = Utf8ToUtf16::new(source_text);
span_converter.convert_program_with_ascending_order_checks(&mut program);

let oxc_tokens_json =
to_estree_tokens_json(&tokens, &program, EstreeTokenOptions::test262(), &allocator);
let oxc_tokens_json = to_estree_tokens_json(
&tokens,
&program,
source_text,
&span_converter,
EstreeTokenOptions::test262(),
&allocator,
);

let token_path = workspace_root().join(f.path.with_extension("tokens.json"));
let expected_tokens_json = fs::read_to_string(&token_path).unwrap_or_default();
Expand Down Expand Up @@ -1078,6 +1090,8 @@ pub fn run_estree_typescript_tokens(files: &[TypeScriptFile]) -> Vec<CoverageRes
let oxc_tokens_json = to_estree_tokens_json(
&tokens,
&program,
source_text,
&span_converter,
EstreeTokenOptions::typescript(),
&allocator,
);
Expand Down
Loading