Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ jobs:
name: Run tests in workspace
env:
RUN_RAW_RANGE_TESTS: "true"
RUN_RAW_TOKENS_TESTS: "true"
run: |
rustup target add wasm32-wasip1-threads
pnpm run build-test
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions napi/parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ doctest = false
oxc = { workspace = true, features = ["ast_visit", "regular_expression", "semantic", "serialize"] }
oxc_ast_macros = { workspace = true }
oxc_estree = { workspace = true }
oxc_estree_tokens = { workspace = true }
oxc_napi = { workspace = true }

rustc-hash = { workspace = true }
Expand Down
130 changes: 130 additions & 0 deletions napi/parser/src-js/raw-transfer/eager.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { createRequire } from "node:module";
import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.js";
import { isJsAst, parseAsyncRawImpl, parseSyncRawImpl, returnBufferToCache } from "./common.js";

const require = createRequire(import.meta.url);
Expand Down Expand Up @@ -97,11 +98,34 @@ function deserialize(buffer, sourceText, sourceByteLen, options) {
}
}

// Deserialize tokens
const tokens = options.experimentalTokens ? deserializeTokens(buffer, sourceText, isJs) : null;

// Return buffer to cache, to be reused
returnBufferToCache(buffer);

// We cannot lazily deserialize in the getters, because the buffer might be re-used to parse
// another file before the getter is called
if (tokens !== null) {
return {
get program() {
return data.program;
},
get module() {
return data.module;
},
get comments() {
return data.comments;
},
get tokens() {
return tokens;
},
get errors() {
return data.errors;
},
};
}

return {
get program() {
return data.program;
Expand All @@ -117,3 +141,109 @@ function deserialize(buffer, sourceText, sourceByteLen, options) {
},
};
}

// `ESTreeKind` discriminants (set by Rust side)
const PRIVATE_IDENTIFIER_KIND = 2;
const REGEXP_KIND = 8;

// Indexed by `ESTreeKind` discriminant (matches `ESTreeKind` enum in `estree_kind.rs`)
const TOKEN_TYPES = [
"Identifier",
"Keyword",
"PrivateIdentifier",
"Punctuator",
"Numeric",
"String",
"Boolean",
"Null",
"RegularExpression",
"Template",
"JSXText",
"JSXIdentifier",
];

// Details of Rust `Token` type
const TOKEN_SIZE = 16;
const KIND_FIELD_OFFSET = 8;
const IS_ESCAPED_FIELD_OFFSET = 10;

/**
* Deserialize tokens from buffer.
* @param {Uint8Array} buffer - Buffer containing AST in raw form
* @param {string} sourceText - Source for the file
* @param {boolean} isJs - `true` if parsing in JS mode
* @returns {Object[]} - Array of token objects
*/
function deserializeTokens(buffer, sourceText, isJs) {
const { uint32 } = buffer;

let pos = uint32[TOKENS_OFFSET_POS_32];
const len = uint32[TOKENS_LEN_POS_32];
const endPos = pos + len * TOKEN_SIZE;

const tokens = [];
while (pos < endPos) {
tokens.push(deserializeToken(pos, buffer, sourceText, isJs));
pos += TOKEN_SIZE;
}
return tokens;
}

/**
* Deserialize a token from buffer at position `pos`.
* @param {number} pos - Position in buffer containing Rust `Token` type
* @param {Uint8Array} buffer - Buffer containing AST in raw form
* @param {string} sourceText - Source for the file
* @param {boolean} isJs - `true` if parsing in JS mode
* @returns {Object} - Token object
*/
function deserializeToken(pos, buffer, sourceText, isJs) {
const { uint32 } = buffer;

const pos32 = pos >> 2;
const start = uint32[pos32],
end = uint32[pos32 + 1];

let value = sourceText.slice(start, end);

const kind = buffer[pos + KIND_FIELD_OFFSET];

if (kind === REGEXP_KIND) {
const patternEnd = value.lastIndexOf("/");
return {
type: "RegularExpression",
value,
regex: {
pattern: value.slice(1, patternEnd),
flags: value.slice(patternEnd + 1),
},
start,
end,
};
}

// Strip leading `#` from private identifiers
if (kind === PRIVATE_IDENTIFIER_KIND) value = value.slice(1);

// Unescape identifiers, keywords, and private identifiers in JS mode
if (isJs && kind <= PRIVATE_IDENTIFIER_KIND && buffer[pos + IS_ESCAPED_FIELD_OFFSET] === 1) {
value = unescapeIdentifier(value);
}

return { type: TOKEN_TYPES[kind], value, start, end };
}

/**
* Unescape an identifier.
*
* We do this on JS side, because escaped identifiers are so extremely rare that this function
* is never called in practice anyway.
*
* @param {string} name - Identifier name to unescape
* @returns {string} - Unescaped identifier name
*/
function unescapeIdentifier(name) {
return name.replace(/\\u(?:\{([0-9a-fA-F]+)\}|([0-9a-fA-F]{4}))/g, (_, hex1, hex2) =>
String.fromCodePoint(parseInt(hex1 ?? hex2, 16)),
);
}
3 changes: 2 additions & 1 deletion napi/parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use napi_derive::napi;

use oxc::{
allocator::Allocator,
parser::{ParseOptions, Parser, ParserReturn},
parser::{ParseOptions, Parser, ParserReturn, config::RuntimeParserConfig},
semantic::SemanticBuilder,
span::SourceType,
};
Expand Down Expand Up @@ -85,6 +85,7 @@ fn parse_impl<'a>(
preserve_parens: options.preserve_parens.unwrap_or(true),
..ParseOptions::default()
})
.with_config(RuntimeParserConfig::new(options.tokens.unwrap_or(false)))
.parse()
}

Expand Down
28 changes: 22 additions & 6 deletions napi/parser/src/raw_transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use oxc::{
ast_visit::utf8_to_utf16::Utf8ToUtf16,
semantic::SemanticBuilder,
};
use oxc_estree_tokens::{ESTreeTokenOptions, update_tokens};
use oxc_napi::get_source_type;

use crate::{
Expand Down Expand Up @@ -214,9 +215,9 @@ unsafe fn parse_raw_impl(
let options = options.unwrap_or_default();
let source_type =
get_source_type(filename, options.lang.as_deref(), options.source_type.as_deref());
let ast_type = get_ast_type(source_type, &options);
let is_ts = get_ast_type(source_type, &options) == AstType::TypeScript;

let data_ptr = {
let (data_offset, tokens_offset, tokens_len) = {
// SAFETY: We checked above that `source_len` does not exceed length of buffer
let source_text = unsafe { buffer.get_unchecked(..source_len) };
// SAFETY: Caller guarantees source occupies this region of the buffer and is valid UTF-8
Expand Down Expand Up @@ -251,8 +252,22 @@ unsafe fn parse_raw_impl(
ArenaVec::new_in(&allocator)
};

// Convert spans to UTF-16
// Convert tokens
let span_converter = Utf8ToUtf16::new(source_text);

let (tokens_offset, tokens_len) = if options.tokens == Some(true) {
let mut tokens = ret.tokens;
update_tokens(&mut tokens, &program, &span_converter, ESTreeTokenOptions::new(is_ts));

let tokens_offset = tokens.as_ptr() as u32;
#[expect(clippy::cast_possible_truncation)]
let tokens_len = tokens.len() as u32;
(tokens_offset, tokens_len)
} else {
(0, 0)
};

// Convert spans to UTF-16
span_converter.convert_program(&mut program);
span_converter.convert_comments(&mut comments);
span_converter.convert_module_record(&mut module_record);
Expand All @@ -270,12 +285,13 @@ unsafe fn parse_raw_impl(
// Write `RawTransferData` to arena, and return pointer to it
let data = RawTransferData { program, comments, module, errors };
let data = allocator.alloc(data);
ptr::from_ref(data).cast::<u8>()
let data_offset = ptr::from_ref(data).cast::<u8>() as u32;

(data_offset, tokens_offset, tokens_len)
};

// Write metadata into end of buffer
#[allow(clippy::cast_possible_truncation)]
let metadata = RawTransferMetadata::new(data_ptr as u32, ast_type == AstType::TypeScript);
let metadata = RawTransferMetadata::new(data_offset, is_ts, tokens_offset, tokens_len);
const RAW_METADATA_OFFSET: usize = BUFFER_SIZE - RAW_METADATA_SIZE;
const _: () = assert!(RAW_METADATA_OFFSET.is_multiple_of(BUMP_ALIGN));
// SAFETY: `RAW_METADATA_OFFSET` is less than length of `buffer`.
Expand Down
4 changes: 2 additions & 2 deletions napi/parser/src/raw_transfer_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ pub struct RawTransferMetadata {
}

impl RawTransferMetadata {
pub fn new(data_offset: u32, is_ts: bool) -> Self {
Self { data_offset, is_ts, is_jsx: false, has_bom: false, tokens_offset: 0, tokens_len: 0 }
pub fn new(data_offset: u32, is_ts: bool, tokens_offset: u32, tokens_len: u32) -> Self {
Self { data_offset, is_ts, is_jsx: false, has_bom: false, tokens_offset, tokens_len }
}
}

Expand Down
8 changes: 8 additions & 0 deletions napi/parser/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ pub struct ParserOptions {
#[napi(ts_type = "boolean")]
pub range: Option<bool>,

/// Controls whether parser should return tokens.
///
/// This option is not stable yet, and only available with experimental raw transfer.
///
/// @default false
#[napi(skip_typescript, js_name = "experimentalTokens")]
pub tokens: Option<bool>,

/// Emit `ParenthesizedExpression` and `TSParenthesizedType` in AST.
///
/// If this option is true, parenthesized expressions are represented by
Expand Down
9 changes: 7 additions & 2 deletions napi/parser/test/parse-raw-common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ export const TEST_TYPE_INLINE_FIXTURE = 4;

export const TEST_TYPE_MAIN_MASK = 7;
export const TEST_TYPE_RANGE_PARENT = 8;
export const TEST_TYPE_LAZY = 16;
export const TEST_TYPE_PRETTY = 32;
export const TEST_TYPE_TOKENS = 16;
export const TEST_TYPE_LAZY = 32;
export const TEST_TYPE_PRETTY = 64;

export const ROOT_DIR_PATH = pathJoin(import.meta.dirname, "../../..");
export const TARGET_DIR_PATH = pathJoin(ROOT_DIR_PATH, "target");
Expand All @@ -23,6 +24,10 @@ export const ACORN_TEST262_DIR_PATH = pathJoin(
ROOT_DIR_PATH,
"tasks/coverage/estree-conformance/tests/test262/test",
);
export const ACORN_TEST262_TOKENS_DIR_PATH = pathJoin(
ROOT_DIR_PATH,
"tasks/coverage/estree-conformance/tests/test262-tokens/test",
);
export const JSX_SHORT_DIR_PATH = "tasks/coverage/estree-conformance/tests/acorn-jsx/pass";
export const JSX_DIR_PATH = pathJoin(ROOT_DIR_PATH, JSX_SHORT_DIR_PATH);
const TS_ESTREE_SHORT_DIR_PATH = "tasks/coverage/estree-conformance/tests/typescript";
Expand Down
Loading
Loading