From 5acc6ec3e9b51b3c6649409759e5039b6bdce8eb Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Thu, 20 Feb 2025 14:37:19 +0000 Subject: [PATCH] fix(wasm): transfer AST to JS as JSON string (#9259) In [@oxc-parser/wasm](https://www.npmjs.com/package/@oxc-parser/wasm) NPM package, transfer the AST to JS as a JSON string, and parse it to a `Program` object on JS side. This fixes a couple of bugs - previously `RegExp`s and `BigInt`s were deserialized as `null`. The mechanism for doing this is pretty hacky. After `wasm-pack` generates the JS binding files, a script modifies these files to add a getter for `program` property on `ParseResult`. That getter deserializes the JSON to a `Program`. The hackyness is not ideal, but I'm not familiar enough with WASM build systems to see if there's a better way. This does at least seem to work! --- napi/parser/index.js | 2 ++ npm/parser-wasm/package.json | 5 +-- wasm/parser/package.json | 3 +- wasm/parser/src/lib.rs | 14 ++++++--- wasm/parser/test-node.mjs | 16 ++++++++-- wasm/parser/update-bindings.mjs | 56 +++++++++++++++++++++++++++++++++ 6 files changed, 87 insertions(+), 9 deletions(-) create mode 100644 wasm/parser/update-bindings.mjs diff --git a/napi/parser/index.js b/napi/parser/index.js index ea32cc6f886a4..5a0f6658d4378 100644 --- a/napi/parser/index.js +++ b/napi/parser/index.js @@ -13,6 +13,8 @@ function wrap(result) { return { get program() { if (!program) { + // Note: This code is repeated in `wasm/parser/update-bindings.mjs`. + // Any changes should be applied in both places. program = JSON.parse(result.program, function(key, value) { // Set `value` field of `Literal`s for `BigInt`s and `RegExp`s. // This is not possible to do on Rust side, as neither can be represented correctly in JSON. diff --git a/npm/parser-wasm/package.json b/npm/parser-wasm/package.json index 95c32b676e817..93fdb146bba78 100644 --- a/npm/parser-wasm/package.json +++ b/npm/parser-wasm/package.json @@ -1,6 +1,6 @@ { "name": "@oxc-parser/wasm", - "version": "0.38.0", + "version": "0.51.0", "description": "Wasm target for the oxc parser.", "keywords": [ "JavaScript", @@ -29,10 +29,11 @@ "@oxc-project/types": "workspace:^" }, "scripts": { - "build": "pnpm run build-node && pnpm run build-web && pnpm run copy-files && pnpm run clean-files", + "build": "pnpm run build-node && pnpm run build-web && pnpm run update-bindings && pnpm run copy-files && pnpm run clean-files", "build-node": "pnpm run build-base --target nodejs --out-dir ../../npm/parser-wasm/node .", "build-web": "pnpm run build-base --target web --out-dir ../../npm/parser-wasm/web .", "build-base": "wasm-pack build --release --no-pack", + "update-bindings": "node ./update-bindings.mjs", "copy-files": "cp ./package.json ../../npm/parser-wasm/package.json && cp ./README.md ../../npm/parser-wasm/README.md", "clean-files": "rm ../../npm/parser-wasm/*/.gitignore", "test": "node ./test-node.mjs", diff --git a/wasm/parser/package.json b/wasm/parser/package.json index 146f949b1949e..93fdb146bba78 100644 --- a/wasm/parser/package.json +++ b/wasm/parser/package.json @@ -29,10 +29,11 @@ "@oxc-project/types": "workspace:^" }, "scripts": { - "build": "pnpm run build-node && pnpm run build-web && pnpm run copy-files && pnpm run clean-files", + "build": "pnpm run build-node && pnpm run build-web && pnpm run update-bindings && pnpm run copy-files && pnpm run clean-files", "build-node": "pnpm run build-base --target nodejs --out-dir ../../npm/parser-wasm/node .", "build-web": "pnpm run build-base --target web --out-dir ../../npm/parser-wasm/web .", "build-base": "wasm-pack build --release --no-pack", + "update-bindings": "node ./update-bindings.mjs", "copy-files": "cp ./package.json ../../npm/parser-wasm/package.json && cp ./README.md ../../npm/parser-wasm/README.md", "clean-files": "rm ../../npm/parser-wasm/*/.gitignore", "test": "node ./test-node.mjs", diff --git a/wasm/parser/src/lib.rs b/wasm/parser/src/lib.rs index b846305d0a96e..8c3bf6c41c2de 100644 --- a/wasm/parser/src/lib.rs +++ b/wasm/parser/src/lib.rs @@ -26,9 +26,15 @@ pub struct ParserOptions { #[derive(Default, Tsify)] #[wasm_bindgen(getter_with_clone)] pub struct ParseResult { - #[wasm_bindgen(readonly, skip_typescript)] + // Dummy field, only present to make `tsify` include it in the type definition for `ParseResult`. + // The getter for this field in WASM bindings is generated by `update-bindings.mjs` script. + #[wasm_bindgen(skip)] #[tsify(type = "Program")] - pub program: JsValue, + pub program: (), + + #[wasm_bindgen(readonly, skip_typescript, js_name = programJson)] + #[serde(rename = "programJson")] + pub program_json: String, #[wasm_bindgen(readonly, skip_typescript)] #[tsify(type = "Comment[]")] @@ -95,7 +101,7 @@ pub fn parse_sync( let serializer = serde_wasm_bindgen::Serializer::json_compatible(); - let program = ret.program.serialize(&serializer)?; + let program_json = ret.program.to_json(); let comments: Vec = if ret.program.comments.is_empty() { vec![] @@ -143,5 +149,5 @@ pub fn parse_sync( .collect::>() }; - Ok(ParseResult { program, comments, errors }) + Ok(ParseResult { program: (), program_json, comments, errors }) } diff --git a/wasm/parser/test-node.mjs b/wasm/parser/test-node.mjs index 30d3c16e11f3d..ddd58f8b1ffd7 100644 --- a/wasm/parser/test-node.mjs +++ b/wasm/parser/test-node.mjs @@ -1,7 +1,19 @@ import assert from 'assert'; import { parseSync } from '../../npm/parser-wasm/node/oxc_parser_wasm.js'; -const code = 'let foo'; +const code = '/abc/gu; 123n;'; const result = parseSync(code, { sourceFilename: 'test.ts' }); + assert(result.errors.length === 0); -assert(result.program.body.length === 1); + +// Check `program` getter caches result +const program = result.program; +assert(result.program === program); + +// Check output is correct +assert(program.type === 'Program'); +assert(program.body.length === 2); + +// Check `RegExp`s and `BigInt`s are deserialized correctly +assert(program.body[0].expression.value instanceof RegExp); +assert(typeof program.body[1].expression.value === 'bigint'); diff --git a/wasm/parser/update-bindings.mjs b/wasm/parser/update-bindings.mjs new file mode 100644 index 0000000000000..55ab9e627726e --- /dev/null +++ b/wasm/parser/update-bindings.mjs @@ -0,0 +1,56 @@ +// Script to inject code for an extra `program` getter on `class ParseResult` in WASM binding files. + +import assert from 'assert'; +import { readFileSync, writeFileSync } from 'fs'; +import { join as pathJoin } from 'path'; +import { fileURLToPath } from 'url'; + +const pkgDirPath = pathJoin(fileURLToPath(import.meta.url), '../../../npm/parser-wasm'); + +const bindingFilename = 'oxc_parser_wasm.js'; + +// Extra getter on `ParseResult` `get program() { ... }` that gets the program as JSON string, +// and parses it to a `Program` object. +// +// JSON parsing uses a reviver function that sets `value` field of `Literal`s for `BigInt`s and `RegExp`s. +// This is not possible to do on Rust side, as neither can be represented correctly in JSON. +// Invalid regexp, or valid regexp using syntax not supported by the platform is ignored. +// +// The getter caches the result to avoid re-parsing JSON every time `result.program` is accessed. +// +// Note: This code is repeated in `napi/parser/index.js`. +// Any changes should be applied in both places. +const getterCode = ` + __program; + + get program() { + if (this.__program) return this.__program; + return this.__program = JSON.parse(this.programJson, function(key, value) { + if (value === null && key === 'value' && Object.hasOwn(this, 'type') && this.type === 'Literal') { + if (Object.hasOwn(this, 'bigint')) { + return BigInt(this.bigint); + } + if (Object.hasOwn(this, 'regex')) { + const { regex } = this; + try { + return RegExp(regex.pattern, regex.flags); + } catch (_err) {} + } + } + return value; + }); + } +`.trimEnd().replace(/ /g, ' '); + +const insertGetterAfter = 'class ParseResult {'; + +for (const dirName of ['node', 'web']) { + const path = pathJoin(pkgDirPath, dirName, bindingFilename); + const code = readFileSync(path, 'utf8'); + + const parts = code.split(insertGetterAfter); + assert(parts.length === 2); + const [before, after] = parts; + const updatedCode = [before, insertGetterAfter, getterCode, after].join(''); + writeFileSync(path, updatedCode); +}