From c1a8cea4743d49a4e584ca5f5bcb23e0e1248b3c Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Wed, 5 Mar 2025 02:21:02 +0000 Subject: [PATCH] refactor(ast/estree): simplify serializing `RegExpLiteral`s (#9551) Similar to #9546. Acorn outputs `RegExpLiteral`'s flags in same order as in source text. There is nothing wrong with this, but it makes our serialization/deserialization code more complex than it needs to be, just to match Acorn. I don't think we need to follow Acorn here - there's no particular "good" order for the flags. Bump `acorn-test262` to include commit https://github.com/oxc-project/acorn-test262/commit/3d7013e145499268dc8f985d087208aa013e0ba2, which reorders the flags in alphabetical order in Acorn's AST. This allows simplifying serialization/deserialization code on our side. --- .github/actions/clone-submodules/action.yml | 2 +- crates/oxc_ast/src/ast/literal.rs | 1 - crates/oxc_ast/src/generated/derive_estree.rs | 2 +- crates/oxc_ast/src/serialize.rs | 78 ++++++------------- justfile | 2 +- napi/parser/deserialize-js.js | 42 ++++------ napi/parser/deserialize-ts.js | 42 ++++------ .../ast_tools/src/generators/raw_transfer.rs | 1 + 8 files changed, 62 insertions(+), 108 deletions(-) diff --git a/.github/actions/clone-submodules/action.yml b/.github/actions/clone-submodules/action.yml index 01c5b637fa7d9..e18a22f087541 100644 --- a/.github/actions/clone-submodules/action.yml +++ b/.github/actions/clone-submodules/action.yml @@ -38,4 +38,4 @@ runs: show-progress: false repository: oxc-project/acorn-test262 path: tasks/coverage/acorn-test262 - ref: 7b7fa95e055352d76ddccc98cabc37ca1448cf92 # Latest main at 4/3/25 + ref: 3d7013e145499268dc8f985d087208aa013e0ba2 # Latest main at 4/3/25 diff --git a/crates/oxc_ast/src/ast/literal.rs b/crates/oxc_ast/src/ast/literal.rs index 8f90f06e8c679..9f60d207dd182 100644 --- a/crates/oxc_ast/src/ast/literal.rs +++ b/crates/oxc_ast/src/ast/literal.rs @@ -122,7 +122,6 @@ pub struct RegExpLiteral<'a> { pub span: Span, /// The parsed regular expression. See [`oxc_regular_expression`] for more /// details. - #[estree(via = RegExpLiteralRegex)] pub regex: RegExp<'a>, /// The regular expression as it appears in source code /// diff --git a/crates/oxc_ast/src/generated/derive_estree.rs b/crates/oxc_ast/src/generated/derive_estree.rs index 090eb611c3dbc..37e3eeae198a1 100644 --- a/crates/oxc_ast/src/generated/derive_estree.rs +++ b/crates/oxc_ast/src/generated/derive_estree.rs @@ -1945,7 +1945,7 @@ impl ESTree for RegExpLiteral<'_> { state.serialize_field("end", &self.span.end); state.serialize_field("value", &crate::serialize::RegExpLiteralValue(self)); state.serialize_field("raw", &self.raw); - state.serialize_field("regex", &crate::serialize::RegExpLiteralRegex(self)); + state.serialize_field("regex", &self.regex); state.end(); } } diff --git a/crates/oxc_ast/src/serialize.rs b/crates/oxc_ast/src/serialize.rs index 9292dc7509fe6..bf4f5f0ec69c8 100644 --- a/crates/oxc_ast/src/serialize.rs +++ b/crates/oxc_ast/src/serialize.rs @@ -195,65 +195,19 @@ impl ESTree for BigIntLiteralValue<'_, '_> { } } -/// Serializer for `regex` field of `RegExpLiteral`. -#[ast_meta] -#[estree( - ts_type = "RegExp", - raw_deser = r#" - let pattern, flags, value = null; - if (THIS.raw === null) { - pattern = DESER[RegExpPattern](POS_OFFSET.regex.pattern); - const flagBits = DESER[u8](POS_OFFSET.regex.flags); - flags = ''; - if (flagBits & 1) flags += 'g'; - if (flagBits & 2) flags += 'i'; - if (flagBits & 4) flags += 'm'; - if (flagBits & 8) flags += 's'; - if (flagBits & 16) flags += 'u'; - if (flagBits & 32) flags += 'y'; - if (flagBits & 64) flags += 'd'; - if (flagBits & 128) flags += 'v'; - } else { - [, pattern, flags] = THIS.raw.match(/^\/(.*)\/([a-z]*)$/); - } - - try { - value = new RegExp(pattern, flags); - } catch (e) {} - - { pattern, flags } - "# -)] -pub struct RegExpLiteralRegex<'a, 'b>(pub &'b RegExpLiteral<'a>); - -impl ESTree for RegExpLiteralRegex<'_, '_> { - fn serialize(&self, serializer: S) { - let mut state = serializer.serialize_struct(); - state.serialize_field("pattern", &self.0.regex.pattern); - - // If `raw` field is present, flags must be in same order as in source to match Acorn. - // Count number of set bits in `flags` to get number of flags - // (cheaper than searching through `raw` for last `/`). - let flags = self.0.regex.flags; - if let Some(raw) = &self.0.raw { - let flags_count = flags.bits().count_ones() as usize; - let flags_index = raw.len() - flags_count; - state.serialize_field("flags", &JsonSafeString(&raw[flags_index..])); - } else { - state.serialize_field("flags", &flags); - } - state.end(); - } -} - /// Serializer for `value` field of `RegExpLiteral`. /// /// Serialized as `null` in JSON, but updated on JS side to contain a `RegExp` if the regexp is valid. #[ast_meta] #[estree( ts_type = "RegExp | null", - // `value` is defined by `RegExpLiteralRegex` converter - raw_deser = "value", + raw_deser = " + let value = null; + try { + value = new RegExp(THIS.regex.pattern, THIS.regex.flags); + } catch (e) {} + value + " )] pub struct RegExpLiteralValue<'a, 'b>(#[expect(dead_code)] pub &'b RegExpLiteral<'a>); @@ -274,7 +228,23 @@ impl ESTree for RegExpPatternConverter<'_, '_> { } #[ast_meta] -#[estree(ts_type = "string")] +#[estree( + ts_type = "string", + raw_deser = " + const flagBits = DESER[u8](POS); + let flags = ''; + // Alphabetical order + if (flagBits & 64) flags += 'd'; + if (flagBits & 1) flags += 'g'; + if (flagBits & 2) flags += 'i'; + if (flagBits & 4) flags += 'm'; + if (flagBits & 8) flags += 's'; + if (flagBits & 16) flags += 'u'; + if (flagBits & 128) flags += 'v'; + if (flagBits & 32) flags += 'y'; + flags + " +)] pub struct RegExpFlagsConverter<'b>(pub &'b RegExpFlags); impl ESTree for RegExpFlagsConverter<'_> { diff --git a/justfile b/justfile index a86c187c2dd3a..a198a59852a7f 100755 --- a/justfile +++ b/justfile @@ -40,7 +40,7 @@ submodules: just clone-submodule tasks/coverage/babel https://github.com/babel/babel.git 578ac4df1c8a05f01350553950dbfbbeaac013c2 just clone-submodule tasks/coverage/typescript https://github.com/microsoft/TypeScript.git 15392346d05045742e653eab5c87538ff2a3c863 just clone-submodule tasks/prettier_conformance/prettier https://github.com/prettier/prettier.git 7584432401a47a26943dd7a9ca9a8e032ead7285 - just clone-submodule tasks/coverage/acorn-test262 https://github.com/oxc-project/acorn-test262 7b7fa95e055352d76ddccc98cabc37ca1448cf92 + just clone-submodule tasks/coverage/acorn-test262 https://github.com/oxc-project/acorn-test262 3d7013e145499268dc8f985d087208aa013e0ba2 just update-transformer-fixtures # Install git pre-commit to format files diff --git a/napi/parser/deserialize-js.js b/napi/parser/deserialize-js.js index 936f41007cc27..ae5fcff2198e4 100644 --- a/napi/parser/deserialize-js.js +++ b/napi/parser/deserialize-js.js @@ -1079,34 +1079,18 @@ function deserializeBigIntLiteral(pos) { } function deserializeRegExpLiteral(pos) { - const raw = deserializeOptionStr(pos + 40); - let pattern, flags, value = null; - if (raw === null) { - pattern = deserializeRegExpPattern(pos + 8); - const flagBits = deserializeU8(pos + 32); - flags = ''; - if (flagBits & 1) flags += 'g'; - if (flagBits & 2) flags += 'i'; - if (flagBits & 4) flags += 'm'; - if (flagBits & 8) flags += 's'; - if (flagBits & 16) flags += 'u'; - if (flagBits & 32) flags += 'y'; - if (flagBits & 64) flags += 'd'; - if (flagBits & 128) flags += 'v'; - } else { - [, pattern, flags] = raw.match(/^\/(.*)\/([a-z]*)$/); - } - + const regex = deserializeRegExp(pos + 8); + let value = null; try { - value = new RegExp(pattern, flags); + value = new RegExp(regex.pattern, regex.flags); } catch (e) {} return { type: 'Literal', start: deserializeU32(pos), end: deserializeU32(pos + 4), value, - raw, - regex: { pattern, flags }, + raw: deserializeOptionStr(pos + 40), + regex, }; } @@ -1118,10 +1102,18 @@ function deserializeRegExp(pos) { } function deserializeRegExpFlags(pos) { - return { - type: 'RegExpFlags', - 0: deserializeU8(pos), - }; + const flagBits = deserializeU8(pos); + let flags = ''; + // Alphabetical order + if (flagBits & 64) flags += 'd'; + if (flagBits & 1) flags += 'g'; + if (flagBits & 2) flags += 'i'; + if (flagBits & 4) flags += 'm'; + if (flagBits & 8) flags += 's'; + if (flagBits & 16) flags += 'u'; + if (flagBits & 128) flags += 'v'; + if (flagBits & 32) flags += 'y'; + return flags; } function deserializeJSXElement(pos) { diff --git a/napi/parser/deserialize-ts.js b/napi/parser/deserialize-ts.js index 2d02abe25b561..a9ea0539a3656 100644 --- a/napi/parser/deserialize-ts.js +++ b/napi/parser/deserialize-ts.js @@ -1131,34 +1131,18 @@ function deserializeBigIntLiteral(pos) { } function deserializeRegExpLiteral(pos) { - const raw = deserializeOptionStr(pos + 40); - let pattern, flags, value = null; - if (raw === null) { - pattern = deserializeRegExpPattern(pos + 8); - const flagBits = deserializeU8(pos + 32); - flags = ''; - if (flagBits & 1) flags += 'g'; - if (flagBits & 2) flags += 'i'; - if (flagBits & 4) flags += 'm'; - if (flagBits & 8) flags += 's'; - if (flagBits & 16) flags += 'u'; - if (flagBits & 32) flags += 'y'; - if (flagBits & 64) flags += 'd'; - if (flagBits & 128) flags += 'v'; - } else { - [, pattern, flags] = raw.match(/^\/(.*)\/([a-z]*)$/); - } - + const regex = deserializeRegExp(pos + 8); + let value = null; try { - value = new RegExp(pattern, flags); + value = new RegExp(regex.pattern, regex.flags); } catch (e) {} return { type: 'Literal', start: deserializeU32(pos), end: deserializeU32(pos + 4), value, - raw, - regex: { pattern, flags }, + raw: deserializeOptionStr(pos + 40), + regex, }; } @@ -1170,10 +1154,18 @@ function deserializeRegExp(pos) { } function deserializeRegExpFlags(pos) { - return { - type: 'RegExpFlags', - 0: deserializeU8(pos), - }; + const flagBits = deserializeU8(pos); + let flags = ''; + // Alphabetical order + if (flagBits & 64) flags += 'd'; + if (flagBits & 1) flags += 'g'; + if (flagBits & 2) flags += 'i'; + if (flagBits & 4) flags += 'm'; + if (flagBits & 8) flags += 's'; + if (flagBits & 16) flags += 'u'; + if (flagBits & 128) flags += 'v'; + if (flagBits & 32) flags += 'y'; + return flags; } function deserializeJSXElement(pos) { diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs index 8170526287e60..0cdde1d864919 100644 --- a/tasks/ast_tools/src/generators/raw_transfer.rs +++ b/tasks/ast_tools/src/generators/raw_transfer.rs @@ -377,6 +377,7 @@ impl<'s> StructDeserializerGenerator<'s> { let value = DESER_REGEX.replace_all(&value, DeserReplacer::new(self.schema)); let value = POS_OFFSET_REGEX .replace_all(&value, PosOffsetReplacer::new(self, struct_def, struct_offset)); + let value = POS_REGEX.replace_all(&value, PosReplacer::new(struct_offset)); let value = value.cow_replace("SOURCE_TEXT", "sourceText"); let value = if let Some((preamble, value)) = value.trim().rsplit_once('\n') {