From 9cd612f1e99f93d4624d8a6a00d99fff03864eac Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Sat, 14 Mar 2026 12:19:27 +0000 Subject: [PATCH] perf(linter/plugins): recycle comment objects (#20362) Apply the same optimization as #19978 to comments - hold a pool of `Comment` objects, and re-use those objects rather than creating new objects each time. Same as with `Token`s, `loc` property is a getter which calculates `loc` lazily, and caches it in a private property. --- apps/oxlint/src-js/generated/constants.ts | 25 ++ apps/oxlint/src-js/generated/deserialize.d.ts | 4 +- apps/oxlint/src-js/generated/deserialize.js | 65 +---- apps/oxlint/src-js/generated/types.d.ts | 9 +- apps/oxlint/src-js/plugins.ts | 10 +- apps/oxlint/src-js/plugins/comments.ts | 239 +++++++++++++++--- apps/oxlint/src-js/plugins/location.ts | 23 +- apps/oxlint/src-js/plugins/source_code.ts | 7 +- apps/oxlint/src-js/plugins/tokens.ts | 18 +- apps/oxlint/src-js/plugins/types.ts | 7 +- crates/oxc_ast/src/serialize/mod.rs | 21 +- napi/parser/src-js/generated/constants.js | 25 ++ .../ast_tools/src/generators/raw_transfer.rs | 80 ++++-- tasks/ast_tools/src/generators/typescript.rs | 9 +- 14 files changed, 365 insertions(+), 177 deletions(-) diff --git a/apps/oxlint/src-js/generated/constants.ts b/apps/oxlint/src-js/generated/constants.ts index 4b57d05c5180a..89a749acc5842 100644 --- a/apps/oxlint/src-js/generated/constants.ts +++ b/apps/oxlint/src-js/generated/constants.ts @@ -60,3 +60,28 @@ export const SOURCE_START_OFFSET = 8; * Byte offset of length of source text, relative to start of `Program`. */ export const SOURCE_LEN_OFFSET = 16; + +/** + * Byte offset of comments `Vec` pointer, relative to start of `Program`. + */ +export const COMMENTS_OFFSET = 24; + +/** + * Byte offset of comments `Vec` length, relative to start of `Program`. + */ +export const COMMENTS_LEN_OFFSET = 32; + +/** + * Size of `Comment` struct in bytes. + */ +export const COMMENT_SIZE = 16; + +/** + * Byte offset of `kind` field, relative to start of `Comment` struct. + */ +export const COMMENT_KIND_OFFSET = 12; + +/** + * Discriminant value for `CommentKind::Line`. + */ +export const COMMENT_LINE_KIND = 0; diff --git a/apps/oxlint/src-js/generated/deserialize.d.ts b/apps/oxlint/src-js/generated/deserialize.d.ts index 7473511fa3e1a..e8125c38b3342 100644 --- a/apps/oxlint/src-js/generated/deserialize.d.ts +++ b/apps/oxlint/src-js/generated/deserialize.d.ts @@ -2,11 +2,11 @@ // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. import type { Program } from "./types.d.ts"; -import type { Node, Comment } from "../plugins/types.ts"; +import type { Node } from "../plugins/types.ts"; import type { Location as SourceLocation } from "../plugins/location.ts"; type BufferWithArrays = Uint8Array & { uint32: Uint32Array; float64: Float64Array }; -type GetLoc = (node: Node | Comment) => SourceLocation; +type GetLoc = (node: Node) => SourceLocation; export declare function deserializeProgramOnly( buffer: BufferWithArrays, diff --git a/apps/oxlint/src-js/generated/deserialize.js b/apps/oxlint/src-js/generated/deserialize.js index 3b2eccac34d18..16b6397a5d8e7 100644 --- a/apps/oxlint/src-js/generated/deserialize.js +++ b/apps/oxlint/src-js/generated/deserialize.js @@ -2,6 +2,7 @@ // To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`. import { tokens, initTokens } from "../plugins/tokens.js"; +import { comments, initComments } from "../plugins/comments.js"; let uint8, uint32, @@ -9,7 +10,6 @@ let uint8, sourceText, sourceIsAscii, sourceStartPos, - astId = 0, parent = null, getLoc; @@ -49,12 +49,10 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de export function resetBuffer() { // Clear buffer and source text string to allow them to be garbage collected uint8 = uint32 = float64 = sourceText = void 0; - astId++; } function deserializeProgram(pos) { - let localAstId = astId, - end = deserializeU32(pos + 4), + let end = deserializeU32(pos + 4), program = (parent = { __proto__: NodeProto, type: "Program", @@ -62,25 +60,7 @@ function deserializeProgram(pos) { sourceType: deserializeModuleKind(pos + 137), hashbang: null, get comments() { - // Check AST in buffer is still the same AST (buffers are reused) - if (localAstId !== astId) - throw Error("Comments are only accessible while linting the file"); - // Deserialize the comments. - // Replace this getter with the comments array, so we don't deserialize twice. - let comments = deserializeVecComment(pos + 24), - { hashbang } = this; - if (hashbang !== null) { - let start, end; - comments.unshift({ - __proto__: NodeProto, - type: "Shebang", - value: hashbang.value, - start: (start = hashbang.start), - end: (end = hashbang.end), - range: [start, end], - }); - } - Object.defineProperty(this, "comments", { value: comments }); + comments === null && initComments(); return comments; }, get tokens() { @@ -5744,33 +5724,6 @@ function deserializeJSDocUnknownType(pos) { }; } -function deserializeCommentKind(pos) { - switch (uint8[pos]) { - case 0: - return "Line"; - case 1: - return "Block"; - case 2: - return "Block"; - default: - throw Error(`Unexpected discriminant ${uint8[pos]} for CommentKind`); - } -} - -function deserializeComment(pos) { - let type = deserializeCommentKind(pos + 12), - start = deserializeU32(pos), - end = deserializeU32(pos + 4); - return { - __proto__: NodeProto, - type, - value: sourceText.slice(start + 2, end - (type === "Line" ? 0 : 2)), - start, - end, - range: [start, end], - }; -} - function deserializeAssignmentOperator(pos) { switch (uint8[pos]) { case 0: @@ -5951,18 +5904,6 @@ function deserializeStr(pos) { return out; } -function deserializeVecComment(pos) { - let arr = [], - pos32 = pos >> 2; - pos = uint32[pos32]; - let endPos = pos + uint32[pos32 + 2] * 16; - for (; pos !== endPos; ) { - arr.push(deserializeComment(pos)); - pos += 16; - } - return arr; -} - function deserializeOptionHashbang(pos) { if (uint32[(pos + 8) >> 2] === 0 && uint32[(pos + 12) >> 2] === 0) return null; return deserializeHashbang(pos); diff --git a/apps/oxlint/src-js/generated/types.d.ts b/apps/oxlint/src-js/generated/types.d.ts index 96109abfff3f8..6f036ba1fa0f8 100644 --- a/apps/oxlint/src-js/generated/types.d.ts +++ b/apps/oxlint/src-js/generated/types.d.ts @@ -1,10 +1,11 @@ // Auto-generated code, DO NOT EDIT DIRECTLY! // To edit this generated file you have to edit `tasks/ast_tools/src/generators/typescript.rs`. -import { Span } from "../plugins/location.ts"; -import { Token } from "../plugins/tokens.ts"; -import { Comment } from "../plugins/types.ts"; -export { Span, Comment, Token }; +import type { Comment } from "../plugins/comments.ts"; +import type { Span } from "../plugins/location.ts"; +import type { Token } from "../plugins/tokens.ts"; + +export type { Comment, Span, Token }; export interface Program extends Span { type: "Program"; diff --git a/apps/oxlint/src-js/plugins.ts b/apps/oxlint/src-js/plugins.ts index d09294711f3b5..f32d078c29bbb 100644 --- a/apps/oxlint/src-js/plugins.ts +++ b/apps/oxlint/src-js/plugins.ts @@ -6,6 +6,7 @@ export { eslintCompatPlugin } from "./package/compat.ts"; export type * as ESTree from "./generated/types"; // Plugin types +export type { Comment } from "./plugins/comments.ts"; export type { Context, LanguageOptions } from "./plugins/context.ts"; export type { Fix, Fixer, FixFn } from "./plugins/fix.ts"; export type { Globals, Envs } from "./plugins/globals.ts"; @@ -52,11 +53,4 @@ export type { RuleReplacedByExternalSpecifier, } from "./plugins/rule_meta.ts"; export type { LineColumn, Location, Range, Ranged, Span } from "./plugins/location.ts"; -export type { - AfterHook, - BeforeHook, - Comment, - Node, - Visitor, - VisitorWithHooks, -} from "./plugins/types.ts"; +export type { AfterHook, BeforeHook, Node, Visitor, VisitorWithHooks } from "./plugins/types.ts"; diff --git a/apps/oxlint/src-js/plugins/comments.ts b/apps/oxlint/src-js/plugins/comments.ts index 26fdf106ad620..5d9e98211f973 100644 --- a/apps/oxlint/src-js/plugins/comments.ts +++ b/apps/oxlint/src-js/plugins/comments.ts @@ -1,12 +1,200 @@ /* - * `SourceCode` methods related to comments. + * Comment class, object pooling, deserialization, and `SourceCode` methods related to comments. */ -import { ast, initAst, sourceText } from "./source_code.ts"; +import { ast, buffer, initAst, sourceText } from "./source_code.ts"; +import { + COMMENTS_OFFSET, + COMMENTS_LEN_OFFSET, + COMMENT_SIZE, + COMMENT_KIND_OFFSET, + COMMENT_LINE_KIND, + DATA_POINTER_POS_32, +} from "../generated/constants.ts"; +import { computeLoc } from "./location.ts"; import { firstTokenAtOrAfter } from "./tokens_methods.ts"; -import { debugAssertIsNonNull } from "../utils/asserts.ts"; +import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; -import type { Comment, Node, NodeOrToken } from "./types.ts"; +import type { Node, NodeOrToken } from "./types.ts"; +import type { Location, Span } from "./location.ts"; + +/** + * Comment. + */ +interface CommentType extends Span { + type: "Line" | "Block" | "Shebang"; + value: string; +} + +// Export type as `Comment` for external consumers +export type { CommentType as Comment }; + +// Comments for the current file. +// Created lazily only when needed. +export let comments: CommentType[] | null = null; + +// Cached comment objects, reused across files to reduce GC pressure. +// Comments are mutated in place during deserialization, then `comments` is set to a slice of this array. +const cachedComments: Comment[] = []; + +// Comments array from previous file. +// Reused for next file if next file has fewer comments than the previous file (by truncating to correct length). +let previousComments: Comment[] = []; + +// Comments whose `loc` property has been accessed, and therefore needs clearing on reset. +const commentsWithLoc: Comment[] = []; + +// Reset `#loc` field on a `Comment` class instance. +let resetCommentLoc: (comment: Comment) => void; + +/** + * Comment class. + * + * Creates `loc` lazily and caches it in a private field. + * Using a class with a private `#loc` field avoids hidden class transitions that would occur + * with `Object.defineProperty` / `delete` on plain objects. + * All `Comment` instances always have the same V8 hidden class, keeping property access monomorphic. + */ +class Comment implements Span { + type: CommentType["type"] = null!; // Overwritten later + value: string = null!; // Overwritten later + start: number = 0; + end: number = 0; + range: [number, number] = [0, 0]; + + #loc: Location | null = null; + + get loc(): Location { + const loc = this.#loc; + if (loc !== null) return loc; + + commentsWithLoc.push(this); + return (this.#loc = computeLoc(this.start, this.end)); + } + + static { + // Defined in static block to avoid exposing this as a public method + resetCommentLoc = (comment: Comment) => { + comment.#loc = null; + }; + } +} + +// Make `loc` property enumerable so `for (const key in comment) ...` includes `loc` +Object.defineProperty(Comment.prototype, "loc", { enumerable: true }); + +/** + * Initialize comments for current file. + * + * Deserializes comments from the buffer using object pooling. + * If the program has a hashbang, prepends a `Shebang` comment. + */ +export function initComments(): void { + debugAssert(comments === null, "Comments already initialized"); + + if (ast === null) initAst(); + debugAssertIsNonNull(ast); + debugAssertIsNonNull(sourceText); + debugAssertIsNonNull(buffer); + + const { uint32 } = buffer; + const programPos32 = uint32[DATA_POINTER_POS_32] >> 2; + let pos = uint32[programPos32 + (COMMENTS_OFFSET >> 2)]; + const commentsLen = uint32[programPos32 + (COMMENTS_LEN_OFFSET >> 2)]; + + // Determine total number of comments (including shebang if present) + const { hashbang } = ast; + let index = +(hashbang !== null); + const totalLen = commentsLen + index; + + // Grow cache if needed (one-time cost as cache warms up) + while (cachedComments.length < totalLen) { + cachedComments.push(new Comment()); + } + + // If there's a hashbang, populate slot 0 with `Shebang` comment + if (index !== 0) { + debugAssertIsNonNull(hashbang); + + const comment = cachedComments[0]; + comment.type = "Shebang"; + comment.value = hashbang.value; + comment.range[0] = comment.start = hashbang.start; + comment.range[1] = comment.end = hashbang.end; + } + + // Deserialize comments from buffer + while (index < totalLen) { + const comment = cachedComments[index++]; + + const start = uint32[pos >> 2]; + const end = uint32[(pos + 4) >> 2]; + const isBlock = buffer[pos + COMMENT_KIND_OFFSET] !== COMMENT_LINE_KIND; + + comment.type = isBlock ? "Block" : "Line"; + // Line comments: `// text` -> slice `start + 2..end` + // Block comments: `/* text */` -> slice `start + 2..end - 2` + comment.value = sourceText.slice(start + 2, end - (+isBlock << 1)); + comment.range[0] = comment.start = start; + comment.range[1] = comment.end = end; + + pos += COMMENT_SIZE; + } + + // Use `slice` rather than copying comments one-by-one into a new array. + // V8 implements `slice` with a single `memcpy` of the backing store, which is faster + // than N individual `push` calls with bounds checking and potential resizing. + // + // If the comments array from previous file is longer than the current one, + // reuse it and truncate it to avoid the memcpy entirely. + if (previousComments.length >= totalLen) { + previousComments.length = totalLen; + comments = previousComments; + } else { + comments = previousComments = cachedComments.slice(0, totalLen); + } + + // Check `comments` have valid ranges and are in ascending order + debugCheckValidRanges(comments); +} + +/** + * Check comments have valid ranges and are in ascending order. + * + * Only runs in debug build (tests). In release build, this function is entirely removed by minifier. + */ +function debugCheckValidRanges(commentsArr: CommentType[]): void { + if (!DEBUG) return; + + let lastEnd = 0; + for (const comment of commentsArr) { + const { start, end } = comment; + if (end <= start) throw new Error(`Invalid comment range: ${start}-${end}`); + if (start < lastEnd) { + throw new Error(`Overlapping comments: last end: ${lastEnd}, next start: ${start}`); + } + lastEnd = end; + } + + if (lastEnd > sourceText!.length) { + throw new Error(`Comments end beyond source text length: ${lastEnd} > ${sourceText!.length}`); + } +} + +/** + * Reset comments after file has been linted. + * + * Clears cached `loc` on comments that had it accessed, so the getter + * will recalculate it when the comment is reused for a different file. + */ +export function resetComments(): void { + for (let i = 0, len = commentsWithLoc.length; i < len; i++) { + resetCommentLoc(commentsWithLoc[i]); + } + commentsWithLoc.length = 0; + + comments = null; +} // Regex that tests if a string is entirely whitespace. const WHITESPACE_ONLY_REGEXP = /^\s*$/; @@ -15,12 +203,10 @@ const WHITESPACE_ONLY_REGEXP = /^\s*$/; * Retrieve an array containing all comments in the source code. * @returns Array of `Comment`s in order they appear in source. */ -export function getAllComments(): Comment[] { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); - - // `comments` property is a getter. Comments are deserialized lazily. - return ast.comments; +export function getAllComments(): CommentType[] { + if (comments === null) initComments(); + debugAssertIsNonNull(comments); + return comments; } /** @@ -40,13 +226,11 @@ export function getAllComments(): Comment[] { * @param nodeOrToken - The AST node or token to check for adjacent comment tokens. * @returns Array of `Comment`s in occurrence order. */ -export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); +export function getCommentsBefore(nodeOrToken: NodeOrToken): CommentType[] { + if (comments === null) initComments(); + debugAssertIsNonNull(comments); debugAssertIsNonNull(sourceText); - const { comments } = ast; - let targetStart = nodeOrToken.range[0]; // start // Binary search for first comment at or past `nodeOrToken`'s start. @@ -88,13 +272,11 @@ export function getCommentsBefore(nodeOrToken: NodeOrToken): Comment[] { * @param nodeOrToken - The AST node or token to check for adjacent comment tokens. * @returns Array of `Comment`s in occurrence order. */ -export function getCommentsAfter(nodeOrToken: NodeOrToken): Comment[] { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); +export function getCommentsAfter(nodeOrToken: NodeOrToken): CommentType[] { + if (comments === null) initComments(); + debugAssertIsNonNull(comments); debugAssertIsNonNull(sourceText); - const { comments } = ast; - let targetEnd = nodeOrToken.range[1]; // end // Binary search for first comment at or past `nodeOrToken`'s end. @@ -124,11 +306,9 @@ export function getCommentsAfter(nodeOrToken: NodeOrToken): Comment[] { * @param node - The AST node to get the comments for. * @returns Array of `Comment`s in occurrence order. */ -export function getCommentsInside(node: Node): Comment[] { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); - - const { comments } = ast; +export function getCommentsInside(node: Node): CommentType[] { + if (comments === null) initComments(); + debugAssertIsNonNull(comments); const { range } = node, rangeStart = range[0], @@ -153,12 +333,11 @@ export function commentsExistBetween( nodeOrToken1: NodeOrToken, nodeOrToken2: NodeOrToken, ): boolean { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); + if (comments === null) initComments(); + debugAssertIsNonNull(comments); // Find the first comment after `nodeOrToken1` ends. - const { comments } = ast, - betweenRangeStart = nodeOrToken1.range[1]; + const betweenRangeStart = nodeOrToken1.range[1]; const firstCommentBetween = firstTokenAtOrAfter(comments, betweenRangeStart, 0); // Check if it ends before `nodeOrToken2` starts. return ( @@ -176,7 +355,7 @@ export function commentsExistBetween( * @returns The JSDoc comment for the given node, or `null` if not found. */ /* oxlint-disable no-unused-vars */ -export function getJSDocComment(node: Node): Comment | null { +export function getJSDocComment(node: Node): CommentType | null { throw new Error("`sourceCode.getJSDocComment` is not supported at present (and deprecated)"); // TODO } /* oxlint-enable no-unused-vars */ diff --git a/apps/oxlint/src-js/plugins/location.ts b/apps/oxlint/src-js/plugins/location.ts index 6e5bf71e67eb9..8ff8969971ebd 100644 --- a/apps/oxlint/src-js/plugins/location.ts +++ b/apps/oxlint/src-js/plugins/location.ts @@ -7,7 +7,7 @@ import { ast, initAst, initSourceText, sourceText } from "./source_code.ts"; import visitorKeys from "../generated/keys.ts"; import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; -import type { NodeOrToken, Node, Comment } from "./types.ts"; +import type { NodeOrToken, Node } from "./types.ts"; import type { Node as ESTreeNode } from "../generated/types.d.ts"; /** @@ -254,29 +254,30 @@ export function getRange(nodeOrToken: NodeOrToken): Range { * @param nodeOrToken - Node or token to get the location of * @returns Location of the node or token */ -// Both AST nodes and tokens handle lazy `loc` computation and caching via their respective getters +// AST nodes, tokens, and comments handle lazy `loc` computation and caching via their respective getters // (AST nodes via `NodeProto` prototype getter which caches via `Object.defineProperty`, -// tokens via `Token` class getter which caches in a private field). -// So accessing `.loc` gives the right behavior for both, including stable object identity. +// tokens and comments via `Token` / `Comment` class getters which cache in private fields). +// So accessing `.loc` gives the right behavior for all 3, including stable object identity. export function getLoc(nodeOrToken: NodeOrToken): Location { return nodeOrToken.loc; } /** - * Calculate the `Location` for an AST node or comment, and cache it on the node. + * Calculate the `Location` for an AST node, and cache it on the node. * - * Used in `loc` getters on AST nodes and comments (not tokens - tokens use their own caching via `Token` class). + * Used in `loc` getter on AST nodes (not tokens or comments - they use their own caching + * via `Token` / `Comment` class private fields). * - * Defines a `loc` property on the node/comment with the calculated `Location`, so accessing `loc` twice on same node + * Defines a `loc` property on the node with the calculated `Location`, so accessing `loc` twice on same node * results in the same object each time. * * For internal use only. * - * @param nodeOrComment - AST node or comment + * @param node - AST node * @returns Location */ -export function getNodeLoc(nodeOrComment: Node | Comment): Location { - const loc = computeLoc(nodeOrComment.start, nodeOrComment.end); +export function getNodeLoc(node: Node): Location { + const loc = computeLoc(node.start, node.end); // Define `loc` property with the calculated `Location`, so accessing `loc` twice on same node // results in the same object each time. @@ -287,7 +288,7 @@ export function getNodeLoc(nodeOrComment: Node | Comment): Location { // // We also don't make it configurable, because deleting it wouldn't make `node.loc` evaluate to `undefined`, // because the access would fall through to the getter on the prototype. - Object.defineProperty(nodeOrComment, "loc", { value: loc, writable: true }); + Object.defineProperty(node, "loc", { value: loc, writable: true }); return loc; } diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts index 1d1f509e20655..49f1b092b7352 100644 --- a/apps/oxlint/src-js/plugins/source_code.ts +++ b/apps/oxlint/src-js/plugins/source_code.ts @@ -11,6 +11,7 @@ import { import { deserializeProgramOnly, resetBuffer } from "../generated/deserialize.js"; import visitorKeys from "../generated/keys.ts"; +import { resetComments } from "./comments.ts"; import * as commentMethods from "./comments.ts"; import { ecmaVersion } from "./context.ts"; import * as locationMethods from "./location.ts"; @@ -23,10 +24,11 @@ import * as tokenMethods from "./tokens_methods.ts"; import { debugAssertIsNonNull } from "../utils/asserts.ts"; import type { Program } from "../generated/types.d.ts"; +import type { Comment } from "./comments.ts"; import type { Ranged } from "./location.ts"; -import type { Token } from "./tokens.ts"; -import type { BufferWithArrays, Comment, Node } from "./types.ts"; import type { ScopeManager } from "./scope.ts"; +import type { Token } from "./tokens.ts"; +import type { BufferWithArrays, Node } from "./types.ts"; // Text decoder, for decoding source text from buffer const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }); @@ -137,6 +139,7 @@ export function resetSourceAndAst(): void { resetLines(); resetScopeManager(); resetTokens(); + resetComments(); } /** diff --git a/apps/oxlint/src-js/plugins/tokens.ts b/apps/oxlint/src-js/plugins/tokens.ts index c9d69c43a32b0..0f4bedb9f5e90 100644 --- a/apps/oxlint/src-js/plugins/tokens.ts +++ b/apps/oxlint/src-js/plugins/tokens.ts @@ -2,12 +2,13 @@ * Token types and tokens initialization / reset. */ -import { ast, buffer, initAst, initSourceText, sourceText } from "./source_code.ts"; +import { buffer, initSourceText, sourceText } from "./source_code.ts"; +import { comments, initComments } from "./comments.ts"; import { computeLoc } from "./location.ts"; import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.ts"; import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; -import type { Comment } from "./types.ts"; +import type { Comment } from "./comments.ts"; import type { Location, Span } from "./location.ts"; /** @@ -93,7 +94,6 @@ export type TokenOrComment = TokenType | Comment; // Tokens for the current file. // Created lazily only when needed. export let tokens: TokenType[] | null = null; -let comments: Comment[] | null = null; export let tokensAndComments: TokenOrComment[] | null = null; // Cached token objects, reused across files to reduce GC pressure. @@ -328,14 +328,9 @@ function debugCheckValidRanges(tokens: TokenOrComment[], description: string): v export function initTokensAndComments() { debugAssertIsNonNull(tokens); - // Get comments from AST - if (comments === null) { - if (ast === null) initAst(); - debugAssertIsNonNull(ast); - comments = ast.comments; - - debugCheckValidRanges(comments, "comment"); - } + // Ensure comments are initialized + if (comments === null) initComments(); + debugAssertIsNonNull(comments); // Fast paths for file with no comments, or file which is only comments const commentsLength = comments.length; @@ -466,6 +461,5 @@ export function resetTokens() { tokensWithRegex.length = 0; tokens = null; - comments = null; tokensAndComments = null; } diff --git a/apps/oxlint/src-js/plugins/types.ts b/apps/oxlint/src-js/plugins/types.ts index 52160cab18e1e..0dceef5c18585 100644 --- a/apps/oxlint/src-js/plugins/types.ts +++ b/apps/oxlint/src-js/plugins/types.ts @@ -1,5 +1,6 @@ import type { Span } from "./location.ts"; import type { Token } from "./tokens.ts"; +import type { Comment } from "./comments.ts"; import type { VisitorObject as Visitor } from "../generated/visitor.d.ts"; export type { Visitor }; @@ -22,12 +23,6 @@ export interface Node extends Span {} export type NodeOrToken = Node | Token | Comment; -// Comment. -export interface Comment extends Span { - type: "Line" | "Block" | "Shebang"; - value: string; -} - // Buffer with typed array views of itself stored as properties. export interface BufferWithArrays extends Uint8Array { uint32: Uint32Array; diff --git a/crates/oxc_ast/src/serialize/mod.rs b/crates/oxc_ast/src/serialize/mod.rs index 37072a202fd1d..168300e37cf32 100644 --- a/crates/oxc_ast/src/serialize/mod.rs +++ b/crates/oxc_ast/src/serialize/mod.rs @@ -121,8 +121,6 @@ impl Program<'_> { /// `Program` span start is 0 (not 5). #[ast_meta] #[estree(raw_deser = " - const localAstId = astId; - const start = IS_TS ? 0 : DESER[u32](POS_OFFSET.span.start), end = DESER[u32](POS_OFFSET.span.end); @@ -133,24 +131,7 @@ impl Program<'_> { hashbang: null, /* IF LINTER */ get comments() { - // Check AST in buffer is still the same AST (buffers are reused) - if (localAstId !== astId) throw new Error('Comments are only accessible while linting the file'); - // Deserialize the comments. - // Replace this getter with the comments array, so we don't deserialize twice. - const comments = DESER[Vec](POS_OFFSET.comments); - // If there's a hashbang, prepend it as a `Shebang` comment - const { hashbang } = this; - if (hashbang !== null) { - let start, end; - comments.unshift({ - type: 'Shebang', - value: hashbang.value, - start: start = hashbang.start, - end: end = hashbang.end, - ...(RANGE && { range: [start, end] }), - }); - } - Object.defineProperty(this, 'comments', { value: comments }); + if (comments === null) initComments(); return comments; }, get tokens() { diff --git a/napi/parser/src-js/generated/constants.js b/napi/parser/src-js/generated/constants.js index 4b57d05c5180a..89a749acc5842 100644 --- a/napi/parser/src-js/generated/constants.js +++ b/napi/parser/src-js/generated/constants.js @@ -60,3 +60,28 @@ export const SOURCE_START_OFFSET = 8; * Byte offset of length of source text, relative to start of `Program`. */ export const SOURCE_LEN_OFFSET = 16; + +/** + * Byte offset of comments `Vec` pointer, relative to start of `Program`. + */ +export const COMMENTS_OFFSET = 24; + +/** + * Byte offset of comments `Vec` length, relative to start of `Program`. + */ +export const COMMENTS_LEN_OFFSET = 32; + +/** + * Size of `Comment` struct in bytes. + */ +export const COMMENT_SIZE = 16; + +/** + * Byte offset of `kind` field, relative to start of `Comment` struct. + */ +export const COMMENT_KIND_OFFSET = 12; + +/** + * Discriminant value for `CommentKind::Line`. + */ +export const COMMENT_LINE_KIND = 0; diff --git a/tasks/ast_tools/src/generators/raw_transfer.rs b/tasks/ast_tools/src/generators/raw_transfer.rs index 9a2d88a063734..72468a97be270 100644 --- a/tasks/ast_tools/src/generators/raw_transfer.rs +++ b/tasks/ast_tools/src/generators/raw_transfer.rs @@ -62,8 +62,8 @@ const BLOCK_ALIGN: u64 = 1 << 32; // 4 GiB // Offsets of `Vec`'s fields. // `Vec` is `#[repr(transparent)]` and `RawVec` is `#[repr(C)]`, so these offsets are fixed. -pub(super) const VEC_PTR_FIELD_OFFSET: usize = 0; -pub(super) const VEC_LEN_FIELD_OFFSET: usize = 8; +pub(super) const VEC_PTR_FIELD_OFFSET: u32 = 0; +pub(super) const VEC_LEN_FIELD_OFFSET: u32 = 8; /// Generator for raw transfer deserializer. pub struct RawTransferGenerator; @@ -140,11 +140,11 @@ fn generate_deserializers( let mut code = format!(" /* IF LINTER */ import {{ tokens, initTokens }} from '../plugins/tokens.js'; + import {{ comments, initComments }} from '../plugins/comments.js'; /* END_IF */ let uint8, uint32, float64, sourceText, sourceIsAscii, sourceStartPos, sourceEndPos; - let astId = 0; let parent = null; let getLoc; @@ -196,11 +196,6 @@ fn generate_deserializers( export function resetBuffer() {{ // Clear buffer and source text string to allow them to be garbage collected uint8 = uint32 = float64 = sourceText = undefined; - - // Increment `astId` counter. - // This prevents `program.comments` being accessed after the AST is done with. - // (see `deserializeProgram`) - if (LINTER) astId++; }} "); @@ -208,11 +203,11 @@ fn generate_deserializers( #[rustfmt::skip] let code_type_definition_linter = " import type { Program } from './types.d.ts'; - import type { Node, Comment } from '../plugins/types.ts'; + import type { Node } from '../plugins/types.ts'; import type { Location as SourceLocation } from '../plugins/location.ts'; type BufferWithArrays = Uint8Array & { uint32: Uint32Array; float64: Float64Array }; - type GetLoc = (node: Node | Comment) => SourceLocation; + type GetLoc = (node: Node) => SourceLocation; export declare function deserializeProgramOnly( buffer: BufferWithArrays, @@ -1314,6 +1309,16 @@ struct Constants { source_start_offset: u32, /// Offset of `u32` source text length, relative to position of `Program` source_len_offset: u32, + /// Offset of comments `Vec` pointer, relative to position of `Program` + comments_offset: u32, + /// Offset of comments `Vec` length, relative to position of `Program` + comments_len_offset: u32, + /// Size of `Comment` struct in bytes + comment_size: u32, + /// Offset of `kind` field within `Comment` struct + comment_kind_offset: u32, + /// Discriminant value for `CommentKind::Line` + comment_line_kind: u8, /// Size of `RawTransferData` in bytes raw_metadata_size: u32, } @@ -1332,6 +1337,11 @@ fn generate_constants(consts: Constants) -> (String, TokenStream) { program_offset, source_start_offset, source_len_offset, + comments_offset, + comments_len_offset, + comment_size, + comment_kind_offset, + comment_line_kind, raw_metadata_size, } = consts; @@ -1400,6 +1410,31 @@ fn generate_constants(consts: Constants) -> (String, TokenStream) { * Byte offset of length of source text, relative to start of `Program`. */ export const SOURCE_LEN_OFFSET = {source_len_offset}; + + /** + * Byte offset of comments `Vec` pointer, relative to start of `Program`. + */ + export const COMMENTS_OFFSET = {comments_offset}; + + /** + * Byte offset of comments `Vec` length, relative to start of `Program`. + */ + export const COMMENTS_LEN_OFFSET = {comments_len_offset}; + + /** + * Size of `Comment` struct in bytes. + */ + export const COMMENT_SIZE = {comment_size}; + + /** + * Byte offset of `kind` field, relative to start of `Comment` struct. + */ + export const COMMENT_KIND_OFFSET = {comment_kind_offset}; + + /** + * Discriminant value for `CommentKind::Line`. + */ + export const COMMENT_LINE_KIND = {comment_line_kind}; "); let block_size = number_lit(BLOCK_SIZE); @@ -1485,15 +1520,23 @@ fn get_constants(schema: &Schema) -> Constants { .field_by_name("program") .offset_64(); - let source_start_offset = schema - .type_by_name("Program") - .as_struct() - .unwrap() - .field_by_name("source_text") - .offset_64(); + let program_struct = schema.type_by_name("Program").as_struct().unwrap(); + let source_start_offset = program_struct.field_by_name("source_text").offset_64(); let source_len_offset = source_start_offset + STR_LEN_OFFSET; + let comments_field_offset = program_struct.field_by_name("comments").offset_64(); + let comments_offset = comments_field_offset + VEC_PTR_FIELD_OFFSET; + let comments_len_offset = comments_field_offset + VEC_LEN_FIELD_OFFSET; + + let comment_struct = schema.type_by_name("Comment").as_struct().unwrap(); + let comment_size = comment_struct.layout_64().size; + let comment_kind_offset = comment_struct.field_by_name("kind").offset_64(); + + let comment_kind_enum = schema.type_by_name("CommentKind").as_enum().unwrap(); + let comment_line_kind = + comment_kind_enum.variants.iter().find(|v| v.name() == "Line").unwrap().discriminant; + Constants { buffer_size, active_size, @@ -1506,6 +1549,11 @@ fn get_constants(schema: &Schema) -> Constants { program_offset, source_start_offset, source_len_offset, + comments_offset, + comments_len_offset, + comment_size, + comment_kind_offset, + comment_line_kind, raw_metadata_size, } } diff --git a/tasks/ast_tools/src/generators/typescript.rs b/tasks/ast_tools/src/generators/typescript.rs index de068647d2d5a..8436420d52ef1 100644 --- a/tasks/ast_tools/src/generators/typescript.rs +++ b/tasks/ast_tools/src/generators/typescript.rs @@ -491,10 +491,11 @@ fn amend_oxlint_types(code: &str) -> String { #[rustfmt::skip] code.insert_str(0, " - import { Span } from '../plugins/location.ts'; - import { Token } from '../plugins/tokens.ts'; - import { Comment } from '../plugins/types.ts'; - export { Span, Comment, Token }; + import type { Comment } from '../plugins/comments.ts'; + import type { Span } from '../plugins/location.ts'; + import type { Token } from '../plugins/tokens.ts'; + + export type { Comment, Span, Token }; ");