diff --git a/apps/oxlint/src-js/plugins/location.ts b/apps/oxlint/src-js/plugins/location.ts index a5b5ad26a40ba..6e5bf71e67eb9 100644 --- a/apps/oxlint/src-js/plugins/location.ts +++ b/apps/oxlint/src-js/plugins/location.ts @@ -7,7 +7,7 @@ import { ast, initAst, initSourceText, sourceText } from "./source_code.ts"; import visitorKeys from "../generated/keys.ts"; import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; -import type { NodeOrToken } from "./types.ts"; +import type { NodeOrToken, Node, Comment } from "./types.ts"; import type { Node as ESTreeNode } from "../generated/types.d.ts"; /** @@ -254,52 +254,62 @@ export function getRange(nodeOrToken: NodeOrToken): Range { * @param nodeOrToken - Node or token to get the location of * @returns Location of the node or token */ -// Note: We cannot expose `getNodeLoc` as public method, because it always recalculates the location, -// and returns a new object each time. It would be misleading if `node.loc !== sourceCode.getLoc(node)`. +// Both AST nodes and tokens handle lazy `loc` computation and caching via their respective getters +// (AST nodes via `NodeProto` prototype getter which caches via `Object.defineProperty`, +// tokens via `Token` class getter which caches in a private field). +// So accessing `.loc` gives the right behavior for both, including stable object identity. export function getLoc(nodeOrToken: NodeOrToken): Location { - // If location is already calculated for this node or token, return it - if (Object.hasOwn(nodeOrToken, "loc")) return nodeOrToken.loc; - // Calculate location - return getNodeLoc(nodeOrToken); + return nodeOrToken.loc; } /** - * Calculate the `Location` for an AST node or token. + * Calculate the `Location` for an AST node or comment, and cache it on the node. * - * Used in `loc` getters on AST nodes. + * Used in `loc` getters on AST nodes and comments (not tokens - tokens use their own caching via `Token` class). * - * Defines a `loc` property on the node/token with the calculated `Location`, so accessing `loc` twice on same node + * Defines a `loc` property on the node/comment with the calculated `Location`, so accessing `loc` twice on same node * results in the same object each time. * * For internal use only. * - * @param nodeOrToken - AST node or token + * @param nodeOrComment - AST node or comment * @returns Location */ -export function getNodeLoc(nodeOrToken: NodeOrToken): Location { - // Build `lines` and `lineStartIndices` tables if they haven't been already. - // This also decodes `sourceText` if it wasn't already. - if (lines.length === 0) initLines(); +export function getNodeLoc(nodeOrComment: Node | Comment): Location { + const loc = computeLoc(nodeOrComment.start, nodeOrComment.end); - const loc = { - start: getLineColumnFromOffsetUnchecked(nodeOrToken.start), - end: getLineColumnFromOffsetUnchecked(nodeOrToken.end), - }; - - // Define `loc` property with the calculated `Location`, so accessing `loc` twice on same node/token + // Define `loc` property with the calculated `Location`, so accessing `loc` twice on same node // results in the same object each time. // // We do not make the `loc` property enumerable, because it wasn't present before. // It would be weird if `Object.keys(node)` included `loc` if the property had been accessed previously, // but not if it hadn't. // - // The property is configurable so that it can be deleted when token objects are reused across files - // (see token pooling in `tokens.ts`). Deleting the own property restores the prototype getter. - Object.defineProperty(nodeOrToken, "loc", { value: loc, writable: true, configurable: true }); + // We also don't make it configurable, because deleting it wouldn't make `node.loc` evaluate to `undefined`, + // because the access would fall through to the getter on the prototype. + Object.defineProperty(nodeOrComment, "loc", { value: loc, writable: true }); return loc; } +/** + * Compute a `Location` from `start` and `end` source offsets. + * + * Pure computation - does not cache the result. + * Initializes `lines` and `lineStartIndices` tables if they haven't been already. + * + * @param start - Start offset + * @param end - End offset + * @returns Location + */ +export function computeLoc(start: number, end: number): Location { + if (lines.length === 0) initLines(); + return { + start: getLineColumnFromOffsetUnchecked(start), + end: getLineColumnFromOffsetUnchecked(end), + }; +} + /** * Get the deepest node containing a range index. * @param offset - Range index of the desired node diff --git a/apps/oxlint/src-js/plugins/tokens.ts b/apps/oxlint/src-js/plugins/tokens.ts index 1eeec096eaf91..9f972674bbea3 100644 --- a/apps/oxlint/src-js/plugins/tokens.ts +++ b/apps/oxlint/src-js/plugins/tokens.ts @@ -3,12 +3,12 @@ */ import { ast, buffer, initAst, initSourceText, sourceText } from "./source_code.ts"; -import { getNodeLoc } from "./location.ts"; +import { computeLoc } from "./location.ts"; import { TOKENS_OFFSET_POS_32, TOKENS_LEN_POS_32 } from "../generated/constants.ts"; import { debugAssert, debugAssertIsNonNull } from "../utils/asserts.ts"; import type { Comment, Node, NodeOrToken } from "./types.ts"; -import type { Span } from "./location.ts"; +import type { Location, Span } from "./location.ts"; /** * Options for various `SourceCode` methods e.g. `getFirstToken`. @@ -50,7 +50,7 @@ export type FilterFn = (token: TokenOrComment) => boolean; /** * AST token type. */ -export type Token = +type TokenType = | BooleanToken | IdentifierToken | JSXIdentifierToken @@ -64,6 +64,9 @@ export type Token = | StringToken | TemplateToken; +// Export type as `Token` for external consumers +export type { TokenType as Token }; + interface BaseToken extends Span { value: string; regex: undefined; @@ -122,27 +125,15 @@ export interface TemplateToken extends BaseToken { type: "Template"; } -type TokenOrComment = Token | Comment; +type TokenOrComment = TokenType | Comment; // `SkipOptions` object used by `getTokenOrCommentBefore` and `getTokenOrCommentAfter`. // This object is reused over and over to avoid creating a new options object on each call. const INCLUDE_COMMENTS_SKIP_OPTIONS: SkipOptions = { includeComments: true, skip: 0 }; -// Prototype for `Token` objects, which calculates `loc` property lazily. -const TokenProto = Object.create(Object.prototype, { - loc: { - // Note: Not configurable - get() { - tokensWithLoc.push(this); - return getNodeLoc(this); - }, - enumerable: true, - }, -}); - // Tokens for the current file. // Created lazily only when needed. -export let tokens: Token[] | null = null; +export let tokens: TokenType[] | null = null; let comments: Comment[] | null = null; export let tokensAndComments: TokenOrComment[] | null = null; @@ -166,6 +157,45 @@ const regexObjects: RegularExpressionToken["regex"][] = []; // for the next regex descriptor object which can be reused. const tokensWithRegex: Token[] = []; +// Reset `#loc` field on a `Token` class instance +let resetLoc: (token: Token) => void; + +/** + * Token implementation with lazy `loc` caching via private field. + * + * Using a class with a private `#loc` field avoids hidden class transitions that would occur + * with `Object.defineProperty` / `delete` on plain objects. + * All `Token` instances always have the same V8 hidden class, keeping property access monomorphic. + */ +class Token { + type: TokenType["type"] = "" as TokenType["type"]; // Overwritten later + value: string = ""; + regex: RegularExpressionToken["regex"] | undefined; + start: number = 0; + end: number = 0; + range: [number, number] = [0, 0]; + + #loc: Location | null = null; + + get loc(): Location { + const loc = this.#loc; + if (loc !== null) return loc; + + tokensWithLoc.push(this); + return (this.#loc = computeLoc(this.start, this.end)); + } + + static { + // Defined in static block to avoid exposing this as a public method + resetLoc = (token: Token) => { + token.#loc = null; + }; + } +} + +// Make `loc` property enumerable so that `for (const key in token) ...` includes `loc` in the keys it iterates over +Object.defineProperty(Token.prototype, "loc", { enumerable: true }); + let uint32: Uint32Array | null = null; // `ESTreeKind` discriminants (set by Rust side) @@ -173,7 +203,7 @@ const PRIVATE_IDENTIFIER_KIND = 2; const REGEXP_KIND = 8; // Indexed by `ESTreeKind` discriminant (matches `ESTreeKind` enum in `estree_kind.rs`) -const TOKEN_TYPES: Token["type"][] = [ +const TOKEN_TYPES: TokenType["type"][] = [ "Identifier", "Keyword", "PrivateIdentifier", @@ -210,16 +240,7 @@ export function initTokens() { // Grow cache if needed (one-time cost as cache warms up) while (cachedTokens.length < tokensLen) { - cachedTokens.push({ - // @ts-expect-error - TS doesn't understand `__proto__` - __proto__: TokenProto, - type: "" as Token["type"], // Overwritten later - value: "", - regex: undefined, - start: 0, - end: 0, - range: [0, 0], - }); + cachedTokens.push(new Token()); } // Deserialize into cached token objects @@ -237,9 +258,9 @@ export function initTokens() { // Assuming random distribution of file sizes, this cheaper branch should be hit on 50% of files. if (previousTokens.length >= tokensLen) { previousTokens.length = tokensLen; - tokens = previousTokens; + tokens = previousTokens as TokenType[]; } else { - tokens = previousTokens = cachedTokens.slice(0, tokensLen); + tokens = (previousTokens = cachedTokens.slice(0, tokensLen)) as TokenType[]; } uint32 = null; @@ -461,12 +482,12 @@ function debugCheckTokensAndComments() { /** * Reset tokens after file has been linted. * - * Deletes cached `loc` from tokens that had it accessed, so the prototype getter + * Clears cached `loc` on tokens that had it accessed, so the getter * will recalculate it when the token is reused for a different file. */ export function resetTokens() { for (let i = 0, len = tokensWithLoc.length; i < len; i++) { - delete (tokensWithLoc[i] as { loc?: unknown }).loc; + resetLoc(tokensWithLoc[i]); } tokensWithLoc.length = 0; diff --git a/apps/oxlint/test/tokens.test.ts b/apps/oxlint/test/tokens.test.ts index 9c54415849fa4..f3c88ed3f3655 100644 --- a/apps/oxlint/test/tokens.test.ts +++ b/apps/oxlint/test/tokens.test.ts @@ -26,10 +26,12 @@ import { resetSourceAndAst, setupSourceForFile, } from "../src-js/plugins/source_code.ts"; +import { getLoc } from "../src-js/plugins/location.ts"; import { parse as parseRaw } from "../src-js/package/parse.ts"; import { debugAssertIsNonNull } from "../src-js/utils/asserts.ts"; import type { Node } from "../src-js/plugins/types.ts"; +import type { Token } from "../src-js/plugins/tokens.ts"; import type { BinaryExpression } from "../src-js/generated/types.d.ts"; // Source text used for most tests @@ -1410,3 +1412,303 @@ describe("when calling getLastToken & getTokenBefore", () => { ]); }); }); + +// Regression tests for token `loc` correctness across pooled token reuse. +// Token objects are cached and reused across files. These tests verify that `loc` values +// are correct on the second file after token objects have been reused, and that stale +// cached `loc` from the first file does not leak into the second. +describe("token loc across sequential files", () => { + // Two source texts with different line structures. + // File 1 has everything on one line, file 2 spreads tokens across multiple lines. + // If stale `loc` leaks from file 1, the line/column values for file 2 would be wrong. + const FILE_1 = "var x = 1;"; + const FILE_2 = "var\n y\n =\n 2;"; + + it("should compute correct `token.loc` on second file after accessing `loc` on first file", () => { + // File 1: access `loc` on all tokens to cache it + setup(FILE_1); + const tokens1 = getTokens({ range: [0, FILE_1.length] } as Node); + expect(tokens1.map((t) => t.value)).toEqual(["var", "x", "=", "1", ";"]); + for (const token of tokens1) { + // Access `loc` to trigger caching + expect(token.loc.start.line).toBeGreaterThan(0); + } + + // File 2: verify `loc` is correct, not stale from file 1 + setup(FILE_2); + const tokens2 = getTokens({ range: [0, FILE_2.length] } as Node); + expect(tokens2.map((t) => t.value)).toEqual(["var", "y", "=", "2", ";"]); + + // `var` is at start of line 1 + expect(tokens2[0].loc).toEqual({ start: { line: 1, column: 0 }, end: { line: 1, column: 3 } }); + // `y` is at line 2, column 2 + expect(tokens2[1].loc).toEqual({ start: { line: 2, column: 2 }, end: { line: 2, column: 3 } }); + // `=` is at line 3, column 2 + expect(tokens2[2].loc).toEqual({ start: { line: 3, column: 2 }, end: { line: 3, column: 3 } }); + // `2` is at line 4, column 2 + expect(tokens2[3].loc).toEqual({ start: { line: 4, column: 2 }, end: { line: 4, column: 3 } }); + // `;` is at line 4, column 3 + expect(tokens2[4].loc).toEqual({ start: { line: 4, column: 3 }, end: { line: 4, column: 4 } }); + }); + + it("should compute correct `getLoc(token)` on second file after accessing `getLoc` on first file", () => { + // File 1: access `getLoc` on all tokens + setup(FILE_1); + const tokens1 = getTokens({ range: [0, FILE_1.length] } as Node); + for (const token of tokens1) { + expect(getLoc(token).start.line).toBeGreaterThan(0); + } + + // File 2: verify `getLoc` returns correct values + setup(FILE_2); + const tokens2 = getTokens({ range: [0, FILE_2.length] } as Node); + + expect(getLoc(tokens2[0])).toEqual({ + start: { line: 1, column: 0 }, + end: { line: 1, column: 3 }, + }); + expect(getLoc(tokens2[1])).toEqual({ + start: { line: 2, column: 2 }, + end: { line: 2, column: 3 }, + }); + expect(getLoc(tokens2[2])).toEqual({ + start: { line: 3, column: 2 }, + end: { line: 3, column: 3 }, + }); + }); + + it("should return same `loc` object from `token.loc` and `getLoc(token)`", () => { + setup(FILE_2); + const tokens2 = getTokens({ range: [0, FILE_2.length] } as Node); + + for (const token of tokens2) { + // Both access paths should return the same cached object + expect(getLoc(token)).toBe(token.loc); + } + }); + + it("should compute correct `loc` when second file has fewer tokens than first", () => { + // File with many tokens, then file with few tokens. + // Tests the `previousTokens.length >= tokensLen` reuse path in `initTokens`. + const manyTokens = "a + b + c + d + e + f;"; + const fewTokens = "x\n+\ny;"; + + setup(manyTokens); + const tokens1 = getTokens({ range: [0, manyTokens.length] } as Node); + for (const token of tokens1) { + expect(token.loc.start.line).toBe(1); + } + + setup(fewTokens); + const tokens2 = getTokens({ range: [0, fewTokens.length] } as Node); + expect(tokens2.map((t) => t.value)).toEqual(["x", "+", "y", ";"]); + + expect(tokens2[0].loc).toEqual({ start: { line: 1, column: 0 }, end: { line: 1, column: 1 } }); + expect(tokens2[1].loc).toEqual({ start: { line: 2, column: 0 }, end: { line: 2, column: 1 } }); + expect(tokens2[2].loc).toEqual({ start: { line: 3, column: 0 }, end: { line: 3, column: 1 } }); + expect(tokens2[3].loc).toEqual({ start: { line: 3, column: 1 }, end: { line: 3, column: 2 } }); + }); + + it("should compute correct `loc` across three sequential files", () => { + const file1 = "let a = 1;"; + const file2 = "let\n b\n =\n 2;"; + const file3 = "let c\n= 3;"; + + // File 1 + setup(file1); + for (const token of getTokens({ range: [0, file1.length] } as Node)) { + expect(token.loc.start.line).toBe(1); + } + + // File 2 + setup(file2); + for (const token of getTokens({ range: [0, file2.length] } as Node)) { + expect(token.loc.start.line).toBeGreaterThan(0); + } + + // File 3: verify locations are from file 3, not file 1 or 2 + setup(file3); + const tokens3 = getTokens({ range: [0, file3.length] } as Node); + expect(tokens3.map((t) => t.value)).toEqual(["let", "c", "=", "3", ";"]); + + // `let` on line 1 + expect(tokens3[0].loc).toEqual({ start: { line: 1, column: 0 }, end: { line: 1, column: 3 } }); + // `c` on line 1 + expect(tokens3[1].loc).toEqual({ start: { line: 1, column: 4 }, end: { line: 1, column: 5 } }); + // `=` on line 2 + expect(tokens3[2].loc).toEqual({ start: { line: 2, column: 0 }, end: { line: 2, column: 1 } }); + // `3` on line 2 + expect(tokens3[3].loc).toEqual({ start: { line: 2, column: 2 }, end: { line: 2, column: 3 } }); + // `;` on line 2 + expect(tokens3[4].loc).toEqual({ start: { line: 2, column: 3 }, end: { line: 2, column: 4 } }); + }); +}); + +// Regression tests for token `regex` property correctness across pooled token reuse. +// Token objects are cached and reused across files. These tests verify that `regex` is correctly +// set for RegularExpression tokens and reset to `undefined` when the token object is reused +// for a non-regex token in a subsequent file. +describe("token regex across sequential files", () => { + it("should have `regex` undefined on second file when first file had regex tokens", () => { + // File 1: has a regex token + const withRegex = "var x = /abc/gi;"; + setup(withRegex); + const tokens1 = getTokens({ range: [0, withRegex.length] } as Node); + const regexToken1 = tokens1.find((t) => t.type === "RegularExpression"); + expect(regexToken1).toBeDefined(); + expect(regexToken1!.regex).toEqual({ pattern: "abc", flags: "gi" }); + + // File 2: no regex tokens - reused token objects should have `regex: undefined` + const withoutRegex = "var y = 1;"; + setup(withoutRegex); + const tokens2 = getTokens({ range: [0, withoutRegex.length] } as Node) as Token[]; + for (const token of tokens2) { + expect(token.regex).toBeUndefined(); + } + }); + + it("should set `regex` correctly on second file when first file had no regex", () => { + // File 1: no regex + const withoutRegex = "var x = 1;"; + setup(withoutRegex); + const tokens1 = getTokens({ range: [0, withoutRegex.length] } as Node) as Token[]; + for (const token of tokens1) { + expect(token.regex).toBeUndefined(); + } + + // File 2: has a regex token + const withRegex = "var y = /foo/m;"; + setup(withRegex); + const tokens2 = getTokens({ range: [0, withRegex.length] } as Node) as Token[]; + const regexToken2 = tokens2.find((t) => t.type === "RegularExpression"); + expect(regexToken2).toBeDefined(); + expect(regexToken2!.regex).toEqual({ pattern: "foo", flags: "m" }); + + // Non-regex tokens should still have `regex: undefined` + for (const token of tokens2) { + if (token.type !== "RegularExpression") { + // oxlint-disable-next-line vitest/no-conditional-expect + expect(token.regex).toBeUndefined(); + } + } + }); + + it("should update `regex` correctly across files that both have regex tokens", () => { + // File 1: regex with one pattern + const regex1 = "var x = /abc/g;"; + setup(regex1); + const tokens1 = getTokens({ range: [0, regex1.length] } as Node); + const regexToken1 = tokens1.find((t) => t.type === "RegularExpression"); + expect(regexToken1!.regex).toEqual({ pattern: "abc", flags: "g" }); + + // File 2: regex with a different pattern - should not carry over from file 1 + const regex2 = "var y = /xyz/i;"; + setup(regex2); + const tokens2 = getTokens({ range: [0, regex2.length] } as Node); + const regexToken2 = tokens2.find((t) => t.type === "RegularExpression"); + expect(regexToken2!.regex).toEqual({ pattern: "xyz", flags: "i" }); + }); + + it("should handle multiple regex tokens followed by a file with no regex", () => { + // File 1: multiple regex tokens + const multiRegex = "/a/g; /b/i; /c/m;"; + setup(multiRegex); + const tokens1 = getTokens({ range: [0, multiRegex.length] } as Node); + const regexTokens1 = tokens1.filter((t) => t.type === "RegularExpression"); + expect(regexTokens1).toHaveLength(3); + expect(regexTokens1[0].regex).toEqual({ pattern: "a", flags: "g" }); + expect(regexTokens1[1].regex).toEqual({ pattern: "b", flags: "i" }); + expect(regexTokens1[2].regex).toEqual({ pattern: "c", flags: "m" }); + + // File 2: no regex - all tokens should have `regex: undefined` + const noRegex = "x + y + z;"; + setup(noRegex); + const tokens2 = getTokens({ range: [0, noRegex.length] } as Node) as Token[]; + for (const token of tokens2) { + expect(token.regex).toBeUndefined(); + } + }); +}); + +// Tests for `for (const key in token)` enumeration. +// All token properties (including the `loc` getter and `regex`) should be enumerable. +describe("token property enumeration", () => { + it("should enumerate all properties including `loc` for a non-regex token", () => { + const tokens = getTokens({ range: [0, SOURCE_TEXT.length] } as Node); + const token = tokens[0]; // `var` keyword + + const keys: string[] = []; + for (const key in token) { + keys.push(key); + } + + expect(keys).toContain("type"); + expect(keys).toContain("value"); + expect(keys).toContain("regex"); + expect(keys).toContain("start"); + expect(keys).toContain("end"); + expect(keys).toContain("range"); + expect(keys).toContain("loc"); + }); + + it("should enumerate all properties after `token.loc` has been accessed", () => { + const tokens = getTokens({ range: [0, SOURCE_TEXT.length] } as Node); + const token = tokens[0]; // `var` keyword + + // Access `loc` to trigger caching + expect(token.loc.start.line).toBe(1); + + const keys: string[] = []; + for (const key in token) { + keys.push(key); + } + + expect(keys).toContain("type"); + expect(keys).toContain("value"); + expect(keys).toContain("regex"); + expect(keys).toContain("start"); + expect(keys).toContain("end"); + expect(keys).toContain("range"); + expect(keys).toContain("loc"); + }); + + it("should enumerate all properties after `getLoc(token)` has been called", () => { + const tokens = getTokens({ range: [0, SOURCE_TEXT.length] } as Node); + const token = tokens[0]; // `var` keyword + + // Access `loc` via `getLoc` to trigger caching + expect(getLoc(token).start.line).toBe(1); + + const keys: string[] = []; + for (const key in token) { + keys.push(key); + } + + expect(keys).toContain("type"); + expect(keys).toContain("value"); + expect(keys).toContain("regex"); + expect(keys).toContain("start"); + expect(keys).toContain("end"); + expect(keys).toContain("range"); + expect(keys).toContain("loc"); + }); + + it("should enumerate all properties for a regex token", () => { + setup("var x = /abc/g;"); + const tokens = getTokens({ range: [0, 15] } as Node); + const regexToken = tokens.find((t) => t.type === "RegularExpression")!; + + const keys: string[] = []; + for (const key in regexToken) { + keys.push(key); + } + + expect(keys).toContain("type"); + expect(keys).toContain("value"); + expect(keys).toContain("regex"); + expect(keys).toContain("start"); + expect(keys).toContain("end"); + expect(keys).toContain("range"); + expect(keys).toContain("loc"); + }); +});