From c27a39338e88fbd2b529c2fb86ceb35a6061821f Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Thu, 2 Oct 2025 08:32:04 +0000 Subject: [PATCH] perf(linter/plugins): deserialize AST on demand (#14288) Avoid deserializing the AST if it's not required. It's possible that all rules return an empty visitor, or that all `createOnce` rules return `false` from their `before` hook. In those cases, the AST does not need to be walked, so AST doesn't need to be deserialized. And the source text doesn't need to be decoded from buffer either. These cases probably aren't so common, but not completely uncommon either. However, source text and AST are both accessible before traversal via `context.sourceCode`. Enable the usage of those APIs in `create` method / `before` hook by decoding source text / deserializing AST on demand when those APIs are used. --- apps/oxlint/src-js/plugins/lint.ts | 44 ++---- apps/oxlint/src-js/plugins/source_code.ts | 80 ++++++++-- apps/oxlint/src-js/plugins/types.ts | 6 + apps/oxlint/test/e2e.test.ts | 8 + .../sourceCode_late_access/.oxlintrc.json | 8 + .../sourceCode_late_access/files/1.js | 1 + .../sourceCode_late_access/files/2.js | 1 + .../sourceCode_late_access/output.snap.md | 148 ++++++++++++++++++ .../fixtures/sourceCode_late_access/plugin.ts | 115 ++++++++++++++ .../.oxlintrc.json | 7 + .../files/1.js | 1 + .../files/2.js | 1 + .../output.snap.md | 32 ++++ .../plugin.ts | 34 ++++ 14 files changed, 442 insertions(+), 44 deletions(-) create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access/.oxlintrc.json create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access/files/1.js create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access/files/2.js create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access/output.snap.md create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access/plugin.ts create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access_after_only/.oxlintrc.json create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/1.js create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/2.js create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access_after_only/output.snap.md create mode 100644 apps/oxlint/test/fixtures/sourceCode_late_access_after_only/plugin.ts diff --git a/apps/oxlint/src-js/plugins/lint.ts b/apps/oxlint/src-js/plugins/lint.ts index 0d478dc8d1cc3..cbf023b93f374 100644 --- a/apps/oxlint/src-js/plugins/lint.ts +++ b/apps/oxlint/src-js/plugins/lint.ts @@ -1,12 +1,6 @@ -import { - DATA_POINTER_POS_32, - SOURCE_LEN_OFFSET, - // TODO(camc314): we need to generate `.d.ts` file for this module. - // @ts-expect-error -} from '../generated/constants.js'; import { diagnostics, setupContextForFile } from './context.js'; import { registeredRules } from './load.js'; -import { resetSource, setupSourceForFile } from './source_code.js'; +import { getAst, resetSource, setupSourceForFile } from './source_code.js'; import { assertIs, getErrorMessage } from './utils.js'; import { addVisitorToCompiled, compiledVisitor, finalizeCompiledVisitor, initCompiledVisitor } from './visitor.js'; @@ -18,18 +12,10 @@ import { TOKEN } from '../../dist/src-js/raw-transfer/lazy-common.js'; import { walkProgram } from '../../dist/generated/lazy/walk.js'; */ -// @ts-expect-error we need to generate `.d.ts` file for this module -import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts.js'; // @ts-expect-error we need to generate `.d.ts` file for this module import { walkProgram } from '../../dist/generated/visit/walk.js'; -import type { AfterHook } from './types.ts'; - -// Buffer with typed array views of itself stored as properties -interface BufferWithArrays extends Uint8Array { - uint32: Uint32Array; - float64: Float64Array; -} +import type { AfterHook, BufferWithArrays } from './types.ts'; // Buffers cache. // @@ -38,9 +24,6 @@ interface BufferWithArrays extends Uint8Array { // until the process exits. const buffers: (BufferWithArrays | null)[] = []; -// Text decoder, for decoding source text from buffer -const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true }); - // Array of `after` hooks to run after traversal. This array reused for every file. const afterHooks: AfterHook[] = []; @@ -105,20 +88,16 @@ function lintFileImpl(filePath: string, bufferId: number, buffer: Uint8Array | n throw new Error('Expected `ruleIds` to be a non-zero len array'); } - // Decode source text from buffer - const { uint32 } = buffer, - programPos = uint32[DATA_POINTER_POS_32], - sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2]; - - const sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen)); - - // Deserialize AST from buffer. - // `preserveParens` argument is `false`, to match ESLint. - // ESLint does not include `ParenthesizedExpression` nodes in its AST. - const program = deserializeProgramOnly(buffer, sourceText, sourceByteLen, false); - + // Pass buffer to source code module, so it can decode source text and deserialize AST on demand. + // + // We don't want to do this eagerly, because all rules might return empty visitors, + // or `createOnce` rules might return `false` from their `before` hooks. + // In such cases, the AST doesn't need to be walked, so we can skip deserializing it. + // + // But... source text and AST can be accessed in body of `create` method, or `before` hook, via `context.sourceCode`. + // So we pass the buffer to source code module here, so it can decode source text / deserialize AST on demand. const hasBOM = false; // TODO: Set this correctly - setupSourceForFile(sourceText, hasBOM, program); + setupSourceForFile(buffer, hasBOM); // Get visitors for this file from all rules initCompiledVisitor(); @@ -155,6 +134,7 @@ function lintFileImpl(filePath: string, bufferId: number, buffer: Uint8Array | n // Some rules seen in the wild return an empty visitor object from `create` if some initial check fails // e.g. file extension is not one the rule acts on. if (needsVisit) { + const program = getAst(); walkProgram(program, compiledVisitor); // Lazy implementation diff --git a/apps/oxlint/src-js/plugins/source_code.ts b/apps/oxlint/src-js/plugins/source_code.ts index 58d12d86d6796..e2a6a82a438f0 100644 --- a/apps/oxlint/src-js/plugins/source_code.ts +++ b/apps/oxlint/src-js/plugins/source_code.ts @@ -1,33 +1,86 @@ +import { + DATA_POINTER_POS_32, + SOURCE_LEN_OFFSET, + // TODO(camc314): we need to generate `.d.ts` file for this module. + // @ts-expect-error +} from '../generated/constants.js'; +// @ts-expect-error we need to generate `.d.ts` file for this module +import { deserializeProgramOnly } from '../../dist/generated/deserialize/ts.js'; + import type { Program } from '@oxc-project/types'; import type { Scope, ScopeManager, Variable } from './scope.ts'; -import type { Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts'; +import type { BufferWithArrays, Comment, LineColumn, Node, NodeOrToken, Token } from './types.ts'; const { max } = Math; -// Source text. -// Initially `null`, but set to source text string before linting each file by `setupSourceForFile`. -let sourceText: string | null = null; -// Set before linting each file by `setupSourceForFile`. +// Text decoder, for decoding source text from buffer +const textDecoder = new TextDecoder('utf-8', { ignoreBOM: true }); + +// Buffer containing AST. Set before linting a file by `setupSourceForFile`. +let buffer: BufferWithArrays | null = null; + +// Indicates if the original source text has a BOM. Set before linting a file by `setupSourceForFile`. let hasBOM = false; -// Set before linting each file by `setupSourceForFile`. + +// Lazily populated when `SOURCE_CODE.text` or `SOURCE_CODE.ast` is accessed, +// or `getAst()` is called before the AST is walked. +let sourceText: string | null = null; +let sourceByteLen: number = 0; let ast: Program | null = null; /** * Set up source for the file about to be linted. - * @param sourceTextInput - Source text + * @param bufferInput - Buffer containing AST * @param hasBOMInput - `true` if file's original source text has Unicode BOM - * @param astInput - The AST program for the file */ -export function setupSourceForFile(sourceTextInput: string, hasBOMInput: boolean, astInput: Program): void { - sourceText = sourceTextInput; +export function setupSourceForFile(bufferInput: BufferWithArrays, hasBOMInput: boolean): void { + buffer = bufferInput; hasBOM = hasBOMInput; - ast = astInput; +} + +/** + * Decode source text from buffer. + */ +function initSourceText(): void { + const { uint32 } = buffer, + programPos = uint32[DATA_POINTER_POS_32]; + sourceByteLen = uint32[(programPos + SOURCE_LEN_OFFSET) >> 2]; + sourceText = textDecoder.decode(buffer.subarray(0, sourceByteLen)); +} + +/** + * Deserialize AST from buffer. + */ +function initAst(): void { + if (sourceText === null) initSourceText(); + + // `preserveParens` argument is `false`, to match ESLint. + // ESLint does not include `ParenthesizedExpression` nodes in its AST. + ast = deserializeProgramOnly(buffer, sourceText, sourceByteLen, false); +} + +/** + * Get AST of the file being linted. + * If AST has not already been deserialized, do it now. + * @returns AST of the file being linted. + */ +export function getAst(): Program { + if (ast === null) initAst(); + return ast; } /** * Reset source after file has been linted, to free memory. + * + * Setting `buffer` to `null` also prevents AST being deserialized after linting, + * at which point the buffer may be being reused for another file. + * The buffer might contain a half-constructed AST (if parsing is currently in progress in Rust), + * which would cause deserialization to malfunction. + * With `buffer` set to `null`, accessing `SOURCE_CODE.ast` will still throw, but the error message will be clearer, + * and no danger of an infinite loop due to a circular AST (unlikely but possible). */ export function resetSource(): void { + buffer = null; sourceText = null; ast = null; } @@ -44,6 +97,7 @@ export function resetSource(): void { export const SOURCE_CODE = Object.freeze({ // Get source text. get text(): string { + if (sourceText === null) initSourceText(); return sourceText; }, @@ -54,7 +108,7 @@ export const SOURCE_CODE = Object.freeze({ // Get AST of the file. get ast(): Program { - return ast; + return getAst(); }, // Get `ScopeManager` for the file. @@ -89,6 +143,8 @@ export const SOURCE_CODE = Object.freeze({ beforeCount?: number | null | undefined, afterCount?: number | null | undefined, ): string { + if (sourceText === null) initSourceText(); + // ESLint treats all falsy values for `node` as undefined if (!node) return sourceText; diff --git a/apps/oxlint/src-js/plugins/types.ts b/apps/oxlint/src-js/plugins/types.ts index 0e64d62e4df38..3d0a279f9e2cb 100644 --- a/apps/oxlint/src-js/plugins/types.ts +++ b/apps/oxlint/src-js/plugins/types.ts @@ -80,3 +80,9 @@ export interface RuleMeta { fixable?: 'code' | 'whitespace' | null | undefined; [key: string]: unknown; } + +// Buffer with typed array views of itself stored as properties. +export interface BufferWithArrays extends Uint8Array { + uint32: Uint32Array; + float64: Float64Array; +} diff --git a/apps/oxlint/test/e2e.test.ts b/apps/oxlint/test/e2e.test.ts index 1fde4be7313f2..fe26c30d7e400 100644 --- a/apps/oxlint/test/e2e.test.ts +++ b/apps/oxlint/test/e2e.test.ts @@ -127,6 +127,14 @@ describe('oxlint CLI', () => { await testFixture('sourceCode'); }); + it('should get source text and AST from `context.sourceCode` when accessed late', async () => { + await testFixture('sourceCode_late_access'); + }); + + it('should get source text and AST from `context.sourceCode` when accessed in `after` hook only', async () => { + await testFixture('sourceCode_late_access_after_only'); + }); + it('should support `createOnce`', async () => { await testFixture('createOnce'); }); diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access/.oxlintrc.json b/apps/oxlint/test/fixtures/sourceCode_late_access/.oxlintrc.json new file mode 100644 index 0000000000000..435ece15a9159 --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access/.oxlintrc.json @@ -0,0 +1,8 @@ +{ + "jsPlugins": ["./plugin.ts"], + "categories": { "correctness": "off" }, + "rules": { + "source-code-plugin/create": "error", + "source-code-plugin/create-once": "error" + } +} diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access/files/1.js b/apps/oxlint/test/fixtures/sourceCode_late_access/files/1.js new file mode 100644 index 0000000000000..c60f3786ed5bc --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access/files/1.js @@ -0,0 +1 @@ +let foo, bar; diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access/files/2.js b/apps/oxlint/test/fixtures/sourceCode_late_access/files/2.js new file mode 100644 index 0000000000000..5273b647ec1cb --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access/files/2.js @@ -0,0 +1 @@ +let qux; diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access/output.snap.md b/apps/oxlint/test/fixtures/sourceCode_late_access/output.snap.md new file mode 100644 index 0000000000000..f70aca31f06f1 --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access/output.snap.md @@ -0,0 +1,148 @@ +# Exit code +1 + +# stdout +``` + x source-code-plugin(create): program: + | text: "let foo, bar;\n" + | getText(): "let foo, bar;\n" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^ + `---- + + x source-code-plugin(create-once): after: + | source: "let foo, bar;\n" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^ + `---- + + x source-code-plugin(create-once): program: + | text: "let foo, bar;\n" + | getText(): "let foo, bar;\n" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^ + `---- + + x source-code-plugin(create): var decl: + | source: "let foo, bar;" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^^^^^^^^^^^^^ + `---- + + x source-code-plugin(create-once): var decl: + | source: "let foo, bar;" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^^^^^^^^^^^^^ + `---- + + x source-code-plugin(create): ident "foo": + | source: "foo" + | source with before: "t foo" + | source with after: "foo," + | source with both: "t foo," + ,-[files/1.js:1:5] + 1 | let foo, bar; + : ^^^ + `---- + + x source-code-plugin(create-once): ident "foo": + | source: "foo" + | source with before: "t foo" + | source with after: "foo," + | source with both: "t foo," + ,-[files/1.js:1:5] + 1 | let foo, bar; + : ^^^ + `---- + + x source-code-plugin(create): ident "bar": + | source: "bar" + | source with before: ", bar" + | source with after: "bar;" + | source with both: ", bar;" + ,-[files/1.js:1:10] + 1 | let foo, bar; + : ^^^ + `---- + + x source-code-plugin(create-once): ident "bar": + | source: "bar" + | source with before: ", bar" + | source with after: "bar;" + | source with both: ", bar;" + ,-[files/1.js:1:10] + 1 | let foo, bar; + : ^^^ + `---- + + x source-code-plugin(create): program: + | text: "let qux;\n" + | getText(): "let qux;\n" + ,-[files/2.js:1:1] + 1 | let qux; + : ^ + `---- + + x source-code-plugin(create-once): after: + | source: "let qux;\n" + ,-[files/2.js:1:1] + 1 | let qux; + : ^ + `---- + + x source-code-plugin(create-once): program: + | text: "let qux;\n" + | getText(): "let qux;\n" + ,-[files/2.js:1:1] + 1 | let qux; + : ^ + `---- + + x source-code-plugin(create): var decl: + | source: "let qux;" + ,-[files/2.js:1:1] + 1 | let qux; + : ^^^^^^^^ + `---- + + x source-code-plugin(create-once): var decl: + | source: "let qux;" + ,-[files/2.js:1:1] + 1 | let qux; + : ^^^^^^^^ + `---- + + x source-code-plugin(create): ident "qux": + | source: "qux" + | source with before: "t qux" + | source with after: "qux;" + | source with both: "t qux;" + ,-[files/2.js:1:5] + 1 | let qux; + : ^^^ + `---- + + x source-code-plugin(create-once): ident "qux": + | source: "qux" + | source with before: "t qux" + | source with after: "qux;" + | source with both: "t qux;" + ,-[files/2.js:1:5] + 1 | let qux; + : ^^^ + `---- + +Found 0 warnings and 16 errors. +Finished in Xms on 2 files using X threads. +``` + +# stderr +``` +WARNING: JS plugins are experimental and not subject to semver. +Breaking changes are possible while JS plugins support is under development. +``` diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access/plugin.ts b/apps/oxlint/test/fixtures/sourceCode_late_access/plugin.ts new file mode 100644 index 0000000000000..69bc6ae16006f --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access/plugin.ts @@ -0,0 +1,115 @@ +import assert from 'node:assert'; + +import type { Program } from '@oxc-project/types'; +import type { Plugin, Rule } from '../../../dist/index.js'; + +const SPAN = { start: 0, end: 0 }; + +// Purpose of this test fixture is to ensure that AST is not deserialized twice +// if `context.sourceCode.ast` is accessed during AST traversal. +// +// `sourceCode` test fixture tests the opposite case. +// In that fixture `context.sourceCode.ast` is accessed in `create` method or `before` hook +// - which are before AST traversal starts. + +const createRule: Rule = { + create(context) { + let ast: Program | null = null; + + return { + Program(node) { + ast = context.sourceCode.ast; + assert(ast === node); + + context.report({ + message: 'program:\n' + + `text: ${JSON.stringify(context.sourceCode.text)}\n` + + `getText(): ${JSON.stringify(context.sourceCode.getText())}`, + node: SPAN, + }); + }, + VariableDeclaration(node) { + assert(context.sourceCode.ast === ast); + + context.report({ + message: `var decl:\nsource: "${context.sourceCode.getText(node)}"`, + node, + }); + }, + Identifier(node) { + assert(context.sourceCode.ast === ast); + + context.report({ + message: `ident "${node.name}":\n` + + `source: "${context.sourceCode.getText(node)}"\n` + + `source with before: "${context.sourceCode.getText(node, 2)}"\n` + + `source with after: "${context.sourceCode.getText(node, null, 1)}"\n` + + `source with both: "${context.sourceCode.getText(node, 2, 1)}"`, + node, + }); + }, + }; + }, +}; + +const createOnceRule: Rule = { + createOnce(context) { + let ast: Program | null = null; + + return { + Program(node) { + ast = context.sourceCode.ast; + assert(ast === node); + + context.report({ + message: 'program:\n' + + `text: ${JSON.stringify(context.sourceCode.text)}\n` + + `getText(): ${JSON.stringify(context.sourceCode.getText())}`, + node: SPAN, + }); + }, + VariableDeclaration(node) { + assert(context.sourceCode.ast === ast); + + context.report({ + message: `var decl:\nsource: "${context.sourceCode.getText(node)}"`, + node, + }); + }, + Identifier(node) { + assert(context.sourceCode.ast === ast); + + context.report({ + message: `ident "${node.name}":\n` + + `source: "${context.sourceCode.getText(node)}"\n` + + `source with before: "${context.sourceCode.getText(node, 2)}"\n` + + `source with after: "${context.sourceCode.getText(node, null, 1)}"\n` + + `source with both: "${context.sourceCode.getText(node, 2, 1)}"`, + node, + }); + }, + after() { + assert(context.sourceCode.ast === ast); + ast = null; + + context.report({ + message: 'after:\n' + + `source: ${JSON.stringify(context.sourceCode.text)}`, + node: SPAN, + }); + }, + }; + }, +}; + +const plugin: Plugin = { + meta: { + name: 'source-code-plugin', + }, + rules: { + create: createRule, + 'create-once': createOnceRule, + }, +}; + +export default plugin; diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/.oxlintrc.json b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/.oxlintrc.json new file mode 100644 index 0000000000000..4bd4f9be3f1bb --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/.oxlintrc.json @@ -0,0 +1,7 @@ +{ + "jsPlugins": ["./plugin.ts"], + "categories": { "correctness": "off" }, + "rules": { + "source-code-plugin/create-once": "error" + } +} diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/1.js b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/1.js new file mode 100644 index 0000000000000..c60f3786ed5bc --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/1.js @@ -0,0 +1 @@ +let foo, bar; diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/2.js b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/2.js new file mode 100644 index 0000000000000..5273b647ec1cb --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/files/2.js @@ -0,0 +1 @@ +let qux; diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/output.snap.md b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/output.snap.md new file mode 100644 index 0000000000000..11ed70fd9c7df --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/output.snap.md @@ -0,0 +1,32 @@ +# Exit code +1 + +# stdout +``` + x source-code-plugin(create-once): after: + | text: "let foo, bar;\n" + | getText(): "let foo, bar;\n" + | ast: "foo" + ,-[files/1.js:1:1] + 1 | let foo, bar; + : ^ + `---- + + x source-code-plugin(create-once): after: + | text: "let qux;\n" + | getText(): "let qux;\n" + | ast: "qux" + ,-[files/2.js:1:1] + 1 | let qux; + : ^ + `---- + +Found 0 warnings and 2 errors. +Finished in Xms on 2 files using X threads. +``` + +# stderr +``` +WARNING: JS plugins are experimental and not subject to semver. +Breaking changes are possible while JS plugins support is under development. +``` diff --git a/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/plugin.ts b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/plugin.ts new file mode 100644 index 0000000000000..118c539a55285 --- /dev/null +++ b/apps/oxlint/test/fixtures/sourceCode_late_access_after_only/plugin.ts @@ -0,0 +1,34 @@ +import type { Plugin, Rule } from '../../../dist/index.js'; + +const SPAN = { start: 0, end: 0 }; + +// Purpose of this test fixture is to ensure that source text and AST are available in `after` hook +// via `context.sourceCode` when the AST is not traversed + +const createOnceRule: Rule = { + createOnce(context) { + return { + after() { + context.report({ + message: 'after:\n' + + `text: ${JSON.stringify(context.sourceCode.text)}\n` + + `getText(): ${JSON.stringify(context.sourceCode.getText())}\n` + + // @ts-ignore + `ast: "${context.sourceCode.ast.body[0].declarations[0].id.name}"`, + node: SPAN, + }); + }, + }; + }, +}; + +const plugin: Plugin = { + meta: { + name: 'source-code-plugin', + }, + rules: { + 'create-once': createOnceRule, + }, +}; + +export default plugin;