diff --git a/gitnexus/src/core/tree-sitter/safe-parse.ts b/gitnexus/src/core/tree-sitter/safe-parse.ts index 1b3bc4fc55..e79217a3c2 100644 --- a/gitnexus/src/core/tree-sitter/safe-parse.ts +++ b/gitnexus/src/core/tree-sitter/safe-parse.ts @@ -19,6 +19,51 @@ const SAFE_PARSE_CHUNK_CHARS = 16 * 1024; */ const DIRECT_PARSE_LIMIT_CHARS = 16 * 1024; +/** + * Optional per-parse timeout, opt-in via `GITNEXUS_PARSE_TIMEOUT_MICROS`. + * + * When unset (default) the parser has no timeout — behaviour is identical to + * previous versions. When set to a positive integer, `parser.setTimeoutMicros` + * is applied before each parse so tree-sitter aborts long-running parses + * cooperatively (returning `null` from `parse()`); this wrapper then converts + * the `null` into a catchable `ParseTimeoutError`, which existing callers in + * `call-processor.ts` already handle by skipping the file. + * + * Motivation: on large repos the worker-pool idle timeout can fire while + * tree-sitter is blocked inside a sync `parser.parse()` on a pathological + * file. The replacement path (`worker.terminate()`) then races the native + * parser and surfaces as `libc++abi: terminating due to uncaught exception + * of type Napi::Error`, killing the analysis run. Setting a per-parse + * timeout slightly below the worker idle timeout lets the parser abort + * cleanly before the pool tries to terminate it. + */ +const readParseTimeoutMicros = (): number => { + const raw = process.env.GITNEXUS_PARSE_TIMEOUT_MICROS; + if (!raw) return 0; + const value = Number(raw); + return Number.isFinite(value) && value > 0 ? Math.floor(value) : 0; +}; + +export class ParseTimeoutError extends Error { + constructor(timeoutMicros: number, sourceLength: number) { + super( + `tree-sitter parse exceeded GITNEXUS_PARSE_TIMEOUT_MICROS=${timeoutMicros} ` + + `(source length ${sourceLength} chars).`, + ); + this.name = 'ParseTimeoutError'; + } +} + +const applyAndClearTimeout = (parser: Parser, timeoutMicros: number, run: () => T): T => { + if (timeoutMicros <= 0) return run(); + parser.setTimeoutMicros(timeoutMicros); + try { + return run(); + } finally { + parser.setTimeoutMicros(0); + } +}; + /** * Parse `sourceText` safely on every platform. See {@link SAFE_PARSE_CHUNK_CHARS} * for the underlying tree-sitter binding bug this works around. @@ -29,12 +74,17 @@ export function parseSourceSafe( oldTree?: Parser.Tree, options?: Parser.Options, ): Parser.Tree { - if (sourceText.length <= DIRECT_PARSE_LIMIT_CHARS) { - return parser.parse(sourceText, oldTree, options); - } - const input: Parser.Input = (index) => { - if (index >= sourceText.length) return null; - return sourceText.slice(index, index + SAFE_PARSE_CHUNK_CHARS); - }; - return parser.parse(input, oldTree, options); + const timeoutMicros = readParseTimeoutMicros(); + const tree = applyAndClearTimeout(parser, timeoutMicros, () => { + if (sourceText.length <= DIRECT_PARSE_LIMIT_CHARS) { + return parser.parse(sourceText, oldTree, options); + } + const input: Parser.Input = (index) => { + if (index >= sourceText.length) return null; + return sourceText.slice(index, index + SAFE_PARSE_CHUNK_CHARS); + }; + return parser.parse(input, oldTree, options); + }); + if (!tree) throw new ParseTimeoutError(timeoutMicros, sourceText.length); + return tree; } diff --git a/gitnexus/test/unit/safe-parse.test.ts b/gitnexus/test/unit/safe-parse.test.ts index e536bbd40d..b083e15a16 100644 --- a/gitnexus/test/unit/safe-parse.test.ts +++ b/gitnexus/test/unit/safe-parse.test.ts @@ -1,7 +1,7 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, afterEach } from 'vitest'; import Parser from 'tree-sitter'; import Python from 'tree-sitter-python'; -import { parseSourceSafe } from '../../src/core/tree-sitter/safe-parse.js'; +import { parseSourceSafe, ParseTimeoutError } from '../../src/core/tree-sitter/safe-parse.js'; const makeParser = (): Parser => { const p = new Parser(); @@ -71,4 +71,35 @@ describe('parseSourceSafe', () => { expect(tree.rootNode.hasError).toBe(false); expect(tree.rootNode.endIndex).toBe(large.length); }); + + describe('GITNEXUS_PARSE_TIMEOUT_MICROS opt-in', () => { + const ORIGINAL = process.env.GITNEXUS_PARSE_TIMEOUT_MICROS; + afterEach(() => { + if (ORIGINAL === undefined) delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS; + else process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = ORIGINAL; + }); + + it('is a no-op when the env var is unset (backward compatible)', () => { + delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS; + const tree = parseSourceSafe(makeParser(), 'x = 1\n'); + expect(tree.rootNode.hasError).toBe(false); + }); + + it('throws ParseTimeoutError when parsing exceeds the configured timeout', () => { + // 1 microsecond is below any real parse latency — tree-sitter aborts + // immediately and returns null, which the wrapper converts to a throw. + process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = '1'; + const src = buildSource(64 * 1024); + expect(() => parseSourceSafe(makeParser(), src)).toThrowError(ParseTimeoutError); + }); + + it('resets the parser timeout after each call so reuse is unaffected', () => { + const parser = makeParser(); + process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = '1'; + expect(() => parseSourceSafe(parser, buildSource(64 * 1024))).toThrow(); + delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS; + const tree = parseSourceSafe(parser, 'x = 1\n'); + expect(tree.rootNode.hasError).toBe(false); + }); + }); });