Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 58 additions & 8 deletions gitnexus/src/core/tree-sitter/safe-parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,51 @@ const SAFE_PARSE_CHUNK_CHARS = 16 * 1024;
*/
const DIRECT_PARSE_LIMIT_CHARS = 16 * 1024;

/**
* Optional per-parse timeout, opt-in via `GITNEXUS_PARSE_TIMEOUT_MICROS`.
*
* When unset (default) the parser has no timeout — behaviour is identical to
* previous versions. When set to a positive integer, `parser.setTimeoutMicros`
* is applied before each parse so tree-sitter aborts long-running parses
* cooperatively (returning `null` from `parse()`); this wrapper then converts
* the `null` into a catchable `ParseTimeoutError`, which existing callers in
* `call-processor.ts` already handle by skipping the file.
*
* Motivation: on large repos the worker-pool idle timeout can fire while
* tree-sitter is blocked inside a sync `parser.parse()` on a pathological
* file. The replacement path (`worker.terminate()`) then races the native
* parser and surfaces as `libc++abi: terminating due to uncaught exception
* of type Napi::Error`, killing the analysis run. Setting a per-parse
* timeout slightly below the worker idle timeout lets the parser abort
* cleanly before the pool tries to terminate it.
*/
const readParseTimeoutMicros = (): number => {
const raw = process.env.GITNEXUS_PARSE_TIMEOUT_MICROS;
if (!raw) return 0;
const value = Number(raw);
return Number.isFinite(value) && value > 0 ? Math.floor(value) : 0;
};

export class ParseTimeoutError extends Error {
constructor(timeoutMicros: number, sourceLength: number) {
super(
`tree-sitter parse exceeded GITNEXUS_PARSE_TIMEOUT_MICROS=${timeoutMicros} ` +
`(source length ${sourceLength} chars).`,
);
this.name = 'ParseTimeoutError';
}
}

const applyAndClearTimeout = <T>(parser: Parser, timeoutMicros: number, run: () => T): T => {
if (timeoutMicros <= 0) return run();
parser.setTimeoutMicros(timeoutMicros);
try {
return run();
} finally {
parser.setTimeoutMicros(0);
}
};

/**
* Parse `sourceText` safely on every platform. See {@link SAFE_PARSE_CHUNK_CHARS}
* for the underlying tree-sitter binding bug this works around.
Expand All @@ -29,12 +74,17 @@ export function parseSourceSafe(
oldTree?: Parser.Tree,
options?: Parser.Options,
): Parser.Tree {
if (sourceText.length <= DIRECT_PARSE_LIMIT_CHARS) {
return parser.parse(sourceText, oldTree, options);
}
const input: Parser.Input = (index) => {
if (index >= sourceText.length) return null;
return sourceText.slice(index, index + SAFE_PARSE_CHUNK_CHARS);
};
return parser.parse(input, oldTree, options);
const timeoutMicros = readParseTimeoutMicros();
const tree = applyAndClearTimeout(parser, timeoutMicros, () => {
if (sourceText.length <= DIRECT_PARSE_LIMIT_CHARS) {
return parser.parse(sourceText, oldTree, options);
}
const input: Parser.Input = (index) => {
if (index >= sourceText.length) return null;
return sourceText.slice(index, index + SAFE_PARSE_CHUNK_CHARS);
};
return parser.parse(input, oldTree, options);
});
if (!tree) throw new ParseTimeoutError(timeoutMicros, sourceText.length);
return tree;
}
35 changes: 33 additions & 2 deletions gitnexus/test/unit/safe-parse.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { describe, it, expect } from 'vitest';
import { describe, it, expect, afterEach } from 'vitest';
import Parser from 'tree-sitter';
import Python from 'tree-sitter-python';
import { parseSourceSafe } from '../../src/core/tree-sitter/safe-parse.js';
import { parseSourceSafe, ParseTimeoutError } from '../../src/core/tree-sitter/safe-parse.js';

const makeParser = (): Parser => {
const p = new Parser();
Expand Down Expand Up @@ -71,4 +71,35 @@ describe('parseSourceSafe', () => {
expect(tree.rootNode.hasError).toBe(false);
expect(tree.rootNode.endIndex).toBe(large.length);
});

describe('GITNEXUS_PARSE_TIMEOUT_MICROS opt-in', () => {
const ORIGINAL = process.env.GITNEXUS_PARSE_TIMEOUT_MICROS;
afterEach(() => {
if (ORIGINAL === undefined) delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS;
else process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = ORIGINAL;
});

it('is a no-op when the env var is unset (backward compatible)', () => {
delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS;
const tree = parseSourceSafe(makeParser(), 'x = 1\n');
expect(tree.rootNode.hasError).toBe(false);
});

it('throws ParseTimeoutError when parsing exceeds the configured timeout', () => {
// 1 microsecond is below any real parse latency — tree-sitter aborts
// immediately and returns null, which the wrapper converts to a throw.
process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = '1';
const src = buildSource(64 * 1024);
expect(() => parseSourceSafe(makeParser(), src)).toThrowError(ParseTimeoutError);
});

it('resets the parser timeout after each call so reuse is unaffected', () => {
const parser = makeParser();
process.env.GITNEXUS_PARSE_TIMEOUT_MICROS = '1';
expect(() => parseSourceSafe(parser, buildSource(64 * 1024))).toThrow();
delete process.env.GITNEXUS_PARSE_TIMEOUT_MICROS;
const tree = parseSourceSafe(parser, 'x = 1\n');
expect(tree.rootNode.hasError).toBe(false);
});
});
});