Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions gitnexus/src/cli/analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import {
} from './analyze-config.js';
import { runFullAnalysis } from '../core/run-analyze.js';
import { getMaxFileSizeBannerMessage } from '../core/ingestion/utils/max-file-size.js';
import { warnMissingOptionalGrammars } from './optional-grammars.js';
import { warnMissingOptionalGrammars, getOptionalGrammarExtensions } from './optional-grammars.js';
import { glob } from 'glob';
import fs from 'fs/promises';
import { cliError } from './cli-message.js';
Expand Down Expand Up @@ -943,11 +943,13 @@ const analyzeCommandImpl = async (
}

// If the target repo contains files an optional grammar would parse but
// that grammar's native binding is absent, warn before analysis so users
// learn why those files end up unparsed instead of silently getting a
// degraded index.
// that grammar's native binding is absent (or disabled via
// GITNEXUS_SKIP_OPTIONAL_GRAMMARS), warn before analysis so users learn why
// those files end up unparsed instead of silently getting a degraded index.
// The extension set is derived from OPTIONAL_GRAMMARS so it can't drift.
try {
const matches = await glob(['**/*.dart', '**/*.proto'], {
const optionalGlobs = getOptionalGrammarExtensions().map((e) => `**/*${e}`);
const matches = await glob(optionalGlobs, {
cwd: repoPath,
ignore: ['**/node_modules/**', '**/.git/**', '**/dist/**', '**/build/**'],
dot: false,
Expand Down
78 changes: 67 additions & 11 deletions gitnexus/src/cli/optional-grammars.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@
* tree-sitter-dart, tree-sitter-proto, and tree-sitter-swift are vendored
* under vendor/ and materialized into node_modules/ at postinstall. Dart
* and Proto are built from source with node-gyp; Swift ships platform
* prebuilds activated via node-gyp-build. All three can be skipped via
* prebuilds activated via node-gyp-build. tree-sitter-kotlin is a declared
* optionalDependency (not vendored). All can be skipped via
* GITNEXUS_SKIP_OPTIONAL_GRAMMARS=1 (postinstall scripts), or can silently
* soft-fail when the toolchain is missing (Dart/Proto) or no prebuild
* matches the host platform (Swift).
* soft-fail when the toolchain is missing (Dart/Proto), when no prebuild
* matches the host platform (Swift), or when the optional install was
* skipped or its native build failed (Kotlin).
*
* Either path produces the same observable: the .node binding is absent
* at runtime. This helper detects that condition and surfaces a single
* stderr line per missing grammar so users learn why .dart/.proto/.swift
* stderr line per missing grammar so users learn why .dart/.proto/.swift/.kt
* support is unavailable instead of silently getting a degraded index.
*/

import { createRequire } from 'module';
import { SupportedLanguages } from 'gitnexus-shared';
import { isGrammarRuntimeSkipped } from '../core/tree-sitter/parser-loader.js';
import { cliWarn } from './cli-message.js';

const _require = createRequire(import.meta.url);
Expand All @@ -27,17 +31,55 @@ interface OptionalGrammar {
pkg: string;
/** File extensions this grammar parses */
extensions: string[];
/**
* SupportedLanguages id, when this grammar backs an ingestion language.
* Used to ask `isGrammarRuntimeSkipped` whether the grammar was disabled via
* `GITNEXUS_SKIP_OPTIONAL_GRAMMARS` (vs. genuinely missing). Omitted for
* `.proto`, which is a gRPC-extractor concern, not a SupportedLanguages.
*/
language?: SupportedLanguages;
}

const OPTIONAL_GRAMMARS: OptionalGrammar[] = [
{ name: 'tree-sitter-dart', pkg: 'tree-sitter-dart', extensions: ['.dart'] },
{
name: 'tree-sitter-dart',
pkg: 'tree-sitter-dart',
extensions: ['.dart'],
language: SupportedLanguages.Dart,
},
{ name: 'tree-sitter-proto', pkg: 'tree-sitter-proto', extensions: ['.proto'] },
{ name: 'tree-sitter-swift', pkg: 'tree-sitter-swift', extensions: ['.swift'] },
{
name: 'tree-sitter-swift',
pkg: 'tree-sitter-swift',
extensions: ['.swift'],
language: SupportedLanguages.Swift,
},
{
name: 'tree-sitter-kotlin',
pkg: 'tree-sitter-kotlin',
extensions: ['.kt', '.kts'],
language: SupportedLanguages.Kotlin,
},
];

/**
* The file extensions backed by an optional grammar — the single source for
* the `analyze` preflight glob (so the glob can't drift from this list).
*/
export function getOptionalGrammarExtensions(): string[] {
return [...new Set(OPTIONAL_GRAMMARS.flatMap((g) => g.extensions))];
}

export interface MissingGrammar {
name: string;
extensions: string[];
/**
* `missing` — the native binding could not be loaded (not installed / build
* soft-failed / no prebuild). `skipped` — the binding is fine but the user
* disabled it via `GITNEXUS_SKIP_OPTIONAL_GRAMMARS`. Drives the warning text
* so a deliberate opt-out is not told to reinstall.
*/
reason: 'missing' | 'skipped';
}

/**
Expand All @@ -59,6 +101,13 @@ export interface MissingGrammar {
export function detectMissingOptionalGrammars(): MissingGrammar[] {
const missing: MissingGrammar[] = [];
for (const g of OPTIONAL_GRAMMARS) {
// Deliberate runtime opt-out comes first: even an installed binding is
// treated as unavailable, with a `skipped` reason so the warning says so
// instead of suggesting a reinstall (#2101 review).
if (g.language !== undefined && isGrammarRuntimeSkipped(g.language)) {
missing.push({ name: g.name, extensions: g.extensions, reason: 'skipped' });
continue;
}
try {
_require(g.pkg);
} catch (err) {
Expand All @@ -80,7 +129,7 @@ export function detectMissingOptionalGrammars(): MissingGrammar[] {
{ grammar: g.name, extensions: g.extensions, error: msg },
);
}
missing.push({ name: g.name, extensions: g.extensions });
missing.push({ name: g.name, extensions: g.extensions, reason: 'missing' });
}
}
return missing;
Expand Down Expand Up @@ -110,9 +159,16 @@ export function warnMissingOptionalGrammars(opts?: {
if (relevantExtensions && !g.extensions.some((e) => relevantExtensions.has(e))) {
continue;
}
cliWarn(
`GitNexus${ctx}: optional grammar "${g.name}" is unavailable — ${g.extensions.join('/')} files will not be parsed. Reinstall without GITNEXUS_SKIP_OPTIONAL_GRAMMARS=1 (and ensure python3, make, g++) to enable.`,
{ grammar: g.name, extensions: g.extensions, context: opts?.context },
);
const exts = g.extensions.join('/');
const message =
g.reason === 'skipped'
? `GitNexus${ctx}: optional grammar "${g.name}" is disabled via GITNEXUS_SKIP_OPTIONAL_GRAMMARS — ${exts} files will not be parsed. Unset the variable to re-enable.`
: `GitNexus${ctx}: optional grammar "${g.name}" is unavailable — ${exts} files will not be parsed. Reinstall without GITNEXUS_SKIP_OPTIONAL_GRAMMARS=1 (and ensure python3, make, g++) to enable.`;
cliWarn(message, {
grammar: g.name,
extensions: g.extensions,
reason: g.reason,
context: opts?.context,
});
}
}
19 changes: 16 additions & 3 deletions gitnexus/src/core/ingestion/languages/dart/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@
*/

import Parser from 'tree-sitter';
import Dart from 'tree-sitter-dart';
import { SupportedLanguages } from 'gitnexus-shared';
// `tree-sitter-dart` is an optional/vendored grammar that may be absent on a
// default install. Loaded lazily + guarded via parser-loader rather than
// statically imported: this module is pulled onto the main thread eagerly by
// the scope-resolution registry and the language-provider index, so a top-level
// `import Dart from 'tree-sitter-dart'` would throw ERR_MODULE_NOT_FOUND at
// module-load and crash `analyze` even for repos with no Dart files (#2091,
// #2093). The grammar is only ever needed inside the lazy getters below.
import { getLanguageGrammar } from '../../../tree-sitter/parser-loader.js';

const DART_SCOPE_QUERY = `
; ── Scopes ───────────────────────────────────────────────────────────────────
Expand Down Expand Up @@ -134,14 +142,19 @@ let _query: Parser.Query | null = null;
export function getDartParser(): Parser {
if (_parser === null) {
_parser = new Parser();
_parser.setLanguage(Dart as Parameters<Parser['setLanguage']>[0]);
_parser.setLanguage(
getLanguageGrammar(SupportedLanguages.Dart) as Parameters<Parser['setLanguage']>[0],
);
}
return _parser;
}

export function getDartScopeQuery(): Parser.Query {
if (_query === null) {
_query = new Parser.Query(Dart as Parameters<Parser['setLanguage']>[0], DART_SCOPE_QUERY);
_query = new Parser.Query(
getLanguageGrammar(SupportedLanguages.Dart) as Parameters<Parser['setLanguage']>[0],
DART_SCOPE_QUERY,
);
}
return _query;
}
19 changes: 16 additions & 3 deletions gitnexus/src/core/ingestion/languages/kotlin/query.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import Parser from 'tree-sitter';
import Kotlin from 'tree-sitter-kotlin';
import { SupportedLanguages } from 'gitnexus-shared';
// `tree-sitter-kotlin` is an optionalDependency that may be absent on a default
// install (or fail its native build). Loaded lazily + guarded via parser-loader
// rather than statically imported: this module is pulled onto the main thread
// eagerly by the scope-resolution registry and the language-provider index, so
// a top-level `import Kotlin from 'tree-sitter-kotlin'` would throw
// ERR_MODULE_NOT_FOUND at module-load and crash `analyze` even for repos with no
// Kotlin files (#2091, #2093). The grammar is only ever needed in the getters.
import { getLanguageGrammar } from '../../../tree-sitter/parser-loader.js';

const KOTLIN_SCOPE_QUERY = `
;; Scopes
Expand Down Expand Up @@ -179,14 +187,19 @@ let query: Parser.Query | null = null;
export function getKotlinParser(): Parser {
if (parser === null) {
parser = new Parser();
parser.setLanguage(Kotlin as Parameters<Parser['setLanguage']>[0]);
parser.setLanguage(
getLanguageGrammar(SupportedLanguages.Kotlin) as Parameters<Parser['setLanguage']>[0],
);
}
return parser;
}

export function getKotlinScopeQuery(): Parser.Query {
if (query === null) {
query = new Parser.Query(Kotlin as Parameters<Parser['setLanguage']>[0], KOTLIN_SCOPE_QUERY);
query = new Parser.Query(
getLanguageGrammar(SupportedLanguages.Kotlin) as Parameters<Parser['setLanguage']>[0],
KOTLIN_SCOPE_QUERY,
);
}
return query;
}
19 changes: 16 additions & 3 deletions gitnexus/src/core/ingestion/languages/swift/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,15 @@
*/

import Parser from 'tree-sitter';
import Swift from 'tree-sitter-swift';
import { SupportedLanguages } from 'gitnexus-shared';
// `tree-sitter-swift` is an optional/vendored grammar that may be absent on a
// default install. It is loaded lazily + guarded via parser-loader rather than
// statically imported: this module is pulled onto the main thread eagerly by
// the scope-resolution registry and the language-provider index, so a top-level
// `import Swift from 'tree-sitter-swift'` would throw ERR_MODULE_NOT_FOUND at
// module-load and crash `analyze` even for repos with no Swift files (#2091,
// #2093). The grammar is only ever needed inside the lazy getters below.
import { getLanguageGrammar } from '../../../tree-sitter/parser-loader.js';

const SWIFT_SCOPE_QUERY = `
;; ── Scopes ──────────────────────────────────────────────────────────
Expand Down Expand Up @@ -186,14 +194,19 @@ let _query: Parser.Query | null = null;
export function getSwiftParser(): Parser {
if (_parser === null) {
_parser = new Parser();
_parser.setLanguage(Swift as Parameters<Parser['setLanguage']>[0]);
_parser.setLanguage(
getLanguageGrammar(SupportedLanguages.Swift) as Parameters<Parser['setLanguage']>[0],
);
}
return _parser;
}

export function getSwiftScopeQuery(): Parser.Query {
if (_query === null) {
_query = new Parser.Query(Swift as Parameters<Parser['setLanguage']>[0], SWIFT_SCOPE_QUERY);
_query = new Parser.Query(
getLanguageGrammar(SupportedLanguages.Swift) as Parameters<Parser['setLanguage']>[0],
SWIFT_SCOPE_QUERY,
);
}
return _query;
}
23 changes: 18 additions & 5 deletions gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,13 @@ import {
type ExportedTypeMap,
} from '../call-processor.js';
import { createSemanticModel, type MutableSemanticModel } from '../model/index.js';
import { type PipelineProgress, getLanguageFromFilename } from 'gitnexus-shared';
import {
type PipelineProgress,
getLanguageFromFilename,
SupportedLanguages,
} from 'gitnexus-shared';
import { readFileContents } from '../filesystem-walker.js';
import { isLanguageAvailable } from '../../tree-sitter/parser-loader.js';
import { isLanguageAvailable, isGrammarRuntimeSkipped } from '../../tree-sitter/parser-loader.js';
import {
createWorkerPool,
workerPoolDisabledByEnv,
Expand Down Expand Up @@ -274,9 +278,18 @@ export async function runChunkedParseAndResolve(
}
}
for (const [lang, count] of skippedByLang) {
logger.warn(
`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`,
);
// Distinguish a deliberate runtime opt-out from a genuinely-missing binding
// so we don't tell a user who set GITNEXUS_SKIP_OPTIONAL_GRAMMARS to
// `npm rebuild` a grammar that built fine (#2091/#2093 review).
if (isGrammarRuntimeSkipped(lang as SupportedLanguages)) {
logger.warn(
`Skipping ${count} ${lang} file(s) — ${lang} parsing disabled via GITNEXUS_SKIP_OPTIONAL_GRAMMARS.`,
);
} else {
logger.warn(
`Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`,
);
}
}

// Sort parseableScanned alphabetically for stable chunk membership
Expand Down
10 changes: 10 additions & 0 deletions gitnexus/src/core/ingestion/scope-resolution/pipeline/phase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import type { ParseOutput } from '../../pipeline-phases/parse.js';
import { SupportedLanguages, getLanguageFromFilename } from 'gitnexus-shared';
import { readFileContents } from '../../filesystem-walker.js';
import { runScopeResolution, type ScopeResolutionSubPhase } from './run.js';
import { isLanguageAvailable } from '../../../tree-sitter/parser-loader.js';
import { buildGraphNodeLookup } from '../graph-bridge/node-lookup.js';
import { SCOPE_RESOLVERS } from './registry.js';
import { isDev, isSemanticModelValidatorEnabled } from '../../utils/env.js';
Expand Down Expand Up @@ -170,6 +171,15 @@ export const scopeResolutionPhase: PipelinePhase<ScopeResolutionOutput> = {
for (const f of scannedFiles) {
const fileLang = getLanguageFromFilename(f.path);
if (fileLang === null) continue;
// Skip files whose grammar isn't available (optional grammars like
// swift/dart/kotlin on an install where the binding is absent or the
// user set GITNEXUS_SKIP_OPTIONAL_GRAMMARS). The parse phase already
// excluded and warned about these (parse-impl.ts); without this guard the
// file would fall through to the main-thread re-extract in run.ts and
// throw "Unsupported language" (caught, but noisy, and it needlessly
// loads the grammar on the main thread). `isLanguageAvailable` is
// memoized, so this stays O(1) per language. (#2091, #2093)
if (!isLanguageAvailable(fileLang)) continue;
let bucket = filesByLang.get(fileLang);
if (bucket === undefined) {
bucket = [];
Expand Down
13 changes: 13 additions & 0 deletions gitnexus/src/core/ingestion/workers/parse-worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,19 @@ import type {
/** Language grammar type accepted by Parser.setLanguage(). */
type TreeSitterLanguage = Parameters<typeof Parser.prototype.setLanguage>[0];

// ── Worker grammar loading — enforcement boundary (#2091/#2093, #2101) ───────
// The worker maintains its own grammar table (the guarded `_require`s below +
// `languageMap`) and intentionally does NOT consult the runtime
// `GITNEXUS_SKIP_OPTIONAL_GRAMMARS` opt-out. It does not need to: the MAIN
// THREAD's `parseableScanned` filter (pipeline-phases/parse-impl.ts, gated on
// `parser-loader.isLanguageAvailable`, which honors the runtime opt-out and a
// genuinely-absent binding alike) excludes files of an unavailable/opted-out
// language BEFORE any chunk is dispatched, so the worker never receives them.
// That main-thread filter is the single enforcement point. Any future change
// that dispatches files to the worker WITHOUT first passing them through
// `isLanguageAvailable` must re-introduce the gate here. (The cleaner end-state
// — routing this table through `parser-loader.getLanguageGrammar` so there is
// one loader — is the deferred Tier-1 consolidation.)
// tree-sitter-swift is an optionalDependency — may not be installed
const _require = createRequire(import.meta.url);
let Swift: TreeSitterLanguage | null = null;
Expand Down
Loading
Loading