diff --git a/.github/scripts/check-tree-sitter-upgrade-readiness.py b/.github/scripts/check-tree-sitter-upgrade-readiness.py index f54afd7f08..5b0fad09e5 100644 --- a/.github/scripts/check-tree-sitter-upgrade-readiness.py +++ b/.github/scripts/check-tree-sitter-upgrade-readiness.py @@ -32,6 +32,7 @@ import re import sys import urllib.error +import urllib.parse import urllib.request REPO_ROOT = pathlib.Path(__file__).resolve().parents[2] @@ -190,7 +191,17 @@ def fetch_text(url: str, timeout: int = 8) -> str | None: set (raises the rate limit from 60 to 5 000 requests/hour). """ headers: dict[str, str] = {} - if _GITHUB_TOKEN and ("github.com" in url or "githubusercontent.com" in url): + # Parse the URL and check the hostname rather than substring-matching + # on the full URL string (CodeQL py/incomplete-url-substring-sanitization). + # `https://evil.com/?u=github.com` would have passed the substring check. + try: + parsed_host = urllib.parse.urlparse(url).hostname or "" + except ValueError: + parsed_host = "" + is_github_host = parsed_host == "github.com" or parsed_host.endswith( + (".github.com", ".githubusercontent.com") + ) or parsed_host == "githubusercontent.com" + if _GITHUB_TOKEN and is_github_host: headers["Authorization"] = f"Bearer {_GITHUB_TOKEN}" try: req = urllib.request.Request(url, headers=headers) diff --git a/.github/workflows/ci-report.yml b/.github/workflows/ci-report.yml index ab8d8362c4..03908bb2ba 100644 --- a/.github/workflows/ci-report.yml +++ b/.github/workflows/ci-report.yml @@ -138,14 +138,15 @@ jobs: const fs = require('fs'); const path = require('path'); - // Find the latest successful CI run on main + // Find recent successful CI runs on main (check several in case + // the most recent artifact has expired). const runs = await github.rest.actions.listWorkflowRuns({ owner: context.repo.owner, repo: context.repo.repo, workflow_id: 'ci.yml', branch: 'main', status: 'success', - per_page: 1, + per_page: 5, }); if (runs.data.workflow_runs.length === 0) { @@ -154,32 +155,47 @@ jobs: return; } - const mainRunId = runs.data.workflow_runs[0].id; - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: mainRunId, - }); + // Try each run until we find a downloadable test-reports artifact + for (const run of runs.data.workflow_runs) { + const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: run.id, + }); - const testReports = artifacts.data.artifacts.find(a => a.name === 'test-reports'); - if (!testReports) { - core.setOutput('found', 'false'); - core.info('No test-reports artifact on main branch'); - return; - } + const testReports = artifacts.data.artifacts.find(a => a.name === 'test-reports'); + if (!testReports) { + core.info(`Run ${run.id}: no test-reports artifact, trying next`); + continue; + } - const zip = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: testReports.id, - archive_format: 'zip', - }); + try { + const zip = await github.rest.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: testReports.id, + archive_format: 'zip', + }); + + const dest = path.join(process.env.RUNNER_TEMP, 'base-coverage'); + fs.mkdirSync(dest, { recursive: true }); + fs.writeFileSync(path.join(dest, 'base.zip'), Buffer.from(zip.data)); + core.setOutput('found', 'true'); + core.setOutput('dir', dest); + return; + } catch (err) { + // 410 Gone means the artifact expired; try the next run + if (err.status === 410 || err.response?.status === 410) { + core.info(`Run ${run.id}: artifact expired, trying next`); + continue; + } + throw err; + } + } - const dest = path.join(process.env.RUNNER_TEMP, 'base-coverage'); - fs.mkdirSync(dest, { recursive: true }); - fs.writeFileSync(path.join(dest, 'base.zip'), Buffer.from(zip.data)); - core.setOutput('found', 'true'); - core.setOutput('dir', dest); + // All attempts exhausted — no usable base coverage + core.setOutput('found', 'false'); + core.info('No downloadable test-reports artifact found on main (all expired or missing)'); - name: Extract base coverage if: steps.meta.outputs.skip != 'true' && steps.base-coverage.outputs.found == 'true' @@ -234,7 +250,7 @@ jobs: printf -v "${prefix}_BRANCH_COV" '%s' "" printf -v "${prefix}_FUNCS_COV" '%s' "" printf -v "${prefix}_LINES_COV" '%s' "" - return 1 + return 0 fi } diff --git a/eslint.config.mjs b/eslint.config.mjs index 9100256833..f377cba6c3 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -4,6 +4,38 @@ import unusedImports from 'eslint-plugin-unused-imports'; import reactHooks from 'eslint-plugin-react-hooks'; import prettierConfig from 'eslint-config-prettier'; +// Selectors that protect MCP-reachable code from corrupting the JSON-RPC +// stdio frame stream. The MCP-reachable block below uses these directly; +// the lbug-adapter file-specific block must spread them in too because +// ESLint flat config REPLACES (not merges) `no-restricted-syntax` when +// multiple matching configs target the same file. Extracting to a const +// makes the dependency mechanical instead of documentation-enforced. +const mcpStdoutWriteSelectors = [ + { + selector: + "MemberExpression[object.type='MemberExpression'][object.object.name='process'][object.property.name='stdout'][property.name='write']", + message: + 'Direct process.stdout.write is forbidden in MCP-reachable code. Route diagnostics through console.error or process.stderr.write — the MCP stdio transport owns stdout for JSON-RPC frames.', + }, + { + selector: + "CallExpression[callee.type='MemberExpression'][callee.object.type='MemberExpression'][callee.object.object.name='process'][callee.object.property.name='stdout'][callee.property.name='write']", + message: + 'Direct process.stdout.write is forbidden in MCP-reachable code. Route diagnostics through console.error or process.stderr.write — the MCP stdio transport owns stdout for JSON-RPC frames.', + }, + { + // Catches the canonical destructuring shape: + // const { write } = process.stdout; + // (and any other ObjectPattern destructure rooted at process.stdout) + // which would otherwise capture a reference to the original write + // and bypass the sentinel. + selector: + "VariableDeclarator[init.type='MemberExpression'][init.object.name='process'][init.property.name='stdout'] > ObjectPattern", + message: + 'Destructuring process.stdout is forbidden in MCP-reachable code — bypasses the sentinel. Use process.stderr.write for diagnostics.', + }, +]; + export default [ // Global ignores { @@ -59,11 +91,26 @@ export default [ }, }, - // CLI package — allow console.log (it's a CLI tool) + // CLI/server packages — `console.log` IS the contract (CLI tool data output + // on stdout, e.g. `gitnexus query | jq`; server pretty-printed banners). + // Diagnostic logging (`warn`/`error`/`debug`/`info`) goes through pino like + // the rest of the codebase. { files: ['gitnexus/src/cli/**/*.ts', 'gitnexus/src/server/**/*.ts'], rules: { - 'no-console': 'off', + 'no-console': ['error', { allow: ['log'] }], + }, + }, + + // Forcing function for the pino migration. Severity is `error` — the + // codebase-wide migration is complete; new `console.*` in core source + // must fail lint. CLI/server are exempt above (legitimate stdout output). + // Tests, bin scripts, and the logger module itself remain exempt. + { + files: ['gitnexus/src/**/*.ts'], + ignores: ['gitnexus/src/cli/**', 'gitnexus/src/server/**', 'gitnexus/src/core/logger.ts'], + rules: { + 'no-console': 'error', }, }, @@ -84,32 +131,7 @@ export default [ ], rules: { 'no-console': ['error', { allow: ['error'] }], - 'no-restricted-syntax': [ - 'error', - { - selector: - "MemberExpression[object.type='MemberExpression'][object.object.name='process'][object.property.name='stdout'][property.name='write']", - message: - 'Direct process.stdout.write is forbidden in MCP-reachable code. Route diagnostics through console.error or process.stderr.write — the MCP stdio transport owns stdout for JSON-RPC frames.', - }, - { - selector: - "CallExpression[callee.type='MemberExpression'][callee.object.type='MemberExpression'][callee.object.object.name='process'][callee.object.property.name='stdout'][callee.property.name='write']", - message: - 'Direct process.stdout.write is forbidden in MCP-reachable code. Route diagnostics through console.error or process.stderr.write — the MCP stdio transport owns stdout for JSON-RPC frames.', - }, - { - // Catches the canonical destructuring shape: - // const { write } = process.stdout; - // (and any other ObjectPattern destructure rooted at process.stdout) - // which would otherwise capture a reference to the original write - // and bypass the sentinel. - selector: - "VariableDeclarator[init.type='MemberExpression'][init.object.name='process'][init.property.name='stdout'] > ObjectPattern", - message: - 'Destructuring process.stdout is forbidden in MCP-reachable code — bypasses the sentinel. Use process.stderr.write for diagnostics.', - }, - ], + 'no-restricted-syntax': ['error', ...mcpStdoutWriteSelectors], }, }, @@ -129,11 +151,18 @@ export default [ // All close operations must go through safeClose() so the WAL is always // flushed before the connection is released. The sole authorised call site // inside safeClose itself uses an eslint-disable-next-line override. + // + // ESLint flat config REPLACES (not merges) `no-restricted-syntax` when + // multiple matching configs target the same file. lbug-adapter.ts is also + // covered by the MCP-reachable block above, so we spread the shared + // mcpStdoutWriteSelectors here alongside the safeClose selectors. Without + // this, lbug-adapter would silently lose its MCP stdout-write protection. { files: ['gitnexus/src/core/lbug/lbug-adapter.ts'], rules: { 'no-restricted-syntax': [ 'error', + ...mcpStdoutWriteSelectors, { selector: "CallExpression[callee.object.name='conn'][callee.property.name='close']", message: 'Use safeClose() instead of calling conn.close() directly (#1376).', diff --git a/gitnexus-web/src/core/llm/agent.ts b/gitnexus-web/src/core/llm/agent.ts index c8cfa8d7cc..49862a9e6a 100644 --- a/gitnexus-web/src/core/llm/agent.ts +++ b/gitnexus-web/src/core/llm/agent.ts @@ -277,8 +277,10 @@ const extractInstanceName = (endpoint: string): string => { try { const url = new URL(endpoint); const hostname = url.hostname; - // Extract the first part before .openai.azure.com - const match = hostname.match(/^([^.]+)\.openai\.azure\.com/); + // Extract the first part before .openai.azure.com. The trailing `$` + // anchor is required (CodeQL js/regex/missing-regexp-anchor): without + // it `evil.openai.azure.com.attacker.tld` would match. + const match = hostname.match(/^([^.]+)\.openai\.azure\.com$/); if (match) { return match[1]; } diff --git a/gitnexus-web/src/core/llm/tools.ts b/gitnexus-web/src/core/llm/tools.ts index cd0b5a8008..5a595049e0 100644 --- a/gitnexus-web/src/core/llm/tools.ts +++ b/gitnexus-web/src/core/llm/tools.ts @@ -278,8 +278,11 @@ export const createGraphRAGTools = (backend: GraphRAGBackend) => { const val = row[col]; if (val === null || val === undefined) return ''; if (typeof val === 'object') return JSON.stringify(val); - // Truncate long values and escape pipe characters - const str = String(val).replace(/\|/g, '\\|'); + // Truncate long values and escape pipe characters. Escape + // backslashes FIRST so the subsequent pipe escape isn't + // unescaped by a trailing backslash (CodeQL + // js/incomplete-sanitization). + const str = String(val).replace(/\\/g, '\\\\').replace(/\|/g, '\\|'); return str.length > 60 ? str.slice(0, 57) + '...' : str; }); return `| ${values.join(' | ')} |`; diff --git a/gitnexus/package-lock.json b/gitnexus/package-lock.json index ccc61bc5b2..b07ccd7cb8 100644 --- a/gitnexus/package-lock.json +++ b/gitnexus/package-lock.json @@ -30,6 +30,8 @@ "mnemonist": "^0.40.3", "onnxruntime-node": "^1.24.0", "pandemonium": "^2.4.0", + "pino": "^10.3.1", + "pino-pretty": "^13.1.3", "tree-sitter": "^0.21.1", "tree-sitter-c": "0.21.4", "tree-sitter-c-sharp": "0.23.1", @@ -1579,6 +1581,12 @@ "url": "https://github.com/sponsors/Boshen" } }, + "node_modules/@pinojs/redact": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz", + "integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg==", + "license": "MIT" + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -2057,9 +2065,9 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "25.6.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz", - "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==", + "version": "25.6.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.1.tgz", + "integrity": "sha512-coJCN8O1q4AGyyqCAUSP06P+SrMTu18BkEj3NVAK07q6QUneD2wzj3CLv9+yP+BMeZQlMvneXqqvDe3w+xcq7g==", "license": "MIT", "dependencies": { "undici-types": "~7.19.0" @@ -2380,6 +2388,15 @@ "js-tokens": "^10.0.0" } }, + "node_modules/atomic-sleep": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz", + "integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==", + "license": "MIT", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/balanced-match": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", @@ -2579,6 +2596,12 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "license": "MIT" }, + "node_modules/colorette": { + "version": "2.0.20", + "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz", + "integrity": "sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==", + "license": "MIT" + }, "node_modules/commander": { "version": "14.0.3", "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz", @@ -2683,6 +2706,15 @@ "node": ">= 8" } }, + "node_modules/dateformat": { + "version": "4.6.3", + "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz", + "integrity": "sha512-2P0p0pFGzHS5EMnhdxQi7aJN+iMheud0UhG4dlE1DLAlvL8JHjJJTX/CSm4JXwV0Ka5nGk3zC5mcb5bUQUxxMA==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/debug": { "version": "4.4.3", "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", @@ -2806,6 +2838,15 @@ "node": ">= 0.8" } }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -3050,12 +3091,24 @@ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==", "license": "MIT" }, + "node_modules/fast-copy": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-4.0.3.tgz", + "integrity": "sha512-58apWr0GUiDFM8+3afrO6eYwJBn9ZAhDOzG3L+/9llab/haCARS2UIfffmOurYLwbgDRs8n0rfr6qAAPEAuAQw==", + "license": "MIT" + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", "license": "MIT" }, + "node_modules/fast-safe-stringify": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz", + "integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA==", + "license": "MIT" + }, "node_modules/fast-uri": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", @@ -3416,6 +3469,12 @@ "node": ">= 0.4" } }, + "node_modules/help-me": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/help-me/-/help-me-5.0.0.tgz", + "integrity": "sha512-7xgomUX6ADmcYzFik0HzAxh/73YlKR9bmFzf51CZwR+b6YtzU2m0u49hQCqV6SvlqIqsaxovfwdvbnsw3b/zpg==", + "license": "MIT" + }, "node_modules/hono": { "version": "4.12.16", "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz", @@ -3575,6 +3634,15 @@ "url": "https://github.com/sponsors/panva" } }, + "node_modules/joycon": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz", + "integrity": "sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/js-tokens": { "version": "10.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-10.0.0.tgz", @@ -4183,6 +4251,15 @@ ], "license": "MIT" }, + "node_modules/on-exit-leak-free": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz", + "integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==", + "license": "MIT", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", @@ -4332,6 +4409,79 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/pino": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/pino/-/pino-10.3.1.tgz", + "integrity": "sha512-r34yH/GlQpKZbU1BvFFqOjhISRo1MNx1tWYsYvmj6KIRHSPMT2+yHOEb1SG6NMvRoHRF0a07kCOox/9yakl1vg==", + "license": "MIT", + "dependencies": { + "@pinojs/redact": "^0.4.0", + "atomic-sleep": "^1.0.0", + "on-exit-leak-free": "^2.1.0", + "pino-abstract-transport": "^3.0.0", + "pino-std-serializers": "^7.0.0", + "process-warning": "^5.0.0", + "quick-format-unescaped": "^4.0.3", + "real-require": "^0.2.0", + "safe-stable-stringify": "^2.3.1", + "sonic-boom": "^4.0.1", + "thread-stream": "^4.0.0" + }, + "bin": { + "pino": "bin.js" + } + }, + "node_modules/pino-abstract-transport": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz", + "integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==", + "license": "MIT", + "dependencies": { + "split2": "^4.0.0" + } + }, + "node_modules/pino-pretty": { + "version": "13.1.3", + "resolved": "https://registry.npmjs.org/pino-pretty/-/pino-pretty-13.1.3.tgz", + "integrity": "sha512-ttXRkkOz6WWC95KeY9+xxWL6AtImwbyMHrL1mSwqwW9u+vLp/WIElvHvCSDg0xO/Dzrggz1zv3rN5ovTRVowKg==", + "license": "MIT", + "dependencies": { + "colorette": "^2.0.7", + "dateformat": "^4.6.3", + "fast-copy": "^4.0.0", + "fast-safe-stringify": "^2.1.1", + "help-me": "^5.0.0", + "joycon": "^3.1.1", + "minimist": "^1.2.6", + "on-exit-leak-free": "^2.1.0", + "pino-abstract-transport": "^3.0.0", + "pump": "^3.0.0", + "secure-json-parse": "^4.0.0", + "sonic-boom": "^4.0.1", + "strip-json-comments": "^5.0.2" + }, + "bin": { + "pino-pretty": "bin.js" + } + }, + "node_modules/pino-pretty/node_modules/strip-json-comments": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-5.0.3.tgz", + "integrity": "sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==", + "license": "MIT", + "engines": { + "node": ">=14.16" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/pino-std-serializers": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.1.0.tgz", + "integrity": "sha512-BndPH67/JxGExRgiX1dX0w1FvZck5Wa4aal9198SrRhZjH3GxKQUKIBnYJTdj2HDN3UQAS06HlfcSbQj2OHmaw==", + "license": "MIT" + }, "node_modules/pkce-challenge": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", @@ -4376,6 +4526,22 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/process-warning": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz", + "integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "MIT" + }, "node_modules/protobufjs": { "version": "7.5.5", "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.5.tgz", @@ -4413,6 +4579,16 @@ "node": ">= 0.10" } }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "node_modules/qs": { "version": "6.14.2", "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.2.tgz", @@ -4428,6 +4604,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/quick-format-unescaped": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", + "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==", + "license": "MIT" + }, "node_modules/range-parser": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", @@ -4483,6 +4665,15 @@ "rc": "cli.js" } }, + "node_modules/real-require": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz", + "integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==", + "license": "MIT", + "engines": { + "node": ">= 12.13.0" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -4591,12 +4782,37 @@ ], "license": "MIT" }, + "node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "license": "MIT", + "engines": { + "node": ">=10" + } + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", "license": "MIT" }, + "node_modules/secure-json-parse": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.1.0.tgz", + "integrity": "sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/semver": { "version": "7.7.4", "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", @@ -4828,6 +5044,15 @@ "dev": true, "license": "ISC" }, + "node_modules/sonic-boom": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.1.tgz", + "integrity": "sha512-w6AxtubXa2wTXAUsZMMWERrsIRAdrK0Sc+FUytWvYAhBJLyuI4llrMIC1DtlNSdI99EI86KZum2MMq3EAZlF9Q==", + "license": "MIT", + "dependencies": { + "atomic-sleep": "^1.0.0" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -4838,6 +5063,15 @@ "node": ">=0.10.0" } }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, "node_modules/stackback": { "version": "0.0.2", "resolved": "https://registry.npmjs.org/stackback/-/stackback-0.0.2.tgz", @@ -4925,6 +5159,18 @@ "node": ">=18" } }, + "node_modules/thread-stream": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.0.0.tgz", + "integrity": "sha512-4iMVL6HAINXWf1ZKZjIPcz5wYaOdPhtO8ATvZ+Xqp3BTdaqtAwQkNmKORqcIo5YkQqGXq5cwfswDwMqqQNrpJA==", + "license": "MIT", + "dependencies": { + "real-require": "^0.2.0" + }, + "engines": { + "node": ">=20" + } + }, "node_modules/tinybench": { "version": "2.9.0", "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-2.9.0.tgz", diff --git a/gitnexus/package.json b/gitnexus/package.json index 6aa5f59b7d..84f7027627 100644 --- a/gitnexus/package.json +++ b/gitnexus/package.json @@ -73,6 +73,8 @@ "mnemonist": "^0.40.3", "onnxruntime-node": "^1.24.0", "pandemonium": "^2.4.0", + "pino": "^10.3.1", + "pino-pretty": "^13.1.3", "tree-sitter": "^0.21.1", "tree-sitter-c": "0.21.4", "tree-sitter-c-sharp": "0.23.1", diff --git a/gitnexus/src/cli/ai-context.ts b/gitnexus/src/cli/ai-context.ts index 1b83963017..41ba6c73e9 100644 --- a/gitnexus/src/cli/ai-context.ts +++ b/gitnexus/src/cli/ai-context.ts @@ -10,6 +10,7 @@ import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { type GeneratedSkillInfo } from './skill-gen.js'; +import { logger } from '../core/logger.js'; // ESM equivalent of __dirname const __filename = fileURLToPath(import.meta.url); @@ -293,7 +294,7 @@ Use GitNexus tools to accomplish this task. installedSkills.push(skill.name); } catch (err) { // Skip on error, don't fail the whole process - console.warn(`Warning: Could not install skill ${skill.name}:`, err); + logger.warn({ err }, `Warning: Could not install skill ${skill.name}:`); } } diff --git a/gitnexus/src/cli/analyze.ts b/gitnexus/src/cli/analyze.ts index 90b8c9a433..47745c5752 100644 --- a/gitnexus/src/cli/analyze.ts +++ b/gitnexus/src/cli/analyze.ts @@ -26,6 +26,8 @@ import { getMaxFileSizeBannerMessage } from '../core/ingestion/utils/max-file-si import { warnMissingOptionalGrammars } from './optional-grammars.js'; import { glob } from 'glob'; import fs from 'fs/promises'; +import { cliError } from './cli-message.js'; +import { isHfDownloadFailure } from '../core/embeddings/hf-env.js'; // Capture stderr.write at module load BEFORE anything (LadybugDB native // init, progress bar, console redirection) can monkey-patch it. The @@ -167,7 +169,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption if (options?.workerTimeout) { const workerTimeoutSeconds = Number(options.workerTimeout); if (!Number.isFinite(workerTimeoutSeconds) || workerTimeoutSeconds < 1) { - console.error(' --worker-timeout must be at least 1 second.\n'); + cliError(' --worker-timeout must be at least 1 second.\n'); process.exitCode = 1; return; } @@ -184,7 +186,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption if (typeof options?.embeddings === 'string') { const parsed = Number(options.embeddings); if (!Number.isInteger(parsed) || parsed < 0) { - console.error( + cliError( ` --embeddings expects a non-negative integer (got "${options.embeddings}"). ` + `Pass 0 to disable the safety cap, or omit the value to keep the default.\n`, ); @@ -203,7 +205,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption if (value === undefined) return true; const parsed = Number(value); if (!Number.isInteger(parsed) || parsed <= 0) { - console.error(` ${optionName} must be a positive integer.\n`); + cliError(` ${optionName} must be a positive integer.\n`); process.exitCode = 1; return false; } @@ -234,7 +236,7 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption if (options?.embeddingDevice) { const allowed = new Set(['auto', 'cpu', 'dml', 'cuda', 'wasm']); if (!allowed.has(options.embeddingDevice)) { - console.error(' --embedding-device must be one of: auto, cpu, dml, cuda, wasm.\n'); + cliError(' --embedding-device must be one of: auto, cpu, dml, cuda, wasm.\n'); process.exitCode = 1; return; } @@ -330,7 +332,9 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption bar.start(100, 0, { phase: 'Initializing...' }); - // Graceful SIGINT handling + // Graceful SIGINT handling. Pino's default destination is `sync: false` + // (buffered) — flush before exit so in-flight records reach stderr. + // See `gitnexus/src/core/logger.ts:flushLoggerSync`. let aborted = false; const sigintHandler = () => { if (aborted) process.exit(1); @@ -339,13 +343,23 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption console.log('\n Interrupted — cleaning up...'); closeLbug() .catch(() => {}) - .finally(() => process.exit(130)); + .finally(async () => { + const { flushLoggerSync } = await import('../core/logger.js'); + flushLoggerSync(); + process.exit(130); + }); }; process.on('SIGINT', sigintHandler); - // Route console output through bar.log() to prevent progress bar corruption + // Route console output through bar.log() to prevent progress bar corruption. + // This is a deliberate UI pattern (not a logging concern): analyze runs a + // long-lived progress bar on stdout; any concurrent console.* write would + // overwrite the bar mid-render. We capture originals, swap to barLog for + // the lifetime of the run, and restore on completion/error/SIGINT. const origLog = console.log.bind(console); + // eslint-disable-next-line no-console -- intentional console-routing for progress bar UX const origWarn = console.warn.bind(console); + // eslint-disable-next-line no-console -- intentional console-routing for progress bar UX const origError = console.error.bind(console); let barCurrentValue = 0; const barLog = (...args: any[]) => { @@ -354,7 +368,9 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption bar.update(barCurrentValue); }; console.log = barLog; + // eslint-disable-next-line no-console -- intentional console-routing for progress bar UX console.warn = barLog; + // eslint-disable-next-line no-console -- intentional console-routing for progress bar UX console.error = barLog; // Track elapsed time per phase @@ -420,7 +436,9 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption clearInterval(elapsedTimer); process.removeListener('SIGINT', sigintHandler); console.log = origLog; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.warn = origWarn; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.error = origError; bar.stop(); console.log(' Already up to date\n'); @@ -493,7 +511,9 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption process.removeListener('SIGINT', sigintHandler); console.log = origLog; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.warn = origWarn; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.error = origError; bar.update(100, { phase: 'Done' }); @@ -518,7 +538,9 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption clearInterval(elapsedTimer); process.removeListener('SIGINT', sigintHandler); console.log = origLog; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.warn = origWarn; + // eslint-disable-next-line no-console -- restoring after intentional progress-bar routing console.error = origError; bar.stop(); @@ -527,14 +549,14 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption // Registry name-collision from --name (#829) — surface as an // actionable error rather than a generic stack-trace. if (err instanceof RegistryNameCollisionError) { - console.error(`\n Registry name collision:\n`); - console.error(` "${err.registryName}" is already used by "${err.existingPath}".\n`); - console.error(` Options:`); - console.error(` • Pick a different alias: gitnexus analyze --name `); - console.error( - ` • Allow the duplicate: gitnexus analyze --allow-duplicate-name (leaves "-r ${err.registryName}" ambiguous)`, + cliError( + `\n Registry name collision:\n` + + ` "${err.registryName}" is already used by "${err.existingPath}".\n\n` + + ` Options:\n` + + ` • Pick a different alias: gitnexus analyze --name \n` + + ` • Allow the duplicate: gitnexus analyze --allow-duplicate-name (leaves "-r ${err.registryName}" ambiguous)\n`, + { registryName: err.registryName, existingPath: err.existingPath }, ); - console.error(''); process.exitCode = 1; return; } @@ -555,6 +577,26 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption return; } + // HF download failure — show clean guidance without the raw stack trace. + // Checked before writeFatalToStderr so the user sees one focused message + // rather than a stack-trace dump followed by a second remediation block. + if (isHfDownloadFailure(msg) || msg.includes('Failed to download embedding model')) { + cliError( + ` The embedding model could not be downloaded.\n` + + ` huggingface.co may be unreachable from your network\n` + + ` (e.g. behind a corporate proxy or a regional firewall).\n` + + ` Suggestions:\n` + + ` 1. Set HF_ENDPOINT to a mirror and retry:\n` + + ` HF_ENDPOINT=https://hf-mirror.com npx gitnexus analyze --embeddings\n` + + ` (Windows: set HF_ENDPOINT=https://hf-mirror.com && npx gitnexus analyze --embeddings)\n` + + ` 2. Check your proxy / VPN settings.\n` + + ` 3. Once downloaded the model is cached — future runs work offline.\n`, + { recoveryHint: 'hf-endpoint-unreachable' }, + ); + process.exitCode = 1; + return; + } + // Bypass the redirected console.error and write the full stack to // the real stderr captured at module load. The redirected // console.error wraps every line with `\\x1b[2K\\r` (ANSI clear-line) @@ -574,34 +616,40 @@ export const analyzeCommand = async (inputPath?: string, options?: AnalyzeOption msg.includes('heap out of memory') || msg.includes('JavaScript heap') ) { - console.error(' This error typically occurs on very large repositories.'); - console.error(' Suggestions:'); - console.error(' 1. Add large vendored/generated directories to .gitnexusignore'); - console.error(' 2. Increase Node.js heap: NODE_OPTIONS="--max-old-space-size=16384"'); - console.error(' 3. Increase stack size: NODE_OPTIONS="--stack-size=4096"'); - console.error(''); + cliError( + ` This error typically occurs on very large repositories.\n` + + ` Suggestions:\n` + + ` 1. Add large vendored/generated directories to .gitnexusignore\n` + + ` 2. Increase Node.js heap: NODE_OPTIONS="--max-old-space-size=16384"\n` + + ` 3. Increase stack size: NODE_OPTIONS="--stack-size=4096"\n`, + { recoveryHint: 'large-repo' }, + ); } else if (msg.includes('ERESOLVE') || msg.includes('Could not resolve dependency')) { // Note: the original arborist "Cannot destructure property 'package' of // 'node.target'" crash happens inside npm *before* gitnexus code runs, // so it can't be caught here. This branch handles dependency-resolution // errors that surface at runtime (e.g. dynamic require failures). - console.error(' This looks like an npm dependency resolution issue.'); - console.error(' Suggestions:'); - console.error(' 1. Clear the npm cache: npm cache clean --force'); - console.error(' 2. Update npm: npm install -g npm@latest'); - console.error(' 3. Reinstall gitnexus: npm install -g gitnexus@latest'); - console.error(' 4. Or try npx directly: npx gitnexus@latest analyze'); - console.error(''); + cliError( + ` This looks like an npm dependency resolution issue.\n` + + ` Suggestions:\n` + + ` 1. Clear the npm cache: npm cache clean --force\n` + + ` 2. Update npm: npm install -g npm@latest\n` + + ` 3. Reinstall gitnexus: npm install -g gitnexus@latest\n` + + ` 4. Or try npx directly: npx gitnexus@latest analyze\n`, + { recoveryHint: 'npm-resolution' }, + ); } else if ( msg.includes('MODULE_NOT_FOUND') || msg.includes('Cannot find module') || msg.includes('ERR_MODULE_NOT_FOUND') ) { - console.error(' A required module could not be loaded. The installation may be corrupt.'); - console.error(' Suggestions:'); - console.error(' 1. Reinstall: npm install -g gitnexus@latest'); - console.error(' 2. Clear cache: npm cache clean --force && npx gitnexus@latest analyze'); - console.error(''); + cliError( + ` A required module could not be loaded. The installation may be corrupt.\n` + + ` Suggestions:\n` + + ` 1. Reinstall: npm install -g gitnexus@latest\n` + + ` 2. Clear cache: npm cache clean --force && npx gitnexus@latest analyze\n`, + { recoveryHint: 'module-not-found' }, + ); } process.exitCode = 1; diff --git a/gitnexus/src/cli/clean.ts b/gitnexus/src/cli/clean.ts index 4681508fb3..2bbc32aef1 100644 --- a/gitnexus/src/cli/clean.ts +++ b/gitnexus/src/cli/clean.ts @@ -6,6 +6,7 @@ */ import fs from 'fs/promises'; +import { logger } from '../core/logger.js'; import { findRepo, unregisterRepo, @@ -45,7 +46,7 @@ export const cleanCommand = async (options?: { force?: boolean; all?: boolean }) assertSafeStoragePath(entry); } catch (err) { if (err instanceof UnsafeStoragePathError) { - console.error(`Refusing to clean ${entry.name}: ${err.message}`); + logger.error(`Refusing to clean ${entry.name}: ${err.message}`); continue; } throw err; @@ -56,7 +57,7 @@ export const cleanCommand = async (options?: { force?: boolean; all?: boolean }) await unregisterRepo(entry.path); console.log(`Deleted: ${entry.name} (${entry.storagePath})`); } catch (err) { - console.error(`Failed to delete ${entry.name}:`, err); + logger.error({ err }, `Failed to delete ${entry.name}:`); } } return; @@ -85,6 +86,6 @@ export const cleanCommand = async (options?: { force?: boolean; all?: boolean }) await unregisterRepo(repo.repoPath); console.log(`Deleted: ${repo.storagePath}`); } catch (err) { - console.error('Failed to delete:', err); + logger.error({ err }, 'Failed to delete:'); } }; diff --git a/gitnexus/src/cli/cli-message.ts b/gitnexus/src/cli/cli-message.ts new file mode 100644 index 0000000000..db5f51fab5 --- /dev/null +++ b/gitnexus/src/cli/cli-message.ts @@ -0,0 +1,65 @@ +/** + * CLI message helpers — for user-facing banners, error guidance, and + * recovery hints emitted by `gitnexus` subcommands. + * + * These functions write **plain text** directly to `process.stderr` AND + * tee a structured pino record through the singleton `logger`. Plain text + * preserves the human-readable contract for users running `gitnexus` + * interactively, redirecting to a file, or piping to `cat`/`grep`. The + * structured tee keeps log aggregators happy. + * + * **Use these for:** + * - User-facing banners ("Server listening on http://...:N") + * - Validation errors ("--worker-timeout must be at least 1 second") + * - Recovery hints ("Suggestions: 1. Clear the npm cache, 2. ...") + * - One-line user notices ("No indexed repositories found.") + * + * **Do NOT use these for:** + * - Internal diagnostics (worker progress, retry counts, telemetry) + * — use `logger.info`/`warn`/`error` directly. Internal logs only + * need structured fields, not double-output to stderr. + * - High-volume hot paths — every `cliMessage` call writes twice (raw + * + structured). Acceptable for user-facing messages, wasteful for + * ingestion pipeline events. + * + * Design note: stderr is the right channel even for non-error messages + * because GitNexus CLI tools (`query`, `cypher`, `impact`) emit JSON + * data on stdout for piping (`gitnexus query | jq`). User banners on + * stdout would corrupt that pipeline. + */ +import { logger } from '../core/logger.js'; + +function writeStderr(msg: string): void { + // Direct write — bypassing `console.*` so it cannot be intercepted by + // progress-bar redirection (see `cli/analyze.ts:barLog`) or other + // routing. The structured tee below still goes through the logger so + // log aggregation works either way. + process.stderr.write(msg.endsWith('\n') ? msg : msg + '\n'); +} + +/** + * User-facing informational message. Use for banners, listening URLs, + * and any message the user expects to read in plain text. + */ +export function cliInfo(msg: string, fields?: Record): void { + writeStderr(msg); + logger.info(fields ?? {}, msg); +} + +/** + * User-facing warning. Operator-actionable but non-fatal — `cliWarn` + * indicates the command can still proceed in some form. + */ +export function cliWarn(msg: string, fields?: Record): void { + writeStderr(msg); + logger.warn(fields ?? {}, msg); +} + +/** + * User-facing error. Indicates the command cannot proceed; usually + * paired with a non-zero exit code at the call site. + */ +export function cliError(msg: string, fields?: Record): void { + writeStderr(msg); + logger.error(fields ?? {}, msg); +} diff --git a/gitnexus/src/cli/eval-server.ts b/gitnexus/src/cli/eval-server.ts index cf9013d63e..0819c33e81 100644 --- a/gitnexus/src/cli/eval-server.ts +++ b/gitnexus/src/cli/eval-server.ts @@ -27,6 +27,8 @@ import http from 'http'; import { writeSync } from 'node:fs'; import { LocalBackend } from '../mcp/local/local-backend.js'; +import { logger } from '../core/logger.js'; +import { cliInfo, cliWarn } from './cli-message.js'; export interface EvalServerOptions { port?: string; @@ -332,13 +334,19 @@ export async function evalServerCommand(options?: EvalServerOptions): Promise r.name).join(', ')}`, + logger.info( + { repoCount: repos.length, repos: repos.map((r) => r.name) }, + 'GitNexus eval-server: repos loaded', ); let idleTimer: ReturnType | null = null; @@ -347,7 +355,7 @@ export async function evalServerCommand(options?: EvalServerOptions): Promise { - console.error('GitNexus eval-server: Idle timeout reached, shutting down'); + logger.info({ idleTimeoutSec }, 'GitNexus eval-server: idle timeout reached, shutting down'); await backend.disconnect(); process.exit(0); }, idleTimeoutSec * 1000); @@ -419,16 +427,34 @@ export async function evalServerCommand(options?: EvalServerOptions): Promise { - console.error(`GitNexus eval-server: listening on http://127.0.0.1:${port}`); - console.error(` POST /tool/query — search execution flows`); - console.error(` POST /tool/context — 360-degree symbol view`); - console.error(` POST /tool/impact — blast radius analysis`); - console.error(` POST /tool/cypher — raw Cypher query`); - console.error(` GET /health — health check`); - console.error(` POST /shutdown — graceful shutdown`); + // Plain-text banner for the human watching stderr; structured record + // for log aggregation (split into two so the user sees a real banner + // not `{"level":30,"msg":"...","port":4747,"endpoints":[...]}`). + const bannerLines = [ + `GitNexus eval-server: listening on http://127.0.0.1:${port}`, + ` POST /tool/query — search execution flows`, + ` POST /tool/context — 360-degree symbol view`, + ` POST /tool/impact — blast radius analysis`, + ` POST /tool/cypher — raw Cypher query`, + ` GET /health — health check`, + ` POST /shutdown — graceful shutdown`, + ]; if (idleTimeoutSec > 0) { - console.error(` Auto-shutdown after ${idleTimeoutSec}s idle`); + bannerLines.push(` Auto-shutdown after ${idleTimeoutSec}s idle`); } + cliInfo(bannerLines.join('\n'), { + port, + host: '127.0.0.1', + idleTimeoutSec: idleTimeoutSec > 0 ? idleTimeoutSec : undefined, + endpoints: [ + 'POST /tool/query', + 'POST /tool/context', + 'POST /tool/impact', + 'POST /tool/cypher', + 'GET /health', + 'POST /shutdown', + ], + }); try { // Use fd 1 directly — LadybugDB captures process.stdout (#324) writeSync(1, `GITNEXUS_EVAL_SERVER_READY:${port}\n`); @@ -440,7 +466,7 @@ export async function evalServerCommand(options?: EvalServerOptions): Promise { - console.error('GitNexus eval-server: shutting down...'); + logger.info('GitNexus eval-server: shutting down...'); await backend.disconnect(); server.close(); process.exit(0); diff --git a/gitnexus/src/cli/group.ts b/gitnexus/src/cli/group.ts index eb0dffc3d3..0053099b70 100644 --- a/gitnexus/src/cli/group.ts +++ b/gitnexus/src/cli/group.ts @@ -1,6 +1,7 @@ // gitnexus/src/cli/group.ts import { createRequire } from 'node:module'; import type { Command } from 'commander'; +import { logger } from '../core/logger.js'; const _require = createRequire(import.meta.url); const yaml = _require('js-yaml') as typeof import('js-yaml'); @@ -51,7 +52,7 @@ export function registerGroupCommands(program: Command): void { const groupDir = getGroupDir(getDefaultGitnexusDir(), groupName); const config = await loadGroupConfig(groupDir); if (!(repoPath in config.repos)) { - console.error(`Repo path "${repoPath}" not found in group "${groupName}"`); + logger.error(`Repo path "${repoPath}" not found in group "${groupName}"`); process.exitCode = 1; return; } @@ -239,7 +240,7 @@ export function registerGroupCommands(program: Command): void { const raw = await backend.getGroupService().groupImpact(payload); if (raw && typeof raw === 'object' && 'error' in raw) { - console.error(String((raw as { error: string }).error)); + logger.error(String((raw as { error: string }).error)); process.exitCode = 1; return; } @@ -333,7 +334,7 @@ export function registerGroupCommands(program: Command): void { }); if (raw && typeof raw === 'object' && 'error' in raw) { - console.error(String((raw as { error: string }).error)); + logger.error(String((raw as { error: string }).error)); process.exitCode = 1; return; } diff --git a/gitnexus/src/cli/mcp.ts b/gitnexus/src/cli/mcp.ts index e69ce0eb05..056b12591f 100644 --- a/gitnexus/src/cli/mcp.ts +++ b/gitnexus/src/cli/mcp.ts @@ -41,11 +41,17 @@ export const mcpCommand = async () => { // path runs cleanly with full stack traces. Registering duplicates here // would only produce noisy double-logging on the same exception. - // Now safe to dynamically import the heavy backend modules. Anything - // they emit to stdout during evaluation will route through the sentinel. - const [{ startMCPServer }, { LocalBackend }] = await Promise.all([ + // Dynamically import heavy backend modules AND the pino logger AFTER + // the sentinel installs. The logger is dynamic-imported (rather than + // static) to preserve the leaf-only static-import closure documented at + // the top of this file — `core/logger.js` itself doesn't write to + // stdout at module init, but transitive deps (pino, pino-pretty, the + // worker-thread transport) could in theory, and the import-closure + // regression test enforces the leaf invariant. + const [{ startMCPServer }, { LocalBackend }, { logger }] = await Promise.all([ import('../mcp/server.js'), import('../mcp/local/local-backend.js'), + import('../core/logger.js'), ]); // Missing-optional-grammar warnings are intentionally NOT emitted here. @@ -62,12 +68,15 @@ export const mcpCommand = async () => { const repos = await backend.listRepos(); if (repos.length === 0) { - console.error( + // Operator-actionable but the server still starts and serves; warn-level, + // not error. Tools will discover newly-analyzed repos via lazy refresh. + logger.warn( 'GitNexus: No indexed repos yet. Run `gitnexus analyze` in a git repo — the server will pick it up automatically.', ); } else { - console.error( - `GitNexus: MCP server starting with ${repos.length} repo(s): ${repos.map((r) => r.name).join(', ')}`, + logger.info( + { repoCount: repos.length, repos: repos.map((r) => r.name) }, + 'GitNexus: MCP server starting', ); } diff --git a/gitnexus/src/cli/optional-grammars.ts b/gitnexus/src/cli/optional-grammars.ts index c6e239b563..14f6c3c5ea 100644 --- a/gitnexus/src/cli/optional-grammars.ts +++ b/gitnexus/src/cli/optional-grammars.ts @@ -13,6 +13,7 @@ */ import { createRequire } from 'module'; +import { cliWarn } from './cli-message.js'; const _require = createRequire(import.meta.url); @@ -65,9 +66,14 @@ export function detectMissingOptionalGrammars(): MissingGrammar[] { /could not find|no native build|prebuilds/i.test(msg); if (!looksMissing) { // Present but broken — surface so the user doesn't get a misleading - // "reinstall" recovery message that wouldn't actually help. - console.error( + // "reinstall" recovery message that wouldn't actually help. cliWarn + // writes plain text to stderr AND tees a structured logger.warn + // record; the merged repo-wide ESLint pino-migration rule forbids + // direct `console.error` in CLI code (only `console.log` is allowed + // there for tool-data stdout output). + cliWarn( `GitNexus: optional grammar "${g.name}" is installed but failed to load (${msg.slice(0, 200)}). ${g.extensions.join('/')} files will not be parsed.`, + { grammar: g.name, extensions: g.extensions, error: msg }, ); } missing.push({ name: g.name, extensions: g.extensions }); @@ -92,12 +98,17 @@ export function warnMissingOptionalGrammars(opts?: { const missing = detectMissingOptionalGrammars(); if (missing.length === 0) return; const ctx = opts?.context ? ` [${opts.context}]` : ''; + // Hoist the optional set into a local so the closure below can narrow + // its type; references to `opts?.relevantExtensions` inside `.some()` + // lose the outer null-check narrowing and require a non-null assertion. + const relevantExtensions = opts?.relevantExtensions; for (const g of missing) { - if (opts?.relevantExtensions && !g.extensions.some((e) => opts.relevantExtensions!.has(e))) { + if (relevantExtensions && !g.extensions.some((e) => relevantExtensions.has(e))) { continue; } - console.error( + cliWarn( `GitNexus${ctx}: optional grammar "${g.name}" is unavailable — ${g.extensions.join('/')} files will not be parsed. Reinstall without GITNEXUS_SKIP_OPTIONAL_GRAMMARS=1 (and ensure python3, make, g++) to enable.`, + { grammar: g.name, extensions: g.extensions, context: opts?.context }, ); } } diff --git a/gitnexus/src/cli/remove.ts b/gitnexus/src/cli/remove.ts index 4d2ce0771d..18d2a340d0 100644 --- a/gitnexus/src/cli/remove.ts +++ b/gitnexus/src/cli/remove.ts @@ -27,6 +27,8 @@ */ import fs from 'fs/promises'; +import { logger } from '../core/logger.js'; +import { cliError } from './cli-message.js'; import { readRegistry, resolveRegistryEntry, @@ -51,14 +53,14 @@ export const removeCommand = async (target: string, options?: { force?: boolean // Idempotent: missing target is a no-op warning, not an error. // The `availableNames` hint comes from the error itself so users // can see what they might have meant. - console.warn(`Nothing to remove: ${err.message}`); + logger.warn(`Nothing to remove: ${err.message}`); return; } if (err instanceof RegistryAmbiguousTargetError) { // Duplicate aliases are allowed via --allow-duplicate-name (#829); // refuse to guess which one the user meant — surface the full list // and exit non-zero so scripts don't silently pick the wrong repo. - console.error(`Error: ${err.message}`); + cliError(`Error: ${err.message}`); process.exit(1); } throw err; @@ -86,7 +88,7 @@ export const removeCommand = async (target: string, options?: { force?: boolean assertSafeStoragePath(entry); } catch (err) { if (err instanceof UnsafeStoragePathError) { - console.error(`Error: ${err.message}`); + cliError(`Error: ${err.message}`); process.exit(1); } throw err; @@ -104,7 +106,8 @@ export const removeCommand = async (target: string, options?: { force?: boolean console.log(` Path: ${entry.path}`); console.log(` Storage: ${entry.storagePath}`); } catch (err) { - console.error(`Failed to remove ${entry.name}:`, err); + const msg = err instanceof Error ? err.message : String(err); + cliError(`Failed to remove ${entry.name}: ${msg}`, { err }); process.exit(1); } }; diff --git a/gitnexus/src/cli/serve.ts b/gitnexus/src/cli/serve.ts index 9f03793060..9356b5bab1 100644 --- a/gitnexus/src/cli/serve.ts +++ b/gitnexus/src/cli/serve.ts @@ -1,14 +1,26 @@ import { createServer } from '../server/api.js'; +import { logger, flushLoggerSync } from '../core/logger.js'; +import { cliError } from './cli-message.js'; -// Catch anything that would cause a silent exit +// Catch anything that would cause a silent exit. Pino v10's default +// destination is `sync: false` (SonicBoom buffered) — call +// `flushLoggerSync()` between the log and `process.exit(1)` so the crash +// record is not lost to the unflushed buffer. Worker-thread transports +// (pino-pretty under TTY) handle their own flush on process exit in v10, +// so no separate `pino.final` integration is needed (the API was removed +// in v10 because the transport architecture made it unnecessary). +// +// We pass the Error itself in `{ err }` so pino's built-in err serializer +// captures `type`, `message`, and `stack` as structured fields. process.on('uncaughtException', (err) => { - console.error('\n[gitnexus serve] Uncaught exception:', err.message); - if (process.env.DEBUG) console.error(err.stack); + logger.error({ err }, '[gitnexus serve] Uncaught exception'); + flushLoggerSync(); process.exit(1); }); -process.on('unhandledRejection', (reason: any) => { - console.error('\n[gitnexus serve] Unhandled rejection:', reason?.message || reason); - if (process.env.DEBUG) console.error(reason?.stack); +process.on('unhandledRejection', (reason) => { + const err = reason instanceof Error ? reason : new Error(String(reason)); + logger.error({ err }, '[gitnexus serve] Unhandled rejection'); + flushLoggerSync(); process.exit(1); }); @@ -22,16 +34,26 @@ export const serveCommand = async (options?: { port?: string; host?: string }) = try { await createServer(port, host); } catch (err: any) { - console.error(`\nFailed to start GitNexus server:\n`); - console.error(` ${err.message || err}\n`); if (err.code === 'EADDRINUSE') { - console.error(` Port ${port} is already in use. Either:`); - console.error(` 1. Stop the other process using port ${port}`); - console.error(` 2. Use a different port: gitnexus serve --port 4748\n`); + cliError( + `\nFailed to start GitNexus server:\n` + + ` ${err.message || err}\n\n` + + ` Port ${port} is already in use. Either:\n` + + ` 1. Stop the other process using port ${port}\n` + + ` 2. Use a different port: gitnexus serve --port 4748\n`, + { code: err.code, port, host }, + ); + } else { + cliError(`\nFailed to start GitNexus server:\n ${err.message || err}\n`, { + code: err.code, + port, + host, + }); } if (err.stack && process.env.DEBUG) { - console.error(err.stack); + logger.debug({ stack: err.stack }, 'serve start error stack'); } + flushLoggerSync(); process.exit(1); } }; diff --git a/gitnexus/src/cli/setup.ts b/gitnexus/src/cli/setup.ts index 1238e65275..af3c4737a7 100644 --- a/gitnexus/src/cli/setup.ts +++ b/gitnexus/src/cli/setup.ts @@ -365,7 +365,12 @@ async function installClaudeCodeHooks(result: SetupResult): Promise { } const hookPath = path.join(destHooksDir, 'gitnexus-hook.cjs').replace(/\\/g, '/'); - const hookCmd = `node "${hookPath.replace(/"/g, '\\"')}"`; + // Escape backslashes FIRST, then quotes (CodeQL js/incomplete-sanitization). + // The previous shape `replace(/"/g, '\\"')` alone would let `path\with"quote` + // become `path\with\"quote`, where the trailing `\` before `"` could + // unescape the quote inside the surrounding double-quoted shell context. + const escapedHookPath = hookPath.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); + const hookCmd = `node "${escapedHookPath}"`; // Check which hook events need entries (idempotent: skip if already registered) const parsed = await (async () => { @@ -622,7 +627,7 @@ async function installOpenCodeSkills(result: SetupResult): Promise { const installed = await installSkillsTo(skillsDir); if (installed.length > 0) { result.configured.push( - `OpenCode skills (${installed.length} skills → ~/.config/opencode/skill/)`, + `OpenCode skills (${installed.length} skills → ~/.config/opencode/skills/)`, ); } } catch (err: any) { diff --git a/gitnexus/src/cli/tool.ts b/gitnexus/src/cli/tool.ts index 443f12f4cc..b40ffdd259 100644 --- a/gitnexus/src/cli/tool.ts +++ b/gitnexus/src/cli/tool.ts @@ -17,6 +17,7 @@ import { writeSync } from 'node:fs'; import { LocalBackend } from '../mcp/local/local-backend.js'; +import { cliError } from './cli-message.js'; let _backend: LocalBackend | null = null; @@ -25,7 +26,7 @@ async function getBackend(): Promise { _backend = new LocalBackend(); const ok = await _backend.init(); if (!ok) { - console.error('GitNexus: No indexed repositories found. Run: gitnexus analyze'); + cliError('GitNexus: No indexed repositories found. Run: gitnexus analyze'); process.exit(1); } return _backend; @@ -67,7 +68,7 @@ export async function queryCommand( }, ): Promise { if (!queryText?.trim()) { - console.error('Usage: gitnexus query '); + cliError('Usage: gitnexus query '); process.exit(1); } @@ -93,7 +94,7 @@ export async function contextCommand( }, ): Promise { if (!name?.trim() && !options?.uid) { - console.error('Usage: gitnexus context [--uid ] [--file ]'); + cliError('Usage: gitnexus context [--uid ] [--file ]'); process.exit(1); } @@ -118,7 +119,7 @@ export async function impactCommand( }, ): Promise { if (!target?.trim()) { - console.error('Usage: gitnexus impact [--direction upstream|downstream]'); + cliError('Usage: gitnexus impact [--direction upstream|downstream]'); process.exit(1); } @@ -153,7 +154,7 @@ export async function cypherCommand( }, ): Promise { if (!query?.trim()) { - console.error('Usage: gitnexus cypher '); + cliError('Usage: gitnexus cypher '); process.exit(1); } diff --git a/gitnexus/src/cli/wiki.ts b/gitnexus/src/cli/wiki.ts index ccd1cae4ec..38a0f82a69 100644 --- a/gitnexus/src/cli/wiki.ts +++ b/gitnexus/src/cli/wiki.ts @@ -19,6 +19,7 @@ import { import { WikiGenerator, type WikiOptions } from '../core/wiki/generator.js'; import { resolveLLMConfig, type LLMProvider } from '../core/wiki/llm-client.js'; import { detectCursorCLI } from '../core/wiki/cursor-client.js'; +import { logger } from '../core/logger.js'; export interface WikiCommandOptions { force?: boolean; @@ -583,7 +584,7 @@ export const wikiCommand = async (inputPath?: string, options?: WikiCommandOptio } else { console.log(`\n Error: ${err.message}\n`); if (process.env.GITNEXUS_VERBOSE) { - console.error(err); + logger.error({ err }, 'wiki command failed'); } } process.exitCode = 1; @@ -601,6 +602,38 @@ function hasGhCLI(): boolean { } } +/** + * Strict Gist URL predicate. Rejects: + * - any URL that does not parse (URL constructor throws) + * - schemes other than https (drops `http:`, `file:`, `gist:`-style spoofs) + * - hostnames that are not exactly `gist.github.com` (drops substring spoofs + * like `https://evil.com/?u=gist.github.com` and userinfo-prefixed shapes + * like `https://[email protected]/...` — note that URL.hostname + * strips userinfo, so the equality check rejects the userinfo-prefixed + * spoof if the actual host differs from gist.github.com) + * - any URL containing userinfo (`username[:password]@`), which the URL + * parser exposes via `.username` / `.password`. Defense-in-depth: even + * when hostname matches, a credential-bearing URL is suspect and not + * produced by `gh gist create`. + * + * Closes the substring-bypass class CodeQL `js/incomplete-url-substring- + * sanitization` flags. + */ +function isGistUrl(line: string): boolean { + const trimmed = line.trim(); + try { + const u = new URL(trimmed); + return ( + u.protocol === 'https:' && + u.hostname === 'gist.github.com' && + u.username === '' && + u.password === '' + ); + } catch { + return false; + } +} + function publishGist(htmlPath: string): { url: string; rawUrl: string } | null { try { const output = execFileSync( @@ -609,13 +642,14 @@ function publishGist(htmlPath: string): { url: string; rawUrl: string } | null { { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }, ).trim(); - // gh gist create prints the gist URL as the last line - const lines = output.split('\n'); - const gistUrl = lines.find((l) => l.includes('gist.github.com')) || lines[lines.length - 1]; - - if (!gistUrl || !gistUrl.includes('gist.github.com')) return null; + // `gh gist create` prints the gist URL as a line in the output. Find the + // first parseable Gist URL — if no line is a valid Gist URL, fail closed + // (do NOT fall back to lines[last]: a non-Gist last line would propagate + // through the regex below and produce a malformed `rawUrl`). + const gistUrl = output.split('\n').find(isGistUrl); + if (!gistUrl) return null; - // Build a raw viewer URL via gist.githack.com + // Build a raw viewer URL via gist.githack.com. // gist URL format: https://gist.github.com/{user}/{id} const match = gistUrl.match(/gist\.github\.com\/([^/]+)\/([a-f0-9]+)/); let rawUrl = gistUrl; diff --git a/gitnexus/src/config/ignore-service.ts b/gitnexus/src/config/ignore-service.ts index ff61f0eb31..ce1fda9133 100644 --- a/gitnexus/src/config/ignore-service.ts +++ b/gitnexus/src/config/ignore-service.ts @@ -2,6 +2,7 @@ import ignore, { type Ignore } from 'ignore'; import fs from 'fs/promises'; import nodePath from 'path'; import type { Path } from 'path-scurry'; +import { logger } from '../core/logger.js'; const DEFAULT_IGNORE_LIST = new Set([ // Version Control @@ -365,7 +366,7 @@ export const loadIgnoreRules = async ( } catch (err: unknown) { const code = (err as NodeJS.ErrnoException).code; if (code !== 'ENOENT') { - console.warn(` Warning: could not read ${filename}: ${(err as Error).message}`); + logger.warn(` Warning: could not read ${filename}: ${(err as Error).message}`); } } } diff --git a/gitnexus/src/core/embeddings/embedder.ts b/gitnexus/src/core/embeddings/embedder.ts index 993c418839..72ddcbd707 100644 --- a/gitnexus/src/core/embeddings/embedder.ts +++ b/gitnexus/src/core/embeddings/embedder.ts @@ -14,7 +14,12 @@ if (!process.env.ORT_LOG_LEVEL) { process.env.ORT_LOG_LEVEL = '3'; } -import { pipeline, env, type FeatureExtractionPipeline } from '@huggingface/transformers'; +import { + pipeline, + env, + type FeatureExtractionPipeline, + type ProgressInfo, +} from '@huggingface/transformers'; import { existsSync } from 'fs'; import { execFileSync } from 'child_process'; import { join, dirname } from 'path'; @@ -22,7 +27,8 @@ import { createRequire } from 'module'; import { DEFAULT_EMBEDDING_CONFIG, type EmbeddingConfig, type ModelProgress } from './types.js'; import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js'; import { resolveEmbeddingConfig } from './config.js'; -import { applyHfEnvOverrides } from './hf-env.js'; +import { applyHfEnvOverrides, isHfDownloadFailure, withHfDownloadRetry } from './hf-env.js'; +import { logger } from '../logger.js'; /** * Check whether the onnxruntime-node package that @huggingface/transformers @@ -166,17 +172,22 @@ export const initEmbedder = async ( const isDev = process.env.NODE_ENV === 'development'; if (isDev) { - console.error(`🧠 Loading embedding model: ${finalConfig.modelId}`); + logger.info(`🧠 Loading embedding model: ${finalConfig.modelId}`); } const progressCallback = onProgress - ? (data: any) => { + ? (data: ProgressInfo) => { const progress: ModelProgress = { - status: data.status || 'progress', - file: data.file, - progress: data.progress, - loaded: data.loaded, - total: data.total, + // Map the `progress_total` aggregate event (not in ModelProgress.status) + // back to 'progress' so callers don't need to handle it separately. + status: + data.status === 'progress_total' + ? 'progress' + : ((data.status as ModelProgress['status']) ?? 'progress'), + file: 'file' in data ? data.file : undefined, + progress: 'progress' in data ? data.progress : undefined, + loaded: 'loaded' in data ? data.loaded : undefined, + total: 'total' in data ? data.total : undefined, }; onProgress(progress); } @@ -192,26 +203,38 @@ export const initEmbedder = async ( for (const device of devicesToTry) { try { if (isDev && device === 'dml') { - console.error('🔧 Trying DirectML (DirectX12) GPU backend...'); + logger.info('🔧 Trying DirectML (DirectX12) GPU backend...'); } else if (isDev && device === 'cuda') { - console.error('🔧 Trying CUDA GPU backend...'); + logger.info('🔧 Trying CUDA GPU backend...'); } else if (isDev && device === 'cpu') { - console.error('🔧 Using CPU backend...'); + logger.info('🔧 Using CPU backend...'); } else if (isDev && device === 'wasm') { - console.error('🔧 Using WASM backend (slower)...'); + logger.info('🔧 Using WASM backend (slower)...'); } - embedderInstance = await (pipeline as any)('feature-extraction', finalConfig.modelId, { - device: device, - dtype: 'fp32', - progress_callback: progressCallback, - session_options: { - logSeverityLevel: 3, - intraOpNumThreads: finalConfig.threads, - interOpNumThreads: 1, - executionMode: 'sequential', + embedderInstance = await withHfDownloadRetry( + () => + pipeline('feature-extraction', finalConfig.modelId, { + device: device, + dtype: 'fp32', + progress_callback: progressCallback, + session_options: { + logSeverityLevel: 3, + intraOpNumThreads: finalConfig.threads, + interOpNumThreads: 1, + executionMode: 'sequential', + }, + }), + { + onRetry: isDev + ? (attempt, max, err) => + logger.warn( + { attempt, max, err: err.message }, + `⚠️ Model download network error (attempt ${attempt}/${max}), retrying…`, + ) + : undefined, }, - }); + ); currentDevice = device; if (isDev) { @@ -221,15 +244,29 @@ export const initEmbedder = async ( : device === 'cuda' ? 'GPU (CUDA)' : device.toUpperCase(); - console.error(`✅ Using ${label} backend`); - console.error('✅ Embedding model loaded successfully'); + logger.info(`✅ Using ${label} backend`); + logger.info('✅ Embedding model loaded successfully'); } return embedderInstance!; } catch (deviceError) { + // Network errors and circuit-open errors are not device-specific — + // they will fail the same way on every device. Rethrow immediately + // with actionable HF_ENDPOINT guidance rather than silently falling + // back to the next device. + const errMsg = deviceError instanceof Error ? deviceError.message : String(deviceError); + if (isHfDownloadFailure(errMsg)) { + const endpointHint = process.env.HF_ENDPOINT + ? `The configured endpoint (${process.env.HF_ENDPOINT}) may be unreachable.` + : `huggingface.co may be unreachable from your network.\n` + + ` Set HF_ENDPOINT to a mirror and retry:\n` + + ` HF_ENDPOINT=https://hf-mirror.com npx gitnexus analyze --embeddings\n` + + ` (Windows: set HF_ENDPOINT=https://hf-mirror.com && npx gitnexus analyze --embeddings)`; + throw new Error(`Failed to download embedding model: ${errMsg}\n ${endpointHint}`); + } if (isDev && (device === 'cuda' || device === 'dml')) { const gpuType = device === 'dml' ? 'DirectML' : 'CUDA'; - console.error(`⚠️ ${gpuType} not available, falling back to CPU...`); + logger.info(`⚠️ ${gpuType} not available, falling back to CPU...`); } // Continue to next device in list if (device === devicesToTry[devicesToTry.length - 1]) { diff --git a/gitnexus/src/core/embeddings/embedding-pipeline.ts b/gitnexus/src/core/embeddings/embedding-pipeline.ts index 9d4f982818..cc2d38d9b0 100644 --- a/gitnexus/src/core/embeddings/embedding-pipeline.ts +++ b/gitnexus/src/core/embeddings/embedding-pipeline.ts @@ -44,6 +44,7 @@ import { } from '../lbug/schema.js'; import { loadVectorExtension } from '../lbug/lbug-adapter.js'; import { getExactScanLimit } from '../platform/capabilities.js'; +import { logger } from '../logger.js'; const isDev = process.env.NODE_ENV === 'development'; @@ -157,7 +158,7 @@ const queryEmbeddableNodes = async ( } } catch (error) { if (isDev) { - console.error(`Query for ${label} nodes failed:`, error); + logger.warn({ error }, `Query for ${label} nodes failed:`); } } } @@ -212,7 +213,7 @@ const createVectorIndex = async ( return true; } catch (error) { if (isDev) { - console.error('Vector index creation warning:', error); + logger.warn({ error }, 'Vector index creation warning:'); } return false; } @@ -256,7 +257,9 @@ export const runEmbeddingPipeline = async ( try { const vectorAvailable = await ensureVectorExtensionAvailable(); - if (!vectorAvailable && isDev) console.error(vectorUnavailableMessage); + if (!vectorAvailable && isDev) { + logger.warn(vectorUnavailableMessage); + } // Phase 1: Load embedding model onProgress({ @@ -283,7 +286,7 @@ export const runEmbeddingPipeline = async ( }); if (isDev) { - console.error('🔍 Querying embeddable nodes...'); + logger.info('🔍 Querying embeddable nodes...'); } // Phase 2: Query embeddable nodes @@ -325,7 +328,7 @@ export const runEmbeddingPipeline = async ( // (Kuzu forbids SET on vector-indexed properties; DELETE-then-INSERT is the sanctioned pattern) if (staleNodeIds.length > 0) { if (isDev) { - console.error(`🔄 Deleting ${staleNodeIds.length} stale embedding rows for re-embed`); + logger.info(`🔄 Deleting ${staleNodeIds.length} stale embedding rows for re-embed`); } try { await executeWithReusedStatement( @@ -346,7 +349,7 @@ export const runEmbeddingPipeline = async ( } if (isDev) { - console.error( + logger.info( `📦 Incremental embeddings: ${beforeCount} total, ${existingEmbeddings.size} cached, ${staleNodeIds.length} stale, ${nodes.length} to embed`, ); } @@ -355,7 +358,7 @@ export const runEmbeddingPipeline = async ( const totalNodes = nodes.length; if (isDev) { - console.error(`📊 Found ${totalNodes} embeddable nodes`); + logger.info(`📊 Found ${totalNodes} embeddable nodes`); } if (totalNodes === 0) { @@ -442,9 +445,9 @@ export const runEmbeddingPipeline = async ( ); } catch (chunkErr) { if (isDev) { - console.error( + logger.warn( + { chunkErr }, `⚠️ AST chunking failed for ${node.label} "${node.name}" (${node.filePath}), falling back to character-based chunking:`, - chunkErr, ); } chunks = characterChunk(node.content, startLine, endLine, chunkSize, overlap); @@ -482,9 +485,9 @@ export const runEmbeddingPipeline = async ( try { embeddings = await embedBatch(subTexts); } catch (embedErr) { - console.error( + logger.error( + { embedErr }, `❌ embedBatch failed for ${subTexts.length} texts (first: "${subTexts[0]?.substring(0, 80)}..."):`, - embedErr, ); throw embedErr; } @@ -520,7 +523,7 @@ export const runEmbeddingPipeline = async ( }); if (isDev) { - console.error('📇 Creating vector index...'); + logger.info('📇 Creating vector index...'); } const vectorIndexReady = await createVectorIndex(executeQuery); @@ -533,7 +536,7 @@ export const runEmbeddingPipeline = async ( }); if (isDev) { - console.error( + logger.info( `✅ Embedding pipeline complete! (${totalChunks} chunks from ${totalNodes} nodes)`, ); } @@ -547,7 +550,7 @@ export const runEmbeddingPipeline = async ( const errorMessage = error instanceof Error ? error.message : 'Unknown error'; if (isDev) { - console.error('❌ Embedding pipeline error:', error); + logger.error({ error }, '❌ Embedding pipeline error:'); } onProgress({ diff --git a/gitnexus/src/core/embeddings/hf-env.ts b/gitnexus/src/core/embeddings/hf-env.ts index 6a977a76db..95548fff2a 100644 --- a/gitnexus/src/core/embeddings/hf-env.ts +++ b/gitnexus/src/core/embeddings/hf-env.ts @@ -1,6 +1,25 @@ import os from 'node:os'; import { join } from 'node:path'; +// --------------------------------------------------------------------------- +// Download resilience defaults +// --------------------------------------------------------------------------- + +/** Per-attempt timeout for the full model download (5 minutes). */ +export const HF_DOWNLOAD_TIMEOUT_MS = 5 * 60 * 1_000; +/** Maximum total download attempts (1 initial + N-1 retries). */ +export const HF_MAX_ATTEMPTS = 3; +/** Initial delay between retry attempts; doubles on each subsequent retry. */ +export const HF_BASE_DELAY_MS = 2_000; +/** Number of consecutive failures required to open the circuit. */ +export const CB_FAILURE_THRESHOLD = 3; +/** How long the circuit stays open before transitioning to half-open. */ +export const CB_RESET_TIMEOUT_MS = 60_000; +/** Upper bound clamped on the env-override per-attempt timeout (30 minutes). */ +export const HF_MAX_TIMEOUT_MS = 30 * 60 * 1_000; +/** Upper bound clamped on the env-override attempt count. */ +export const HF_MAX_ATTEMPTS_CAP = 10; + /** * @internal Exported only for unit tests and the two embedder entry points * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the @@ -60,3 +79,265 @@ export function applyHfEnvOverrides(env: HfEnvSubset): void { env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/'; } } + +/** + * @internal Exported for unit tests and the two embedder entry points. + * + * Returns true when an error message indicates a network-level fetch failure + * during HuggingFace model download (e.g. `TypeError: fetch failed`, + * `ECONNREFUSED`, `ENOTFOUND`, `ETIMEDOUT`, `ECONNRESET`). + * + * These errors are not device-specific and cannot be fixed by falling back to + * a different ONNX device — the caller should rethrow immediately with + * guidance about `HF_ENDPOINT`. + */ +export function isNetworkFetchError(message: string): boolean { + return ( + message.includes('fetch failed') || + message.includes('ECONNREFUSED') || + message.includes('ENOTFOUND') || + message.includes('ETIMEDOUT') || + message.includes('ECONNRESET') + ); +} + +// --------------------------------------------------------------------------- +// Circuit breaker +// --------------------------------------------------------------------------- + +/** @internal Used by `withHfDownloadRetry` to mark a circuit-open rejection. */ +export const CIRCUIT_OPEN_TAG = 'hf-circuit-open'; + +/** Circuit-breaker states. */ +type CircuitState = 'closed' | 'open' | 'half-open'; + +/** + * Circuit breaker for HuggingFace model downloads. + * + * After `failureThreshold` consecutive network failures the circuit opens and + * all subsequent calls to `withHfDownloadRetry` fail immediately without + * issuing any network requests. After `resetTimeoutMs` the circuit enters the + * half-open state and the next call is attempted — if it succeeds the circuit + * closes again; if it fails the circuit re-opens. + * + * Exported for unit-testing; production code should use the module-level + * `hfDownloadCircuit` singleton. + */ +export class HfDownloadCircuitBreaker { + private _state: CircuitState = 'closed'; + private _failures = 0; + /** Timestamp of the last recorded failure (ms since epoch). */ + lastFailureAt = 0; + + constructor( + readonly failureThreshold: number = CB_FAILURE_THRESHOLD, + readonly resetTimeoutMs: number = CB_RESET_TIMEOUT_MS, + ) {} + + /** Effective state, factoring in the reset-timeout transition. */ + get state(): CircuitState { + if (this._state === 'open' && Date.now() - this.lastFailureAt > this.resetTimeoutMs) { + this._state = 'half-open'; + } + return this._state; + } + + /** Returns true when the circuit is open and calls should be rejected. */ + isOpen(): boolean { + return this.state === 'open'; + } + + /** Record a successful call — resets the failure counter and closes the circuit. */ + recordSuccess(): void { + this._failures = 0; + this._state = 'closed'; + } + + /** Record a failed call — increments the counter and opens the circuit when the threshold is reached. */ + recordFailure(): void { + this._failures++; + this.lastFailureAt = Date.now(); + if (this._failures >= this.failureThreshold) { + this._state = 'open'; + } + } + + /** @internal Reset to initial state (used in tests). */ + reset(): void { + this._failures = 0; + this._state = 'closed'; + this.lastFailureAt = 0; + } +} + +/** Module-level singleton shared by both embedder entry points. */ +export const hfDownloadCircuit = new HfDownloadCircuitBreaker(); + +// --------------------------------------------------------------------------- +// Retry + timeout wrapper +// --------------------------------------------------------------------------- + +/** @internal Returns true for errors that should abort without retry (circuit-open). */ +export function isHfCircuitOpenError(message: string): boolean { + return message.includes(CIRCUIT_OPEN_TAG); +} + +/** + * Returns true for any HuggingFace download failure that warrants showing the + * `HF_ENDPOINT` remediation hint: either a raw network error or a + * circuit-open rejection (which itself was caused by repeated network errors). + */ +export function isHfDownloadFailure(message: string): boolean { + return isNetworkFetchError(message) || isHfCircuitOpenError(message); +} + +/** @internal Wraps `fn` in a hard time-limit. The timeout error contains + * `ETIMEDOUT` so that `isNetworkFetchError` classifies it correctly. + */ +export function withDownloadTimeout(fn: () => Promise, timeoutMs: number): Promise { + return new Promise((resolve, reject) => { + const timer = setTimeout( + () => + reject( + new Error( + `ETIMEDOUT: model download timed out after ${Math.round(timeoutMs / 1000)}s — ` + + `check your network speed or set HF_ENDPOINT to a faster mirror`, + ), + ), + timeoutMs, + ); + fn().then( + (v) => { + clearTimeout(timer); + resolve(v); + }, + (e) => { + clearTimeout(timer); + reject(e); + }, + ); + }); +} + +/** @internal Async sleep (exposed for testing). */ +export function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +export interface HfRetryOptions { + /** Maximum total attempts including the initial one (default: `HF_MAX_ATTEMPTS`). */ + maxAttempts?: number; + /** Delay before the first retry; doubles on each subsequent attempt (default: `HF_BASE_DELAY_MS`). */ + baseDelayMs?: number; + /** Per-attempt wall-clock timeout in ms (default: `HF_DOWNLOAD_TIMEOUT_MS`). */ + timeoutMs?: number; + /** + * Circuit-breaker instance to use. Defaults to the module-level + * `hfDownloadCircuit` singleton. Pass a fresh instance in tests. + */ + circuit?: HfDownloadCircuitBreaker; + /** + * Optional callback invoked before each retry (not the initial attempt). + * @param attempt - 1-based retry number + * @param max - total allowed attempts + * @param error - the error that triggered the retry + */ + onRetry?: (attempt: number, max: number, error: Error) => void; +} + +/** + * Retry wrapper for HuggingFace model downloads with per-attempt timeout and + * circuit-breaker protection. + * + * Behaviour: + * - If the circuit is **open**, fails immediately with a `CIRCUIT_OPEN_TAG` + * message (so `isHfDownloadFailure` still returns true and the caller can + * show `HF_ENDPOINT` guidance). + * - Each attempt is wrapped in `withDownloadTimeout`. + * - On a network-level error (`isNetworkFetchError`) the attempt is retried + * with exponential back-off; non-network errors (e.g. ONNX device failure) + * are rethrown immediately without retry. + * - Every network failure is recorded on the circuit breaker; a success resets + * it. + * - After all attempts are exhausted, the last network error is rethrown + * so the existing `isNetworkFetchError` / `isHfDownloadFailure` guards in + * the calling code still fire. + */ +export async function withHfDownloadRetry( + fn: () => Promise, + options: HfRetryOptions = {}, +): Promise { + // Resolve effective values — explicit options take precedence over env vars, + // which take precedence over built-in defaults. This lets users lower the + // per-attempt timeout without rebuilding (e.g. + // HF_DOWNLOAD_TIMEOUT_MS=60000 npx gitnexus analyze --embeddings + // reduces the worst-case wait from 15 minutes to ~3 minutes). + // + // Upper bounds are clamped to prevent accidental runaway configuration: + // - timeoutMs is capped at HF_MAX_TIMEOUT_MS (30 min) + // - maxAttempts is floored (fractional values → integer) and capped at + // HF_MAX_ATTEMPTS_CAP (10). Values ≤ 0, NaN, or Infinity fall back to + // the built-in defaults. + const envTimeout = Number(process.env.HF_DOWNLOAD_TIMEOUT_MS); + const envMaxAttempts = Number(process.env.HF_MAX_ATTEMPTS); + const resolvedTimeout = + Number.isFinite(envTimeout) && envTimeout > 0 + ? Math.min(envTimeout, HF_MAX_TIMEOUT_MS) + : HF_DOWNLOAD_TIMEOUT_MS; + const resolvedMaxAttempts = + Number.isFinite(envMaxAttempts) && envMaxAttempts > 0 + ? Math.min(Math.floor(envMaxAttempts), HF_MAX_ATTEMPTS_CAP) + : HF_MAX_ATTEMPTS; + const { + maxAttempts = resolvedMaxAttempts, + baseDelayMs = HF_BASE_DELAY_MS, + timeoutMs = resolvedTimeout, + circuit = hfDownloadCircuit, + onRetry, + } = options; + if (circuit.isOpen()) { + const secsUntilReset = Math.ceil( + (circuit.resetTimeoutMs - (Date.now() - circuit.lastFailureAt)) / 1000, + ); + throw new Error( + `${CIRCUIT_OPEN_TAG}: HuggingFace download circuit is open after repeated network failures` + + (secsUntilReset > 0 ? ` — will reset in ~${secsUntilReset}s` : ''), + ); + } + + let lastError: Error = new Error('unknown error'); + + for (let attempt = 0; attempt < maxAttempts; attempt++) { + try { + const result = await withDownloadTimeout(fn, timeoutMs); + circuit.recordSuccess(); + return result; + } catch (err) { + lastError = err instanceof Error ? err : new Error(String(err)); + + if (!isNetworkFetchError(lastError.message)) { + // Non-network error (e.g. CUDA unavailable) — propagate without retry + throw lastError; + } + + circuit.recordFailure(); + + if (circuit.isOpen()) { + // Circuit just tripped — fail fast, no more retries + throw new Error( + `${CIRCUIT_OPEN_TAG}: HuggingFace download circuit opened after ${circuit.failureThreshold} consecutive failures`, + ); + } + + if (attempt < maxAttempts - 1) { + const delay = baseDelayMs * Math.pow(2, attempt); + onRetry?.(attempt + 1, maxAttempts, lastError); + await sleep(delay); + } + } + } + + // All retries exhausted — throw the last network error so isNetworkFetchError + // patterns in the calling code still match and surface HF_ENDPOINT guidance. + throw lastError; +} diff --git a/gitnexus/src/core/git-staleness.ts b/gitnexus/src/core/git-staleness.ts index 96f70ddd65..c90cef85ea 100644 --- a/gitnexus/src/core/git-staleness.ts +++ b/gitnexus/src/core/git-staleness.ts @@ -3,11 +3,14 @@ * Lives in core/ so application code does not depend on the MCP package layer. */ -import { execFileSync } from 'node:child_process'; +import { execFile, execFileSync } from 'node:child_process'; +import { promisify } from 'node:util'; import path from 'path'; import { readRegistry, type RegistryEntry, type CwdMatch } from '../storage/repo-manager.js'; import { findGitRootByDotGit, getCurrentCommit, getRemoteUrl } from '../storage/git.js'; +const execFileAsync = promisify(execFile); + export interface StalenessInfo { isStale: boolean; commitsBehind: number; @@ -41,6 +44,39 @@ export function checkStaleness(repoPath: string, lastCommit: string): StalenessI } } +/** + * Async variant of {@link checkStaleness} — spawns git as a child process + * instead of blocking the event loop. Used by `listRepos()` to check many + * repos in parallel (issue #1363: 200 repos × sync spawn ≈ 50 s). + */ +export async function checkStalenessAsync( + repoPath: string, + lastCommit: string, +): Promise { + try { + // Note: promisified execFile captures stdout/stderr by default (no stdio option needed, + // unlike the sync variant which requires explicit stdio: ['pipe','pipe','pipe']). + const { stdout } = await execFileAsync('git', ['rev-list', '--count', `${lastCommit}..HEAD`], { + cwd: repoPath, + encoding: 'utf-8', + }); + + const commitsBehind = parseInt(stdout.trim(), 10) || 0; + + if (commitsBehind > 0) { + return { + isStale: true, + commitsBehind, + hint: `⚠️ Index is ${commitsBehind} commit${commitsBehind > 1 ? 's' : ''} behind HEAD. Run analyze tool to update.`, + }; + } + + return { isStale: false, commitsBehind: 0 }; + } catch { + return { isStale: false, commitsBehind: 0 }; + } +} + /** * Compare a sibling-clone HEAD against an indexed `lastCommit`. Returns * `undefined` when the indexed commit is not reachable from the sibling diff --git a/gitnexus/src/core/group/bridge-db.ts b/gitnexus/src/core/group/bridge-db.ts index ef45d68190..ef6244b22a 100644 --- a/gitnexus/src/core/group/bridge-db.ts +++ b/gitnexus/src/core/group/bridge-db.ts @@ -11,6 +11,9 @@ import { type LbugConnectionHandle, } from '../lbug/lbug-config.js'; import { dedupeContracts, dedupeCrossLinks } from './normalization.js'; +import { createLogger } from '../logger.js'; + +const bridgeLogger = createLogger('bridge-db', { debugEnvVar: 'GITNEXUS_DEBUG_BRIDGE' }); /** * Sidecar files that LadybugDB creates next to a `bridge.lbug` file. @@ -41,26 +44,6 @@ async function removeLbugFile(basePath: string): Promise { } } -/** - * Remove all stale `bridge.lbug.tmp.*` files (and their sidecars) from a - * group directory. With randomBytes-based temp names, a crashed writeBridge - * leaves behind a uniquely-named tmp file that no future run will target by - * name — so we glob for the prefix and clean up everything matching. - */ -async function cleanStaleBridgeTmpFiles(groupDir: string): Promise { - try { - const entries = await fsp.readdir(groupDir); - const staleBases = entries.filter( - (e) => e.startsWith('bridge.lbug.tmp.') && !LBUG_SIDECAR_SUFFIXES.some((s) => e.endsWith(s)), - ); - for (const name of staleBases) { - await removeLbugFile(path.join(groupDir, name)); - } - } catch { - /* best-effort: directory may not exist yet */ - } -} - export function contractNodeId( repo: string, contractId: string, @@ -296,8 +279,24 @@ export async function retryRename(src: string, dst: string, attempts = 3): Promi export async function writeBridgeMeta(groupDir: string, meta: BridgeMeta): Promise { const target = path.join(groupDir, 'meta.json'); + // Unpredictable suffix + O_EXCL via `'wx'` flag closes the symlink/ + // pre-create attack window. The third argument `0o600` is the + // user-only mode mask — CodeQL's `js/insecure-temporary-file` query + // sources its verdict from the `mode` argument, NOT from `flags`: + // its `isSecureMode(mode)` predicate requires the low 6 bits to be + // zero (no group/world bits). Without an explicit mode the file is + // created with the process umask (typically 0o644 = group/world + // readable), which the query treats as the actual vulnerability. + // Both `'wx'` (runtime O_EXCL) AND `0o600` (CodeQL-credited mode) + // are needed: one closes the symlink race, the other closes the + // permissions exposure. const tmp = `${target}.tmp.${randomBytes(8).toString('hex')}`; - await fsp.writeFile(tmp, JSON.stringify(meta, null, 2), 'utf-8'); + const handle = await fsp.open(tmp, 'wx', 0o600); + try { + await handle.writeFile(JSON.stringify(meta, null, 2), 'utf-8'); + } finally { + await handle.close(); + } // Use retryRename for consistency with writeBridge's atomic swap — on // Windows a concurrent reader can cause EBUSY/EPERM even on a tiny // meta.json, and we don't want meta write to be less robust than the @@ -366,7 +365,19 @@ export async function writeBridge( const crossLinks = dedupeCrossLinks(input.crossLinks); const finalPath = path.join(groupDir, 'bridge.lbug'); - const tmpPath = path.join(groupDir, `bridge.lbug.tmp.${randomBytes(8).toString('hex')}`); + // Stage the temp database inside a unique mkdtemp directory rather than + // a fixed `bridge.lbug.tmp` name. The previous shape was flagged by + // CodeQL js/insecure-temporary-file as a predictable path: a co-located + // attacker (or a parallel writeBridge call into the same group) could + // pre-create or symlink that path before this writer opens it. mkdtemp + // returns a directory whose suffix is filled with cryptographically + // random bytes, so the staging path is unguessable AND collision-free + // across parallel callers. We anchor the staging directory inside + // `groupDir` so the subsequent rename of `bridge.lbug` (and its + // `.wal` / `.shadow` sidecars) into place stays on the same filesystem + // and remains atomic — moving across `os.tmpdir()` could trip EXDEV. + const stagingDir = await fsp.mkdtemp(path.join(groupDir, 'bridge-tmp-')); + const tmpPath = path.join(stagingDir, 'bridge.lbug'); const bakPath = path.join(groupDir, 'bridge.lbug.bak'); const report: WriteBridgeReport = { @@ -386,43 +397,42 @@ export async function writeBridge( } }; - // Clean up stale tmp files left behind by previously crashed writeBridge - // runs. With randomBytes-based names each run picks a unique path, so - // the old fixed-name `removeLbugFile(tmpPath)` was a no-op — stale - // artifacts accumulated. The glob-based helper finds *all* leftover - // `bridge.lbug.tmp.*` entries and removes them (including sidecars). - await cleanStaleBridgeTmpFiles(groupDir); - - // 1. Create temp DB, insert all data. - // - // Everything after `openBridgeDb` must run inside a try/finally so that - // if ANY step before the explicit `closeBridgeDb` throws — schema - // creation, a contract insert loop that rethrows, a snapshot write, the - // cross-link loop, or anything else — the handle is still released. A - // leaked handle holds the native LadybugDB file lock on tmpPath, which - // (a) leaks a FD and (b) prevents the next writeBridge call from - // reusing the same tmp slot. - const handle = await openBridgeDb(tmpPath); - let handleClosed = false; + // The mkdtemp staging directory above is freshly created with a unique + // random suffix, so there are no leftover `bridge.lbug.tmp` / `.wal` / + // `.shadow` sidecars from a previous crashed run to clean up here — the + // directory is empty by construction. + try { - await ensureBridgeSchema(handle); - - // Build the lookup index incrementally as contracts are inserted, so - // failed inserts are never in the index (and therefore never resolved - // by the cross-link loop below). This replaces a previous N+1 query - // pattern where each link made up to 6 DB round-trips to find its - // endpoints — see ContractLookupIndex. - const lookupIndex = createContractLookupIndex(); - - // Insert contracts — tolerate individual failures (e.g., a corrupt meta - // that can't be serialized). The whole sync must not fail because one - // contract is broken. - for (const c of contracts) { - const id = contractNodeId(c.repo, c.contractId, c.role, c.symbolRef.filePath); - try { - await queryBridge( - handle, - `CREATE (n:Contract { + // 1. Create temp DB, insert all data. + // + // Everything after `openBridgeDb` must run inside a try/finally so that + // if ANY step before the explicit `closeBridgeDb` throws — schema + // creation, a contract insert loop that rethrows, a snapshot write, the + // cross-link loop, or anything else — the handle is still released. A + // leaked handle holds the native LadybugDB file lock on tmpPath, which + // (a) leaks a FD and (b) prevents the next writeBridge call from + // reusing the same tmp slot. + const handle = await openBridgeDb(tmpPath); + let handleClosed = false; + try { + await ensureBridgeSchema(handle); + + // Build the lookup index incrementally as contracts are inserted, so + // failed inserts are never in the index (and therefore never resolved + // by the cross-link loop below). This replaces a previous N+1 query + // pattern where each link made up to 6 DB round-trips to find its + // endpoints — see ContractLookupIndex. + const lookupIndex = createContractLookupIndex(); + + // Insert contracts — tolerate individual failures (e.g., a corrupt meta + // that can't be serialized). The whole sync must not fail because one + // contract is broken. + for (const c of contracts) { + const id = contractNodeId(c.repo, c.contractId, c.role, c.symbolRef.filePath); + try { + await queryBridge( + handle, + `CREATE (n:Contract { id: $id, contractId: $contractId, type: $type, @@ -435,91 +445,91 @@ export async function writeBridge( confidence: $confidence, meta: $meta })`, - { - id, - contractId: c.contractId, - type: c.type, - role: c.role, - repo: c.repo, - service: c.service ?? '', - symbolUid: c.symbolUid, - filePath: c.symbolRef.filePath, - symbolName: c.symbolName, - confidence: c.confidence, - meta: JSON.stringify(c.meta), - }, - ); - report.contractsInserted++; - // Only index on successful insert — the cross-link loop must never - // resolve to a row that isn't actually in the DB. - indexContract(lookupIndex, c, id); - } catch (err) { - report.contractsFailed++; - recordError('contract', id, err); + { + id, + contractId: c.contractId, + type: c.type, + role: c.role, + repo: c.repo, + service: c.service ?? '', + symbolUid: c.symbolUid, + filePath: c.symbolRef.filePath, + symbolName: c.symbolName, + confidence: c.confidence, + meta: JSON.stringify(c.meta), + }, + ); + report.contractsInserted++; + // Only index on successful insert — the cross-link loop must never + // resolve to a row that isn't actually in the DB. + indexContract(lookupIndex, c, id); + } catch (err) { + report.contractsFailed++; + recordError('contract', id, err); + } } - } - // Insert repo snapshots - for (const [repoId, snap] of Object.entries(input.repoSnapshots)) { - try { - await queryBridge( - handle, - `CREATE (s:RepoSnapshot { + // Insert repo snapshots + for (const [repoId, snap] of Object.entries(input.repoSnapshots)) { + try { + await queryBridge( + handle, + `CREATE (s:RepoSnapshot { id: $id, indexedAt: $indexedAt, lastCommit: $lastCommit })`, - { - id: repoId, - indexedAt: snap.indexedAt, - lastCommit: snap.lastCommit, - }, - ); - report.snapshotsInserted++; - } catch (err) { - report.snapshotsFailed++; - recordError('snapshot', repoId, err); + { + id: repoId, + indexedAt: snap.indexedAt, + lastCommit: snap.lastCommit, + }, + ); + report.snapshotsInserted++; + } catch (err) { + report.snapshotsFailed++; + recordError('snapshot', repoId, err); + } } - } - // Insert cross-links (tolerating missing nodes). - // - // `findContractNode` consults the in-memory lookup index built above, - // not the DB — that's an O(1) pure-function lookup per endpoint instead - // of the previous 2-3 DB queries. For M cross-links, the previous code - // issued up to 6M round-trips; this version issues zero. - // - // `link.contractId` may differ between the consumer and provider sides - // (e.g. wildcard consumer `grpc::Service/*` → method-level provider - // `grpc::Service/Method`) — that's why we resolve each endpoint - // independently via its own `(repo, role, symbolUid, filePath, symbolName)` - // tuple rather than matching on contractId. - for (const link of crossLinks) { - const linkId = `${link.from.repo}::${link.contractId}->${link.to.repo}::${link.contractId}`; - try { - const fromId = findContractNode( - lookupIndex, - link.from.repo, - 'consumer', - link.from.symbolUid, - link.from.symbolRef.filePath, - link.from.symbolRef.name, - ); - const toId = findContractNode( - lookupIndex, - link.to.repo, - 'provider', - link.to.symbolUid, - link.to.symbolRef.filePath, - link.to.symbolRef.name, - ); - if (!fromId || !toId) { - report.linksDroppedMissingNode++; - continue; - } - await queryBridge( - handle, - ` + // Insert cross-links (tolerating missing nodes). + // + // `findContractNode` consults the in-memory lookup index built above, + // not the DB — that's an O(1) pure-function lookup per endpoint instead + // of the previous 2-3 DB queries. For M cross-links, the previous code + // issued up to 6M round-trips; this version issues zero. + // + // `link.contractId` may differ between the consumer and provider sides + // (e.g. wildcard consumer `grpc::Service/*` → method-level provider + // `grpc::Service/Method`) — that's why we resolve each endpoint + // independently via its own `(repo, role, symbolUid, filePath, symbolName)` + // tuple rather than matching on contractId. + for (const link of crossLinks) { + const linkId = `${link.from.repo}::${link.contractId}->${link.to.repo}::${link.contractId}`; + try { + const fromId = findContractNode( + lookupIndex, + link.from.repo, + 'consumer', + link.from.symbolUid, + link.from.symbolRef.filePath, + link.from.symbolRef.name, + ); + const toId = findContractNode( + lookupIndex, + link.to.repo, + 'provider', + link.to.symbolUid, + link.to.symbolRef.filePath, + link.to.symbolRef.name, + ); + if (!fromId || !toId) { + report.linksDroppedMissingNode++; + continue; + } + await queryBridge( + handle, + ` MATCH (a:Contract), (b:Contract) WHERE a.id = $fromId AND b.id = $toId CREATE (a)-[:ContractLink { @@ -530,83 +540,93 @@ export async function writeBridge( toRepo: $toRepo }]->(b) `, - { - fromId, - toId, - matchType: link.matchType, - confidence: link.confidence, - contractId: link.contractId, - fromRepo: link.from.repo, - toRepo: link.to.repo, - }, - ); - report.linksInserted++; - } catch (err) { - report.linksFailed++; - recordError('link', linkId, err); + { + fromId, + toId, + matchType: link.matchType, + confidence: link.confidence, + contractId: link.contractId, + fromRepo: link.from.repo, + toRepo: link.to.repo, + }, + ); + report.linksInserted++; + } catch (err) { + report.linksFailed++; + recordError('link', linkId, err); + } } - } - // 2. Close temp DB (happy path). The finally block also calls - // closeBridgeDb if we threw above; `handleClosed` prevents a - // double-close on the native handle. - await closeBridgeDb(handle); - handleClosed = true; - } finally { - if (!handleClosed) { - await closeBridgeDb(handle).catch(() => { - /* ignore: cleanup path, best effort */ - }); + // 2. Close temp DB (happy path). The finally block also calls + // closeBridgeDb if we threw above; `handleClosed` prevents a + // double-close on the native handle. + await closeBridgeDb(handle); + handleClosed = true; + } finally { + if (!handleClosed) { + await closeBridgeDb(handle).catch(() => { + /* ignore: cleanup path, best effort */ + }); + } } - } - // 3. Atomic swap: old→.bak, tmp→final, rm .bak - // - // The current database file (with its `.wal` / `.shadow` sidecars) is - // moved aside, then the freshly built tmp database takes its place. - // We move the sidecars together with the main file so the open below - // and any external readers see a consistent set; orphan sidecars from - // the tmp namespace are then removed because LadybugDB looks for them - // under the renamed-to base name and would reject mismatching IDs. - try { - await fsp.access(finalPath); - await retryRename(finalPath, bakPath); + // 3. Atomic swap: old→.bak, tmp→final, rm .bak + // + // The current database file (with its `.wal` / `.shadow` sidecars) is + // moved aside, then the freshly built tmp database takes its place. + // We move the sidecars together with the main file so the open below + // and any external readers see a consistent set; orphan sidecars from + // the tmp namespace are then removed because LadybugDB looks for them + // under the renamed-to base name and would reject mismatching IDs. + try { + await fsp.access(finalPath); + await retryRename(finalPath, bakPath); + for (const suffix of LBUG_SIDECAR_SUFFIXES) { + try { + await fsp.access(`${finalPath}${suffix}`); + await retryRename(`${finalPath}${suffix}`, `${bakPath}${suffix}`); + } catch { + /* sidecar absent — nothing to move */ + } + } + } catch { + /* no existing db */ + } + await retryRename(tmpPath, finalPath); for (const suffix of LBUG_SIDECAR_SUFFIXES) { + // Rename — not delete — so the WAL (which may carry uncommitted-at- + // close-time pages on a graceful close, depending on + // `autoCheckpoint` / `checkpointThreshold`) and the `.shadow` + // checkpoint snapshot stay paired with the database file under its + // final name. LadybugDB 0.16.0's database-id check rejects an open + // when the sidecars belong to a different base name. try { - await fsp.access(`${finalPath}${suffix}`); - await retryRename(`${finalPath}${suffix}`, `${bakPath}${suffix}`); + await fsp.access(`${tmpPath}${suffix}`); + await retryRename(`${tmpPath}${suffix}`, `${finalPath}${suffix}`); } catch { /* sidecar absent — nothing to move */ } } - } catch { - /* no existing db */ - } - await retryRename(tmpPath, finalPath); - for (const suffix of LBUG_SIDECAR_SUFFIXES) { - // Rename — not delete — so the WAL (which may carry uncommitted-at- - // close-time pages on a graceful close, depending on - // `autoCheckpoint` / `checkpointThreshold`) and the `.shadow` - // checkpoint snapshot stay paired with the database file under its - // final name. LadybugDB 0.16.0's database-id check rejects an open - // when the sidecars belong to a different base name. - try { - await fsp.access(`${tmpPath}${suffix}`); - await retryRename(`${tmpPath}${suffix}`, `${finalPath}${suffix}`); - } catch { - /* sidecar absent — nothing to move */ - } - } - await removeLbugFile(bakPath); + await removeLbugFile(bakPath); - // 4. Write meta.json - await writeBridgeMeta(groupDir, { - version: BRIDGE_SCHEMA_VERSION, - generatedAt: new Date().toISOString(), - missingRepos: input.missingRepos, - }); + // 4. Write meta.json + await writeBridgeMeta(groupDir, { + version: BRIDGE_SCHEMA_VERSION, + generatedAt: new Date().toISOString(), + missingRepos: input.missingRepos, + }); - return report; + return report; + } finally { + // Always remove the mkdtemp staging directory. On the happy path the + // main file and sidecars have been renamed out of it, so it's empty; + // on any error path it may still contain a partial database — either + // way `recursive: true, force: true` removes it without surfacing + // "directory not empty" or ENOENT. + await fsp.rm(stagingDir, { recursive: true, force: true }).catch(() => { + /* best-effort cleanup */ + }); + } } /* ------------------------------------------------------------------ */ @@ -702,14 +722,15 @@ export async function openBridgeDbReadOnly(groupDir: string): Promise setTimeout(r, delay)); } } - if (process.env.GITNEXUS_DEBUG_BRIDGE) { - console.warn( - `[bridge-db] openBridgeDbReadOnly(${groupDir}) gave up after ` + - `${LBUG_OPEN_RETRY_ATTEMPTS} attempts: ${ - lastErr instanceof Error ? lastErr.message : String(lastErr) - }`, - ); - } + // Pino's NDJSON serialization is structurally injection-resistant + // (CodeQL js/log-injection): groupDir and err.message are JSON-escaped + // by the serializer, so no manual CRLF / U+2028 / ANSI sanitization is + // needed. Demoted to debug — only fires when the bridge truly gave up + // after retries, and operators only need it at debug verbosity. + bridgeLogger.debug( + { groupDir, err: lastErr, attempts: LBUG_OPEN_RETRY_ATTEMPTS }, + 'openBridgeDbReadOnly gave up', + ); return null; } diff --git a/gitnexus/src/core/group/config-parser.ts b/gitnexus/src/core/group/config-parser.ts index 73a9021b9e..d4f6ffff9d 100644 --- a/gitnexus/src/core/group/config-parser.ts +++ b/gitnexus/src/core/group/config-parser.ts @@ -4,7 +4,15 @@ import type { GroupConfig, GroupManifestLink, ContractType, ContractRole } from const _require = createRequire(import.meta.url); const yaml = _require('js-yaml') as typeof import('js-yaml'); -const VALID_CONTRACT_TYPES: ContractType[] = ['http', 'grpc', 'thrift', 'topic', 'lib', 'custom']; +const VALID_CONTRACT_TYPES: ContractType[] = [ + 'http', + 'grpc', + 'thrift', + 'topic', + 'lib', + 'custom', + 'include', +]; const VALID_ROLES: ContractRole[] = ['provider', 'consumer']; const DEFAULT_DETECT = { @@ -14,6 +22,7 @@ const DEFAULT_DETECT = { topics: true, shared_libs: true, embedding_fallback: true, + includes: true, workspace_deps: false, }; diff --git a/gitnexus/src/core/group/cross-impact.ts b/gitnexus/src/core/group/cross-impact.ts index f8625cdc5f..eab942a62d 100644 --- a/gitnexus/src/core/group/cross-impact.ts +++ b/gitnexus/src/core/group/cross-impact.ts @@ -91,6 +91,25 @@ function clampCrossDepth(raw: unknown): { depth: number; warning?: string } { return { depth: d }; } +/** + * Clamp the impact timeout to a sane bounded range. Callers can feed this + * via tool params, so an unclamped value lets a single request hold a + * timer slot for an arbitrarily long duration (CodeQL js/resource- + * exhaustion). 100ms lower bound preserves test-suite scenarios that + * exercise tight timeouts; 5min upper bound is well above any legitimate + * single-impact compute. Applied at the validate boundary so the + * downstream `deadline` (Date.now() + timeoutMs) and the local-leg + * `setTimeout` see the same clamped value — earlier shapes had a 1hr + * outer cap and a 5min inner clamp that disagreed. + */ +export const IMPACT_TIMEOUT_MIN_MS = 100; +export const IMPACT_TIMEOUT_MAX_MS = 5 * 60 * 1_000; + +export function clampTimeout(timeoutMs: number): number { + if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return IMPACT_TIMEOUT_MIN_MS; + return Math.min(IMPACT_TIMEOUT_MAX_MS, Math.max(IMPACT_TIMEOUT_MIN_MS, Math.trunc(timeoutMs))); +} + export function validateGroupImpactParams(params: Record): | { ok: true; @@ -143,13 +162,19 @@ export function validateGroupImpactParams(params: Record): const service = normalizeServicePrefix(params.service); const subgroup = typeof params.subgroup === 'string' ? params.subgroup : undefined; - let timeoutMs = + // Clamp at the validate boundary so the downstream `deadline` (line + // ~366) and `safeLocalImpact`'s `setTimeout` both see a single + // bounded value. Without this, the outer deadline budgeted Phase-2 + // cross-repo fanout up to 1hr while only the inner setTimeout was + // capped to 5min — the two halves of CodeQL #184's mitigation + // disagreed. + const rawTimeoutMs = typeof params.timeoutMs === 'number' && params.timeoutMs > 0 ? params.timeoutMs : typeof params.timeout === 'number' && params.timeout > 0 ? params.timeout : DEFAULT_LOCAL_IMPACT_TIMEOUT_MS; - if (timeoutMs > 3_600_000) timeoutMs = 3_600_000; + const timeoutMs = clampTimeout(rawTimeoutMs); return { ok: true, @@ -191,12 +216,13 @@ async function safeLocalImpact( impactParams: Parameters[1], timeoutMs: number, ): Promise<{ value: unknown; timedOut: boolean }> { + const safeTimeoutMs = clampTimeout(timeoutMs); let timer: ReturnType | undefined; const impactP = port.impact(repo, impactParams).catch((err) => ({ error: err instanceof Error ? err.message : String(err), })); const timeoutP = new Promise<'timeout'>((resolve) => { - timer = setTimeout(() => resolve('timeout'), timeoutMs); + timer = setTimeout(() => resolve('timeout'), safeTimeoutMs); }); const won = await Promise.race([ impactP.then((v) => ({ tag: 'impact' as const, v })), @@ -212,6 +238,65 @@ async function safeLocalImpact( return { value: won.v, timedOut: false }; } +/** + * Race a single Phase-2 `impactByUid` call against a remaining-budget + * timer. The Codex adversarial review on PR #1331 surfaced that the + * fanout loop only checked `Date.now() > deadline` *between* neighbor + * calls — once `await port.impactByUid(...)` was reached, a hung + * neighbor could pin the request indefinitely, and slow neighbors + * could compound past the 5-min `IMPACT_TIMEOUT_MAX_MS` cap. + * + * This helper wraps each call: a `setTimeout(remainingMs)` aborts an + * `AbortController` whose signal is forwarded to `impactByUid`, and a + * `Promise.race` resolves to `{ timedOut: true }` when the timer + * fires before the call completes. Implementors that ignore the + * signal (current local backend) still see their await resolved by + * the race; full cooperative cancellation inside the BFS is a future + * follow-up. On rejection, the value is `null` (matching the + * fanout's existing `if (fan == null)` truncation contract). + * + * Exported for direct unit testing — the helper IS the load-bearing + * mitigation surface, so the U3 regression test pins it directly + * rather than driving the full `runGroupImpact` path. + */ +export async function safeNeighborImpact( + port: GroupToolPort, + repoId: string, + uid: string, + direction: string, + opts: { + maxDepth: number; + relationTypes: string[]; + minConfidence: number; + includeTests: boolean; + }, + remainingMs: number, +): Promise<{ value: unknown; timedOut: boolean }> { + const controller = new AbortController(); + let timer: ReturnType | undefined; + const callP = port + .impactByUid(repoId, uid, direction, { ...opts, signal: controller.signal }) + .catch(() => null); + const timeoutP = new Promise<'timeout'>((resolve) => { + timer = setTimeout( + () => { + controller.abort(); + resolve('timeout'); + }, + Math.max(0, remainingMs), + ); + }); + const won = await Promise.race([ + callP.then((v) => ({ tag: 'impact' as const, v })), + timeoutP.then(() => ({ tag: 'timeout' as const })), + ]); + if (timer !== undefined) clearTimeout(timer); + if (won.tag === 'timeout') { + return { value: null, timedOut: true }; + } + return { value: won.v, timedOut: false }; +} + export function collectImpactSymbolUids( local: unknown, servicePrefix: string | undefined, @@ -476,7 +561,8 @@ export async function runGroupImpact( if (seen.has(key)) continue; seen.add(key); - if (Date.now() > deadline) { + const remainingMs = deadline - Date.now(); + if (remainingMs <= 0) { truncatedRepos.push(n.neighborRepo); continue; } @@ -492,13 +578,25 @@ export async function runGroupImpact( continue; } - const fan = await deps.port.impactByUid(neighborHandle.id, n.neighborUid, direction, { - maxDepth, - relationTypes: relationTypes ?? [], - minConfidence, - includeTests, - }); - if (fan == null) { + // Phase-2 hardening: race each impactByUid against a per-call + // timeout derived from the remaining budget. Without this wrap a + // single hung neighbor would pin the request past the clamped + // timeout, which Codex's adversarial review on PR #1331 flagged + // as the still-open half of CodeQL #184 / js/resource-exhaustion. + const { value: fan, timedOut: neighborTimedOut } = await safeNeighborImpact( + deps.port, + neighborHandle.id, + n.neighborUid, + direction, + { + maxDepth, + relationTypes: relationTypes ?? [], + minConfidence, + includeTests, + }, + remainingMs, + ); + if (neighborTimedOut || fan == null) { truncatedRepos.push(n.neighborRepo); continue; } diff --git a/gitnexus/src/core/group/extractors/elixir-workspace-extractor.ts b/gitnexus/src/core/group/extractors/elixir-workspace-extractor.ts index 33afcaed9f..7c184ba96b 100644 --- a/gitnexus/src/core/group/extractors/elixir-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/elixir-workspace-extractor.ts @@ -4,6 +4,7 @@ import type { CypherExecutor } from '../contract-extractor.js'; import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath, loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; interface ElixirAppMeta { appName: string; modulePrefix: string; @@ -202,7 +203,7 @@ export async function extractElixirWorkspaceLinks( }; const existing = appsByName.get(manifest.appName); if (existing) { - console.warn( + logger.warn( `[elixir-workspace-extractor] duplicate app "${manifest.appName}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/extractors/go-workspace-extractor.ts b/gitnexus/src/core/group/extractors/go-workspace-extractor.ts index fbdf0e450c..55e8700900 100644 --- a/gitnexus/src/core/group/extractors/go-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/go-workspace-extractor.ts @@ -4,6 +4,7 @@ import type { CypherExecutor } from '../contract-extractor.js'; import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath, loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; interface GoModuleMeta { modulePath: string; groupPath: string; @@ -211,7 +212,7 @@ export async function extractGoWorkspaceLinks( }; const existing = modulesByPath.get(manifest.modulePath); if (existing) { - console.warn( + logger.warn( `[go-workspace-extractor] duplicate module "${manifest.modulePath}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/extractors/grpc-extractor.ts b/gitnexus/src/core/group/extractors/grpc-extractor.ts index b5782d9b34..c08ba7c367 100644 --- a/gitnexus/src/core/group/extractors/grpc-extractor.ts +++ b/gitnexus/src/core/group/extractors/grpc-extractor.ts @@ -5,6 +5,7 @@ import { createIgnoreFilter } from '../../../config/ignore-service.js'; import type { ContractExtractor, CypherExecutor } from '../contract-extractor.js'; import type { ExtractedContract, RepoHandle } from '../types.js'; import { readSafe } from './fs-utils.js'; +import { logger } from '../../logger.js'; import { GRPC_SCAN_GLOB, getPluginForFile, @@ -344,7 +345,7 @@ export function resolveProtoConflict( // services under a fabricated package-qualified contract id. if (winners.length !== 1) { const paths = candidates.map((c) => c.protoPath).join(', '); - console.warn( + logger.warn( `[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`, ); return null; diff --git a/gitnexus/src/core/group/extractors/include-extractor.ts b/gitnexus/src/core/group/extractors/include-extractor.ts new file mode 100644 index 0000000000..f3c54b464c --- /dev/null +++ b/gitnexus/src/core/group/extractors/include-extractor.ts @@ -0,0 +1,514 @@ +import * as path from 'node:path'; +import { glob } from 'glob'; +import Parser from 'tree-sitter'; +import C from 'tree-sitter-c'; +import Cpp from 'tree-sitter-cpp'; +import type { ContractExtractor, CypherExecutor } from '../contract-extractor.js'; +import type { ExtractedContract, RepoHandle } from '../types.js'; +import { readSafe } from './fs-utils.js'; +import { buildSuffixIndex, type SuffixIndex } from '../../ingestion/import-resolvers/utils.js'; + +/** + * Cross-repo C/C++ `#include` dependency extractor. + * + * **Provider side:** registers every `.h/.hpp/.hxx/.hh` file in the repo + * as a provider contract with `include::`. + * + * **Consumer side:** parses all C/C++ source/header files for `#include "…"` + * directives, attempts suffix-based resolution against the repo's own file + * list (reusing the same algorithm as the single-repo ingestion pipeline), + * and emits unresolved include paths as consumer contracts. + * + * Matching: a consumer's `include::map/base/dice_map_view.h` in repo A + * matches a provider's `include::map/base/dice_map_view.h` in repo B via + * exact contract-id equality in `runExactMatch`. + */ + +// ---------- constants ---------- + +const HEADER_EXTENSIONS = new Set(['.h', '.hpp', '.hxx', '.hh']); + +const SOURCE_GLOB = '**/*.{c,cpp,cc,cxx,h,hpp,hxx,hh}'; + +const STANDARD_IGNORES = [ + '**/node_modules/**', + '**/.git/**', + '**/vendor/**', + '**/dist/**', + '**/build/**', + '**/.gitnexus/**', + '**/third_party/**', + '**/3rdparty/**', + '**/external/**', +]; + +const INCLUDE_QUERY_SRC = '(preproc_include path: (_) @import.source) @import'; + +/** + * Well-known C/C++ standard library headers that can appear in `#include "…"` + * form (some projects use quotes for system headers). + */ +const SYSTEM_HEADERS = new Set([ + // C standard + 'assert.h', + 'complex.h', + 'ctype.h', + 'errno.h', + 'fenv.h', + 'float.h', + 'inttypes.h', + 'iso646.h', + 'limits.h', + 'locale.h', + 'math.h', + 'setjmp.h', + 'signal.h', + 'stdalign.h', + 'stdarg.h', + 'stdatomic.h', + 'stdbool.h', + 'stddef.h', + 'stdint.h', + 'stdio.h', + 'stdlib.h', + 'stdnoreturn.h', + 'string.h', + 'tgmath.h', + 'threads.h', + 'time.h', + 'uchar.h', + 'wchar.h', + 'wctype.h', + // C++ standard (extensionless) + 'algorithm', + 'any', + 'array', + 'atomic', + 'barrier', + 'bit', + 'bitset', + 'cassert', + 'cctype', + 'cerrno', + 'cfenv', + 'cfloat', + 'charconv', + 'chrono', + 'cinttypes', + 'climits', + 'clocale', + 'cmath', + 'codecvt', + 'compare', + 'complex', + 'concepts', + 'condition_variable', + 'coroutine', + 'csetjmp', + 'csignal', + 'cstdarg', + 'cstddef', + 'cstdint', + 'cstdio', + 'cstdlib', + 'cstring', + 'ctime', + 'cuchar', + 'cwchar', + 'cwctype', + 'deque', + 'exception', + 'execution', + 'expected', + 'filesystem', + 'format', + 'forward_list', + 'fstream', + 'functional', + 'future', + 'generator', + 'initializer_list', + 'iomanip', + 'ios', + 'iosfwd', + 'iostream', + 'istream', + 'iterator', + 'latch', + 'limits', + 'list', + 'locale', + 'map', + 'mdspan', + 'memory', + 'memory_resource', + 'mutex', + 'new', + 'numbers', + 'numeric', + 'optional', + 'ostream', + 'print', + 'queue', + 'random', + 'ranges', + 'ratio', + 'regex', + 'scoped_allocator', + 'semaphore', + 'set', + 'shared_mutex', + 'source_location', + 'span', + 'spanstream', + 'sstream', + 'stack', + 'stacktrace', + 'stdexcept', + 'stdfloat', + 'stop_token', + 'streambuf', + 'string', + 'string_view', + 'strstream', + 'syncstream', + 'system_error', + 'thread', + 'tuple', + 'type_traits', + 'typeindex', + 'typeinfo', + 'unordered_map', + 'unordered_set', + 'utility', + 'valarray', + 'variant', + 'vector', + 'version', +]); + +/** Path prefixes that indicate system/kernel headers. */ +const SYSTEM_PATH_PREFIXES = [ + 'sys/', + 'net/', + 'netinet/', + 'arpa/', + 'linux/', + 'asm/', + 'bits/', + 'gnu/', + 'mach/', + 'machine/', + 'xlocale/', +]; + +/** Regex fallback for files that exceed tree-sitter's 32 KB parse limit. */ +const INCLUDE_REGEX = /^[ \t]*#\s*include\s*"([^"]+)"/gm; + +// ---------- helpers ---------- + +/** + * Normalize an include path to a canonical lowercase forward-slash form. + * + * IMPORTANT — case-folding caveat (PR #1156 review finding #3): + * Header paths are lowercased so consumer `#include "Foo/Bar.h"` and + * provider file `Foo/Bar.h` normalize to the same contract-id. This is + * the right trade-off on case-insensitive filesystems (macOS, Windows) + * but on case-sensitive Linux filesystems two distinct headers `Foo.h` + * and `foo.h` in the same repo will collide onto the same provider + * contract-id; only one survives `dedupe()`. The gain (reliable + * cross-platform matching) outweighs the cost (extremely rare header + * casing collisions inside a single repo). + */ +function normalizeIncludePath(raw: string): string { + return raw.replace(/\\/g, '/').replace(/^\.\//, '').replace(/\/+/g, '/').toLowerCase(); +} + +/** + * Strip C/C++ block comments from a source blob. Used only by the + * regex-fallback path to avoid emitting consumer contracts for + * commented-out #include directives. Line comments (`// …`) cannot hide + * #include directives because the regex anchors on start-of-line. + * See PR #1156 review finding #5. + */ +function stripBlockComments(src: string): string { + return src.replace(/\/\*[\s\S]*?\*\//g, ''); +} + +function isAngleBracketInclude(rawNodeText: string): boolean { + const trimmed = rawNodeText.trim(); + return trimmed.startsWith('<') && trimmed.endsWith('>'); +} + +function isSystemHeader(cleanedPath: string): boolean { + // Check well-known standard headers + if (SYSTEM_HEADERS.has(cleanedPath)) return true; + // Check system path prefixes + const lower = cleanedPath.toLowerCase(); + return SYSTEM_PATH_PREFIXES.some((prefix) => lower.startsWith(prefix)); +} + +function isHeaderFile(filePath: string): boolean { + return HEADER_EXTENSIONS.has(path.extname(filePath).toLowerCase()); +} + +function getLanguageForFile(filePath: string): unknown | null { + const ext = path.extname(filePath).toLowerCase(); + switch (ext) { + case '.c': + case '.h': + return C; + case '.cpp': + case '.cc': + case '.cxx': + case '.hpp': + case '.hxx': + case '.hh': + return Cpp; + default: + return null; + } +} + +/** + * Check whether an include path resolves to a file inside the local repo. + * + * Uses *exact full-path* matching on the suffix index — we never accept a + * truncated suffix match. For `#include "foo/bar.h"` this checks: + * (a) a file whose path ends with the full `foo/bar.h` + * (b) if the include omitted the extension, a file whose path ends with + * the include + one of the C/C++ header extensions + * + * Returns `true` when a local file matches — caller should suppress the + * cross-repo consumer contract. + * + * See PR #1156 review finding #4 (suffixResolve ambiguity). + */ +function isLocalInclude(cleaned: string, suffixIndex: SuffixIndex): boolean { + const candidates = [cleaned]; + if (!/\.[a-zA-Z0-9]+$/.test(cleaned)) { + for (const ext of ['.h', '.hpp', '.hxx', '.hh']) candidates.push(cleaned + ext); + } + for (const c of candidates) { + if (suffixIndex.get(c) || suffixIndex.getInsensitive(c)) return true; + } + return false; +} + +// ---------- main class ---------- + +export class IncludeExtractor implements ContractExtractor { + type = 'include' as const; + + async canExtract(_repo: RepoHandle): Promise { + return true; + } + + async extract( + dbExecutor: CypherExecutor | null, + repoPath: string, + _repo: RepoHandle, + ): Promise { + // 1. Build the local file list (for suffix resolution) + const allFiles = await glob('**/*', { + cwd: repoPath, + ignore: STANDARD_IGNORES, + nodir: true, + }); + const normalizedFiles = allFiles.map((f) => f.replace(/\\/g, '/')); + const suffixIndex = buildSuffixIndex(normalizedFiles, allFiles); + + // 2. Provider: register all header files + const providers = await this.extractProviders(dbExecutor, repoPath, allFiles); + + // 3. Consumer: find unresolved #include directives + const consumers = await this.extractConsumers(repoPath, normalizedFiles, allFiles, suffixIndex); + + return this.dedupe([...providers, ...consumers]); + } + + // ---------- provider extraction ---------- + + private async extractProviders( + dbExecutor: CypherExecutor | null, + repoPath: string, + allFiles: string[], + ): Promise { + // Strategy A: graph-assisted + if (dbExecutor) { + const graphProviders = await this.extractProvidersGraph(dbExecutor); + if (graphProviders.length > 0) return graphProviders; + } + // Strategy B: filesystem fallback + return this.extractProvidersFallback(repoPath, allFiles); + } + + private async extractProvidersGraph(db: CypherExecutor): Promise { + try { + const rows = await db( + `MATCH (f:File) + WHERE f.filePath =~ '.*\\\\.(h|hpp|hxx|hh)$' + RETURN f.filePath AS filePath, f.id AS fileId`, + ); + return rows + .filter((r) => typeof r.filePath === 'string' && r.filePath) + .map((r) => { + const filePath = (r.filePath as string).replace(/\\/g, '/'); + return { + contractId: `include::${normalizeIncludePath(filePath)}`, + type: 'include' as const, + role: 'provider' as const, + symbolUid: String(r.fileId ?? ''), + symbolRef: { filePath, name: path.basename(filePath) }, + symbolName: path.basename(filePath), + confidence: 1.0, + meta: { source: 'graph' }, + }; + }); + } catch { + return []; + } + } + + private extractProvidersFallback(_repoPath: string, allFiles: string[]): ExtractedContract[] { + return allFiles + .filter((f) => isHeaderFile(f)) + .map((f) => { + const filePath = f.replace(/\\/g, '/'); + return { + contractId: `include::${normalizeIncludePath(filePath)}`, + type: 'include' as const, + role: 'provider' as const, + symbolUid: `File:${filePath}`, + symbolRef: { filePath, name: path.basename(filePath) }, + symbolName: path.basename(filePath), + confidence: 0.95, + meta: { source: 'filesystem' }, + }; + }); + } + + // ---------- consumer extraction ---------- + + private async extractConsumers( + repoPath: string, + normalizedFiles: string[], + allFiles: string[], + suffixIndex: SuffixIndex, + ): Promise { + const sourceFiles = await glob(SOURCE_GLOB, { + cwd: repoPath, + ignore: STANDARD_IGNORES, + nodir: true, + }); + + const parser = new Parser(); + const out: ExtractedContract[] = []; + // Compile the include query once per grammar to avoid re-compilation per file + const queryCache = new Map(); + + for (const rel of sourceFiles) { + const lang = getLanguageForFile(rel); + if (!lang) continue; + + const content = readSafe(repoPath, rel); + if (!content) continue; + + let query = queryCache.get(lang); + if (!query) { + try { + query = new Parser.Query(lang, INCLUDE_QUERY_SRC); + queryCache.set(lang, query); + } catch { + continue; + } + } + + // Collect raw include paths: tree-sitter first, regex fallback for large files. + // `extractionSource` is stamped on each emitted consumer contract so + // regex-fallback contracts stay auditable post-hoc (PR #1156 review finding #6). + let rawIncludes: string[]; + let extractionSource: 'tree_sitter' | 'regex_fallback'; + try { + parser.setLanguage(lang); + const tree = parser.parse(content); + let matches: Parser.QueryMatch[]; + try { + matches = query.matches(tree.rootNode); + } catch { + matches = []; + } + rawIncludes = []; + extractionSource = 'tree_sitter'; + for (const match of matches) { + const sourceNode = match.captures.find((c) => c.name === 'import.source'); + if (!sourceNode) continue; + const rawText = sourceNode.node.text; + if (isAngleBracketInclude(rawText)) continue; + const cleaned = rawText.replace(/['"<>]/g, ''); + if (cleaned && cleaned.length <= 2048) rawIncludes.push(cleaned); + } + } catch { + // tree-sitter failed (e.g. file > 32 KB) — fall back to regex. + // Strip block comments first so we don't emit a consumer contract + // for a commented-out #include (PR #1156 review finding #5). + rawIncludes = []; + extractionSource = 'regex_fallback'; + const scanTarget = stripBlockComments(content); + INCLUDE_REGEX.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = INCLUDE_REGEX.exec(scanTarget)) !== null) { + if (m[1] && m[1].length <= 2048) rawIncludes.push(m[1]); + } + } + + for (const cleaned of rawIncludes) { + // Filter: skip known system headers and system path prefixes + if (isSystemHeader(cleaned)) continue; + + // Local resolution (PR #1156 review finding #4): only accept an + // exact-suffix match on the *full* include path. The generic + // suffixResolve() iterates all truncated suffixes, which would + // silently suppress a cross-repo `#include "map/base/view.h"` + // when the local repo has any `internal/view.h` — a realistic + // false-negative in large C++ codebases. Here we only resolve + // locally if a file path ends with the complete include string + // (optionally re-appending one of the C/C++ header extensions + // when the include already omits it). + if (isLocalInclude(cleaned, suffixIndex)) continue; + + // Unresolved: emit as consumer contract + const normalizedRel = rel.replace(/\\/g, '/'); + out.push({ + contractId: `include::${normalizeIncludePath(cleaned)}`, + type: 'include' as const, + role: 'consumer' as const, + symbolUid: `File:${normalizedRel}`, + symbolRef: { filePath: normalizedRel, name: cleaned }, + symbolName: cleaned, + confidence: 0.85, + meta: { + source: extractionSource, + includePath: cleaned, + }, + }); + } + } + + return out; + } + + // ---------- deduplication ---------- + + private dedupe(items: ExtractedContract[]): ExtractedContract[] { + const seen = new Set(); + const out: ExtractedContract[] = []; + for (const c of items) { + const k = `${c.contractId}|${c.role}|${c.symbolRef.filePath}`; + if (seen.has(k)) continue; + seen.add(k); + out.push(c); + } + return out; + } +} diff --git a/gitnexus/src/core/group/extractors/java-workspace-extractor.ts b/gitnexus/src/core/group/extractors/java-workspace-extractor.ts index 66ee35ee59..b6beed71c9 100644 --- a/gitnexus/src/core/group/extractors/java-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/java-workspace-extractor.ts @@ -4,6 +4,7 @@ import type { CypherExecutor } from '../contract-extractor.js'; import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath, loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; interface JavaProjectMeta { groupId: string; artifactId: string; @@ -213,7 +214,7 @@ export async function extractJavaWorkspaceLinks( }; const existing = projectsByKey.get(key); if (existing) { - console.warn( + logger.warn( `[java-workspace-extractor] duplicate artifact "${key}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/extractors/manifest-extractor.ts b/gitnexus/src/core/group/extractors/manifest-extractor.ts index 0c3cd20ca0..f4d0f77cff 100644 --- a/gitnexus/src/core/group/extractors/manifest-extractor.ts +++ b/gitnexus/src/core/group/extractors/manifest-extractor.ts @@ -1,6 +1,7 @@ import type { ContractType, CrossLink, GroupManifestLink, StoredContract } from '../types.js'; import type { CypherExecutor } from '../contract-extractor.js'; +import { logger } from '../../logger.js'; export interface ManifestExtractResult { contracts: StoredContract[]; crossLinks: CrossLink[]; @@ -273,6 +274,14 @@ export class ManifestExtractor { LIMIT 1`, { contract: link.contract }, ); + } else if (link.type === 'include') { + rows = await executor( + `MATCH (f:File) WHERE f.filePath = $contract + RETURN f.id AS uid, f.name AS name, f.filePath AS filePath + ORDER BY f.filePath ASC + LIMIT 1`, + { contract: link.contract }, + ); } else if (link.type === 'custom') { // Workspace extractors produce qualified contracts like "mathlex::Expression". // Graph nodes store the unqualified symbol name ("Expression"), so strip @@ -303,7 +312,7 @@ export class ManifestExtractor { // fail the whole manifest extraction. Unresolved contracts still // get a synthetic symbolUid below, so cross-impact can proceed. const message = err instanceof Error ? err.message : String(err); - console.warn( + logger.warn( `[manifest-extractor] resolveSymbol failed for ${link.type}:${link.contract} ` + `in ${repoPathKey}: ${message}`, ); @@ -357,6 +366,8 @@ export class ManifestExtractor { return `lib::${contract}`; case 'custom': return `custom::${contract}`; + case 'include': + return `include::${contract}`; default: { const _exhaustive: never = type; throw new Error(`Unhandled ContractType: ${String(_exhaustive)}`); diff --git a/gitnexus/src/core/group/extractors/node-workspace-extractor.ts b/gitnexus/src/core/group/extractors/node-workspace-extractor.ts index 05a7c95dd5..aa40ac088c 100644 --- a/gitnexus/src/core/group/extractors/node-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/node-workspace-extractor.ts @@ -4,6 +4,7 @@ import type { CypherExecutor } from '../contract-extractor.js'; import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath, loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; interface PackageMeta { name: string; groupPath: string; @@ -205,7 +206,7 @@ export async function extractNodeWorkspaceLinks( }; const existing = packagesByName.get(manifest.name); if (existing) { - console.warn( + logger.warn( `[node-workspace-extractor] duplicate package name "${manifest.name}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/extractors/python-workspace-extractor.ts b/gitnexus/src/core/group/extractors/python-workspace-extractor.ts index 5930808af1..4453852a63 100644 --- a/gitnexus/src/core/group/extractors/python-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/python-workspace-extractor.ts @@ -4,6 +4,7 @@ import type { CypherExecutor } from '../contract-extractor.js'; import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath, loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; interface PythonPackageMeta { name: string; importName: string; @@ -204,7 +205,7 @@ export async function extractPythonWorkspaceLinks( }; const existing = packagesByImportName.get(manifest.importName); if (existing) { - console.warn( + logger.warn( `[python-workspace-extractor] duplicate package "${manifest.name}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/extractors/rust-workspace-extractor.ts b/gitnexus/src/core/group/extractors/rust-workspace-extractor.ts index c19af07caa..d58c3e08f6 100644 --- a/gitnexus/src/core/group/extractors/rust-workspace-extractor.ts +++ b/gitnexus/src/core/group/extractors/rust-workspace-extractor.ts @@ -5,6 +5,7 @@ import type { GroupManifestLink, ContractRole } from '../types.js'; import { shouldIgnorePath } from '../../../config/ignore-service.js'; import { loadIgnoreRules } from '../../../config/ignore-service.js'; +import { logger } from '../../logger.js'; /** * Discover cross-crate contracts in a Rust workspace by reading each * member's `Cargo.toml` dependencies and scanning source files for @@ -30,6 +31,32 @@ interface ImportedSymbol { filePath: string; } +/** + * Linear-time `[package].name = "..."` lookup. The previous regex + * `^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"` had a nested + * lazy quantifier on `\n` that CodeQL js/redos flagged as exponential + * on inputs like `[package]\n` + many bare `\n`. We walk lines + * explicitly: scan from the first `[package]` header until we hit the + * next `[...]` section header, looking for the `name = "..."` line. + * O(n) with the line count. + * + * Exported so the U8 ReDoS regression test can drive the production + * line-walk directly with adversarial fixtures (multi-line strings, + * trailing sections, etc.) instead of duplicating it inline. + */ +export function parseCargoPackageName(content: string): string | null { + const lines = content.split('\n'); + const packageStart = lines.findIndex((l) => l.trim() === '[package]'); + if (packageStart < 0) return null; + for (let i = packageStart + 1; i < lines.length; i++) { + const line = lines[i].trimStart(); + if (line.startsWith('[')) break; // hit the next section header + const m = /^name\s*=\s*"([^"]+)"/.exec(line); + if (m) return m[1]; + } + return null; +} + /** * Parse a Cargo.toml to extract the crate name and workspace dependency * names. Uses simple line-based parsing — no TOML library needed for @@ -46,12 +73,9 @@ async function parseCrateManifest( return null; } - let name = ''; + const name = parseCargoPackageName(content) ?? ''; const workspaceDeps: string[] = []; - const nameMatch = content.match(/^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"/m); - if (nameMatch) name = nameMatch[1]; - // Match dependencies that use workspace = true, which indicates they // are workspace-internal deps: // dep_name = { workspace = true } @@ -224,7 +248,7 @@ export async function extractRustWorkspaceLinks( }; const existing = cratesByName.get(manifest.name); if (existing) { - console.warn( + logger.warn( `[rust-workspace-extractor] duplicate crate name "${manifest.name}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`, ); continue; diff --git a/gitnexus/src/core/group/matching.ts b/gitnexus/src/core/group/matching.ts index 3431f8ddf8..0b27655c62 100644 --- a/gitnexus/src/core/group/matching.ts +++ b/gitnexus/src/core/group/matching.ts @@ -107,6 +107,8 @@ export function normalizeContractId(id: string): string { return `topic::${rest.trim().toLowerCase()}`; case 'lib': return `lib::${rest.toLowerCase()}`; + case 'include': + return `include::${rest.replace(/\\/g, '/').replace(/^\.\//, '').replace(/\/+/g, '/').toLowerCase()}`; default: return id; } diff --git a/gitnexus/src/core/group/service.ts b/gitnexus/src/core/group/service.ts index a412ceaa82..d0473048fc 100644 --- a/gitnexus/src/core/group/service.ts +++ b/gitnexus/src/core/group/service.ts @@ -14,6 +14,7 @@ import { } from './group-path-utils.js'; import { getDefaultGitnexusDir, getGroupDir, listGroups, readContractRegistry } from './storage.js'; import { syncGroup } from './sync.js'; +import { logger } from '../logger.js'; import type { ContractRegistry, CrossLink, @@ -64,6 +65,15 @@ export interface GroupToolPort { relationTypes: string[]; minConfidence: number; includeTests: boolean; + // Optional cancellation signal. Callers (notably the cross-impact + // Phase-2 fanout) wrap this call in a Promise.race against a + // setTimeout-driven AbortController so a single hung neighbor + // cannot exceed the request's clamped timeout budget. Implementors + // may honor the signal cooperatively or simply let the caller's + // race resolve the await — the latter is sufficient for the + // resource-exhaustion mitigation. When the signal is absent or + // already aborted at call time, behavior is unchanged. + signal?: AbortSignal; }, ): Promise; context( @@ -170,11 +180,11 @@ async function loadContractRegistryResilient( contracts.push(row); } else { skippedCorrupt++; - console.warn('[group] skipping corrupt contract row in contracts.json'); + logger.warn('[group] skipping corrupt contract row in contracts.json'); } } catch { skippedCorrupt++; - console.warn('[group] skipping corrupt contract row in contracts.json'); + logger.warn('[group] skipping corrupt contract row in contracts.json'); } } } @@ -187,11 +197,11 @@ async function loadContractRegistryResilient( crossLinks.push(row); } else { skippedCorrupt++; - console.warn('[group] skipping corrupt crossLinks row in contracts.json'); + logger.warn('[group] skipping corrupt crossLinks row in contracts.json'); } } catch { skippedCorrupt++; - console.warn('[group] skipping corrupt crossLinks row in contracts.json'); + logger.warn('[group] skipping corrupt crossLinks row in contracts.json'); } } } diff --git a/gitnexus/src/core/group/storage.ts b/gitnexus/src/core/group/storage.ts index 99bf27fbd9..cc3dbfdc93 100644 --- a/gitnexus/src/core/group/storage.ts +++ b/gitnexus/src/core/group/storage.ts @@ -5,6 +5,15 @@ import * as os from 'node:os'; import { randomBytes } from 'node:crypto'; import type { ContractRegistry } from './types.js'; +/** + * Build an unpredictable suffix for atomic-write tmp files. Replaces the + * previous `Date.now()` pattern which CodeQL flagged as + * js/insecure-temporary-file: a guessable suffix in a writable directory + * lets a co-located attacker pre-create or symlink the tmp path before the + * write lands. + */ +const tmpSuffix = (): string => randomBytes(8).toString('hex'); + const CONTRACTS_FILE = 'contracts.json'; export function getDefaultGitnexusDir(): string { @@ -35,9 +44,21 @@ export async function writeContractRegistry( registry: ContractRegistry, ): Promise { const targetPath = path.join(groupDir, CONTRACTS_FILE); - const tmpPath = `${targetPath}.tmp.${randomBytes(8).toString('hex')}`; - - await fsp.writeFile(tmpPath, JSON.stringify(registry, null, 2), 'utf-8'); + const tmpPath = `${targetPath}.tmp.${tmpSuffix()}`; + + // O_EXCL via `'wx'` flag + explicit `0o600` mode — closes both halves + // of the CodeQL js/insecure-temporary-file finding: `'wx'` rejects a + // pre-planted symlink at the path, and `0o600` (user-only) prevents + // the file from being created group/world readable while it briefly + // contains contract data en route to the rename. The query's + // `isSecureMode` predicate inspects ONLY the mode argument, not the + // flags, so the explicit mode is what credits the fix. + const handle = await fsp.open(tmpPath, 'wx', 0o600); + try { + await handle.writeFile(JSON.stringify(registry, null, 2), 'utf-8'); + } finally { + await handle.close(); + } await fsp.rename(tmpPath, targetPath); } @@ -107,6 +128,38 @@ matching: # exclude_links_paths: [/ping, /health, /healthcheck] # exclude_links_param_only_paths: false `; - await fsp.writeFile(path.join(groupDir, 'group.yaml'), template, 'utf-8'); + // Always write group.yaml with O_EXCL via `fsp.open(..., 'wx')` — + // refuses to follow a pre-planted symlink at the target path, closing + // the TOCTOU window between the existence check (line ~98) and the + // write that CodeQL js/insecure-temporary-file flags. Under + // `force=true` we unlink the existing file first (best-effort, no-op + // when absent) so the subsequent O_EXCL open succeeds AND the same + // symlink-rejection guarantee holds — this is strictly safer than + // the previous `flag: force ? 'w' : 'wx'` shape, which silently + // followed symlinks under force. CodeQL's rule does not recognize + // the `writeFile(path, content, { flag: 'wx' })` shape as O_EXCL; + // the explicit open() handle below is what credits the mitigation. + const yamlPath = path.join(groupDir, 'group.yaml'); + if (force) { + try { + await fsp.unlink(yamlPath); + } catch (err) { + // ENOENT (file absent) is expected on first run; rethrow anything + // else so we don't silently mask permission/EBUSY failures. + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + } + // `'wx'` rejects a pre-planted symlink at the path; `0o600` is + // user-only (no group/world bits) — gitnexus storage is per-user + // (`~/.gitnexus/...`), so any "other user wants to read this" case is + // a misconfiguration, not a feature. Keeping the file user-only also + // satisfies CodeQL's `isSecureMode` predicate (low 6 bits == 0) and + // closes the js/insecure-temporary-file alert at this site. + const handle = await fsp.open(yamlPath, 'wx', 0o600); + try { + await handle.writeFile(template, 'utf-8'); + } finally { + await handle.close(); + } return groupDir; } diff --git a/gitnexus/src/core/group/sync.ts b/gitnexus/src/core/group/sync.ts index 9a77df22a6..9b18d828fc 100644 --- a/gitnexus/src/core/group/sync.ts +++ b/gitnexus/src/core/group/sync.ts @@ -8,14 +8,17 @@ import { HttpRouteExtractor } from './extractors/http-route-extractor.js'; import { GrpcExtractor } from './extractors/grpc-extractor.js'; import { ThriftExtractor } from './extractors/thrift-extractor.js'; import { TopicExtractor } from './extractors/topic-extractor.js'; +import { IncludeExtractor } from './extractors/include-extractor.js'; import { ManifestExtractor } from './extractors/manifest-extractor.js'; import { discoverWorkspaceLinks } from './extractors/workspace-extractor.js'; import { buildProviderIndex, runExactMatch, runWildcardMatch } from './matching.js'; import { detectServiceBoundaries, assignService } from './service-boundary-detector.js'; import type { CypherExecutor } from './contract-extractor.js'; import { writeContractRegistry } from './storage.js'; +import { writeBridge } from './bridge-db.js'; import type { ContractRegistry } from './types.js'; +import { logger } from '../logger.js'; export interface SyncOptions { extractorOverride?: | ((repo: RepoHandle) => Promise) @@ -99,6 +102,7 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis const grpcEx = new GrpcExtractor(); const thriftEx = new ThriftExtractor(); const topicEx = new TopicExtractor(); + const includeEx = new IncludeExtractor(); dbExecutors = new Map(); const openPoolIds: string[] = []; @@ -167,6 +171,17 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis } } + if (config.detect.includes) { + const extracted = await includeEx.extract(executor, handle.repoPath, handle); + for (const c of extracted) { + autoContracts.push({ + ...c, + repo: groupPath, + service: assignService(c.symbolRef.filePath, boundaries), + }); + } + } + const metaPath = path.join(handle.storagePath, 'meta.json'); try { const raw = await fs.readFile(metaPath, 'utf-8'); @@ -211,7 +226,7 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis allLinks = [...allLinks, ...wsResult.links]; if (opts?.verbose) { for (const s of wsResult.stats) { - console.log( + logger.info( ` workspace-deps: discovered ${s.linkCount} cross-${s.ecosystem.toLowerCase()} links from ${s.projectCount} ${s.ecosystem} projects`, ); } @@ -230,7 +245,7 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis for (const link of allLinks) { const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r)); if (dangling.length > 0) { - console.warn( + logger.warn( `[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`, ); } @@ -241,7 +256,7 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis autoContracts.push(...manifestResult.contracts); manifestCrossLinks = manifestResult.crossLinks; if (opts?.verbose) { - console.log( + logger.info( ` manifest: ${manifestCrossLinks.length} cross-links from ${allLinks.length} links (${config.links.length} declared + ${allLinks.length - config.links.length} discovered)`, ); } @@ -269,6 +284,12 @@ export async function syncGroup(config: GroupConfig, opts?: SyncOptions): Promis if (opts?.groupDir && !opts.skipWrite) { await writeContractRegistry(opts.groupDir, registry); + await writeBridge(opts.groupDir, { + contracts: allContracts, + crossLinks, + repoSnapshots, + missingRepos, + }); } return { diff --git a/gitnexus/src/core/group/types.ts b/gitnexus/src/core/group/types.ts index 7d0a142511..8e43ff78f0 100644 --- a/gitnexus/src/core/group/types.ts +++ b/gitnexus/src/core/group/types.ts @@ -1,4 +1,4 @@ -export type ContractType = 'http' | 'grpc' | 'thrift' | 'topic' | 'lib' | 'custom'; +export type ContractType = 'http' | 'grpc' | 'thrift' | 'topic' | 'lib' | 'custom' | 'include'; export type MatchType = 'exact' | 'manifest' | 'wildcard' | 'bm25' | 'embedding'; export type ContractRole = 'provider' | 'consumer'; @@ -28,6 +28,7 @@ export interface DetectConfig { topics: boolean; shared_libs: boolean; embedding_fallback: boolean; + includes: boolean; workspace_deps: boolean; } diff --git a/gitnexus/src/core/ingestion/ast-cache.ts b/gitnexus/src/core/ingestion/ast-cache.ts index 65da46ab88..454c60df2e 100644 --- a/gitnexus/src/core/ingestion/ast-cache.ts +++ b/gitnexus/src/core/ingestion/ast-cache.ts @@ -1,6 +1,7 @@ import { LRUCache } from 'lru-cache'; import Parser from 'tree-sitter'; +import { logger } from '../logger.js'; /** * Minimal structural shape consumers need when reading Trees back * through a phase-dependency boundary. Declared here so phases that @@ -49,7 +50,7 @@ export const createASTCache = (maxSize: number = 50): ASTCache => { // will hand freed memory to scope-resolution. (tree as unknown as { delete?: () => void }).delete?.(); } catch (e) { - console.warn('Failed to delete tree from WASM memory', e); + logger.warn({ e }, 'Failed to delete tree from WASM memory'); } }, }); diff --git a/gitnexus/src/core/ingestion/call-processor.ts b/gitnexus/src/core/ingestion/call-processor.ts index 1871d06057..6c59578b05 100644 --- a/gitnexus/src/core/ingestion/call-processor.ts +++ b/gitnexus/src/core/ingestion/call-processor.ts @@ -75,6 +75,7 @@ import { extractReturnTypeName, stripNullable } from './type-extractors/shared.j import type { LiteralTypeInferrer } from './type-extractors/types.js'; import type { SyntaxNode } from './utils/ast-helpers.js'; +import { logger } from '../logger.js'; /** Per-file resolved type bindings for exported symbols. * Populated during call processing, consumed by Phase 14 re-resolution pass. */ export type ExportedTypeMap = Map>; @@ -784,7 +785,7 @@ export const processCalls = async ( const query = new Parser.Query(lang, queryStr); matches = query.matches(tree.rootNode); } catch (queryError) { - console.warn(`Query error for ${file.path}:`, queryError); + logger.warn({ queryError }, `Query error for ${file.path}:`); continue; } @@ -1391,7 +1392,7 @@ export const processCalls = async ( if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { - console.warn( + logger.warn( `[ingestion] Skipped ${count} ${lang} file(s) in call processing — ${lang} parser not available.`, ); } diff --git a/gitnexus/src/core/ingestion/cluster-enricher.ts b/gitnexus/src/core/ingestion/cluster-enricher.ts index b20ed2bad9..06cd4d0cd0 100644 --- a/gitnexus/src/core/ingestion/cluster-enricher.ts +++ b/gitnexus/src/core/ingestion/cluster-enricher.ts @@ -7,6 +7,7 @@ import { CommunityNode } from './community-processor.js'; +import { logger } from '../logger.js'; // ============================================================================ // TYPES // ============================================================================ @@ -128,7 +129,7 @@ export const enrichClusters = async ( enrichments.set(community.id, enrichment); } catch (error) { // On error, fallback to heuristic - console.warn(`Failed to enrich cluster ${community.id}:`, error); + logger.warn({ error }, `Failed to enrich cluster ${community.id}:`); enrichments.set(community.id, { name: community.heuristicLabel, keywords: [], @@ -210,7 +211,7 @@ Output JSON array: } } } catch (error) { - console.warn('Batch enrichment failed, falling back to heuristics:', error); + logger.warn({ error }, 'Batch enrichment failed, falling back to heuristics:'); // Fallback for this batch for (const community of batch) { enrichments.set(community.id, { diff --git a/gitnexus/src/core/ingestion/cobol/cobol-copy-expander.ts b/gitnexus/src/core/ingestion/cobol/cobol-copy-expander.ts index 46a33001df..dda135b127 100644 --- a/gitnexus/src/core/ingestion/cobol/cobol-copy-expander.ts +++ b/gitnexus/src/core/ingestion/cobol/cobol-copy-expander.ts @@ -1,3 +1,4 @@ +import { logger } from '../../logger.js'; /** * COBOL COPY statement expansion engine. * @@ -454,7 +455,7 @@ export function expandCopies( if (visited.has(resolvedPath)) { if (!warnedCircular.has(resolvedPath)) { warnedCircular.add(resolvedPath); - console.warn( + logger.warn( `[cobol-copy-expander] Circular COPY detected: ${cs.target} (${resolvedPath}) ` + `includes itself. Skipping expansion.`, ); @@ -464,7 +465,7 @@ export function expandCopies( // Max depth exceeded — keep unexpanded if (depth >= maxDepth) { - console.warn( + logger.warn( `[cobol-copy-expander] Max expansion depth (${maxDepth}) reached for ` + `COPY ${cs.target} in ${srcPath}. Skipping expansion.`, ); @@ -475,7 +476,7 @@ export function expandCopies( if (++totalExpansions > MAX_TOTAL_EXPANSIONS) { if (!warnedCircular.has('__max_total__')) { warnedCircular.add('__max_total__'); - console.warn( + logger.warn( `[cobol-copy-expander] Max total expansions (${MAX_TOTAL_EXPANSIONS}) reached ` + `in ${srcPath}. Skipping further expansions.`, ); diff --git a/gitnexus/src/core/ingestion/cobol/cobol-preprocessor.ts b/gitnexus/src/core/ingestion/cobol/cobol-preprocessor.ts index 23cedb99b4..34be6bc032 100644 --- a/gitnexus/src/core/ingestion/cobol/cobol-preprocessor.ts +++ b/gitnexus/src/core/ingestion/cobol/cobol-preprocessor.ts @@ -369,9 +369,20 @@ const RE_USE_AFTER = /\bUSE\s+(?:AFTER\s+)?(?:STANDARD\s+)?(?:EXCEPTION|ERROR)\s+ON\s+([A-Z][A-Z0-9-]+|INPUT|OUTPUT|I-O|EXTEND)\b/i; // SET statement (condition, index) -const RE_SET_TO_TRUE = /\bSET\s+((?:[A-Z][A-Z0-9-]+(?:\s+OF\s+[A-Z][A-Z0-9-]+)?\s+)+)TO\s+TRUE\b/i; -const RE_SET_INDEX = - /\bSET\s+((?:[A-Z][A-Z0-9-]+\s+)+)(TO|UP\s+BY|DOWN\s+BY)\s+(\d+|[A-Z][A-Z0-9-]+)/i; +// +// Catastrophic-backtracking note (CodeQL js/redos): the previous shape +// `((?:[A-Z][A-Z0-9-]+(?:\s+OF\s+[A-Z][A-Z0-9-]+)?\s+)+)TO\s+TRUE` +// nested `\s+` quantifiers across alternations and was exponential on +// inputs like "SET a OF a OF a ... TO TRUE". Replaced with a lazy +// dot-match bounded by the explicit `\s+TO\s+TRUE` suffix — `.+?` is +// O(n) with the trailing anchor, and the captured group is parsed +// downstream the same way as before. +// Exported so the U8 ReDoS regression test can pin the exact production +// pattern. Direct import is the only way to ensure the test's +// pathological-input timing assertion exercises the production regex +// instead of an inline copy that drifts. +export const RE_SET_TO_TRUE = /\bSET\s+(.+?)\s+TO\s+TRUE\b/i; +export const RE_SET_INDEX = /\bSET\s+(.+?)\s+(TO|UP\s+BY|DOWN\s+BY)\s+(\d+|[A-Z][A-Z0-9-]+)/i; // INITIALIZE statement — data reset (captures targets before REPLACING/WITH clause) const RE_INITIALIZE = /\bINITIALIZE\s+([\s\S]*?)(?=\bREPLACING\b|\bWITH\b|\.\s*$|$)/i; diff --git a/gitnexus/src/core/ingestion/filesystem-walker.ts b/gitnexus/src/core/ingestion/filesystem-walker.ts index 71a4046f2b..4d6725e246 100644 --- a/gitnexus/src/core/ingestion/filesystem-walker.ts +++ b/gitnexus/src/core/ingestion/filesystem-walker.ts @@ -5,6 +5,7 @@ import path from 'path'; import { glob } from 'glob'; import { createIgnoreFilter } from '../../config/ignore-service.js'; +import { logger } from '../logger.js'; export interface FileEntry { path: string; content: string; @@ -74,10 +75,10 @@ export const walkRepositoryPaths = async ( if (skippedLarge > 0) { const isDefault = maxFileSizeBytes === DEFAULT_MAX_FILE_SIZE_BYTES; const suffix = isDefault ? ', likely generated/vendored' : ''; - console.warn(` Skipped ${skippedLarge} large files (>${maxFileSizeBytes / 1024}KB${suffix})`); + logger.warn(` Skipped ${skippedLarge} large files (>${maxFileSizeBytes / 1024}KB${suffix})`); if (isVerboseIngestionEnabled()) { for (const p of skippedLargePaths) { - console.warn(` - ${p}`); + logger.warn(` - ${p}`); } } } diff --git a/gitnexus/src/core/ingestion/heritage-processor.ts b/gitnexus/src/core/ingestion/heritage-processor.ts index 12e59a19ac..2c973ad8ec 100644 --- a/gitnexus/src/core/ingestion/heritage-processor.ts +++ b/gitnexus/src/core/ingestion/heritage-processor.ts @@ -34,6 +34,7 @@ import type { ResolutionContext } from './model/resolution-context.js'; import { TIER_CONFIDENCE } from './model/resolution-context.js'; import type { HeritageInfo } from './heritage-types.js'; +import { logger } from '../logger.js'; /** * Derive the heritage-resolution strategy for a language from its * `LanguageProvider`. This is the production wiring that `buildHeritageMap` @@ -237,7 +238,7 @@ export const processHeritage = async ( query = new Parser.Query(treeSitterLang, queryStr); matches = query.matches(tree.rootNode); } catch (queryError) { - console.warn(`Heritage query error for ${file.path}:`, queryError); + logger.warn({ queryError }, `Heritage query error for ${file.path}:`); continue; } @@ -267,7 +268,7 @@ export const processHeritage = async ( if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { - console.warn( + logger.warn( `[ingestion] Skipped ${count} ${lang} file(s) in heritage processing — ${lang} parser not available.`, ); } diff --git a/gitnexus/src/core/ingestion/import-processor.ts b/gitnexus/src/core/ingestion/import-processor.ts index b669d27445..03cbed5b79 100644 --- a/gitnexus/src/core/ingestion/import-processor.ts +++ b/gitnexus/src/core/ingestion/import-processor.ts @@ -27,6 +27,7 @@ import type { SyntaxNode } from './utils/ast-helpers.js'; import { isDev } from './utils/env.js'; import { isRegistryPrimary } from './registry-primary-flag.js'; +import { logger } from '../logger.js'; // Type: Map> // Stores all files that a given file imports from export type ImportMap = Map>; @@ -324,14 +325,18 @@ export const processImports = async ( matches = query.matches(tree.rootNode); } catch (queryError: any) { if (isDev) { - console.group(`🔴 Query Error: ${file.path}`); - console.log('Language:', language); - console.log('Query (first 200 chars):', queryStr.substring(0, 200) + '...'); - console.log('Error:', queryError?.message || queryError); - console.log('File content (first 300 chars):', file.content.substring(0, 300)); - console.log('AST root type:', tree.rootNode?.type); - console.log('AST has errors:', tree.rootNode?.hasError); - console.groupEnd(); + logger.error( + { + file: file.path, + language, + err: queryError?.message || queryError, + queryPreview: queryStr.substring(0, 200) + '...', + contentPreview: file.content.substring(0, 300), + astRootType: tree.rootNode?.type, + astHasError: tree.rootNode?.hasError, + }, + 'tree-sitter query error', + ); } if (wasReparsed) (tree as unknown as { delete?: () => void }).delete?.(); @@ -346,7 +351,7 @@ export const processImports = async ( const sourceNode = captureMap['import.source']; if (!sourceNode) { if (isDev) { - console.log(`⚠️ Import captured but no source node in ${file.path}`); + logger.info(`⚠️ Import captured but no source node in ${file.path}`); } return; } @@ -399,14 +404,14 @@ export const processImports = async ( if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { - console.warn( + logger.warn( `[ingestion] Skipped ${count} ${lang} file(s) in import processing — ${lang} parser not available.`, ); } } if (isDev) { - console.log( + logger.info( `📊 Import processing complete: ${getResolvedCount()}/${totalImportsFound} imports resolved to graph edges`, ); } @@ -498,7 +503,7 @@ export const processImportsFromExtracted = async ( ); if (isDev) { - console.log( + logger.info( `📊 Import processing (fast path): ${getResolvedCount()}/${totalImportsFound} imports resolved to graph edges`, ); } diff --git a/gitnexus/src/core/ingestion/language-config.ts b/gitnexus/src/core/ingestion/language-config.ts index 682d7b190d..f51ef57c67 100644 --- a/gitnexus/src/core/ingestion/language-config.ts +++ b/gitnexus/src/core/ingestion/language-config.ts @@ -4,6 +4,7 @@ import type { ImportConfigs } from './import-resolvers/types.js'; import { isDev } from './utils/env.js'; +import { logger } from '../logger.js'; // ============================================================================ // LANGUAGE-SPECIFIC CONFIG TYPES // ============================================================================ @@ -82,7 +83,7 @@ export async function loadTsconfigPaths(repoRoot: string): Promise 0) { if (isDev) { - console.log(`📦 Loaded ${aliases.size} path aliases from ${filename}`); + logger.info(`📦 Loaded ${aliases.size} path aliases from ${filename}`); } return { aliases, baseUrl }; } @@ -104,7 +105,7 @@ export async function loadGoModulePath(repoRoot: string): Promise 0) { if (isDev) { - console.log(`📦 Loaded ${targets.size} Swift package targets`); + logger.info(`📦 Loaded ${targets.size} Swift package targets`); } return { targets }; } diff --git a/gitnexus/src/core/ingestion/method-extractors/generic.ts b/gitnexus/src/core/ingestion/method-extractors/generic.ts index 7e15f24587..f02faef359 100644 --- a/gitnexus/src/core/ingestion/method-extractors/generic.ts +++ b/gitnexus/src/core/ingestion/method-extractors/generic.ts @@ -8,6 +8,7 @@ */ import type { SyntaxNode } from '../utils/ast-helpers.js'; +import { logger } from '../../logger.js'; import type { MethodExtractor, MethodExtractorContext, @@ -158,7 +159,7 @@ function findBodies(node: SyntaxNode, bodyNodeSet: Set): SyntaxNode[] { // Fallback: body field exists but its type is not in bodyNodeTypes. // This may indicate a config typo — log for debugging if NODE_ENV is development. if (process.env.NODE_ENV === 'development') { - console.warn( + logger.warn( `[MethodExtractor] body field type '${bodyField.type}' not in bodyNodeTypes for node '${node.type}'`, ); } diff --git a/gitnexus/src/core/ingestion/parsing-processor.ts b/gitnexus/src/core/ingestion/parsing-processor.ts index 86b4199904..8803ec023a 100644 --- a/gitnexus/src/core/ingestion/parsing-processor.ts +++ b/gitnexus/src/core/ingestion/parsing-processor.ts @@ -34,6 +34,7 @@ import { import type { LanguageProvider } from './language-provider.js'; import type { ParsedFile } from 'gitnexus-shared'; import { WorkerPool } from './workers/worker-pool.js'; +import { logger } from '../logger.js'; import type { ParseWorkerResult, ParseWorkerInput, @@ -191,7 +192,7 @@ const processParsingWithWorkers = async ( const summary = Array.from(skippedLanguages.entries()) .map(([lang, count]) => `${lang}: ${count}`) .join(', '); - console.warn(` Skipped unsupported languages: ${summary}`); + logger.warn(` Skipped unsupported languages: ${summary}`); } // Final progress @@ -382,7 +383,7 @@ const processParsingSequential = async ( bufferSize: getTreeSitterBufferSize(parseContent), }); } catch (parseError) { - console.warn(`Skipping unparseable file: ${file.path}`); + logger.warn(`Skipping unparseable file: ${file.path}`); continue; } @@ -408,7 +409,7 @@ const processParsingSequential = async ( query = new Parser.Query(language, queryString); matches = query.matches(tree.rootNode); } catch (queryError) { - console.warn(`Query error for ${file.path}:`, queryError); + logger.warn({ queryError }, `Query error for ${file.path}:`); continue; } @@ -701,7 +702,7 @@ const processParsingSequential = async ( if (skippedByLang && skippedByLang.size > 0) { for (const [lang, count] of skippedByLang.entries()) { - console.warn( + logger.warn( `[ingestion] Skipped ${count} ${lang} file(s) in parsing processing — ${lang} parser not available.`, ); } @@ -742,7 +743,7 @@ export const processParsing = async ( // in scope-resolution with an empty cache and get re-parsed. // Surfacing this in PROF mode prevents silent perf cliffs when // a repo crosses the worker-pool threshold. - console.warn( + logger.warn( `[scope-resolution prof] worker pool engaged for ${files.length} files — cross-phase tree cache will be empty; scope-resolution re-parses.`, ); } @@ -757,7 +758,7 @@ export const processParsing = async ( ); } catch (err) { const message = err instanceof Error ? err.message : String(err); - console.warn('Worker pool parsing stopped; continuing with sequential parser:', message); + logger.warn({ message }, 'Worker pool parsing stopped; continuing with sequential parser:'); reportProgress?.( lastProgress, files.length, diff --git a/gitnexus/src/core/ingestion/pipeline-phases/cobol.ts b/gitnexus/src/core/ingestion/pipeline-phases/cobol.ts index cfe6b6ce29..c9332aabce 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/cobol.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/cobol.ts @@ -15,6 +15,7 @@ import { readFileContents } from '../filesystem-walker.js'; import type { StructureOutput } from './structure.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface CobolOutput { programs: number; paragraphs: number; @@ -47,7 +48,7 @@ export const cobolPhase: PipelinePhase = { const cobolResult = processCobol(ctx.graph, cobolFiles, allPathSet); if (isDev) { - console.log( + logger.info( ` COBOL: ${cobolResult.programs} programs, ${cobolResult.paragraphs} paragraphs, ${cobolResult.sections} sections from ${cobolFiles.length} files`, ); if ( @@ -55,12 +56,12 @@ export const cobolPhase: PipelinePhase = { cobolResult.execCicsBlocks > 0 || cobolResult.entryPoints > 0 ) { - console.log( + logger.info( ` COBOL enriched: ${cobolResult.execSqlBlocks} SQL blocks, ${cobolResult.execCicsBlocks} CICS blocks, ${cobolResult.entryPoints} entry points, ${cobolResult.moves} moves, ${cobolResult.fileDeclarations} file declarations`, ); } if (cobolResult.jclJobs > 0) { - console.log(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`); + logger.info(` JCL: ${cobolResult.jclJobs} jobs, ${cobolResult.jclSteps} steps`); } } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/communities.ts b/gitnexus/src/core/ingestion/pipeline-phases/communities.ts index 6a302b8b9f..0e29b6cc27 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/communities.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/communities.ts @@ -15,6 +15,7 @@ import type { StructureOutput } from './structure.js'; import { processCommunities, type CommunityDetectionResult } from '../community-processor.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface CommunitiesOutput { communityResult: CommunityDetectionResult; } @@ -47,7 +48,7 @@ export const communitiesPhase: PipelinePhase = { }); if (isDev) { - console.log( + logger.info( `🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/cross-file-impl.ts b/gitnexus/src/core/ingestion/pipeline-phases/cross-file-impl.ts index 334ff57df6..5c014ed736 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/cross-file-impl.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/cross-file-impl.ts @@ -23,6 +23,7 @@ import { topologicalLevelSort } from '../utils/graph-sort.js'; import type { KnowledgeGraph } from '../../graph/types.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; /** Max AST trees to keep in LRU cache for cross-file binding propagation. */ const AST_CACHE_CAP = 50; @@ -60,7 +61,7 @@ export async function runCrossFileBindingPropagation( const { levels, cycleCount } = topologicalLevelSort(ctx.importMap); if (isDev && cycleCount > 0) { - console.log(`🔄 ${cycleCount} files in import cycles (processed last in undefined order)`); + logger.info(`🔄 ${cycleCount} files in import cycles (processed last in undefined order)`); } let filesWithGaps = 0; @@ -88,7 +89,7 @@ export async function runCrossFileBindingPropagation( const gapRatio = totalFiles > 0 ? filesWithGaps / totalFiles : 0; if (gapRatio < CROSS_FILE_SKIP_THRESHOLD && filesWithGaps < gapThreshold) { if (isDev) { - console.log( + logger.info( `⏭️ Cross-file re-resolution skipped (${filesWithGaps}/${totalFiles} files, ${(gapRatio * 100).toFixed(1)}% < ${CROSS_FILE_SKIP_THRESHOLD * 100}% threshold)`, ); } @@ -193,7 +194,7 @@ export async function runCrossFileBindingPropagation( if (crossFileResolved >= MAX_CROSS_FILE_REPROCESS) { if (isDev) - console.log(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`); + logger.info(`⚠️ Cross-file re-resolution capped at ${MAX_CROSS_FILE_REPROCESS} files`); break; } } @@ -204,7 +205,7 @@ export async function runCrossFileBindingPropagation( const elapsed = Date.now() - crossFileStart; const totalElapsed = Date.now() - pipelineStart; const reResolutionPct = totalElapsed > 0 ? ((elapsed / totalElapsed) * 100).toFixed(1) : '0'; - console.log( + logger.info( `🔗 Cross-file re-resolution: ${crossFileResolved} candidates re-processed` + ` in ${elapsed}ms (${reResolutionPct}% of total ingestion time so far)`, ); diff --git a/gitnexus/src/core/ingestion/pipeline-phases/cross-file.ts b/gitnexus/src/core/ingestion/pipeline-phases/cross-file.ts index e1e907a0bb..3ea604b4dc 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/cross-file.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/cross-file.ts @@ -36,6 +36,7 @@ import type { ParseOutput } from './parse.js'; import { runCrossFileBindingPropagation } from './cross-file-impl.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface CrossFileOutput { /** Number of files re-processed during cross-file propagation. */ filesReprocessed: number; @@ -59,11 +60,11 @@ export const crossFilePhase: PipelinePhase = { if (isDev) { if (bindingAccumulator.totalBindings > 0) { const memKB = Math.round(bindingAccumulator.estimateMemoryBytes() / 1024); - console.log( + logger.info( `📦 BindingAccumulator: ${bindingAccumulator.totalBindings} bindings across ${bindingAccumulator.fileCount} files (~${memKB} KB)`, ); } else if (totalFiles > 0) { - console.log( + logger.info( `📦 BindingAccumulator: EMPTY — 0 bindings across 0 files despite ${totalFiles} parsed files. If the codebase has typed bindings, this indicates an upstream regression.`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/markdown.ts b/gitnexus/src/core/ingestion/pipeline-phases/markdown.ts index 6b3853b9dc..dd57518c15 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/markdown.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/markdown.ts @@ -15,6 +15,7 @@ import { readFileContents } from '../filesystem-walker.js'; import type { StructureOutput } from './structure.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface MarkdownOutput { /** Number of markdown sections extracted. */ sections: number; @@ -48,7 +49,7 @@ export const markdownPhase: PipelinePhase = { const mdResult = processMarkdown(ctx.graph, mdFiles, allPathSet); if (isDev) { - console.log( + logger.info( ` Markdown: ${mdResult.sections} sections, ${mdResult.links} cross-links from ${mdFiles.length} files`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/mro.ts b/gitnexus/src/core/ingestion/pipeline-phases/mro.ts index 372ae32b09..c098f2b7bf 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/mro.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/mro.ts @@ -15,6 +15,7 @@ import type { StructureOutput } from './structure.js'; import { computeMRO } from '../mro-processor.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface MROOutput { entries: number; ambiguityCount: number; @@ -42,7 +43,7 @@ export const mroPhase: PipelinePhase = { const mroResult = computeMRO(ctx.graph); if (isDev && mroResult.entries.length > 0) { - console.log( + logger.info( `🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities, ${mroResult.overrideEdges} METHOD_OVERRIDES, ${mroResult.methodImplementsEdges} METHOD_IMPLEMENTS`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/orm.ts b/gitnexus/src/core/ingestion/pipeline-phases/orm.ts index ebdac018ac..4e6021efa7 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/orm.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/orm.ts @@ -16,6 +16,7 @@ import type { ExtractedORMQuery } from '../workers/parse-worker.js'; import type { KnowledgeGraph } from '../../graph/types.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface ORMOutput { edgesCreated: number; modelCount: number; @@ -91,7 +92,7 @@ function processORMQueries( } if (isDev) { - console.log( + logger.info( `ORM dataflow: ${edgesCreated} QUERIES edges, ${modelNodes.size} models (${queries.length} total calls)`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts b/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts index 025bdbeb7c..bd39a43301 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/parse-impl.ts @@ -69,6 +69,7 @@ import { isDev } from '../utils/env.js'; import { synthesizeWildcardImportBindings, needsSynthesis } from './wildcard-synthesis.js'; import { extractORMQueriesInline } from './orm-extraction.js'; +import { logger } from '../../logger.js'; // ── Constants ────────────────────────────────────────────────────────────── /** Max bytes of source content to load per parse chunk. */ @@ -136,7 +137,7 @@ export async function runChunkedParseAndResolve( } } for (const [lang, count] of skippedByLang) { - console.warn( + logger.warn( `Skipping ${count} ${lang} file(s) — ${lang} parser not available (native binding may not have built). Try: npm rebuild tree-sitter-${lang}`, ); } @@ -171,7 +172,7 @@ export async function runChunkedParseAndResolve( if (isDev) { const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024); - console.log( + logger.info( `📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`, ); } @@ -220,9 +221,9 @@ export async function runChunkedParseAndResolve( } workerPool = createWorkerPool(workerUrl); } catch (err) { - console.warn( + logger.warn( + { err: (err as Error).message }, 'Worker pool creation failed, using sequential fallback:', - (err as Error).message, ); } } @@ -339,7 +340,7 @@ export async function runChunkedParseAndResolve( exportedTypeMap, ); if (isDev && enrichedCount > 0) { - console.log( + logger.info( `🔗 E1: Seeded ${enrichedCount} cross-file receiver types (chunk ${chunkIdx + 1})`, ); } @@ -538,7 +539,7 @@ export async function runChunkedParseAndResolve( const rcStats = ctx.getStats(); const total = rcStats.cacheHits + rcStats.cacheMisses; const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0'; - console.log( + logger.info( `🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`, ); } @@ -554,15 +555,15 @@ export async function runChunkedParseAndResolve( bindingAccumulator.finalize(); const enriched = enrichExportedTypeMap(bindingAccumulator, graph, exportedTypeMap); if (isDev && enriched > 0) { - console.log( + logger.info( `🔗 Worker TypeEnv enrichment: ${enriched} fixpoint-inferred exports added to ExportedTypeMap`, ); } } catch (enrichErr) { if (isDev) { - console.warn( + logger.warn( + { err: (enrichErr as Error).message }, 'Post-fallback finalize/enrich failed during cleanup:', - (enrichErr as Error).message, ); } } @@ -571,7 +572,7 @@ export async function runChunkedParseAndResolve( if (!hasSynthesized) { const synthesized = synthesizeWildcardImportBindings(graph, ctx); if (isDev && synthesized > 0) { - console.log( + logger.info( `🔗 Synthesized ${synthesized} additional wildcard import bindings (Go/Ruby/C++/Swift/Python)`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/processes.ts b/gitnexus/src/core/ingestion/pipeline-phases/processes.ts index 209906cfba..166faea200 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/processes.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/processes.ts @@ -19,6 +19,7 @@ import { processProcesses, type ProcessDetectionResult } from '../process-proces import { generateId } from '../../../lib/utils.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface ProcessesOutput { processResult: ProcessDetectionResult; } @@ -67,7 +68,7 @@ export const processesPhase: PipelinePhase = { ); if (isDev) { - console.log( + logger.info( `🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`, ); } @@ -167,7 +168,7 @@ export const processesPhase: PipelinePhase = { } } if (isDev && linked > 0) { - console.log(`🔗 Linked ${linked} Route/Tool nodes to execution flows`); + logger.info(`🔗 Linked ${linked} Route/Tool nodes to execution flows`); } } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/routes.ts b/gitnexus/src/core/ingestion/pipeline-phases/routes.ts index cd0a65f9d3..de3a8ddb88 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/routes.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/routes.ts @@ -32,6 +32,7 @@ import { generateId } from '../../../lib/utils.js'; import { readFileContents } from '../filesystem-walker.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; const EXPO_NAV_PATTERNS = [ /router\.(push|replace|navigate)\(\s*['"`]([^'"`]+)['"`]/g, /]*href=\s*['"`]([^'"`]+)['"`]/g, @@ -174,7 +175,7 @@ export const routesPhase: PipelinePhase = { } if (isDev) { - console.log( + logger.info( `🗺️ Route registry: ${routeRegistry.size} routes${duplicateRoutes > 0 ? ` (${duplicateRoutes} duplicate URLs skipped)` : ''}`, ); } @@ -224,7 +225,7 @@ export const routesPhase: PipelinePhase = { linkedCount++; } if (isDev && linkedCount > 0) { - console.log( + logger.info( `🛡️ Linked ${mwPath} middleware [${mwLabel.join(', ')}] to ${linkedCount} routes`, ); } @@ -290,7 +291,7 @@ export const routesPhase: PipelinePhase = { processNextjsFetchRoutes(ctx.graph, allFetchCalls, routeURLToFile, consumerContents); if (isDev) { - console.log( + logger.info( `🔗 Processed ${allFetchCalls.length} fetch() calls against ${routeRegistry.size} routes`, ); } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/runner.ts b/gitnexus/src/core/ingestion/pipeline-phases/runner.ts index 89543e049d..0bfc45bd4d 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/runner.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/runner.ts @@ -15,6 +15,7 @@ import type { PipelinePhase, PipelineContext, PhaseResult } from './types.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; /** * Validate that the phases form a valid dependency graph (no cycles, all deps present). * Returns phases in topological execution order. @@ -176,7 +177,7 @@ export async function runPipeline( const start = Date.now(); if (isDev) { - console.log(`▶ Phase: ${phase.name}`); + logger.info(`▶ Phase: ${phase.name}`); } // Only expose declared dependencies — prevents hidden coupling to undeclared phases. @@ -220,7 +221,7 @@ export async function runPipeline( }); if (isDev) { - console.log(`✓ Phase: ${phase.name} (${durationMs}ms)`); + logger.info(`✓ Phase: ${phase.name} (${durationMs}ms)`); } } diff --git a/gitnexus/src/core/ingestion/pipeline-phases/tools.ts b/gitnexus/src/core/ingestion/pipeline-phases/tools.ts index 023c8a1aff..32a0ae71fb 100644 --- a/gitnexus/src/core/ingestion/pipeline-phases/tools.ts +++ b/gitnexus/src/core/ingestion/pipeline-phases/tools.ts @@ -16,6 +16,7 @@ import { generateId } from '../../../lib/utils.js'; import { readFileContents } from '../filesystem-walker.js'; import { isDev } from '../utils/env.js'; +import { logger } from '../../logger.js'; export interface ToolDef { name: string; filePath: string; @@ -104,7 +105,7 @@ export const toolsPhase: PipelinePhase = { } if (isDev) { - console.log(`🔧 Tool registry: ${toolDefs.length} tools detected`); + logger.info(`🔧 Tool registry: ${toolDefs.length} tools detected`); } } diff --git a/gitnexus/src/core/ingestion/process-processor.ts b/gitnexus/src/core/ingestion/process-processor.ts index a12378c981..aa744e54da 100644 --- a/gitnexus/src/core/ingestion/process-processor.ts +++ b/gitnexus/src/core/ingestion/process-processor.ts @@ -17,6 +17,7 @@ import { calculateEntryPointScore, isTestFile } from './entry-point-scoring.js'; import { SupportedLanguages } from 'gitnexus-shared'; import { isDev } from './utils/env.js'; +import { logger } from '../logger.js'; // ============================================================================ // CONFIGURATION // ============================================================================ @@ -319,13 +320,13 @@ const findEntryPoints = ( // DEBUG: Log top candidates with new scoring details if (sorted.length > 0 && isDev) { - console.log(`[Process] Top 10 entry point candidates (new scoring):`); + logger.info(`[Process] Top 10 entry point candidates (new scoring):`); sorted.slice(0, 10).forEach((c, i) => { const node = graph.getNode(c.id); const exported = node?.properties.isExported ? '✓' : '✗'; const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || ''; - console.log(` ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`); - console.log(` score: ${c.score.toFixed(2)} = [${c.reasons.join(' × ')}]`); + logger.info(` ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`); + logger.info(` score: ${c.score.toFixed(2)} = [${c.reasons.join(' × ')}]`); }); } diff --git a/gitnexus/src/core/ingestion/scope-extractor-bridge.ts b/gitnexus/src/core/ingestion/scope-extractor-bridge.ts index 1774cefcac..41a50193de 100644 --- a/gitnexus/src/core/ingestion/scope-extractor-bridge.ts +++ b/gitnexus/src/core/ingestion/scope-extractor-bridge.ts @@ -28,6 +28,7 @@ import type { ParsedFile } from 'gitnexus-shared'; import { extract as extractScope } from './scope-extractor.js'; import type { LanguageProvider } from './language-provider.js'; +import { logger } from '../logger.js'; /** Callback used to report scope-extraction warnings to the host (worker or direct). */ export type ScopeBridgeWarn = (message: string) => void; @@ -53,7 +54,7 @@ export function extractParsedFile( err instanceof Error ? err.message : String(err) }`; if (onWarn !== undefined) onWarn(message); - else console.warn(message); + logger.warn(message); return undefined; } } diff --git a/gitnexus/src/core/ingestion/scope-resolution/pipeline/phase.ts b/gitnexus/src/core/ingestion/scope-resolution/pipeline/phase.ts index 5dbb3715fb..c2fda97776 100644 --- a/gitnexus/src/core/ingestion/scope-resolution/pipeline/phase.ts +++ b/gitnexus/src/core/ingestion/scope-resolution/pipeline/phase.ts @@ -38,6 +38,7 @@ import { runScopeResolution } from './run.js'; import { SCOPE_RESOLVERS } from './registry.js'; import { isDev, isSemanticModelValidatorEnabled } from '../../utils/env.js'; +import { logger } from '../../../logger.js'; export interface ScopeResolutionOutput { /** True when at least one language ran. */ readonly ran: boolean; @@ -144,7 +145,7 @@ export const scopeResolutionPhase: PipelinePhase = { resolutionConfig, onWarn: (msg) => { if (isSemanticModelValidatorEnabled()) { - console.warn(`[scope-resolution:${lang}] ${msg}`); + logger.warn(`[scope-resolution:${lang}] ${msg}`); } }, }, @@ -162,7 +163,7 @@ export const scopeResolutionPhase: PipelinePhase = { }); if (isDev) { - console.log( + logger.info( `[scope-resolution:${lang}] ${stats.filesProcessed} files → ${stats.importsEmitted} IMPORTS + ${stats.referenceEdgesEmitted} reference edges (${stats.resolve.unresolved} unresolved sites, ${stats.referenceSkipped} skipped)`, ); } diff --git a/gitnexus/src/core/ingestion/scope-resolution/pipeline/run.ts b/gitnexus/src/core/ingestion/scope-resolution/pipeline/run.ts index b069f0dd17..558c9ef303 100644 --- a/gitnexus/src/core/ingestion/scope-resolution/pipeline/run.ts +++ b/gitnexus/src/core/ingestion/scope-resolution/pipeline/run.ts @@ -41,6 +41,7 @@ import { emitImportEdges } from '../graph-bridge/imports-to-edges.js'; import type { ScopeResolver } from '../contract/scope-resolver.js'; import { buildWorkspaceResolutionIndex } from '../workspace-index.js'; +import { logger } from '../../../logger.js'; interface RunScopeResolutionInput { readonly graph: KnowledgeGraph; /** @@ -279,7 +280,7 @@ export function runScopeResolution( if (PROF) { const tEnd = process.hrtime.bigint(); const ns = (a: bigint, b: bigint): number => Number(b - a) / 1_000_000; - console.warn( + logger.warn( `[scope-resolution prof] extract=${ns(tStart, tExtract).toFixed(0)}ms` + ` finalize=${ns(tExtract, tFinalize).toFixed(0)}ms` + ` propagate=${ns(tFinalize, tPropagate).toFixed(0)}ms` + diff --git a/gitnexus/src/core/ingestion/type-env.ts b/gitnexus/src/core/ingestion/type-env.ts index 998e7a59ea..a38df617b3 100644 --- a/gitnexus/src/core/ingestion/type-env.ts +++ b/gitnexus/src/core/ingestion/type-env.ts @@ -24,6 +24,7 @@ import { import type { SemanticModel } from './model/index.js'; import type { NodeLabel } from 'gitnexus-shared'; +import { logger } from '../logger.js'; /** * Per-file scoped type environment: maps (scope, variableName) → typeName. * Scope-aware: variables inside functions are keyed by function name, @@ -769,7 +770,7 @@ const resolveFixpointBindings = ( if (iter === MAX_FIXPOINT_ITERATIONS - 1 && process.env.GITNEXUS_DEBUG) { const unresolved = pendingItems.length - resolved.size; if (unresolved > 0) { - console.warn( + logger.warn( `[type-env] fixpoint hit iteration cap (${MAX_FIXPOINT_ITERATIONS}), ${unresolved} items unresolved`, ); } diff --git a/gitnexus/src/core/ingestion/utils/max-file-size.ts b/gitnexus/src/core/ingestion/utils/max-file-size.ts index 0c418bfd47..82013783ac 100644 --- a/gitnexus/src/core/ingestion/utils/max-file-size.ts +++ b/gitnexus/src/core/ingestion/utils/max-file-size.ts @@ -1,5 +1,6 @@ import { TREE_SITTER_MAX_BUFFER } from '../constants.js'; +import { logger } from '../../logger.js'; /** Default threshold (512 KB). Files larger than this are skipped by the walker. */ export const DEFAULT_MAX_FILE_SIZE_BYTES = 512 * 1024; @@ -11,7 +12,7 @@ const warned = new Set(); const warnOnce = (key: string, message: string): void => { if (warned.has(key)) return; warned.add(key); - console.warn(message); + logger.warn(message); }; /** diff --git a/gitnexus/src/core/ingestion/vue-sfc-extractor.ts b/gitnexus/src/core/ingestion/vue-sfc-extractor.ts index 382417c566..f36a85ab41 100644 --- a/gitnexus/src/core/ingestion/vue-sfc-extractor.ts +++ b/gitnexus/src/core/ingestion/vue-sfc-extractor.ts @@ -23,7 +23,24 @@ interface ScriptBlock { lang: string; } -const SCRIPT_RE = /]*)?>([^]*?)<\/script>/g; +// Closing-tag pattern accepts: +// - whitespace before `>` — ``, `` +// - attribute-like junk after `script` — ``, +// `` +// - any case — ``, `` +// +// HTML5 parses `` as a valid close tag (attributes on +// close tags are ignored by the parser but still terminate the script +// block). A strict `<\/script\s*>` would miss those forms and let a +// crafted Vue file hide content from this extractor — exactly the +// CodeQL `js/bad-tag-filter` failure mode (the published test cases +// it checks include `` and ``). +// +// `[^>]*` after ``, +// matching the HTML parser's actual close-tag behaviour. The `i` flag +// covers the case axis. PR #1330 CI surfaced both the case and +// attribute axes; this expression closes both at once. +const SCRIPT_RE = /]*)?>([^]*?)<\/script[^>]*>/gi; const TEMPLATE_COMPONENT_RE = /<([A-Z][A-Za-z0-9]+)/g; // Greedy: matches from the first . // This is intentional — nested