diff --git a/gitnexus/src/core/run-analyze.ts b/gitnexus/src/core/run-analyze.ts index e14f7a30f6..e754a0242a 100644 --- a/gitnexus/src/core/run-analyze.ts +++ b/gitnexus/src/core/run-analyze.ts @@ -30,7 +30,13 @@ import { registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js'; -import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js'; +import { + getCurrentCommit, + getRemoteUrl, + hasGitDir, + getInferredRepoName, + resolveRepoIdentityRoot, +} from '../storage/git.js'; import type { CachedEmbedding } from './embeddings/types.js'; import { generateAIContextFiles } from '../cli/ai-context.js'; import { EMBEDDING_TABLE_NAME } from './lbug/schema.js'; @@ -168,7 +174,13 @@ export async function runFullAnalysis( if (currentCommit !== '') { await ensureGitNexusIgnored(repoPath); return { - repoName: options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath), + // `resolveRepoIdentityRoot` collapses worktree roots to the + // canonical repo basename (#1259) but leaves arbitrary subdirs + // and `--skip-git` paths unchanged (#1232/#1233 intent preserved). + repoName: + options.registryName ?? + getInferredRepoName(repoPath) ?? + path.basename(resolveRepoIdentityRoot(repoPath)), repoPath, stats: existingMeta.stats ?? {}, alreadyUpToDate: true, @@ -345,7 +357,19 @@ export async function runFullAnalysis( } const { readServerMapping } = await import('./embeddings/server-mapping.js'); - const projectName = path.basename(repoPath); + // Mirror the registry's name-resolution chain so the server-mapping + // lookup key stays aligned with the final registry name (#1259): + // --name → remote-derived → canonical-root basename + // (preserved-alias is intentionally NOT consulted here — server + // mappings are addressed by the operationally-meaningful name the + // user configures, not by a sticky registry-only alias they may not + // know about. The previous canonical-only logic ignored both --name + // and remote-derived names, silently breaking server-mapping for + // anyone with a `--name` alias or remote-named repo.) + const projectName = + options.registryName ?? + getInferredRepoName(repoPath) ?? + path.basename(resolveRepoIdentityRoot(repoPath)); const serverName = await readServerMapping(projectName); const embeddingResult = await runEmbeddingPipeline( executeQuery, diff --git a/gitnexus/src/storage/git.ts b/gitnexus/src/storage/git.ts index 9105a4da34..fe1b8511d9 100644 --- a/gitnexus/src/storage/git.ts +++ b/gitnexus/src/storage/git.ts @@ -94,6 +94,80 @@ export const getGitRoot = (fromPath: string): string | null => { } }; +/** + * Get the *canonical* repository root, dereferencing git worktrees. + * + * Unlike `getGitRoot` (which uses `git rev-parse --show-toplevel` and + * returns the WORKTREE's root when called inside a linked worktree), + * this uses `git rev-parse --git-common-dir` — the shared `.git` + * directory, identical for the main checkout and every linked + * worktree — and returns its parent. + * + * Why it matters (#1259): when `gitnexus analyze` runs inside a + * worktree (e.g. `/repo/wt-feature/`), deriving `repoName` from + * `path.basename(getGitRoot(cwd))` registers the project under the + * worktree's directory slug (`wt-feature`) instead of the canonical + * repo's basename (`repo`). Each worktree then re-registers as a + * "different" project, AGENTS.md is rewritten with the wrong MCP URI, + * and Claude-Code-style worktree workflows silently accumulate + * duplicate registry entries. + * + * Returns `null` when the path is not inside a git repository or + * `git` is not available, so callers can chain safely: + * `getCanonicalRepoRoot(p) ?? getGitRoot(p) ?? p`. + * + * `--path-format=absolute` is required because `--git-common-dir` + * returns a path *relative to cwd* by default (e.g. `../.git` when + * called from a worktree), which would resolve to the wrong absolute + * path if the caller later resolved it from a different directory. + */ +export const getCanonicalRepoRoot = (fromPath: string): string | null => { + try { + const commonDir = execSync('git rev-parse --path-format=absolute --git-common-dir', { + cwd: fromPath, + stdio: ['ignore', 'pipe', 'ignore'], + }) + .toString() + .trim(); + if (!commonDir) return null; + // Common dir is `/.git` for both the main checkout and all + // linked worktrees. Its parent is the canonical repo root. + return path.dirname(path.resolve(commonDir)); + } catch { + return null; + } +}; + +/** + * Resolve `fromPath` to the directory whose basename should drive the + * registry name (#1259) — the *identity root*. Three outcomes: + * + * 1. `fromPath` IS the canonical checkout root → returns it unchanged. + * 2. `fromPath` is a linked-worktree root (has its own `.git` entry, but + * `git rev-parse --git-common-dir` points at a different `.git`) → + * returns the canonical repo root. + * 3. `fromPath` is anything else — an arbitrary subdir under a git repo, + * a non-git folder, a `--skip-git` subdir of an unrelated parent + * checkout — returns `fromPath` unchanged. + * + * Why not just use `getCanonicalRepoRoot` directly? Because `git rev-parse + * --git-common-dir` resolves the same canonical root for ANY path inside + * a git repo, including unrelated subdirs. Using it for registry-name + * derivation would silently re-key a `--skip-git` subdir analyze under + * the parent git's basename, defeating the user's `--skip-git` intent + * (regressing the #1232/#1233 fix). The "is this path a tree root" + * gate confines the canonical-root collapse to exactly the cases where + * #1259 matters: main checkouts and linked worktrees. + */ +export const resolveRepoIdentityRoot = (fromPath: string): string => { + const resolved = path.resolve(fromPath); + const canonical = getCanonicalRepoRoot(resolved); + if (!canonical) return resolved; // non-git → use as-is + if (canonical === resolved) return canonical; // canonical checkout + if (hasGitDir(resolved)) return canonical; // linked worktree (has .git file) + return resolved; // arbitrary subdir under a git repo → preserve as-is +}; + /** * Find a git root by checking only `.git` entries on the ancestor chain. * diff --git a/gitnexus/src/storage/repo-manager.ts b/gitnexus/src/storage/repo-manager.ts index 9741f54b46..8c0bda95fb 100644 --- a/gitnexus/src/storage/repo-manager.ts +++ b/gitnexus/src/storage/repo-manager.ts @@ -10,7 +10,7 @@ import fs from 'fs/promises'; import { realpathSync } from 'fs'; import path from 'path'; import os from 'os'; -import { getInferredRepoName } from './git.js'; +import { getInferredRepoName, resolveRepoIdentityRoot } from './git.js'; /** * Normalise a repo path for registry comparison across platforms @@ -389,6 +389,17 @@ export class RegistryNameCollisionError extends Error { const hasCustomAlias = (entry: RegistryEntry, inferredName: string | null): boolean => { const resolved = path.resolve(entry.path); if (entry.name === path.basename(resolved)) return false; + // Canonical-root-derived names are not user aliases either (#1259): + // a worktree registered under the canonical repo's basename + // (e.g. `{name: 'repo', path: '/repo/wt-feature'}`) must re-register + // cleanly without firing the duplicate-name collision guard. Without + // this check `entry.name = 'repo'` !== `path.basename('/repo/wt-feature') = 'wt-feature'`, + // so the prior check returns true → `isPreservedAlias = true` → guard + // throws `RegistryNameCollisionError` against the also-registered + // canonical checkout entry. The Claude-Code per-task worktree workflow + // — analyze canonical, then analyze worktree, then re-analyze worktree + // — would break on the third call. + if (entry.name === path.basename(resolveRepoIdentityRoot(resolved))) return false; if (inferredName && entry.name === inferredName) return false; return true; }; @@ -470,7 +481,13 @@ export const registerRepo = async ( name = existing.name; isPreservedAlias = true; } else { - name = inferred ?? path.basename(resolved); + // Canonical-root fallback: when `resolved` is a worktree root, + // derive the registry name from the canonical repo's basename, not + // the worktree slug — see #1259. `resolveRepoIdentityRoot` confines + // the collapse to canonical checkouts and linked worktree roots only, + // so `--skip-git` subdirs of unrelated parent git repos keep using + // their own basename (preserves the #1232/#1233 fix's intent). + name = inferred ?? path.basename(resolveRepoIdentityRoot(resolved)); } } diff --git a/gitnexus/test/unit/git-utils.test.ts b/gitnexus/test/unit/git-utils.test.ts index d1fc187c4b..e8fcdf6b9f 100644 --- a/gitnexus/test/unit/git-utils.test.ts +++ b/gitnexus/test/unit/git-utils.test.ts @@ -180,3 +180,90 @@ describe('getRemoteUrl', () => { } }); }); + +// ─── getCanonicalRepoRoot (#1259) ──────────────────────────────────────── +// +// Critical for the worktree-naming bug: when `gitnexus analyze` runs from a +// linked worktree, deriving `repoName` from `path.basename(getGitRoot(cwd))` +// uses the worktree's directory slug instead of the canonical repo's +// basename. `getCanonicalRepoRoot` exists specifically to dereference +// worktrees via `git rev-parse --git-common-dir`. + +describe('getCanonicalRepoRoot', () => { + it('returns null for a plain temp directory (not a git repo)', async () => { + const { getCanonicalRepoRoot } = await import('../../src/storage/git.js'); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-')); + try { + expect(getCanonicalRepoRoot(tmpDir)).toBeNull(); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('returns null for a non-existent path', async () => { + const { getCanonicalRepoRoot } = await import('../../src/storage/git.js'); + expect(getCanonicalRepoRoot('/tmp/__gitnexus_canonical_nonexistent__')).toBeNull(); + }); + + it('returns the repo root when called from a regular (non-worktree) checkout', async () => { + const { getCanonicalRepoRoot } = await import('../../src/storage/git.js'); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-main-')); + try { + execSync('git init -q', { cwd: tmpDir }); + // Compare via `path.basename` instead of full-path string equality so + // the test is robust to platform path-format quirks (Windows 8.3 short + // names like `C:\Users\RUNNER~1\…` vs long form `C:\Users\runneradmin\…`, + // macOS `/var/folders/… ↔ /private/var/folders/…`). The basename is the + // only part that registry name derivation actually uses (#1259). + const result = getCanonicalRepoRoot(tmpDir); + expect(result).not.toBeNull(); + expect(path.basename(result!)).toBe(path.basename(tmpDir)); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('returns the CANONICAL repo root when called from inside a linked worktree (#1259)', async () => { + const { getCanonicalRepoRoot, getGitRoot } = await import('../../src/storage/git.js'); + const repoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-wt-')); + try { + execSync('git init -q', { cwd: repoDir }); + // `git worktree add` requires at least one commit on a real branch. + execSync('git config user.email "test@example.com"', { cwd: repoDir }); + execSync('git config user.name "Test"', { cwd: repoDir }); + execSync('git commit --allow-empty -q -m "initial"', { cwd: repoDir }); + // Create a linked worktree on a new branch outside the main checkout. + const worktreeDir = path.join(repoDir, 'wt-feature'); + execSync(`git worktree add -q -b feature "${worktreeDir}"`, { cwd: repoDir }); + + // Both calls go through the same git executable, so their path-format + // output is guaranteed consistent — equality between them is the + // stable cross-platform assertion. (Comparing against `realpathSync` + // breaks on Windows where 8.3 short names and long names diverge.) + const fromMain = getCanonicalRepoRoot(repoDir); + const fromWorktree = getCanonicalRepoRoot(worktreeDir); + + expect(fromMain).not.toBeNull(); + // From inside the worktree: canonical points BACK to the main repo's + // shared `.git`. This is the regression-guard for #1259 — the + // registry name derivation collapses across worktrees. + expect(fromWorktree).toBe(fromMain); + // Basename matches the canonical repo dir (NOT the worktree slug). + expect(path.basename(fromWorktree!)).toBe(path.basename(repoDir)); + expect(path.basename(fromWorktree!)).not.toBe('wt-feature'); + // Sanity: getGitRoot returns the worktree-local root (existing + // behavior unchanged). Compare basenames for the same path-format + // reason as above. + expect(path.basename(getGitRoot(worktreeDir)!)).toBe('wt-feature'); + } finally { + // Best-effort cleanup; worktree teardown can leak open handles on + // Windows so use force. + try { + execSync('git worktree remove -f wt-feature', { cwd: repoDir }); + } catch { + // ignore — fall through to recursive rm + } + fs.rmSync(repoDir, { recursive: true, force: true }); + } + }); +}); diff --git a/gitnexus/test/unit/repo-manager.test.ts b/gitnexus/test/unit/repo-manager.test.ts index 7169eb8062..bbd0fd50eb 100644 --- a/gitnexus/test/unit/repo-manager.test.ts +++ b/gitnexus/test/unit/repo-manager.test.ts @@ -870,3 +870,169 @@ describe('assertSafeStoragePath (#1003)', () => { expect(() => assertSafeStoragePath(entry)).not.toThrow(); }); }); + +// ─── Worktree-aware registry-name fallback (#1259) ───────────────────── +// +// The first @claude review on PR #1296 caught a critical gap: my initial +// fix only patched the early-return path in `runFullAnalysis`, leaving +// the full-analysis path (which calls `registerRepo` directly) still +// using the worktree-slug basename when no `--name` and no remote are +// configured. This block proves `registerRepo`'s OWN basename fallback +// now uses the canonical repo root via `getCanonicalRepoRoot` — the +// regression-guard for the wiring at the registry layer, complementing +// the helper-level coverage in `git-utils.test.ts`. + +describe('registerRepo worktree-aware basename fallback (#1259)', () => { + let tmpHome: Awaited>; + let tmpRepo: Awaited>; + let savedGitnexusHome: string | undefined; + + const meta: RepoMeta = { + repoPath: '', + lastCommit: 'abc1234', + indexedAt: '2026-05-03T00:00:00.000Z', + stats: { files: 1, nodes: 1 }, + }; + + beforeEach(async () => { + tmpHome = await createTempDir('gitnexus-registry-home-'); + tmpRepo = await createTempDir('gitnexus-canonical-repo-'); + savedGitnexusHome = process.env.GITNEXUS_HOME; + process.env.GITNEXUS_HOME = tmpHome.dbPath; + }); + + afterEach(async () => { + if (savedGitnexusHome === undefined) delete process.env.GITNEXUS_HOME; + else process.env.GITNEXUS_HOME = savedGitnexusHome; + await tmpHome.cleanup(); + await tmpRepo.cleanup(); + }); + + it('registerRepo from a linked worktree uses canonical repo basename, not worktree slug', async () => { + // Set up a real git repo with at least one commit (worktree add requires + // a non-empty branch). No remote is configured — that's the trigger for + // the basename fallback this test guards. + execSync('git init -q', { cwd: tmpRepo.dbPath }); + execSync('git config user.email "test@example.com"', { cwd: tmpRepo.dbPath }); + execSync('git config user.name "Test"', { cwd: tmpRepo.dbPath }); + execSync('git commit --allow-empty -q -m "initial"', { cwd: tmpRepo.dbPath }); + + const worktreeDir = path.join(tmpRepo.dbPath, 'wt-feature'); + execSync(`git worktree add -q -b feature "${worktreeDir}"`, { cwd: tmpRepo.dbPath }); + + try { + // Call registerRepo with the WORKTREE path and NO --name. Pre-fix this + // would register under the worktree's basename ("wt-feature"). The + // canonical-root fallback in registerRepo now resolves it to the + // canonical repo's basename (whatever `tmpRepo`'s temp-dir basename + // happens to be). + await registerRepo(worktreeDir, meta); + + const entries = await listRegisteredRepos(); + expect(entries).toHaveLength(1); + // The registered name MUST NOT be the worktree slug. + expect(entries[0].name).not.toBe('wt-feature'); + // It MUST match the canonical repo dir's basename. We compare via + // basename (not full-path equality) for the same Windows 8.3 + // short-name reason as the `getCanonicalRepoRoot` helper tests: + // git and `fs.realpathSync` may resolve to different long/short + // forms of the same path on Windows runners, but both have the + // same `basename`. + expect(entries[0].name).toBe(path.basename(tmpRepo.dbPath)); + } finally { + // Best-effort worktree teardown before the temp-dir cleanup runs. + try { + execSync(`git worktree remove -f "${worktreeDir}"`, { cwd: tmpRepo.dbPath }); + } catch { + // Falls through to recursive rm in afterEach. + } + } + }); + + // Pinned by the second @claude review on PR #1296: the FIRST review-fix + // commit (`7ceb839b`) introduced a regression in `hasCustomAlias`. Once + // a worktree is registered with the canonical basename + // (`{name: 'repo', path: '/repo/wt-feature'}`), `hasCustomAlias` saw + // `'repo' !== path.basename('/repo/wt-feature') = 'wt-feature'` and + // wrongly classified the canonical-root name as a sticky user alias. + // On re-analyze the duplicate-name guard then fired against the + // canonical checkout's entry → `RegistryNameCollisionError` blocking + // the primary "per-task worktree, repeated re-analyze" workflow this + // PR is supposed to FIX. This test exercises the full sequence: + // canonical → worktree → re-worktree, with both paths registered. + it('canonical → worktree → re-worktree re-register does not throw collision (#1259 hasCustomAlias regression)', async () => { + execSync('git init -q', { cwd: tmpRepo.dbPath }); + execSync('git config user.email "test@example.com"', { cwd: tmpRepo.dbPath }); + execSync('git config user.name "Test"', { cwd: tmpRepo.dbPath }); + execSync('git commit --allow-empty -q -m "initial"', { cwd: tmpRepo.dbPath }); + + const worktreeDir = path.join(tmpRepo.dbPath, 'wt-feature'); + execSync(`git worktree add -q -b feature "${worktreeDir}"`, { cwd: tmpRepo.dbPath }); + + try { + // 1. Register the canonical checkout — gets the canonical basename. + await registerRepo(tmpRepo.dbPath, meta); + // 2. Register the worktree — gets the SAME canonical basename + // (because of the `resolveRepoIdentityRoot` fix). Two entries + // coexist with the same name but different paths; this is the + // documented "silent basename collision" behavior, not an error. + await registerRepo(worktreeDir, meta); + // 3. Re-register the worktree. Pre-`hasCustomAlias`-fix this threw + // `RegistryNameCollisionError` because the existing worktree + // entry (`{name: 'repo', path: worktreeDir}`) was misclassified + // as a custom alias by `hasCustomAlias`, fired the guard + // against the canonical entry. With the fix it must complete + // without throwing. + await expect(registerRepo(worktreeDir, meta)).resolves.toBeDefined(); + + // Both registry entries should still be present and named + // canonically. + const entries = await listRegisteredRepos(); + expect(entries).toHaveLength(2); + const canonicalBasename = path.basename(tmpRepo.dbPath); + for (const entry of entries) { + expect(entry.name).toBe(canonicalBasename); + } + } finally { + try { + execSync(`git worktree remove -f "${worktreeDir}"`, { cwd: tmpRepo.dbPath }); + } catch { + // Falls through to recursive rm in afterEach. + } + } + }); + + // Pinned by the third @claude review on PR #1296 (MEDIUM #2): the + // `hasGitDir` gate inside `resolveRepoIdentityRoot` is the safeguard + // that keeps the #1232/#1233 `--skip-git` behaviour working — an + // arbitrary subdir under a parent git repo (no `.git` of its own) + // must NOT collapse to the parent's canonical root, otherwise users + // who analyze a subdir get the parent repo's basename in the + // registry. The COOLIO `--skip-git` integration test in + // `skip-git-cli.test.ts` already proves this end-to-end, but no + // direct test sat at the `registerRepo` layer to guard the gate + // against future refactors. This is that direct test. + it('registerRepo on an arbitrary subdir under a git repo preserves the subdir basename (#1232 / #1233 gate)', async () => { + execSync('git init -q', { cwd: tmpRepo.dbPath }); + execSync('git config user.email "test@example.com"', { cwd: tmpRepo.dbPath }); + execSync('git config user.name "Test"', { cwd: tmpRepo.dbPath }); + execSync('git commit --allow-empty -q -m "initial"', { cwd: tmpRepo.dbPath }); + + // A subdir of the canonical checkout, NOT a worktree (no `.git` file + // here — `mkdirSync` only). `resolveRepoIdentityRoot` must keep + // returning this exact path (basename used for the registry name) + // rather than collapsing to the parent repo's canonical root. + const subdir = path.join(tmpRepo.dbPath, 'arbitrary-subdir'); + await fs.mkdir(subdir, { recursive: true }); + + await registerRepo(subdir, meta); + + const entries = await listRegisteredRepos(); + expect(entries).toHaveLength(1); + // Registered name must be the SUBDIR's basename, NOT the parent + // canonical repo's basename — the inverse of what worktree + // collapse does. + expect(entries[0].name).toBe('arbitrary-subdir'); + expect(entries[0].name).not.toBe(path.basename(tmpRepo.dbPath)); + }); +});