Skip to content
30 changes: 27 additions & 3 deletions gitnexus/src/core/run-analyze.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ import {
registerRepo,
cleanupOldKuzuFiles,
} from '../storage/repo-manager.js';
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
import {
getCurrentCommit,
getRemoteUrl,
hasGitDir,
getInferredRepoName,
resolveRepoIdentityRoot,
} from '../storage/git.js';
import type { CachedEmbedding } from './embeddings/types.js';
import { generateAIContextFiles } from '../cli/ai-context.js';
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
Expand Down Expand Up @@ -168,7 +174,13 @@ export async function runFullAnalysis(
if (currentCommit !== '') {
await ensureGitNexusIgnored(repoPath);
return {
repoName: options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath),
// `resolveRepoIdentityRoot` collapses worktree roots to the
// canonical repo basename (#1259) but leaves arbitrary subdirs
// and `--skip-git` paths unchanged (#1232/#1233 intent preserved).
repoName:
options.registryName ??
getInferredRepoName(repoPath) ??
path.basename(resolveRepoIdentityRoot(repoPath)),
repoPath,
stats: existingMeta.stats ?? {},
alreadyUpToDate: true,
Expand Down Expand Up @@ -345,7 +357,19 @@ export async function runFullAnalysis(
}

const { readServerMapping } = await import('./embeddings/server-mapping.js');
const projectName = path.basename(repoPath);
// Mirror the registry's name-resolution chain so the server-mapping
// lookup key stays aligned with the final registry name (#1259):
// --name → remote-derived → canonical-root basename
// (preserved-alias is intentionally NOT consulted here — server
// mappings are addressed by the operationally-meaningful name the
// user configures, not by a sticky registry-only alias they may not
// know about. The previous canonical-only logic ignored both --name
// and remote-derived names, silently breaking server-mapping for
// anyone with a `--name` alias or remote-named repo.)
const projectName =
options.registryName ??
getInferredRepoName(repoPath) ??
path.basename(resolveRepoIdentityRoot(repoPath));
const serverName = await readServerMapping(projectName);
const embeddingResult = await runEmbeddingPipeline(
executeQuery,
Expand Down
74 changes: 74 additions & 0 deletions gitnexus/src/storage/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,80 @@ export const getGitRoot = (fromPath: string): string | null => {
}
};

/**
* Get the *canonical* repository root, dereferencing git worktrees.
*
* Unlike `getGitRoot` (which uses `git rev-parse --show-toplevel` and
* returns the WORKTREE's root when called inside a linked worktree),
* this uses `git rev-parse --git-common-dir` — the shared `.git`
* directory, identical for the main checkout and every linked
* worktree — and returns its parent.
*
* Why it matters (#1259): when `gitnexus analyze` runs inside a
* worktree (e.g. `/repo/wt-feature/`), deriving `repoName` from
* `path.basename(getGitRoot(cwd))` registers the project under the
* worktree's directory slug (`wt-feature`) instead of the canonical
* repo's basename (`repo`). Each worktree then re-registers as a
* "different" project, AGENTS.md is rewritten with the wrong MCP URI,
* and Claude-Code-style worktree workflows silently accumulate
* duplicate registry entries.
*
* Returns `null` when the path is not inside a git repository or
* `git` is not available, so callers can chain safely:
* `getCanonicalRepoRoot(p) ?? getGitRoot(p) ?? p`.
*
* `--path-format=absolute` is required because `--git-common-dir`
* returns a path *relative to cwd* by default (e.g. `../.git` when
* called from a worktree), which would resolve to the wrong absolute
* path if the caller later resolved it from a different directory.
*/
export const getCanonicalRepoRoot = (fromPath: string): string | null => {
try {
const commonDir = execSync('git rev-parse --path-format=absolute --git-common-dir', {
cwd: fromPath,
stdio: ['ignore', 'pipe', 'ignore'],
})
.toString()
.trim();
if (!commonDir) return null;
// Common dir is `<repo>/.git` for both the main checkout and all
// linked worktrees. Its parent is the canonical repo root.
return path.dirname(path.resolve(commonDir));
} catch {
return null;
}
};

/**
* Resolve `fromPath` to the directory whose basename should drive the
* registry name (#1259) — the *identity root*. Three outcomes:
*
* 1. `fromPath` IS the canonical checkout root → returns it unchanged.
* 2. `fromPath` is a linked-worktree root (has its own `.git` entry, but
* `git rev-parse --git-common-dir` points at a different `.git`) →
* returns the canonical repo root.
* 3. `fromPath` is anything else — an arbitrary subdir under a git repo,
* a non-git folder, a `--skip-git` subdir of an unrelated parent
* checkout — returns `fromPath` unchanged.
*
* Why not just use `getCanonicalRepoRoot` directly? Because `git rev-parse
* --git-common-dir` resolves the same canonical root for ANY path inside
* a git repo, including unrelated subdirs. Using it for registry-name
* derivation would silently re-key a `--skip-git` subdir analyze under
* the parent git's basename, defeating the user's `--skip-git` intent
* (regressing the #1232/#1233 fix). The "is this path a tree root"
* gate confines the canonical-root collapse to exactly the cases where
* #1259 matters: main checkouts and linked worktrees.
*/
export const resolveRepoIdentityRoot = (fromPath: string): string => {
const resolved = path.resolve(fromPath);
const canonical = getCanonicalRepoRoot(resolved);
if (!canonical) return resolved; // non-git → use as-is
if (canonical === resolved) return canonical; // canonical checkout
if (hasGitDir(resolved)) return canonical; // linked worktree (has .git file)
return resolved; // arbitrary subdir under a git repo → preserve as-is
};

/**
* Find a git root by checking only `.git` entries on the ancestor chain.
*
Expand Down
21 changes: 19 additions & 2 deletions gitnexus/src/storage/repo-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import fs from 'fs/promises';
import { realpathSync } from 'fs';
import path from 'path';
import os from 'os';
import { getInferredRepoName } from './git.js';
import { getInferredRepoName, resolveRepoIdentityRoot } from './git.js';

/**
* Normalise a repo path for registry comparison across platforms
Expand Down Expand Up @@ -389,6 +389,17 @@ export class RegistryNameCollisionError extends Error {
const hasCustomAlias = (entry: RegistryEntry, inferredName: string | null): boolean => {
const resolved = path.resolve(entry.path);
if (entry.name === path.basename(resolved)) return false;
// Canonical-root-derived names are not user aliases either (#1259):
// a worktree registered under the canonical repo's basename
// (e.g. `{name: 'repo', path: '/repo/wt-feature'}`) must re-register
// cleanly without firing the duplicate-name collision guard. Without
// this check `entry.name = 'repo'` !== `path.basename('/repo/wt-feature') = 'wt-feature'`,
// so the prior check returns true → `isPreservedAlias = true` → guard
// throws `RegistryNameCollisionError` against the also-registered
// canonical checkout entry. The Claude-Code per-task worktree workflow
// — analyze canonical, then analyze worktree, then re-analyze worktree
// — would break on the third call.
if (entry.name === path.basename(resolveRepoIdentityRoot(resolved))) return false;
if (inferredName && entry.name === inferredName) return false;
return true;
};
Expand Down Expand Up @@ -470,7 +481,13 @@ export const registerRepo = async (
name = existing.name;
isPreservedAlias = true;
} else {
name = inferred ?? path.basename(resolved);
// Canonical-root fallback: when `resolved` is a worktree root,
// derive the registry name from the canonical repo's basename, not
// the worktree slug — see #1259. `resolveRepoIdentityRoot` confines
// the collapse to canonical checkouts and linked worktree roots only,
// so `--skip-git` subdirs of unrelated parent git repos keep using
// their own basename (preserves the #1232/#1233 fix's intent).
name = inferred ?? path.basename(resolveRepoIdentityRoot(resolved));
}
}

Expand Down
87 changes: 87 additions & 0 deletions gitnexus/test/unit/git-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,90 @@ describe('getRemoteUrl', () => {
}
});
});

// ─── getCanonicalRepoRoot (#1259) ────────────────────────────────────────
//
// Critical for the worktree-naming bug: when `gitnexus analyze` runs from a
// linked worktree, deriving `repoName` from `path.basename(getGitRoot(cwd))`
// uses the worktree's directory slug instead of the canonical repo's
// basename. `getCanonicalRepoRoot` exists specifically to dereference
// worktrees via `git rev-parse --git-common-dir`.

describe('getCanonicalRepoRoot', () => {
it('returns null for a plain temp directory (not a git repo)', async () => {
const { getCanonicalRepoRoot } = await import('../../src/storage/git.js');
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-'));
try {
expect(getCanonicalRepoRoot(tmpDir)).toBeNull();
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});

it('returns null for a non-existent path', async () => {
const { getCanonicalRepoRoot } = await import('../../src/storage/git.js');
expect(getCanonicalRepoRoot('/tmp/__gitnexus_canonical_nonexistent__')).toBeNull();
});

it('returns the repo root when called from a regular (non-worktree) checkout', async () => {
const { getCanonicalRepoRoot } = await import('../../src/storage/git.js');
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-main-'));
try {
execSync('git init -q', { cwd: tmpDir });
// Compare via `path.basename` instead of full-path string equality so
// the test is robust to platform path-format quirks (Windows 8.3 short
// names like `C:\Users\RUNNER~1\…` vs long form `C:\Users\runneradmin\…`,
// macOS `/var/folders/… ↔ /private/var/folders/…`). The basename is the
// only part that registry name derivation actually uses (#1259).
const result = getCanonicalRepoRoot(tmpDir);
expect(result).not.toBeNull();
expect(path.basename(result!)).toBe(path.basename(tmpDir));
} finally {
fs.rmSync(tmpDir, { recursive: true, force: true });
}
});

it('returns the CANONICAL repo root when called from inside a linked worktree (#1259)', async () => {
const { getCanonicalRepoRoot, getGitRoot } = await import('../../src/storage/git.js');
const repoDir = fs.mkdtempSync(path.join(os.tmpdir(), 'gitnexus-canonical-wt-'));
try {
execSync('git init -q', { cwd: repoDir });
// `git worktree add` requires at least one commit on a real branch.
execSync('git config user.email "test@example.com"', { cwd: repoDir });
execSync('git config user.name "Test"', { cwd: repoDir });
execSync('git commit --allow-empty -q -m "initial"', { cwd: repoDir });
// Create a linked worktree on a new branch outside the main checkout.
const worktreeDir = path.join(repoDir, 'wt-feature');
execSync(`git worktree add -q -b feature "${worktreeDir}"`, { cwd: repoDir });

// Both calls go through the same git executable, so their path-format
// output is guaranteed consistent — equality between them is the
// stable cross-platform assertion. (Comparing against `realpathSync`
// breaks on Windows where 8.3 short names and long names diverge.)
const fromMain = getCanonicalRepoRoot(repoDir);
const fromWorktree = getCanonicalRepoRoot(worktreeDir);

expect(fromMain).not.toBeNull();
// From inside the worktree: canonical points BACK to the main repo's
// shared `.git`. This is the regression-guard for #1259 — the
// registry name derivation collapses across worktrees.
expect(fromWorktree).toBe(fromMain);
// Basename matches the canonical repo dir (NOT the worktree slug).
expect(path.basename(fromWorktree!)).toBe(path.basename(repoDir));
expect(path.basename(fromWorktree!)).not.toBe('wt-feature');
// Sanity: getGitRoot returns the worktree-local root (existing
// behavior unchanged). Compare basenames for the same path-format
// reason as above.
expect(path.basename(getGitRoot(worktreeDir)!)).toBe('wt-feature');
} finally {
// Best-effort cleanup; worktree teardown can leak open handles on
// Windows so use force.
try {
execSync('git worktree remove -f wt-feature', { cwd: repoDir });
} catch {
// ignore — fall through to recursive rm
}
fs.rmSync(repoDir, { recursive: true, force: true });
}
});
});
Loading
Loading