Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 18 additions & 7 deletions gitnexus/src/server/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -829,52 +829,63 @@
});

// Delete a repo — removes index, clone dir (if any), and unregisters it
app.delete('/api/repo', async (req, res) => {
try {
const repoName = requestedRepo(req);
if (!repoName) {
res.status(400).json({ error: 'Missing repo name' });
return;
}
const entry = await resolveRepo(repoName);
if (!entry) {
res.status(404).json({ error: 'Repository not found' });
return;
}

// Acquire repo lock — prevents deleting while analyze/embed is in flight
const lockKey = getStoragePath(entry.path);
const lockErr = acquireRepoLock(lockKey);
if (lockErr) {
res.status(409).json({ error: lockErr });
return;
}

try {
// Close any open LadybugDB handle before deleting files
try {
await closeLbug();
} catch {}

// 1. Delete the .gitnexus index/storage directory
const storagePath = getStoragePath(entry.path);
await fs.rm(storagePath, { recursive: true, force: true }).catch(() => {});

// 2. Delete the cloned repo dir if it lives under ~/.gitnexus/repos/
const cloneDir = getCloneDir(entry.name);
// 2. Delete the cloned repo dir if it lives under ~/.gitnexus/repos/.
// getCloneDir now throws on names that are not filesystem-safe (e.g.
// local repos registered with names like "my project" or "org/repo").
// Such repos legitimately have no clone dir, so treat the rejection as
// "nothing to clean up" rather than letting it fail the delete handler.
let cloneDir: string | null = null;
try {
const stat = await fs.stat(cloneDir);
if (stat.isDirectory()) {
await fs.rm(cloneDir, { recursive: true, force: true });
}
cloneDir = getCloneDir(entry.name);
} catch {
/* clone dir may not exist (local repos) */
/* repo name not eligible for a clone dir (local repo) */
}
if (cloneDir) {
try {
const stat = await fs.stat(cloneDir);
if (stat.isDirectory()) {
await fs.rm(cloneDir, { recursive: true, force: true });
}
} catch {
/* clone dir may not exist */
}
}

// 3. Unregister from the global registry
const { unregisterRepo } = await import('../storage/repo-manager.js');
await unregisterRepo(entry.path);

Check failure

Code scanning / CodeQL

Missing rate limiting High

This route handler performs
a file system access
, but is not rate-limited.
This route handler performs
a file system access
, but is not rate-limited.
This route handler performs
a file system access
, but is not rate-limited.
// 4. Reinitialize backend to reflect the removal
await backend.init().catch(() => {});

Expand Down
223 changes: 211 additions & 12 deletions gitnexus/src/server/git-clone.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,47 @@ import os from 'os';
import fs from 'fs/promises';
import { isIP } from 'net';

/** Extract the repository name from a git URL (HTTPS or SSH). */
/** Root directory for all cloned repositories. Targets must resolve inside this. */
const CLONE_ROOT = path.resolve(path.join(os.homedir(), '.gitnexus', 'repos'));

// A valid git repository name is filesystem-safe: alphanumerics plus `. _ -`.
// Rejecting anything else (including `..`, `/`, `\`, shell metacharacters)
// guarantees getCloneDir(repoName) cannot escape CLONE_ROOT regardless of
// how the caller derived repoName.
const REPO_NAME_PATTERN = /^[a-zA-Z0-9._-]+$/;

/**
* Extract the repository name from a git URL (HTTPS or SSH).
*
* Throws if the URL does not yield a filesystem-safe last segment. A name
* like `..` or `foo/bar` would otherwise let `getCloneDir(name)` escape the
* clone root via path traversal.
*/
export function extractRepoName(url: string): string {
const cleaned = url.replace(/\/+$/, '');
const lastSegment = cleaned.split(/[/:]/).pop() || 'unknown';
return lastSegment.replace(/\.git$/, '');
// Strip trailing slashes without a regex to avoid polynomial-ReDoS on
// pathological inputs like `https://x.com/y` + '/'.repeat(1e6). CodeQL's
// js/polynomial-redos flagged `/\/+$/` here.
let end = url.length;
while (end > 0 && url.charCodeAt(end - 1) === 47 /* '/' */) end--;
const cleaned = url.slice(0, end);

const lastSegment = cleaned.split(/[/:]/).pop() || '';
const stripped = lastSegment.endsWith('.git') ? lastSegment.slice(0, -4) : lastSegment;

if (!stripped || stripped === '.' || stripped === '..' || !REPO_NAME_PATTERN.test(stripped)) {
throw new Error('Could not extract a valid repository name from URL');
}
return stripped;
}

/** Get the clone target directory for a repo name. */
export function getCloneDir(repoName: string): string {
return path.join(os.homedir(), '.gitnexus', 'repos', repoName);
// Re-validate at the boundary even though extractRepoName already checked —
// callers may pass a repoName from another source (test fixtures, scripts).
if (!repoName || repoName === '.' || repoName === '..' || !REPO_NAME_PATTERN.test(repoName)) {
throw new Error('Invalid repository name');
}
return path.join(CLONE_ROOT, repoName);
}

// Cloud metadata hostnames that must never be reachable via user-supplied URLs
Expand Down Expand Up @@ -196,32 +227,200 @@ export interface CloneProgress {
message: string;
}

/**
* Build the `git clone` argument list for a given URL and target directory.
*
* The `--` separator is non-negotiable: it stops git from parsing a URL that
* starts with `--` (e.g. `--upload-pack=evil`) as an option flag, which would
* otherwise execute an attacker-chosen subprocess (CodeQL
* js/second-order-command-line-injection, alerts #166/#167).
*
* Exported so the separator placement is testable without mocking spawn.
*/
export function buildCloneArgs(url: string, targetDir: string): string[] {
return ['clone', '--depth', '1', '--', url, targetDir];
}

/**
* Normalize a git URL into a comparable form.
*
* Two URLs are considered the same repository when their normalized forms
* are identical: lowercased hostname, no trailing `.git`, no trailing
* slashes on the path, default port stripped. Path comparison stays
* case-sensitive because that's how Git hosts treat the path component on
* the wire (case-folding GitHub's web UI is a separate convenience).
*
* Returns the original input if URL parsing fails — the caller can still
* compare with the literal string for non-URL forms (e.g. SSH `git@host:`).
*/
export function normalizeGitUrlForCompare(url: string): string {
// Strip trailing slashes and a trailing `.git` for both URL and SSH forms.
let trimmed = url;
while (trimmed.length > 0 && trimmed[trimmed.length - 1] === '/') {
trimmed = trimmed.slice(0, -1);
}
if (trimmed.endsWith('.git')) trimmed = trimmed.slice(0, -4);

try {
const parsed = new URL(trimmed);
parsed.hostname = parsed.hostname.toLowerCase();
// strip default ports
if (
(parsed.protocol === 'https:' && parsed.port === '443') ||
(parsed.protocol === 'http:' && parsed.port === '80')
) {
parsed.port = '';
}
// Strip credentials — never material to repo identity, and including
// them would let two equivalent URLs (with/without basic auth) compare
// unequal.
parsed.username = '';
parsed.password = '';
// Recompose without trailing slash on the path.
let pathname = parsed.pathname;
while (pathname.length > 1 && pathname[pathname.length - 1] === '/') {
pathname = pathname.slice(0, -1);
}
parsed.pathname = pathname;
return `${parsed.protocol}//${parsed.hostname}${parsed.port ? ':' + parsed.port : ''}${parsed.pathname}`;
} catch {
// Non-URL forms (e.g. `git@github.com:owner/repo`) — return the trimmed
// form lowercased on the hostname-ish prefix. SSH-form normalization
// is best-effort; exact-string compare is sufficient for the threat
// model (mismatched origins still differ at the literal level).
return trimmed.toLowerCase();
}
}

/**
* Read `remote.origin.url` from an existing clone using `git config --get`.
*
* Returns `null` if the config key is absent, the spawn fails, or the
* directory isn't a git repository. The caller decides what a missing
* remote means for its threat model — for cloneOrPull, a missing remote
* on an existing clone is treated as a refuse-to-pull condition.
*/
export function getRemoteOriginUrl(cwd: string): Promise<string | null> {
return new Promise((resolve) => {
const proc = spawn('git', ['config', '--get', 'remote.origin.url'], {
cwd,
stdio: ['ignore', 'pipe', 'pipe'],
env: { ...process.env, GIT_TERMINAL_PROMPT: '0' },
});
let stdout = '';
proc.stdout.on('data', (chunk: Buffer) => {
stdout += chunk;
});
proc.on('close', (code) => {
if (code === 0 && stdout.trim()) {
resolve(stdout.trim());
} else {
resolve(null);
}
});
proc.on('error', () => resolve(null));
});
}

/**
* Verify that an existing clone's `remote.origin.url` matches the requested
* URL (after normalization). Throws on mismatch or missing remote.
*
* Closes the wrong-repo silent-analysis vector that Codex's adversarial
* review on PR #1325 surfaced: clone dirs are keyed by URL basename, so a
* request for `https://gitlab.example/attacker/repo.git` would otherwise
* collide with an existing `~/.gitnexus/repos/repo` cloned from a different
* origin and `git pull --ff-only` would silently succeed against the wrong
* remote.
*
* Exported so the comparison logic is testable in isolation against any
* tmpdir-based fixture, without needing to populate CLONE_ROOT.
*/
export async function assertRemoteMatchesRequestedUrl(
targetDir: string,
requestedUrl: string,
): Promise<void> {
const remoteUrl = await getRemoteOriginUrl(targetDir);
if (remoteUrl === null) {
throw new Error(`Existing clone at ${targetDir} has no remote.origin — refusing to pull`);
}
if (normalizeGitUrlForCompare(remoteUrl) !== normalizeGitUrlForCompare(requestedUrl)) {
throw new Error(
`Existing clone at ${targetDir} has remote ${remoteUrl}, not the requested URL ${requestedUrl}`,
);
}
}

/**
* Clone or pull a git repository.
* If targetDir doesn't exist: git clone --depth 1
* If targetDir exists with .git: git pull --ff-only
* If targetDir exists with .git: git pull --ff-only (after verifying the
* existing clone's remote.origin matches the requested URL).
*
* Security:
* - targetDir must resolve inside CLONE_ROOT (~/.gitnexus/repos/). The
* path.relative containment barrier below is the inline canonical idiom
* CodeQL's js/path-injection sanitizer recognizes.
* - validateGitUrl runs unconditionally on the requested URL — both the
* clone path and the pull path. An earlier shape only validated on the
* clone branch; an existing clone with the same basename let an
* attacker's URL skip the SSRF / scheme / private-IP checks (Codex
* adversarial review on PR #1325).
* - When the target already has `.git`, the existing clone's
* remote.origin.url is fetched and compared (normalized) to the
* requested URL. Refuses to pull if they differ — this closes the
* wrong-repo silent-analysis vector where two URLs sharing a basename
* would collide on the same on-disk clone dir.
* - The git URL is passed after a `--` separator so a value beginning with
* `--` (e.g. `--upload-pack=evil`) cannot be interpreted as a git option
* (CodeQL js/second-order-command-line-injection).
*/
export async function cloneOrPull(
url: string,
targetDir: string,
onProgress?: (progress: CloneProgress) => void,
): Promise<string> {
const exists = await fs.access(path.join(targetDir, '.git')).then(
// Containment barrier — inline with the canonical path.relative idiom so
// CodeQL recognizes the sanitizer at every following filesystem and
// subprocess sink. The same `safeTarget` is used for every downstream
// path operation — no reassignment that the analyzer could lose track of.
//
// Limitation: this is a lexical containment check, not a realpath check.
// If an attacker can place a symlink under CLONE_ROOT pointing outside it,
// the lexical check passes but the clone lands at the symlink target. That
// requires pre-existing local write access to CLONE_ROOT, so the threat
// model considers it out of scope; CodeQL js/path-injection accepts the
// lexical form. Tracked as a follow-up if defense-in-depth is needed.
const safeTarget = path.resolve(targetDir);
const rel = path.relative(CLONE_ROOT, safeTarget);
if (rel === '' || rel.startsWith('..') || path.isAbsolute(rel)) {
throw new Error(`Clone target must be a subdirectory of ${CLONE_ROOT}`);
}

// Always validate the requested URL — the prior shape only ran this in
// the clone branch, leaving the pull branch as an SSRF / blocked-host
// bypass when an existing clone shared the basename of an attacker URL.
validateGitUrl(url);

const exists = await fs.access(path.join(safeTarget, '.git')).then(
() => true,
() => false,
);

if (exists) {
// Confirm the existing clone is actually the same repository the caller
// requested. Without this check, a pull would silently succeed against
// whatever remote the dir was originally cloned from.
await assertRemoteMatchesRequestedUrl(safeTarget, url);
onProgress?.({ phase: 'pulling', message: 'Pulling latest changes...' });
await runGit(['pull', '--ff-only'], targetDir);
await runGit(['pull', '--ff-only'], safeTarget);
} else {
validateGitUrl(url);
await fs.mkdir(path.dirname(targetDir), { recursive: true });
await fs.mkdir(path.dirname(safeTarget), { recursive: true });
onProgress?.({ phase: 'cloning', message: `Cloning ${url}...` });
await runGit(['clone', '--depth', '1', url, targetDir]);
await runGit(buildCloneArgs(url, safeTarget));
}

return targetDir;
return safeTarget;
}

function runGit(args: string[], cwd?: string): Promise<void> {
Expand Down
Loading
Loading