Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ export const FileRow = memo(function FileRow({
onOpenInEditor,
}: FileRowProps) {
const { dir, basename } = splitPath(file.path);
const oldBasename =
file.oldPath && (file.status === "renamed" || file.status === "copied")
? splitPath(file.oldPath).basename
: null;
const absolutePath = worktreePath
? toAbsoluteWorkspacePath(worktreePath, file.path)
: undefined;
Expand All @@ -63,6 +67,12 @@ export const FileRow = memo(function FileRow({
<FileIcon fileName={basename} className="size-3.5 shrink-0" />
<span className="flex min-w-0 flex-1 items-baseline overflow-hidden">
{dir && <span className="truncate text-muted-foreground">{dir}</span>}
{oldBasename && (
<span className="truncate text-muted-foreground">
{oldBasename}
<span className="px-1">→</span>
</span>
)}
<span className="min-w-[120px] truncate font-medium text-foreground">
{basename}
</span>
Expand Down
57 changes: 52 additions & 5 deletions packages/host-service/src/trpc/router/git/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import type {
} from "./types";
import {
buildBranch,
countUntrackedFileLines,
detectUnstagedRenames,
getChangedFilesForDiff,
mapGitStatus,
parseNumstat,
Expand Down Expand Up @@ -109,9 +111,14 @@ export const gitRouter = router({
`${baseRef}...HEAD`,
]);

// Staged — use status.files index character for correct status
// Staged — use status.files index character for correct status.
// `-M -C` lets the numstat collapse renamed/copied entries so a
// `git add` of `mv old new` yields a single 0/0 rename row
// instead of an A + D pair.
const stagedNumstat = parseNumstat(
await git.raw(["diff", "--numstat", "-z", "--cached"]).catch(() => ""),
await git
.raw(["diff", "--numstat", "-z", "-M", "-C", "--cached"])
.catch(() => ""),
);
const staged: ChangedFile[] = [];
for (const file of status.files) {
Expand All @@ -123,6 +130,8 @@ export const gitRouter = router({
};
staged.push({
path: file.path,
oldPath:
file.from && file.from !== file.path ? file.from : undefined,
status: mapGitStatus(idx),
additions: stats.additions,
deletions: stats.deletions,
Expand All @@ -135,15 +144,18 @@ export const gitRouter = router({
await git.raw(["diff", "--numstat", "-z"]).catch(() => ""),
);
const unstaged: ChangedFile[] = [];
const untrackedFiles: ChangedFile[] = [];
for (const file of status.files) {
const wd = file.working_dir;
if (file.index === "?" && wd === "?") {
unstaged.push({
const entry: ChangedFile = {
path: file.path,
status: "untracked",
additions: 0,
deletions: 0,
});
};
untrackedFiles.push(entry);
unstaged.push(entry);
} else if (wd && wd !== " ") {
const stats = unstagedNumstat.get(file.path) ?? {
additions: 0,
Expand All @@ -157,13 +169,48 @@ export const gitRouter = router({
});
}
}
await countUntrackedFileLines(worktreePath, untrackedFiles);

const hasDeletions = unstaged.some((f) => f.status === "deleted");
const renames = await detectUnstagedRenames(
git,
worktreePath,
untrackedFiles.map((f) => f.path),
hasDeletions,
);

let mergedUnstaged = unstaged;
if (renames.length > 0) {
const consumedDeleted = new Set<string>();
const consumedUntracked = new Set<string>();
for (const r of renames) {
if (r.status === "renamed") consumedDeleted.add(r.oldPath);
consumedUntracked.add(r.newPath);
}
mergedUnstaged = unstaged.filter((f) => {
if (f.status === "deleted" && consumedDeleted.has(f.path))
return false;
if (f.status === "untracked" && consumedUntracked.has(f.path))
return false;
return true;
});
for (const r of renames) {
mergedUnstaged.push({
path: r.newPath,
oldPath: r.oldPath,
status: r.status,
additions: r.additions,
deletions: r.deletions,
});
}
}

return {
currentBranch,
defaultBranch,
againstBase,
staged,
unstaged,
unstaged: mergedUnstaged,
ignoredPaths,
};
}),
Expand Down
194 changes: 193 additions & 1 deletion packages/host-service/src/trpc/router/git/utils/git-helpers.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,46 @@
import type { SimpleGit } from "simple-git";
import {
copyFile,
mkdtemp,
readFile,
realpath,
rm,
stat,
} from "node:fs/promises";
import { tmpdir } from "node:os";
import { isAbsolute, join, relative, resolve, sep } from "node:path";
import simpleGit, { type SimpleGit } from "simple-git";
import { resolveUpstream } from "../../../../runtime/git/refs";
import type { Branch, ChangedFile, FileStatus } from "../types";

// Skip line counting for files larger than this — anything over a MB
// of "source" is almost certainly a data file or accidental binary,
// and the LOC signal isn't useful for it.
const MAX_UNTRACKED_LINE_COUNT_SIZE = 1 * 1024 * 1024;

// Cap parallel file I/O so a workspace with thousands of untracked
// files (e.g. fresh checkout with un-gitignored build artifacts)
// doesn't exhaust the process file-descriptor limit.
const UNTRACKED_IO_CONCURRENCY = 64;

async function mapWithConcurrency<T>(
items: T[],
limit: number,
fn: (item: T) => Promise<void>,
): Promise<void> {
let next = 0;
const workers = Array.from(
{ length: Math.min(limit, items.length) },
async () => {
while (true) {
const i = next++;
if (i >= items.length) return;
await fn(items[i] as T);
}
},
);
await Promise.all(workers);
}

/** Map git's single-letter status codes to GitHub-aligned FileStatus */
export function mapGitStatus(code: string): FileStatus {
switch (code) {
Expand Down Expand Up @@ -185,6 +224,159 @@ export async function buildBranch(
};
}

function isPathWithinWorktree(
worktreePath: string,
candidate: string,
): boolean {
const relativePath = relative(worktreePath, candidate);
if (relativePath === "") return true;
return (
relativePath !== ".." &&
!relativePath.startsWith(`..${sep}`) &&
!isAbsolute(relativePath)
);
}

/**
* Untracked files don't appear in `git diff --numstat` (they're not in
* the index). The only batch-friendly way to get their line counts is
* to read them directly — `git diff --no-index` requires a subprocess
* per file, and `git add -N` would mutate the index inside a read.
*/
export async function countUntrackedFileLines(
worktreePath: string,
files: ChangedFile[],
): Promise<void> {
if (files.length === 0) return;

let worktreeReal: string;
try {
worktreeReal = await realpath(worktreePath);
} catch {
return;
}

await mapWithConcurrency(files, UNTRACKED_IO_CONCURRENCY, async (file) => {
try {
const absolutePath = resolve(worktreePath, file.path);
if (!isPathWithinWorktree(worktreePath, absolutePath)) return;

const fileReal = await realpath(absolutePath);
if (!isPathWithinWorktree(worktreeReal, fileReal)) return;

const stats = await stat(fileReal);
if (!stats.isFile() || stats.size > MAX_UNTRACKED_LINE_COUNT_SIZE) {
return;
}

// `readFile(file, "utf-8")` happily turns binary into U+FFFDs
// and returns a non-zero line count, so sniff first 8KB for
// NULs the way git's own binary heuristic does.
const buf = await readFile(fileReal);
const sniffEnd = Math.min(buf.length, 8192);
for (let i = 0; i < sniffEnd; i++) {
if (buf[i] === 0) return;
}

const content = buf.toString("utf-8");
file.additions =
content === ""
? 0
: content.endsWith("\n")
? content.split(/\r?\n/).length - 1
: content.split(/\r?\n/).length;
} catch {}
});
}

export interface DetectedRename {
oldPath: string;
newPath: string;
status: "renamed" | "copied";
additions: number;
deletions: number;
}

/**
* Run git's real rename/copy detection across the working tree by
* copying the index to a temp file, marking untracked files
* intent-to-add against that copy, and diffing. Real index is never
* mutated. Falls back to an empty result on any error — caller still
* has the unrelated deleted+untracked entries to display.
*/
export async function detectUnstagedRenames(
git: SimpleGit,
worktreePath: string,
untrackedPaths: string[],
hasDeletions: boolean,
): Promise<DetectedRename[]> {
if (untrackedPaths.length === 0) return [];
// Renames need a deletion; copy detection between two untracked
// files needs at least two of them.
if (!hasDeletions && untrackedPaths.length < 2) return [];

let indexPath: string;
try {
indexPath = (await git.raw(["rev-parse", "--git-path", "index"])).trim();
if (!indexPath) return [];
if (!isAbsolute(indexPath)) indexPath = resolve(worktreePath, indexPath);
} catch {
return [];
}

let tempDir: string;
try {
tempDir = await mkdtemp(join(tmpdir(), "superset-renames-"));
} catch {
return [];
}

try {
const tempIndex = join(tempDir, "index");
await copyFile(indexPath, tempIndex);

const tempGit = simpleGit(worktreePath).env({
...process.env,
GIT_INDEX_FILE: tempIndex,
});

await tempGit.raw(["add", "--intent-to-add", "--", ...untrackedPaths]);

const [nameStatusRaw, numstatRaw] = await Promise.all([
tempGit.raw(["diff", "--name-status", "-z", "-M", "-C"]),
tempGit.raw(["diff", "--numstat", "-z", "-M", "-C"]),
]);

const nameStatus = parseNameStatus(nameStatusRaw);
const numstat = parseNumstat(numstatRaw);

const result: DetectedRename[] = [];
for (const entry of nameStatus) {
if (!entry.oldPath) continue;
const code = entry.status[0];
if (code !== "R" && code !== "C") continue;
const stats = numstat.get(entry.path) ?? { additions: 0, deletions: 0 };
result.push({
oldPath: entry.oldPath,
newPath: entry.path,
status: code === "R" ? "renamed" : "copied",
additions: stats.additions,
deletions: stats.deletions,
});
}
return result;
} catch {
return [];
} finally {
await rm(tempDir, { recursive: true, force: true }).catch((error) => {
console.warn("[git-helpers] failed to remove rename-detection tempdir", {
tempDir,
error,
});
});
}
}

export async function getChangedFilesForDiff(
git: SimpleGit,
diffArgs: string[],
Expand Down
Loading