diff --git a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/components/WorkspaceSidebar/hooks/useChangesTab/components/ChangesFileList/components/FileRow/FileRow.tsx b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/components/WorkspaceSidebar/hooks/useChangesTab/components/ChangesFileList/components/FileRow/FileRow.tsx index f5f7ef5fe58..b77d344d761 100644 --- a/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/components/WorkspaceSidebar/hooks/useChangesTab/components/ChangesFileList/components/FileRow/FileRow.tsx +++ b/apps/desktop/src/renderer/routes/_authenticated/_dashboard/v2-workspace/$workspaceId/components/WorkspaceSidebar/hooks/useChangesTab/components/ChangesFileList/components/FileRow/FileRow.tsx @@ -43,6 +43,10 @@ export const FileRow = memo(function FileRow({ onOpenInEditor, }: FileRowProps) { const { dir, basename } = splitPath(file.path); + const oldBasename = + file.oldPath && (file.status === "renamed" || file.status === "copied") + ? splitPath(file.oldPath).basename + : null; const absolutePath = worktreePath ? toAbsoluteWorkspacePath(worktreePath, file.path) : undefined; @@ -63,6 +67,12 @@ export const FileRow = memo(function FileRow({ {dir && {dir}} + {oldBasename && ( + + {oldBasename} + + + )} {basename} diff --git a/packages/host-service/src/trpc/router/git/git.ts b/packages/host-service/src/trpc/router/git/git.ts index fbb20996515..4080cf84c0d 100644 --- a/packages/host-service/src/trpc/router/git/git.ts +++ b/packages/host-service/src/trpc/router/git/git.ts @@ -18,6 +18,8 @@ import type { } from "./types"; import { buildBranch, + countUntrackedFileLines, + detectUnstagedRenames, getChangedFilesForDiff, mapGitStatus, parseNumstat, @@ -109,9 +111,14 @@ export const gitRouter = router({ `${baseRef}...HEAD`, ]); - // Staged — use status.files index character for correct status + // Staged — use status.files index character for correct status. + // `-M -C` lets the numstat collapse renamed/copied entries so a + // `git add` of `mv old new` yields a single 0/0 rename row + // instead of an A + D pair. const stagedNumstat = parseNumstat( - await git.raw(["diff", "--numstat", "-z", "--cached"]).catch(() => ""), + await git + .raw(["diff", "--numstat", "-z", "-M", "-C", "--cached"]) + .catch(() => ""), ); const staged: ChangedFile[] = []; for (const file of status.files) { @@ -123,6 +130,8 @@ export const gitRouter = router({ }; staged.push({ path: file.path, + oldPath: + file.from && file.from !== file.path ? file.from : undefined, status: mapGitStatus(idx), additions: stats.additions, deletions: stats.deletions, @@ -135,15 +144,18 @@ export const gitRouter = router({ await git.raw(["diff", "--numstat", "-z"]).catch(() => ""), ); const unstaged: ChangedFile[] = []; + const untrackedFiles: ChangedFile[] = []; for (const file of status.files) { const wd = file.working_dir; if (file.index === "?" && wd === "?") { - unstaged.push({ + const entry: ChangedFile = { path: file.path, status: "untracked", additions: 0, deletions: 0, - }); + }; + untrackedFiles.push(entry); + unstaged.push(entry); } else if (wd && wd !== " ") { const stats = unstagedNumstat.get(file.path) ?? { additions: 0, @@ -157,13 +169,48 @@ export const gitRouter = router({ }); } } + await countUntrackedFileLines(worktreePath, untrackedFiles); + + const hasDeletions = unstaged.some((f) => f.status === "deleted"); + const renames = await detectUnstagedRenames( + git, + worktreePath, + untrackedFiles.map((f) => f.path), + hasDeletions, + ); + + let mergedUnstaged = unstaged; + if (renames.length > 0) { + const consumedDeleted = new Set(); + const consumedUntracked = new Set(); + for (const r of renames) { + if (r.status === "renamed") consumedDeleted.add(r.oldPath); + consumedUntracked.add(r.newPath); + } + mergedUnstaged = unstaged.filter((f) => { + if (f.status === "deleted" && consumedDeleted.has(f.path)) + return false; + if (f.status === "untracked" && consumedUntracked.has(f.path)) + return false; + return true; + }); + for (const r of renames) { + mergedUnstaged.push({ + path: r.newPath, + oldPath: r.oldPath, + status: r.status, + additions: r.additions, + deletions: r.deletions, + }); + } + } return { currentBranch, defaultBranch, againstBase, staged, - unstaged, + unstaged: mergedUnstaged, ignoredPaths, }; }), diff --git a/packages/host-service/src/trpc/router/git/utils/git-helpers.ts b/packages/host-service/src/trpc/router/git/utils/git-helpers.ts index 7fc4cbe80a1..f0b0dc833a9 100644 --- a/packages/host-service/src/trpc/router/git/utils/git-helpers.ts +++ b/packages/host-service/src/trpc/router/git/utils/git-helpers.ts @@ -1,7 +1,46 @@ -import type { SimpleGit } from "simple-git"; +import { + copyFile, + mkdtemp, + readFile, + realpath, + rm, + stat, +} from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { isAbsolute, join, relative, resolve, sep } from "node:path"; +import simpleGit, { type SimpleGit } from "simple-git"; import { resolveUpstream } from "../../../../runtime/git/refs"; import type { Branch, ChangedFile, FileStatus } from "../types"; +// Skip line counting for files larger than this — anything over a MB +// of "source" is almost certainly a data file or accidental binary, +// and the LOC signal isn't useful for it. +const MAX_UNTRACKED_LINE_COUNT_SIZE = 1 * 1024 * 1024; + +// Cap parallel file I/O so a workspace with thousands of untracked +// files (e.g. fresh checkout with un-gitignored build artifacts) +// doesn't exhaust the process file-descriptor limit. +const UNTRACKED_IO_CONCURRENCY = 64; + +async function mapWithConcurrency( + items: T[], + limit: number, + fn: (item: T) => Promise, +): Promise { + let next = 0; + const workers = Array.from( + { length: Math.min(limit, items.length) }, + async () => { + while (true) { + const i = next++; + if (i >= items.length) return; + await fn(items[i] as T); + } + }, + ); + await Promise.all(workers); +} + /** Map git's single-letter status codes to GitHub-aligned FileStatus */ export function mapGitStatus(code: string): FileStatus { switch (code) { @@ -185,6 +224,159 @@ export async function buildBranch( }; } +function isPathWithinWorktree( + worktreePath: string, + candidate: string, +): boolean { + const relativePath = relative(worktreePath, candidate); + if (relativePath === "") return true; + return ( + relativePath !== ".." && + !relativePath.startsWith(`..${sep}`) && + !isAbsolute(relativePath) + ); +} + +/** + * Untracked files don't appear in `git diff --numstat` (they're not in + * the index). The only batch-friendly way to get their line counts is + * to read them directly — `git diff --no-index` requires a subprocess + * per file, and `git add -N` would mutate the index inside a read. + */ +export async function countUntrackedFileLines( + worktreePath: string, + files: ChangedFile[], +): Promise { + if (files.length === 0) return; + + let worktreeReal: string; + try { + worktreeReal = await realpath(worktreePath); + } catch { + return; + } + + await mapWithConcurrency(files, UNTRACKED_IO_CONCURRENCY, async (file) => { + try { + const absolutePath = resolve(worktreePath, file.path); + if (!isPathWithinWorktree(worktreePath, absolutePath)) return; + + const fileReal = await realpath(absolutePath); + if (!isPathWithinWorktree(worktreeReal, fileReal)) return; + + const stats = await stat(fileReal); + if (!stats.isFile() || stats.size > MAX_UNTRACKED_LINE_COUNT_SIZE) { + return; + } + + // `readFile(file, "utf-8")` happily turns binary into U+FFFDs + // and returns a non-zero line count, so sniff first 8KB for + // NULs the way git's own binary heuristic does. + const buf = await readFile(fileReal); + const sniffEnd = Math.min(buf.length, 8192); + for (let i = 0; i < sniffEnd; i++) { + if (buf[i] === 0) return; + } + + const content = buf.toString("utf-8"); + file.additions = + content === "" + ? 0 + : content.endsWith("\n") + ? content.split(/\r?\n/).length - 1 + : content.split(/\r?\n/).length; + } catch {} + }); +} + +export interface DetectedRename { + oldPath: string; + newPath: string; + status: "renamed" | "copied"; + additions: number; + deletions: number; +} + +/** + * Run git's real rename/copy detection across the working tree by + * copying the index to a temp file, marking untracked files + * intent-to-add against that copy, and diffing. Real index is never + * mutated. Falls back to an empty result on any error — caller still + * has the unrelated deleted+untracked entries to display. + */ +export async function detectUnstagedRenames( + git: SimpleGit, + worktreePath: string, + untrackedPaths: string[], + hasDeletions: boolean, +): Promise { + if (untrackedPaths.length === 0) return []; + // Renames need a deletion; copy detection between two untracked + // files needs at least two of them. + if (!hasDeletions && untrackedPaths.length < 2) return []; + + let indexPath: string; + try { + indexPath = (await git.raw(["rev-parse", "--git-path", "index"])).trim(); + if (!indexPath) return []; + if (!isAbsolute(indexPath)) indexPath = resolve(worktreePath, indexPath); + } catch { + return []; + } + + let tempDir: string; + try { + tempDir = await mkdtemp(join(tmpdir(), "superset-renames-")); + } catch { + return []; + } + + try { + const tempIndex = join(tempDir, "index"); + await copyFile(indexPath, tempIndex); + + const tempGit = simpleGit(worktreePath).env({ + ...process.env, + GIT_INDEX_FILE: tempIndex, + }); + + await tempGit.raw(["add", "--intent-to-add", "--", ...untrackedPaths]); + + const [nameStatusRaw, numstatRaw] = await Promise.all([ + tempGit.raw(["diff", "--name-status", "-z", "-M", "-C"]), + tempGit.raw(["diff", "--numstat", "-z", "-M", "-C"]), + ]); + + const nameStatus = parseNameStatus(nameStatusRaw); + const numstat = parseNumstat(numstatRaw); + + const result: DetectedRename[] = []; + for (const entry of nameStatus) { + if (!entry.oldPath) continue; + const code = entry.status[0]; + if (code !== "R" && code !== "C") continue; + const stats = numstat.get(entry.path) ?? { additions: 0, deletions: 0 }; + result.push({ + oldPath: entry.oldPath, + newPath: entry.path, + status: code === "R" ? "renamed" : "copied", + additions: stats.additions, + deletions: stats.deletions, + }); + } + return result; + } catch { + return []; + } finally { + await rm(tempDir, { recursive: true, force: true }).catch((error) => { + console.warn("[git-helpers] failed to remove rename-detection tempdir", { + tempDir, + error, + }); + }); + } +} + export async function getChangedFilesForDiff( git: SimpleGit, diffArgs: string[],