Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions bench/fs-cp/cp.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Recursive fs.cp / fs.cpSync benchmark.
//
// bun cp.mjs
// node cp.mjs
//
// The "regular files only" trees are eligible for the whole-tree clonefile()
// fast path on macOS; the trees containing a symlink always go through the
// node-ported walker.
import { cpSync, mkdirSync, promises, rmSync, symlinkSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { bench, run } from "../runner.mjs";

const root = join(tmpdir(), `bench-fs-cp-${process.pid}`);
rmSync(root, { recursive: true, force: true });

const DIRS = 16;
const FILES_PER_DIR = 16;
const data = Buffer.alloc(4096, "a");

function makeTree(src, { withSymlink = false } = {}) {
for (let d = 0; d < DIRS; d++) {
const dir = join(src, `dir-${d}`);
mkdirSync(dir, { recursive: true });
for (let f = 0; f < FILES_PER_DIR; f++) {
writeFileSync(join(dir, `file-${f}.txt`), data);
}
}
if (withSymlink) {
symlinkSync(join("dir-0", "file-0.txt"), join(src, "link"));
}
}

const plainSrc = join(root, "plain-src");
makeTree(plainSrc);
const symlinkSrc = join(root, "symlink-src");
makeTree(symlinkSrc, { withSymlink: true });

const destRoot = join(root, "dest");
mkdirSync(destRoot, { recursive: true });
let destCount = 0;

// Each copy goes to a brand-new destination (an existing destination switches
// fs.cp into its merge semantics, which is a different operation). The
// computed parameter clears out previously created destinations without
// counting towards the measured time.
function recursiveCopyBench(label, copyOne) {
bench(label, function* () {
yield {
[0]() {
rmSync(destRoot, { recursive: true, force: true });
mkdirSync(destRoot, { recursive: true });
return destRoot;
},
bench(base) {
return copyOne(join(base, `d${destCount++}`));
},
};
});
}

const totalFiles = DIRS * FILES_PER_DIR;
recursiveCopyBench(`cpSync recursive (${totalFiles} files, regular files only)`, dest =>
cpSync(plainSrc, dest, { recursive: true }),
);
recursiveCopyBench(`cpSync recursive (${totalFiles} files, tree contains a symlink)`, dest =>
cpSync(symlinkSrc, dest, { recursive: true }),
);
recursiveCopyBench(`fs.promises.cp recursive (${totalFiles} files, regular files only)`, dest =>
promises.cp(plainSrc, dest, { recursive: true }),
);
recursiveCopyBench(`fs.promises.cp recursive (${totalFiles} files, tree contains a symlink)`, dest =>
promises.cp(symlinkSrc, dest, { recursive: true }),
);

await run();

rmSync(root, { recursive: true, force: true });
Comment on lines +76 to +78

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial | ⚡ Quick win

Clean up the benchmark root on failed runs.

If run() rejects, the temporary source/destination tree is left behind. Wrap the run in try/finally so failed benchmark iterations still clean up.

♻️ Proposed cleanup guard
-await run();
-
-rmSync(root, { recursive: true, force: true });
+try {
+  await run();
+} finally {
+  rmSync(root, { recursive: true, force: true });
+}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@bench/fs-cp/cp.mjs` around lines 76 - 78, The temporary benchmark directory
created in the root variable is not cleaned up when the run() function fails,
leaving behind artifacts. Wrap the await run() call in a try/finally block,
moving the rmSync(root, { recursive: true, force: true }) statement into the
finally block to ensure cleanup occurs regardless of whether run() succeeds or
rejects.

47 changes: 43 additions & 4 deletions src/js/internal/fs/cp-sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,35 @@
return checkParentPathsSync(src, srcStat, destParent);
}

// The native recursive copy (a single clonefile() on macOS) copies symlinks
// verbatim and clones special files, while node rewrites relative symlink
// targets against the source tree and raises ERR_FS_CP_SOCKET /
// ERR_FS_CP_FIFO_PIPE. It is therefore only node-equivalent for trees made of
// regular files and directories; anything else — including entries whose type
// the filesystem does not report — bails to the ported walker. Scan errors
// also bail so the walker surfaces them the way node would.
function treeContainsOnlyFilesAndDirsSync(root) {
const stack = [root];
while (stack.length) {
const dir = stack.pop();
let entries;
try {
entries = readdirSync(dir, { withFileTypes: true });
} catch {
return false;
}
for (let i = 0; i < entries.length; i++) {
const entry = entries[i];
if (entry.isDirectory()) {
stack.push(join(dir, entry.name));
} else if (!entry.isFile()) {
return false;
}
}
}
return true;
}

// node-correct validation before handing off to the native fast path
// (which performs the copy but does not implement node's cp error codes).
function tryNativeFastPathSync(src, dest, opts) {
Expand All @@ -260,10 +289,20 @@
code: "EISDIR",
});
}
// The native copy is only node-equivalent for regular-file -> regular-file
// (or missing dest). Symlinks (node resolves relative link targets),
// directories (may contain symlinks), and special files (node-specific
// error codes) must go through the ported implementation.
if (srcStat.isDirectory()) {
// On macOS the native path clones the whole tree with a single
// clonefile(). Only take it when the result is indistinguishable from
// node's walker: dest must not exist (no merge semantics) and the tree
// must contain only regular files and directories.
return {
ok: process.platform === "darwin" && !destStat && treeContainsOnlyFilesAndDirsSync(src),
checked,
};
}

Check failure on line 301 in src/js/internal/fs/cp-sync.ts

View check run for this annotation

Claude / Claude Code Review

Native fallback loses directory modes when clonefile fails

When `clonefile()` fails at runtime (EXDEV cross-volume, ENOTSUP non-APFS, or ENOENT because dest's parent doesn't exist — the `ok:true` path skips `checkParentDir`), the native fallback creates every directory with `mkdir_recursive_os_path(dest, args::Mkdir::DEFAULT_MODE, …)` and never chmods it, so source directory modes are lost. Before this PR `srcStat.isDirectory()` always fell through to the JS walker (whose `mkDirAndCopy → setDestMode` preserves modes), so this is a regression for cross-v

Check warning on line 301 in src/js/internal/fs/cp-sync.ts

View check run for this annotation

Claude / Claude Code Review

Whole-tree clonefile strips setuid/setgid bits that the walker preserves

The whole-tree `clonefile()` fast path silently strips setuid/setgid bits from regular files in the cloned tree (per `clonefile(2)`: "setuid and setgid bits are turned off in the mode bits for regular files"), whereas node's walker — and the immediately-preceding Bun release — restores them via `chmod(dest, srcStat.mode)` after each file copy. So a `04755` file inside a files-and-dirs-only tree now comes out `0755` on macOS, contradicting the "indistinguishable from node's walker" invariant this
Comment on lines +292 to +301

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 When clonefile() fails at runtime (EXDEV cross-volume, ENOTSUP non-APFS, or ENOENT because dest's parent doesn't exist — the ok:true path skips checkParentDir), the native fallback creates every directory with mkdir_recursive_os_path(dest, args::Mkdir::DEFAULT_MODE, …) and never chmods it, so source directory modes are lost. Before this PR srcStat.isDirectory() always fell through to the JS walker (whose mkDirAndCopy → setDestMode preserves modes), so this is a regression for cross-volume / non-APFS / missing-parent recursive copies on macOS — exactly the cases the PR claims are "indistinguishable from node's walker". The same gap exists in the async path (src/js/internal/fs/cp.ts:166-175_cp_async_directory line 2067), and the new mode-preservation test won't catch it because tmpdir is same-volume APFS where clonefile() succeeds.

Extended reasoning...

What the bug is

tryNativeFastPathSync / tryNativeFastPath now return ok: true for a directory on macOS when dest is missing and the tree contains only regular files and directories. The dispatcher (src/js/node/fs.ts:947-949) then calls the native fs.cpSync directly, which lands in cp_sync_inner (src/runtime/node/node_fs.rs:8247). That function attempts clonefile() (line 8338-8341); on success, modes are preserved because clonefile is a metadata-faithful clone. But on any errno outside {ENAMETOOLONG, EROFS, EINVAL, EACCES, EPERM} — notably EXDEV (cross-volume), ENOTSUP (non-APFS target), ENOENT (dest parent missing), EEXIST — the _ => {} arm at line 8354 falls through to:

// node_fs.rs:8367
match self.mkdir_recursive_os_path(dest, args::Mkdir::DEFAULT_MODE, false) {

where DEFAULT_MODE is 0o777 (line 3467). The source directory's mode is never read and dest is never chmod'd. The loop at 8379-8448 then recurses into each subdirectory via cp_sync_inner (line 8413), which repeats the same mkdir-with-default-mode at every level. The async path is identical: _cp_async_directory at line 2067 calls mkdir_recursive_os_path(normdest, args::Mkdir::DEFAULT_MODE, false) after the clonefile fall-through at line 2025.

Why the gate doesn't prevent it

The new gate checks three things: process.platform === 'darwin', !destStat, and treeContainsOnlyFilesAndDirsSync(src). None of these tell you whether clonefile() will actually succeed. clonefile requires src and dest to be on the same APFS (or HFS+) volume; copying to an external USB drive, an SMB/NFS mount, or a FAT/exFAT volume fails with EXDEV or ENOTSUP regardless of the tree contents. Additionally, the ok: true branch bypasses checkParentDir (which the JS walker would run via cpSyncFn → checkParentDir → mkdirSync(destParent, { recursive: true })), so if dirname(dest) doesn't yet exist, clonefile() itself returns ENOENT and triggers the same fallback.

Step-by-step proof

On macOS, with /Volumes/USB an exFAT-formatted external drive:

  1. mkdir -m 0700 /tmp/src && mkdir -m 0750 /tmp/src/sub && touch /tmp/src/sub/f
  2. fs.cpSync('/tmp/src', '/Volumes/USB/dst', { recursive: true })
  3. tryNativeFastPathSync: darwin ✓, destStat is null ✓, tree is files+dirs only ✓ → ok: true
  4. Native cp_sync_inner runs clonefile('/tmp/src', '/Volumes/USB/dst', 0)EXDEV (cross-device)
  5. EXDEV matches _ => {} → fall through to mkdir_recursive_os_path('/Volumes/USB/dst', 0o777, false)dst created with 0o777 & ~umask (typically 0o755), not 0o700
  6. Recursion into sub: clonefile again fails EXDEV → mkdir_recursive_os_path('.../dst/sub', 0o777, false)0o755, not 0o750
  7. Result: statSync('/Volumes/USB/dst').mode & 0o777 === 0o755, statSync('/Volumes/USB/dst/sub').mode & 0o777 === 0o755

Node.js (and Bun before this PR) would have run mkDirAndCopy(srcMode, …) → setDestMode(dest, srcMode) and produced 0o700 / 0o750.

The ENOENT trigger needs no second volume: fs.cpSync('/tmp/src', '/tmp/does/not/exist/dst', { recursive: true }) on a single APFS volume hits the same path, because the dispatcher never creates /tmp/does/not/exist before calling native, clonefile() returns ENOENT, and the recursive mkdir creates both the parent chain and dst with 0o777.

Why this is a regression

Before this PR, the directory branch of tryNativeFastPathSync did not exist — srcStat.isDirectory() fell through to srcStat.isFile() && …ok: false, and the JS walker handled every directory copy with mkDirAndCopy → setDestMode(dest, srcMode). So on the immediately-preceding commit, all three scenarios above preserved directory modes. The PR description states the fast path is taken "when the result is indistinguishable from node's walker"; in these cases it observably isn't.

Why the new test doesn't catch it

The added "file and directory modes are preserved into a fresh destination" test runs entirely inside tempDirWithFiles (the process tmpdir), which on macOS CI is a single APFS volume. clonefile() succeeds there, so the fallback never runs and the test passes.

Suggested fix

Either (a) in the native fallback, stat the source directory and chmod(dest, src_mode & 0o7777) after mkdir_recursive_os_path at each recursion level (both cp_sync_inner ~line 8367 and _cp_async_directory ~line 2067), or (b) have the JS fast-path create dirname(dest) first (closes the ENOENT hole) and have the native entry point return a sentinel on top-level clonefile EXDEV/ENOTSUP so the JS layer can fall back to cpSyncFn/cpFn with the already-computed checked stats. Option (a) is simpler and also fixes Bun's pre-existing native cp for non-fs.cp callers.

Comment on lines +292 to +301

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 The whole-tree clonefile() fast path silently strips setuid/setgid bits from regular files in the cloned tree (per clonefile(2): "setuid and setgid bits are turned off in the mode bits for regular files"), whereas node's walker — and the immediately-preceding Bun release — restores them via chmod(dest, srcStat.mode) after each file copy. So a 04755 file inside a files-and-dirs-only tree now comes out 0755 on macOS, contradicting the "indistinguishable from node's walker" invariant this gate is meant to enforce. Same applies to the async gate at src/js/internal/fs/cp.ts:166-175; either bail out of the fast path when the scan sees (st_mode & (S_ISUID|S_ISGID)) on a file, or document this as an accepted divergence.

Extended reasoning...

What the bug is

Apple's clonefile(2) man page states that the cloned file's mode bits are taken from the source except that the setuid and setgid bits are turned off for regular files — this is unconditional kernel behaviour applied as a security measure regardless of caller privilege. Node's ported walker, by contrast, follows every copyFile() with setDestMode(dest, srcStat.mode)chmodSync(dest, srcStat.mode), where srcStat.mode is the full 12-bit st_mode from lstat and so includes S_ISUID/S_ISGID. The walker therefore preserves setuid/setgid bits; whole-tree clonefile() does not.

The code path

This PR's tryNativeFastPathSync (cp-sync.ts:292-301) and tryNativeFastPath (cp.ts:166-175) now return ok: true for a directory on macOS when destStat is null and treeContainsOnlyFilesAndDirsSync(src) passes. That hands the copy to the native path, which on macOS is the whole-tree clonefile() at src/runtime/node/node_fs.zig:6259 (sync cpSyncInner) / :827 (async _cpAsyncDirectory). On a zero return both sites immediately return .success / true (zig:6277, zig:844) — there is no subsequent per-file chmod walk to restore the stripped bits.

That the codebase already knows clonefile doesn't preserve full mode is corroborated by the per-file native path at node_fs.zig:6505-6507, which explicitly follows a successful single-file clonefile with Syscall.chmod(dest, stat_.mode).

Why the existing gate doesn't prevent it

treeContainsOnlyFilesAndDirsSync only inspects Dirent types from readdir(..., { withFileTypes: true }). A setuid regular file reports isFile() === true, so the scan happily approves it. The new "file and directory modes are preserved" test in cp.test.ts checks & 0o777 against 0o600/0o700 and would not detect a stripped 04xxx bit.

Step-by-step proof

  1. On macOS/APFS, create src/helper with mode 04755 inside a tree containing only regular files and directories (e.g. a vendored toolchain or an .app bundle's privileged helper).
  2. Call fs.cpSync('src', 'dest', { recursive: true }) with default options and no existing dest.
  3. tryNativeFastPathSync: srcStat.isDirectory() → true, process.platform === 'darwin' → true, !destStat → true, treeContainsOnlyFilesAndDirsSync('src') → true (helper is a regular file). Returns ok: true.
  4. Native cpSyncInner runs clonefile('src', 'dest', 0)0 on APFS → returns .success immediately.
  5. fs.statSync('dest/helper').mode & 0o7777 is 0o755, not 0o4755.
  6. Pre-PR (post-fs: port Node.js v26.3.0 fs tests and fix the gaps they surface — cp error semantics, watcher event delivery, watch ignore+AbortSignal, FileHandle pull/writer, glob port, opendir/Dir, mkdtempDisposable, rmdir-recursive end-of-life, mock.fn (+119 tests) #31830): tryNativeFastPathSync returned ok: false for all directories, the JS walker copied helper via copyFileSync then chmodSync(dest, srcStat.mode), and the result was 0o4755. So this is a behavioural regression vs. the immediately-preceding release as well as vs. Node.

Impact and relationship to other findings

This is distinct from the clonefile-fails fallback issue: that one concerns the manual recursion when clonefile() returns an error; this one fires precisely when clonefile() succeeds, which is the common APFS case the PR is optimizing for. Real-world impact is admittedly niche — setuid binaries inside fs.cp'd trees are uncommon (privileged helpers in .app bundles, sudo-like tools in vendored toolchains), and stripping setuid on copy is arguably the safer direction security-wise — but it does violate the PR's stated "indistinguishable from node's walker" invariant and is a silent divergence the readdir scan was specifically introduced to prevent.

How to fix

Either (a) have the scan lstat each regular file and bail to the walker when (st_mode & (S_ISUID | S_ISGID)) !== 0 (cheap relative to the copy itself, keeps the fast path for the overwhelmingly-common case), (b) walk the cloned tree post-success and chmod files whose source had those bits set, or (c) explicitly document this as an accepted divergence in the gate's comment so the "indistinguishable" claim isn't misleading.

// The single-file native copy is only node-equivalent for regular-file ->
// regular-file (or missing dest). Symlinks (node resolves relative link
// targets) and special files (node-specific error codes) must go through
// the ported implementation.
return { ok: srcStat.isFile() && (!destStat || destStat.isFile()), checked };
}

Expand Down
61 changes: 56 additions & 5 deletions src/js/internal/fs/cp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,19 @@ const {
isSrcSubdir,
} = require("internal/fs/cp-sync");

const { chmod, copyFile, lstat, mkdir, opendir, readlink, stat, symlink, unlink, utimes } = require("node:fs/promises");
const {
chmod,
copyFile,
lstat,
mkdir,
opendir,
readdir,
readlink,
stat,
symlink,
unlink,
utimes,
} = require("node:fs/promises");
const { dirname, isAbsolute, join, parse, resolve } = require("node:path");

const PromisePrototypeThen = $Promise.prototype.$then;
Expand Down Expand Up @@ -107,6 +119,35 @@ async function checkParentPaths(src, srcStat, dest) {
return checkParentPaths(src, srcStat, destParent);
}

// The native recursive copy (a single clonefile() on macOS) copies symlinks
// verbatim and clones special files, while node rewrites relative symlink
// targets against the source tree and raises ERR_FS_CP_SOCKET /
// ERR_FS_CP_FIFO_PIPE. It is therefore only node-equivalent for trees made of
// regular files and directories; anything else — including entries whose type
// the filesystem does not report — bails to the ported walker. Scan errors
// also bail so the walker surfaces them the way node would.
async function treeContainsOnlyFilesAndDirs(root) {
const stack = [root];
while (stack.length) {
const dir = stack.pop();
let entries;
try {
entries = await readdir(dir, { withFileTypes: true });
} catch {
return false;
}
for (let i = 0; i < entries.length; i++) {
const entry = entries[i];
if (entry.isDirectory()) {
stack.push(join(dir, entry.name));
} else if (!entry.isFile()) {
return false;
}
}
}
return true;
}

// node-correct validation before handing off to the native fast path
// (which performs the copy but does not implement node's cp error codes).
async function tryNativeFastPath(src, dest, opts) {
Expand All @@ -122,10 +163,20 @@ async function tryNativeFastPath(src, dest, opts) {
code: "EISDIR",
});
}
// The native copy is only node-equivalent for regular-file -> regular-file
// (or missing dest). Symlinks (node resolves relative link targets),
// directories (may contain symlinks), and special files (node-specific
// error codes) must go through the ported implementation.
if (srcStat.isDirectory()) {
// On macOS the native path clones the whole tree with a single
// clonefile(). Only take it when the result is indistinguishable from
// node's walker: dest must not exist (no merge semantics) and the tree
// must contain only regular files and directories.
return {
ok: process.platform === "darwin" && !destStat && (await treeContainsOnlyFilesAndDirs(src)),
checked,
};
}
// The single-file native copy is only node-equivalent for regular-file ->
// regular-file (or missing dest). Symlinks (node resolves relative link
// targets) and special files (node-specific error codes) must go through
// the ported implementation.
return { ok: srcStat.isFile() && (!destStat || destStat.isFile()), checked };
}

Expand Down
50 changes: 50 additions & 0 deletions test/js/node/fs/cp.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { describe, expect, jest, test } from "bun:test";
import fs from "fs";
import { bunEnv, bunExe, isArm64, isPosix, isWindows, tempDir, tempDirWithFiles } from "harness";
import { mkfifo } from "mkfifo";
import { join } from "path";

const impls = [
Expand Down Expand Up @@ -262,6 +263,55 @@ for (const [name, copy] of impls) {
expect(fs.readFileSync(join(basename, "to", "abs_link"), "utf8")).toBe("hello");
});

test("symlinks - relative target inside the tree is resolved against the source tree", async () => {
// node resolves a relative link target against the directory of the
// source link and writes the absolute result into the copy. A verbatim
// copy of the link (e.g. a whole-tree clonefile) would keep "../a.txt".
const basename = tempDirWithFiles("cp", {
"from/a.txt": "a",
"from/sub/keep.txt": "keep",
});
fs.symlinkSync(join("..", "a.txt"), join(basename, "from", "sub", "link"));

await copy(join(basename, "from"), join(basename, "result"), { recursive: true });

const copiedLink = join(basename, "result", "sub", "link");
expect(fs.lstatSync(copiedLink).isSymbolicLink()).toBe(true);
expect(fs.readlinkSync(copiedLink)).toBe(join(basename, "from", "a.txt"));
expect(fs.readFileSync(copiedLink, "utf8")).toBe("a");
});

test.skipIf(isWindows)("recursive - file and directory modes are preserved into a fresh destination", async () => {
const basename = tempDirWithFiles("cp", {
"from/d/f.txt": "x",
});
fs.chmodSync(join(basename, "from", "d", "f.txt"), 0o600);
fs.chmodSync(join(basename, "from", "d"), 0o700);

await copy(join(basename, "from"), join(basename, "result"), { recursive: true });

expect({
dirMode: fs.statSync(join(basename, "result", "d")).mode & 0o777,
fileMode: fs.statSync(join(basename, "result", "d", "f.txt")).mode & 0o777,
content: fs.readFileSync(join(basename, "result", "d", "f.txt"), "utf8"),
}).toEqual({
dirMode: 0o700,
fileMode: 0o600,
content: "x",
});
});

test.skipIf(isWindows)("recursive - FIFO inside the tree is rejected with ERR_FS_CP_FIFO_PIPE", async () => {
const basename = tempDirWithFiles("cp", {
"from/a.txt": "a",
});
mkfifo(join(basename, "from", "pipe"), 0o666);
expect(fs.lstatSync(join(basename, "from", "pipe")).isFIFO()).toBe(true);

const e = await copyShouldThrow(join(basename, "from"), join(basename, "result"), { recursive: true });
expect(e.code).toBe("ERR_FS_CP_FIFO_PIPE");
});

test("filter - works", async () => {
const basename = tempDirWithFiles("cp", {
"from/a.txt": "a",
Expand Down
Loading