From 1aa671d79a75e8833b293b2f5bb8a23d49e74454 Mon Sep 17 00:00:00 2001 From: Ciro Spaciari MacBook Date: Fri, 19 Jun 2026 02:14:05 +0000 Subject: [PATCH] fs: take the clonefile fast path for recursive fs.cp on macOS when it is node-equivalent Recursive fs.cp/cpSync/promises.cp always went through the JS walker, which copies every file individually. On macOS the native path can clone the whole tree with a single clonefile(), but it copies symlinks verbatim and clones special files, while node rewrites relative symlink targets against the source tree and raises ERR_FS_CP_SOCKET / ERR_FS_CP_FIFO_PIPE. Route recursive copies to the native path only when the result is indistinguishable from the walker: the destination does not exist (no merge semantics) and a metadata-only readdir scan of the source tree finds nothing but regular files and directories. Anything else - symlinks, FIFOs, sockets, devices, entries with unknown types, or a scan error - keeps using the ported walker. Non-macOS platforms are unchanged. Adds a recursive fs.cp benchmark (bench/fs-cp) and tests covering the cases the scan must bail on: relative in-tree symlink targets resolved against the source tree, mode preservation into a fresh destination, and FIFO rejection. --- bench/fs-cp/cp.mjs | 80 +++++++++++++++++++++++++++++++++++ src/js/internal/fs/cp-sync.ts | 47 ++++++++++++++++++-- src/js/internal/fs/cp.ts | 61 +++++++++++++++++++++++--- test/js/node/fs/cp.test.ts | 50 ++++++++++++++++++++++ 4 files changed, 229 insertions(+), 9 deletions(-) create mode 100644 bench/fs-cp/cp.mjs diff --git a/bench/fs-cp/cp.mjs b/bench/fs-cp/cp.mjs new file mode 100644 index 00000000000..4a9f6fd436f --- /dev/null +++ b/bench/fs-cp/cp.mjs @@ -0,0 +1,80 @@ +// Recursive fs.cp / fs.cpSync benchmark. +// +// bun cp.mjs +// node cp.mjs +// +// The "regular files only" trees are eligible for the whole-tree clonefile() +// fast path on macOS; the trees containing a symlink always go through the +// node-ported walker. +import { cpSync, mkdirSync, promises, rmSync, symlinkSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { bench, run } from "../runner.mjs"; + +const root = join(tmpdir(), `bench-fs-cp-${process.pid}`); +rmSync(root, { recursive: true, force: true }); + +const DIRS = 16; +const FILES_PER_DIR = 16; +const data = Buffer.alloc(4096, "a"); + +function makeTree(src, { withSymlink = false } = {}) { + for (let d = 0; d < DIRS; d++) { + const dir = join(src, `dir-${d}`); + mkdirSync(dir, { recursive: true }); + for (let f = 0; f < FILES_PER_DIR; f++) { + writeFileSync(join(dir, `file-${f}.txt`), data); + } + } + if (withSymlink) { + symlinkSync(join("dir-0", "file-0.txt"), join(src, "link")); + } +} + +const plainSrc = join(root, "plain-src"); +makeTree(plainSrc); +const symlinkSrc = join(root, "symlink-src"); +makeTree(symlinkSrc, { withSymlink: true }); + +const destRoot = join(root, "dest"); +mkdirSync(destRoot, { recursive: true }); +let destCount = 0; + +// Each copy goes to a brand-new destination (an existing destination switches +// fs.cp into its merge semantics, which is a different operation). The +// computed parameter clears out previously created destinations without +// counting towards the measured time. +function recursiveCopyBench(label, copyOne) { + bench(label, function* () { + yield { + [0]() { + rmSync(destRoot, { recursive: true, force: true }); + mkdirSync(destRoot, { recursive: true }); + return destRoot; + }, + bench(base) { + return copyOne(join(base, `d${destCount++}`)); + }, + }; + }); +} + +const totalFiles = DIRS * FILES_PER_DIR; +recursiveCopyBench(`cpSync recursive (${totalFiles} files, regular files only)`, dest => + cpSync(plainSrc, dest, { recursive: true }), +); +recursiveCopyBench(`cpSync recursive (${totalFiles} files, tree contains a symlink)`, dest => + cpSync(symlinkSrc, dest, { recursive: true }), +); +recursiveCopyBench(`fs.promises.cp recursive (${totalFiles} files, regular files only)`, dest => + promises.cp(plainSrc, dest, { recursive: true }), +); +recursiveCopyBench(`fs.promises.cp recursive (${totalFiles} files, tree contains a symlink)`, dest => + promises.cp(symlinkSrc, dest, { recursive: true }), +); + +try { + await run(); +} finally { + rmSync(root, { recursive: true, force: true }); +} diff --git a/src/js/internal/fs/cp-sync.ts b/src/js/internal/fs/cp-sync.ts index 731fb151328..131349cc3f2 100644 --- a/src/js/internal/fs/cp-sync.ts +++ b/src/js/internal/fs/cp-sync.ts @@ -245,6 +245,35 @@ function checkParentPathsSync(src, srcStat, dest) { return checkParentPathsSync(src, srcStat, destParent); } +// The native recursive copy (a single clonefile() on macOS) copies symlinks +// verbatim and clones special files, while node rewrites relative symlink +// targets against the source tree and raises ERR_FS_CP_SOCKET / +// ERR_FS_CP_FIFO_PIPE. It is therefore only node-equivalent for trees made of +// regular files and directories; anything else — including entries whose type +// the filesystem does not report — bails to the ported walker. Scan errors +// also bail so the walker surfaces them the way node would. +function treeContainsOnlyFilesAndDirsSync(root) { + const stack = [root]; + while (stack.length) { + const dir = stack.pop(); + let entries; + try { + entries = readdirSync(dir, { withFileTypes: true }); + } catch { + return false; + } + for (let i = 0; i < entries.length; i++) { + const entry = entries[i]; + if (entry.isDirectory()) { + stack.push(join(dir, entry.name)); + } else if (!entry.isFile()) { + return false; + } + } + } + return true; +} + // node-correct validation before handing off to the native fast path // (which performs the copy but does not implement node's cp error codes). function tryNativeFastPathSync(src, dest, opts) { @@ -260,10 +289,20 @@ function tryNativeFastPathSync(src, dest, opts) { code: "EISDIR", }); } - // The native copy is only node-equivalent for regular-file -> regular-file - // (or missing dest). Symlinks (node resolves relative link targets), - // directories (may contain symlinks), and special files (node-specific - // error codes) must go through the ported implementation. + if (srcStat.isDirectory()) { + // On macOS the native path clones the whole tree with a single + // clonefile(). Only take it when the result is indistinguishable from + // node's walker: dest must not exist (no merge semantics) and the tree + // must contain only regular files and directories. + return { + ok: process.platform === "darwin" && !destStat && treeContainsOnlyFilesAndDirsSync(src), + checked, + }; + } + // The single-file native copy is only node-equivalent for regular-file -> + // regular-file (or missing dest). Symlinks (node resolves relative link + // targets) and special files (node-specific error codes) must go through + // the ported implementation. return { ok: srcStat.isFile() && (!destStat || destStat.isFile()), checked }; } diff --git a/src/js/internal/fs/cp.ts b/src/js/internal/fs/cp.ts index b3ffa5f3f20..f7db620f191 100644 --- a/src/js/internal/fs/cp.ts +++ b/src/js/internal/fs/cp.ts @@ -14,7 +14,19 @@ const { isSrcSubdir, } = require("internal/fs/cp-sync"); -const { chmod, copyFile, lstat, mkdir, opendir, readlink, stat, symlink, unlink, utimes } = require("node:fs/promises"); +const { + chmod, + copyFile, + lstat, + mkdir, + opendir, + readdir, + readlink, + stat, + symlink, + unlink, + utimes, +} = require("node:fs/promises"); const { dirname, isAbsolute, join, parse, resolve } = require("node:path"); const PromisePrototypeThen = $Promise.prototype.$then; @@ -107,6 +119,35 @@ async function checkParentPaths(src, srcStat, dest) { return checkParentPaths(src, srcStat, destParent); } +// The native recursive copy (a single clonefile() on macOS) copies symlinks +// verbatim and clones special files, while node rewrites relative symlink +// targets against the source tree and raises ERR_FS_CP_SOCKET / +// ERR_FS_CP_FIFO_PIPE. It is therefore only node-equivalent for trees made of +// regular files and directories; anything else — including entries whose type +// the filesystem does not report — bails to the ported walker. Scan errors +// also bail so the walker surfaces them the way node would. +async function treeContainsOnlyFilesAndDirs(root) { + const stack = [root]; + while (stack.length) { + const dir = stack.pop(); + let entries; + try { + entries = await readdir(dir, { withFileTypes: true }); + } catch { + return false; + } + for (let i = 0; i < entries.length; i++) { + const entry = entries[i]; + if (entry.isDirectory()) { + stack.push(join(dir, entry.name)); + } else if (!entry.isFile()) { + return false; + } + } + } + return true; +} + // node-correct validation before handing off to the native fast path // (which performs the copy but does not implement node's cp error codes). async function tryNativeFastPath(src, dest, opts) { @@ -122,10 +163,20 @@ async function tryNativeFastPath(src, dest, opts) { code: "EISDIR", }); } - // The native copy is only node-equivalent for regular-file -> regular-file - // (or missing dest). Symlinks (node resolves relative link targets), - // directories (may contain symlinks), and special files (node-specific - // error codes) must go through the ported implementation. + if (srcStat.isDirectory()) { + // On macOS the native path clones the whole tree with a single + // clonefile(). Only take it when the result is indistinguishable from + // node's walker: dest must not exist (no merge semantics) and the tree + // must contain only regular files and directories. + return { + ok: process.platform === "darwin" && !destStat && (await treeContainsOnlyFilesAndDirs(src)), + checked, + }; + } + // The single-file native copy is only node-equivalent for regular-file -> + // regular-file (or missing dest). Symlinks (node resolves relative link + // targets) and special files (node-specific error codes) must go through + // the ported implementation. return { ok: srcStat.isFile() && (!destStat || destStat.isFile()), checked }; } diff --git a/test/js/node/fs/cp.test.ts b/test/js/node/fs/cp.test.ts index 0808cbc03c1..d7fc554fc58 100644 --- a/test/js/node/fs/cp.test.ts +++ b/test/js/node/fs/cp.test.ts @@ -1,6 +1,7 @@ import { describe, expect, jest, test } from "bun:test"; import fs from "fs"; import { bunEnv, bunExe, isArm64, isPosix, isWindows, tempDir, tempDirWithFiles } from "harness"; +import { mkfifo } from "mkfifo"; import { join } from "path"; const impls = [ @@ -262,6 +263,55 @@ for (const [name, copy] of impls) { expect(fs.readFileSync(join(basename, "to", "abs_link"), "utf8")).toBe("hello"); }); + test("symlinks - relative target inside the tree is resolved against the source tree", async () => { + // node resolves a relative link target against the directory of the + // source link and writes the absolute result into the copy. A verbatim + // copy of the link (e.g. a whole-tree clonefile) would keep "../a.txt". + const basename = tempDirWithFiles("cp", { + "from/a.txt": "a", + "from/sub/keep.txt": "keep", + }); + fs.symlinkSync(join("..", "a.txt"), join(basename, "from", "sub", "link")); + + await copy(join(basename, "from"), join(basename, "result"), { recursive: true }); + + const copiedLink = join(basename, "result", "sub", "link"); + expect(fs.lstatSync(copiedLink).isSymbolicLink()).toBe(true); + expect(fs.readlinkSync(copiedLink)).toBe(join(basename, "from", "a.txt")); + expect(fs.readFileSync(copiedLink, "utf8")).toBe("a"); + }); + + test.skipIf(isWindows)("recursive - file and directory modes are preserved into a fresh destination", async () => { + const basename = tempDirWithFiles("cp", { + "from/d/f.txt": "x", + }); + fs.chmodSync(join(basename, "from", "d", "f.txt"), 0o600); + fs.chmodSync(join(basename, "from", "d"), 0o700); + + await copy(join(basename, "from"), join(basename, "result"), { recursive: true }); + + expect({ + dirMode: fs.statSync(join(basename, "result", "d")).mode & 0o777, + fileMode: fs.statSync(join(basename, "result", "d", "f.txt")).mode & 0o777, + content: fs.readFileSync(join(basename, "result", "d", "f.txt"), "utf8"), + }).toEqual({ + dirMode: 0o700, + fileMode: 0o600, + content: "x", + }); + }); + + test.skipIf(isWindows)("recursive - FIFO inside the tree is rejected with ERR_FS_CP_FIFO_PIPE", async () => { + const basename = tempDirWithFiles("cp", { + "from/a.txt": "a", + }); + mkfifo(join(basename, "from", "pipe"), 0o666); + expect(fs.lstatSync(join(basename, "from", "pipe")).isFIFO()).toBe(true); + + const e = await copyShouldThrow(join(basename, "from"), join(basename, "result"), { recursive: true }); + expect(e.code).toBe("ERR_FS_CP_FIFO_PIPE"); + }); + test("filter - works", async () => { const basename = tempDirWithFiles("cp", { "from/a.txt": "a",