diff --git a/src/bun.js/VirtualMachine.zig b/src/bun.js/VirtualMachine.zig index ee02c636d94..a034100de18 100644 --- a/src/bun.js/VirtualMachine.zig +++ b/src/bun.js/VirtualMachine.zig @@ -2567,14 +2567,30 @@ pub fn swapGlobalForTestIsolation(this: *VirtualMachine) void { this.main_resolved_path = bun.String.empty; this.unhandled_error_counter = 0; - const new_global = JSGlobalObject.createForTestIsolation(this.global, this.console); + const old_global = this.global; + const new_global = JSGlobalObject.createForTestIsolation(old_global, this.console); this.global = new_global; VMHolder.cached_global_object = new_global; this.regular_event_loop.global = new_global; + // macro_event_loop.global is assigned once from this.global at construction + // and would otherwise keep the first file's dead global across the whole run. + this.macro_event_loop.global = new_global; this.has_loaded_constructors = true; if (this.ipc) |ipc| if (ipc == .initialized) { ipc.initialized.globalThis = new_global; }; + // NapiEnv cleanup hooks registered via napi_internal_register_cleanup_zig + // captured the old global in CleanupHook.globalThis; the C++ side has + // already retargeted env->m_globalObject to the new global, so only the + // Zig-side bookkeeping pointer is stale. Nothing currently reads it + // (execute() only calls func(ctx) and there's no per-entry removal + // path), but repoint it anyway so the field doesn't dangle at a freed + // GC cell. + if (this.rare_data) |rare| { + for (rare.cleanup_hooks.items) |*hook| { + if (hook.globalThis == old_global) hook.globalThis = new_global; + } + } // TODO(isolate): drain HTTPThread's keepalive pool. It lives on a separate // thread with its own uws loop; pooled sockets are JS-invisible and bounded diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index 683e43a5218..b472c2e40c0 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -617,6 +617,13 @@ extern "C" JSC::JSGlobalObject* Zig__GlobalObject__createForTestIsolation(Zig::G Bun__setDefaultGlobalObject(globalObject); JSC::gcProtect(globalObject); + // NapiEnv holds a raw Zig::GlobalObject*; deferred napi finalizers for + // the old global's objects run on the next event-loop tick — after this + // function returns and the old global is collectable — and would write + // into the dead cell via NapiHandleScope::open. Point those envs at the + // new global and adopt the refs before unprotecting the old one. + globalObject->adoptNapiEnvsForTestIsolation(oldGlobal); + // Drop the permanent root on the previous global so its module registry, // require.cache, and user objects become collectable. JSC's CodeCache and // Bun's RuntimeTranspilerCache are VM/process scoped and survive. @@ -3772,6 +3779,29 @@ bool GlobalObject::hasNapiFinalizers() const return false; } +// `bun test --isolate`: the old global is about to be gcUnprotect()'d and +// collected, but its NapiEnvs may outlive it — GC-enqueued NapiFinalizerTasks +// hold Ref and run on the event loop while loading the *next* file. +// NapiEnv::m_globalObject is a raw pointer; Finalizer.run opens a +// NapiHandleScope through it, which writes m_currentNapiHandleScopeImpl on the +// dead old global and trips `ASSERT(isMarked(cell))` in +// Heap::addToRememberedSet (release: the concurrent marker later visits it and +// segfaults at offset 0x68/0xD0). Retarget every env to the new global and +// take ownership of the refs so ~GlobalObject on the old one doesn't drop +// them — the envs stay valid for late finalizers and for the process-exit +// cleanup hooks in rare_data.cleanup_hooks (which hold raw NapiEnv* in .ctx). +void GlobalObject::adoptNapiEnvsForTestIsolation(GlobalObject* oldGlobal) +{ + if (oldGlobal->m_napiEnvs.isEmpty()) + return; + for (auto& env : oldGlobal->m_napiEnvs) + env->retargetGlobalObject(this); + // Ref is move-only; the rvalue appendVector overload moves each + // element out, and we make the source explicitly empty afterwards so + // ~GlobalObject on the old cell is a no-op here. + m_napiEnvs.appendVector(std::exchange(oldGlobal->m_napiEnvs, {})); +} + void GlobalObject::setNodeWorkerEnvironmentData(JSMap* data) { m_nodeWorkerEnvironmentData.set(vm(), this, data); } extern "C" void Zig__GlobalObject__destructOnExit(Zig::GlobalObject* globalObject) diff --git a/src/bun.js/bindings/ZigGlobalObject.h b/src/bun.js/bindings/ZigGlobalObject.h index 8ffc8c0f179..ec74c45e6c4 100644 --- a/src/bun.js/bindings/ZigGlobalObject.h +++ b/src/bun.js/bindings/ZigGlobalObject.h @@ -781,6 +781,7 @@ class GlobalObject : public Bun::GlobalScope { Ref makeNapiEnv(const napi_module&); napi_env makeNapiEnvForFFI(); bool hasNapiFinalizers() const; + void adoptNapiEnvsForTestIsolation(GlobalObject* oldGlobal); private: DOMGuardedObjectSet m_guardedObjects WTF_GUARDED_BY_LOCK(m_gcLock); diff --git a/src/bun.js/bindings/napi.h b/src/bun.js/bindings/napi.h index 25d987b2a07..a1f54ceef48 100644 --- a/src/bun.js/bindings/napi.h +++ b/src/bun.js/bindings/napi.h @@ -380,6 +380,21 @@ struct NapiEnv : public WTF::RefCounted { } inline Zig::GlobalObject* globalObject() const { return m_globalObject; } + // `bun test --isolate` creates a fresh Zig::GlobalObject per file and + // gcUnprotect()s the previous one. NapiEnv outlives its owning global — + // GC-enqueued NapiFinalizerTasks hold a Ref and run on the event + // loop *after* the swap. Finalizer.run opens a NapiHandleScope via + // env->globalObject(), which would write m_currentNapiHandleScopeImpl on + // the now-dead old global and trip a write barrier on an unmarked cell + // (debug: `ASSERT(isMarked(cell))` in Heap::addToRememberedSet; release: + // segfault when the marker later walks it). The isolation swap calls this + // to point surviving envs at the new global before unprotecting the old + // one. + inline void retargetGlobalObject(Zig::GlobalObject* newGlobal) + { + ASSERT(&JSC::getVM(newGlobal) == &m_vm); + m_globalObject = newGlobal; + } inline const napi_module& napiModule() const { return m_napiModule; } inline JSC::VM& vm() const { return m_vm; } inline std::optional pendingException() const diff --git a/src/cli/test/parallel/Coordinator.zig b/src/cli/test/parallel/Coordinator.zig index 81ed53741e7..ebb9c9d3bae 100644 --- a/src/cli/test/parallel/Coordinator.zig +++ b/src/cli/test/parallel/Coordinator.zig @@ -1,7 +1,8 @@ //! Process-pool coordinator for `bun test --parallel`. Owns the worker slice, //! drives the event loop, routes IPC frames to per-test output, and handles -//! crash retry / bail / lazy scale-up. Construction and the run loop entry -//! live in `runner.zig`; this file is the per-run state and its methods. +//! crash accounting / panic-abort / bail / lazy scale-up. Construction and +//! the run loop entry live in `runner.zig`; this file is the per-run state +//! and its methods. pub const Coordinator = struct { vm: *jsc.VirtualMachine, @@ -14,10 +15,6 @@ pub const Coordinator = struct { envps: []const [:null]?[*:0]const u8, workers: []Worker, - /// retries[i] counts how many times files[i] has been re-queued after a - /// worker crashed mid-run. - retries: []u8, - pending_retry: []?u32, /// Temp dir for per-worker JUnit XML and LCOV coverage fragments; null /// when neither was requested. worker_tmpdir: ?[:0]const u8, @@ -295,24 +292,29 @@ pub const Coordinator = struct { // the IPC pipe has been drained and this reap actually runs. this.live_workers -= 1; this.flushCaptured(w); - var retry_idx: ?u32 = null; if (w.inflight) |idx| { this.breakDots(); this.ensureHeader(idx); - const rel = this.relPath(idx); - if (this.retries[idx] < 1) { - this.retries[idx] += 1; - retry_idx = idx; - Output.prettyError(" crashed running {s}, retrying\n", .{rel}); - } else { - this.accountCrash(idx, @tagName(status)); - } + // A worker dying mid-file is never silently retried. If a test + // intentionally exits (process.exit) that file is marked failed + // and the run continues in a fresh worker. If the worker was + // killed by a fatal signal — SIGILL/SIGTRAP from Bun's own panic + // handler, SIGSEGV/SIGBUS/SIGFPE from native code, SIGABRT from a + // JSC/WTF assertion — that's a Bun or addon bug and must not be + // masked by the rest of the suite passing: abort the whole run so + // the exit status reflects the crash. SIGKILL is treated as a + // regular failure (commonly the OOM killer or the user). + const panicked = isPanicStatus(status); + this.accountCrash(idx, status); Output.flush(); w.inflight = null; + if (panicked) { + this.abortOnWorkerPanic(idx, status); + } } var respawned = false; - if (!this.bailed and (this.hasUndispatchedFiles() or retry_idx != null)) { + if (!this.bailed and this.hasUndispatchedFiles()) { w.ipc.deinit(); w.out.deinit(); w.err.deinit(); @@ -322,23 +324,12 @@ pub const Coordinator = struct { w.process = null; if (w.start()) |_| { respawned = true; - if (retry_idx) |idx| this.pending_retry[w.idx] = idx; } else |e| { Output.err(e, "failed to respawn test worker", .{}); } } if (!respawned) { - // The worker slot is dead. Any retry that was queued for it (either - // from this exit or from a prior respawn that died before .ready) - // will never be picked up — count it as a crash so totals stay - // correct and drive() doesn't wait on a files_done that can't - // advance. - if (retry_idx orelse this.pending_retry[w.idx]) |orphan| { - this.pending_retry[w.idx] = null; - this.accountCrash(orphan, "retry abandoned"); - Output.flush(); - } if (!this.bailed and this.live_workers == 0) { this.abortQueuedFiles("no live workers"); } @@ -349,9 +340,13 @@ pub const Coordinator = struct { } } - fn accountCrash(this: *Coordinator, file_idx: u32, reason: []const u8) void { + fn accountCrash(this: *Coordinator, file_idx: u32, status: bun.spawn.Status) void { this.breakDots(); - Output.prettyError(" {s} (crashed: {s})\n", .{ this.relPath(file_idx), reason }); + var buf: [32]u8 = undefined; + Output.prettyError(" {s} (worker crashed: {s})\n", .{ + this.relPath(file_idx), + describeStatus(&buf, status), + }); this.reporter.summary().fail += 1; this.reporter.summary().files += 1; bun.handleOom(this.crashed_files.append(bun.default_allocator, file_idx)); @@ -359,6 +354,72 @@ pub const Coordinator = struct { if (this.bail > 0 and this.reporter.summary().fail >= this.bail) this.bailOut(); } + /// Fatal signals that indicate Bun itself (or a native addon) crashed, + /// as opposed to the test calling process.exit() or being SIGKILL'd by + /// the OOM killer. Bun's panic handler ends in @trap() → SIGILL on + /// POSIX; JSC/WTF assertion failures abort() → SIGABRT. On Windows + /// neither surfaces as a signal — abort() is exit code 3 and NTSTATUS + /// fault codes arrive as a plain exit status, both indistinguishable + /// from process.exit(N) — so this classification is effectively + /// POSIX-only and Windows worker crashes fall into the non-panic + /// per-file-failure branch. + fn isPanicStatus(status: bun.spawn.Status) bool { + const sig = status.signalCode() orelse return false; + return switch (sig) { + .SIGILL, .SIGTRAP, .SIGABRT, .SIGBUS, .SIGFPE, .SIGSEGV, .SIGSYS => true, + else => false, + }; + } + + fn describeStatus(buf: []u8, status: bun.spawn.Status) []const u8 { + return switch (status) { + .exited => |e| std.fmt.bufPrint(buf, "exit code {d}", .{e.code}) catch unreachable, + // SignalCode is non-exhaustive (`_`); @tagName on an unnamed value + // (e.g. Linux RT signals 32–64) is safety-checked illegal behavior. + .signaled => |sig| sig.name() orelse + std.fmt.bufPrint(buf, "signal {d}", .{@intFromEnum(sig)}) catch unreachable, + .err => |e| @tagName(e.getErrno()), + .running => "running", + }; + } + + /// A worker was killed by a crash signal — treat this as a Bun bug, not + /// a test failure. Print the panic banner (even if --bail already set + /// `bailed`), terminate every other worker, and mark all remaining + /// files as aborted so the run ends immediately with a non-zero exit + /// and the panic's stderr (already flushed via flushCaptured) is the + /// last meaningful output, not buried under hundreds of later passes. + fn abortOnWorkerPanic(this: *Coordinator, file_idx: u32, status: bun.spawn.Status) void { + this.breakDots(); + var buf: [32]u8 = undefined; + Output.prettyError( + "\nerror: a test worker process crashed with {s} while running {s}.\n" ++ + "This indicates a bug in Bun or in a native addon, not in the test itself. Aborting.\n", + .{ describeStatus(&buf, status), this.relPath(file_idx) }, + ); + Output.flush(); + // .shutdown() only takes effect between files, so a worker that's + // mid-file would keep producing output after the panic banner. + // Terminate the whole process group (same as the SIGINT path) so the + // run ends now; reapWorker() will account each inflight file as a + // crash when the exit arrives. Runs even if --bail already set + // `bailed`, since bailOut() only shutdown()s idle workers and would + // leave inflight ones running past the banner. + for (this.workers[0..this.spawned_count]) |*other| { + if (!other.alive) continue; + if (other.process) |p| { + if (Environment.isPosix) { + _ = std.c.kill(-p.pid, std.posix.SIG.TERM); + } else { + _ = p.kill(1); + } + } + } + if (this.bailed) return; + this.bailed = true; + this.abortQueuedFiles("aborted: worker panicked"); + } + /// Mark every not-yet-dispatched file as failed so `drive()` can exit /// instead of spinning when no live worker remains to make progress. fn abortQueuedFiles(this: *Coordinator, reason: []const u8) void { @@ -375,13 +436,10 @@ pub const Coordinator = struct { } fn assignWorkOrRetry(this: *Coordinator, w: *Worker) void { - if (this.bailed) return w.shutdown(); - if (this.pending_retry[w.idx]) |idx| { - this.pending_retry[w.idx] = null; - w.dispatch(idx, this.files[idx].slice()); - } else { - this.assignWork(w); - } + // Kept as a separate entry point from assignWork so the .ready + // handler has one call site; retry is gone but the indirection + // costs nothing. + this.assignWork(w); } /// Coordinator-side SIGINT/SIGTERM handling. The signal handler only sets a diff --git a/src/cli/test/parallel/runner.zig b/src/cli/test/parallel/runner.zig index b2f53039ca5..689d12a4b0f 100644 --- a/src/cli/test/parallel/runner.zig +++ b/src/cli/test/parallel/runner.zig @@ -87,10 +87,6 @@ pub fn runAsCoordinator( } const workers = try allocator.alloc(Worker, K); - const retries = try allocator.alloc(u8, sorted.len); - @memset(retries, 0); - const pending_retry = try allocator.alloc(?u32, K); - @memset(pending_retry, null); var coord = Coordinator{ .vm = vm, @@ -100,8 +96,6 @@ pub fn runAsCoordinator( .argv = argv, .envps = envps, .workers = workers, - .retries = retries, - .pending_retry = pending_retry, .worker_tmpdir = worker_tmpdir, .parallel_limit = K, .scale_up_after_ms = if (ctx.test_options.parallel_delay_ms) |d| diff --git a/test/cli/test/parallel.test.ts b/test/cli/test/parallel.test.ts index 08e3323ace3..17fc9bc47b7 100644 --- a/test/cli/test/parallel.test.ts +++ b/test/cli/test/parallel.test.ts @@ -98,7 +98,7 @@ test("--parallel surfaces failures and exits non-zero", async () => { expect(exitCode).toBe(1); }); -test("--parallel re-queues a file when its worker crashes mid-run", async () => { +test("--parallel marks a file whose worker exits mid-run as failed (no retry)", async () => { using dir = tempDir("parallel-crash", { "a.test.js": `import {test,expect} from "bun:test"; test("a",()=>expect(1).toBe(1));`, "b.test.js": `import {test,expect} from "bun:test"; test("b",()=>expect(1).toBe(1));`, @@ -114,13 +114,14 @@ test("--parallel re-queues a file when its worker crashes mid-run", async () => }); const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); - // good files still ran and passed + // good files still ran and passed in a fresh worker expect(stderr).toContain("a.test.js"); expect(stderr).toContain("b.test.js"); - // crashed file was retried then marked failed - expect(stderr).toContain("crashed running"); + // crashed file is counted as a failure once — never retried, so an + // intermittent worker crash can't be masked by a passing second attempt. + expect(stderr).not.toContain("retrying"); expect(stderr).toContain("boom.test.js"); - expect(stderr).toContain("(crashed:"); + expect(stderr).toContain("(worker crashed: exit code 7)"); // summary counts the crash as one failure expect(stderr).toContain("Ran 3 tests across 3 files."); expect(exitCode).toBe(1); @@ -735,11 +736,13 @@ test("--parallel: a test writing garbage to fd 3 does not hang the coordinator", expect(result).not.toBe("TIMEOUT"); const [stdout, stderr, exitCode] = result as [string, string, number]; expect(stdout).toContain("PARALLEL"); - // ok.test.js's pass survives; bad.test.js's worker is treated as crashed once - // its IPC pipe is dropped, then retried. We don't assert exact counts (the - // retry may also corrupt fd 3) — only that the run completes deterministically. + // ok.test.js's pass survives; bad.test.js's worker is treated as crashed + // once its IPC pipe is dropped — no retry, so the run is deterministically + // 1 pass / 1 fail. The coordinator kill(9)s the hostile worker, which on + // POSIX surfaces as SIGKILL (non-panic → no whole-run abort). + expect(stderr).not.toContain("retrying"); expect(stderr).toContain("Ran "); - expect([0, 1]).toContain(exitCode); + expect(exitCode).toBe(1); }); test("--parallel --randomize without --seed is reproducible via the printed seed", async () => { diff --git a/test/napi/napi-app/binding.gyp b/test/napi/napi-app/binding.gyp index 7d59a40c7d6..536ba07b135 100644 --- a/test/napi/napi-app/binding.gyp +++ b/test/napi/napi-app/binding.gyp @@ -89,6 +89,16 @@ "NAPI_DISABLE_CPP_EXCEPTIONS", ], }, + { + "target_name": "isolate_finalizer_addon", + "sources": ["isolate_finalizer_addon.c"], + "include_dirs": [" +#include +#include + +#define CALL(env, call) \ + do { \ + if ((call) != napi_ok) { \ + napi_throw_error((env), NULL, "napi call failed: " #call); \ + return NULL; \ + } \ + } while (0) + +static void finalize(napi_env env, void *data, void *hint) { + (void)env; + (void)hint; + free(data); +} + +// wrap(obj) -> obj — attaches a deferred finalizer and returns the same +// object so the caller can root it (e.g. on globalThis). +static napi_value wrap(napi_env env, napi_callback_info info) { + size_t argc = 1; + napi_value argv[1]; + CALL(env, napi_get_cb_info(env, info, &argc, argv, NULL, NULL)); + if (argc < 1) { + napi_throw_type_error(env, NULL, "wrap: expected one argument"); + return NULL; + } + int *data = (int *)malloc(sizeof *data); + *data = 1; + CALL(env, napi_wrap(env, argv[0], data, finalize, NULL, NULL)); + return argv[0]; +} + +NAPI_MODULE_INIT(/* napi_env env, napi_value exports */) { + napi_value fn; + CALL(env, napi_create_function(env, "wrap", NAPI_AUTO_LENGTH, wrap, NULL, &fn)); + CALL(env, napi_set_named_property(env, exports, "wrap", fn)); + return exports; +} diff --git a/test/no-validate-exceptions.txt b/test/no-validate-exceptions.txt index 5936bf7ec47..5bc48a2b9b3 100644 --- a/test/no-validate-exceptions.txt +++ b/test/no-validate-exceptions.txt @@ -66,6 +66,7 @@ test/bundler/bundler_compile.test.ts test/js/node/test/parallel/test-worker-nested-uncaught.js # 3rd party napi +test/regression/issue/30205.test.ts test/integration/sharp/sharp.test.ts test/js/third_party/@duckdb/node-api/duckdb.test.ts test/js/third_party/@napi-rs/canvas/napi-rs-canvas.test.ts diff --git a/test/regression/issue/30205.test.ts b/test/regression/issue/30205.test.ts new file mode 100644 index 00000000000..9bd23e922fb --- /dev/null +++ b/test/regression/issue/30205.test.ts @@ -0,0 +1,200 @@ +// https://github.com/oven-sh/bun/issues/30205 +// +// `bun test --isolate` / `--parallel` creates a fresh Zig::GlobalObject per +// file and gcUnprotect()s the previous one. NapiEnv holds a raw +// `Zig::GlobalObject*` in m_globalObject; for non-experimental addons +// (nm_version != NAPI_VERSION_EXPERIMENTAL), napi finalizers are deferred to +// the event loop as NapiFinalizerTask. Objects rooted on the old global only +// become collectable when the swap unprotects it, so their finalizers run +// while loading the *next* file. Finalizer.run → NapiHandleScope::open then +// reads and writes the dead old global (NapiHandleScopeImplStructure(), +// m_currentNapiHandleScopeImpl.set()). Debug builds hit +// ASSERTION FAILED: isMarked(cell) (Heap::addToRememberedSet) +// ASSERTION FAILED: m_cellState == DefinitelyWhite (JSCell::JSCell) +// release builds segfault at 0x68 / 0xD0 in visitChildren. +// +// Also covers the coordinator behaviour: --parallel used to silently retry a +// file once after its worker crashed, which hid exactly this panic and made +// the run exit 0. A fatal-signal crash now aborts the whole run. + +import { spawnSync } from "bun"; +import { beforeAll, describe, expect, test } from "bun:test"; +import { bunEnv, bunExe, isWindows, tempDir } from "harness"; +import { existsSync } from "node:fs"; +import { join } from "node:path"; + +const napiAppDir = join(import.meta.dir, "..", "..", "napi", "napi-app"); +const addon = join(napiAppDir, "build", "Debug", "isolate_finalizer_addon.node"); + +describe("#30205", () => { + beforeAll(() => { + if (existsSync(addon)) return; + // Same one-shot build pattern as test/napi/napi.test.ts; the addon is + // tiny but node-gyp's toolchain detection is the slow part. + const install = spawnSync({ + cmd: [bunExe(), "install", "--verbose"], + cwd: napiAppDir, + env: bunEnv, + stderr: "inherit", + stdout: "inherit", + stdin: "inherit", + }); + if (!install.success) throw new Error("node-gyp build failed"); + }, 120_000); + + // CI's ASAN lane runs this file with BUN_JSC_validateExceptionChecks=1, + // which leaks into the spawned subprocesses via bunEnv → process.env. + // The napi layer has a known unchecked ThrowScope between + // napi_create_function and napi_set_named_property (see the "3rd party + // napi" section in test/no-validate-exceptions.txt — every napi test is + // excluded); under collectContinuously the simulated-throw counter lands + // on the addon's init path and the subprocess aborts before the fixture + // even runs. That's orthogonal to the GC UAF this test covers, so strip + // the validator from the child env only. + const env = { + ...bunEnv, + BUN_JSC_collectContinuously: "1", + BUN_JSC_validateExceptionChecks: undefined, + BUN_JSC_dumpSimulatedThrows: undefined, + }; + + // The crash is a GC-timing race; collectContinuously + per-file + // `Bun.gc(true)` before loading the addon makes the previous global's napi + // objects collect *before* any event-loop tick has drained their + // finalizers, so it reproduces deterministically on Linux x64 ASAN too. + // On unpatched main this hits the JSCell cellState assertion on file 2. + // `await 0` at module scope makes loadEntryPointForTestRunner go through + // waitForPromise → event_loop.tick(), which is where the enqueued + // NapiFinalizerTask actually runs. + function makeFixtures(n: number): Record { + const files: Record = {}; + for (let i = 0; i < n; i++) { + files[`f${i}.test.js`] = ` + import { test, expect } from "bun:test"; + Bun.gc(true); + const addon = require(${JSON.stringify(addon)}); + globalThis.__wrapped = []; + for (let j = 0; j < 1000; j++) + globalThis.__wrapped.push(addon.wrap({ j, pad: new Array(100).fill(j) })); + Bun.gc(true); + await 0; + test("f${i}", () => { expect(globalThis.__wrapped.length).toBe(1000); }); + `; + } + return files; + } + + // collectContinuously is prohibitively slow under Windows CI (same as the + // 29519 regression test); the swap/finalizer path being exercised is + // platform-agnostic, so POSIX coverage is sufficient. + test.skipIf(isWindows).concurrent( + "--isolate: deferred napi finalizers from the previous global don't write to its dead cell", + async () => { + using dir = tempDir("isolate-napi-uaf", makeFixtures(8)); + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--isolate", "."], + env, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + const [, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + // On crash the summary line is never reached; assert on it (and the pass + // count) first so the diff is the actual crash output, not just "expected + // 0, got 134". + expect(stderr).toContain("8 pass"); + expect(stderr).toContain("0 fail"); + expect(stderr).toContain("Ran 8 tests across 8 files."); + expect(exitCode).toBe(0); + }, + 120_000, + ); + + test.skipIf(isWindows).concurrent( + "--parallel: same scenario via the worker path", + async () => { + using dir = tempDir("parallel-napi-uaf", makeFixtures(8)); + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--parallel=2", "."], + env: { ...env, BUN_TEST_PARALLEL_SCALE_MS: "0" }, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + const [, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toContain("8 pass"); + expect(stderr).toContain("0 fail"); + expect(stderr).toContain("Ran 8 tests across 8 files."); + expect(exitCode).toBe(0); + }, + 120_000, + ); + + // Bun's panic handler ends in @trap(), so a real worker panic surfaces + // as a fatal signal (SIGILL/SIGTRAP). Previously the coordinator printed + // "⟳ crashed running …, retrying" and re-ran the file in a fresh worker; + // if the retry happened to pass the whole run exited 0 and the panic was + // invisible. Now a fatal signal aborts the entire run. SIGABRT is used + // here rather than inducing a real @panic so the test doesn't depend on + // JIT fault-handler behaviour; from the coordinator's point of view + // SIGABRT is indistinguishable from a JSC assertion failure. Windows has + // no process.kill() signals, and the panic-signal classification is + // POSIX-specific anyway (Windows abort() surfaces as exit code 3 and + // falls into the non-panic branch below). + test.skipIf(isWindows)( + "--parallel: worker killed by a fatal signal aborts the run instead of retrying", + async () => { + using dir = tempDir("parallel-panic-no-retry", { + "ok.test.js": `import {test,expect} from "bun:test"; test("ok",()=>expect(1).toBe(1));`, + "boom.test.js": `import {test} from "bun:test"; test("boom",()=>process.kill(process.pid, "SIGABRT"));`, + }); + // CI lanes with coredump-upload flag any new core file in coresDir as a + // test failure — including the one the worker deliberately produces + // here. ulimit -c 0 on the coordinator is inherited by the workers; + // the test is POSIX-only so /bin/sh is available. Same reasoning as + // the setrlimit(RLIMIT_CORE, {0,0}) in BunProcess.cpp's execve path. + await using proc = Bun.spawn({ + cmd: ["/bin/sh", "-c", `ulimit -c 0 && exec "$@"`, "--", bunExe(), "test", "--parallel=2", "."], + env: { ...bunEnv, BUN_TEST_PARALLEL_SCALE_MS: "0" }, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + const [, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + // No "⟳ … retrying" line; instead the coordinator reports the crash, + // names the signal, and aborts remaining work. + expect(stderr).not.toContain("retrying"); + expect(stderr).toContain("worker crashed: SIGABRT"); + expect(stderr).toMatch(/a test worker process crashed with SIGABRT while running .*boom\.test\.js/); + expect(stderr).toContain("Aborting"); + expect(exitCode).not.toBe(0); + }, + 60_000, + ); + + // process.exit() is a deliberate user action, not a Bun bug. The file is + // marked failed (not retried) and the run continues so the other files' + // results are still reported. + test("--parallel: worker process.exit() is a non-retried failure, not a panic-abort", async () => { + using dir = tempDir("parallel-exit-no-retry", { + "a.test.js": `import {test,expect} from "bun:test"; test("a",()=>expect(1).toBe(1));`, + "b.test.js": `import {test,expect} from "bun:test"; test("b",()=>expect(1).toBe(1));`, + "boom.test.js": `import {test} from "bun:test"; test("boom",()=>process.exit(7));`, + }); + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--parallel=2", "."], + env: { ...bunEnv, BUN_TEST_PARALLEL_SCALE_MS: "0" }, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + const [, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).not.toContain("retrying"); + expect(stderr).toContain("(worker crashed: exit code 7)"); + // Not a panic → no whole-run abort; the other two files still ran. + expect(stderr).not.toContain("Aborting"); + expect(stderr).toContain("Ran 3 tests across 3 files."); + expect(stderr).toMatch(/\b1 fail\b/); + expect(exitCode).toBe(1); + }, 60_000); +});