Skip to content
Merged
14 changes: 13 additions & 1 deletion src/bun.js/VirtualMachine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2567,14 +2567,26 @@ pub fn swapGlobalForTestIsolation(this: *VirtualMachine) void {
this.main_resolved_path = bun.String.empty;
this.unhandled_error_counter = 0;

const new_global = JSGlobalObject.createForTestIsolation(this.global, this.console);
const old_global = this.global;
const new_global = JSGlobalObject.createForTestIsolation(old_global, this.console);
this.global = new_global;
VMHolder.cached_global_object = new_global;
this.regular_event_loop.global = new_global;
Comment thread
coderabbitai[bot] marked this conversation as resolved.
this.has_loaded_constructors = true;
if (this.ipc) |ipc| if (ipc == .initialized) {
ipc.initialized.globalThis = new_global;
};
// NapiEnv cleanup hooks registered via napi_internal_register_cleanup_zig
// captured the old global in CleanupHook.globalThis; the C++ side has
// already retargeted env->m_globalObject to the new global, so only the
// Zig-side bookkeeping pointer is stale. It isn't dereferenced (execute
// only calls func(ctx)), but eql() compares on it — keep it accurate so
// a later remove_env_cleanup_hook from the same addon matches.
if (this.rare_data) |rare| {
for (rare.cleanup_hooks.items) |*hook| {
if (hook.globalThis == old_global) hook.globalThis = new_global;
}
}
Comment thread
robobun marked this conversation as resolved.

// TODO(isolate): drain HTTPThread's keepalive pool. It lives on a separate
// thread with its own uws loop; pooled sockets are JS-invisible and bounded
Expand Down
30 changes: 30 additions & 0 deletions src/bun.js/bindings/ZigGlobalObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,13 @@ extern "C" JSC::JSGlobalObject* Zig__GlobalObject__createForTestIsolation(Zig::G
Bun__setDefaultGlobalObject(globalObject);
JSC::gcProtect(globalObject);

// NapiEnv holds a raw Zig::GlobalObject*; deferred napi finalizers for
// the old global's objects run on the next event-loop tick — after this
// function returns and the old global is collectable — and would write
// into the dead cell via NapiHandleScope::open. Point those envs at the
// new global and adopt the refs before unprotecting the old one.
globalObject->adoptNapiEnvsForTestIsolation(oldGlobal);

// Drop the permanent root on the previous global so its module registry,
// require.cache, and user objects become collectable. JSC's CodeCache and
// Bun's RuntimeTranspilerCache are VM/process scoped and survive.
Expand Down Expand Up @@ -3772,6 +3779,29 @@ bool GlobalObject::hasNapiFinalizers() const
return false;
}

// `bun test --isolate`: the old global is about to be gcUnprotect()'d and
// collected, but its NapiEnvs may outlive it — GC-enqueued NapiFinalizerTasks
// hold Ref<NapiEnv> and run on the event loop while loading the *next* file.
// NapiEnv::m_globalObject is a raw pointer; Finalizer.run opens a
// NapiHandleScope through it, which writes m_currentNapiHandleScopeImpl on the
// dead old global and trips `ASSERT(isMarked(cell))` in
// Heap::addToRememberedSet (release: the concurrent marker later visits it and
// segfaults at offset 0x68/0xD0). Retarget every env to the new global and
// take ownership of the refs so ~GlobalObject on the old one doesn't drop
// them — the envs stay valid for late finalizers and for the process-exit
// cleanup hooks in rare_data.cleanup_hooks (which hold raw NapiEnv* in .ctx).
void GlobalObject::adoptNapiEnvsForTestIsolation(GlobalObject* oldGlobal)
{
if (oldGlobal->m_napiEnvs.isEmpty())
return;
for (auto& env : oldGlobal->m_napiEnvs)
env->retargetGlobalObject(this);
// Ref<NapiEnv> is move-only; the rvalue appendVector overload moves each
// element out, and we make the source explicitly empty afterwards so
// ~GlobalObject on the old cell is a no-op here.
m_napiEnvs.appendVector(std::exchange(oldGlobal->m_napiEnvs, {}));
}

void GlobalObject::setNodeWorkerEnvironmentData(JSMap* data) { m_nodeWorkerEnvironmentData.set(vm(), this, data); }

extern "C" void Zig__GlobalObject__destructOnExit(Zig::GlobalObject* globalObject)
Expand Down
1 change: 1 addition & 0 deletions src/bun.js/bindings/ZigGlobalObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ class GlobalObject : public Bun::GlobalScope {
Ref<NapiEnv> makeNapiEnv(const napi_module&);
napi_env makeNapiEnvForFFI();
bool hasNapiFinalizers() const;
void adoptNapiEnvsForTestIsolation(GlobalObject* oldGlobal);

private:
DOMGuardedObjectSet m_guardedObjects WTF_GUARDED_BY_LOCK(m_gcLock);
Expand Down
11 changes: 11 additions & 0 deletions src/bun.js/bindings/napi.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,17 @@ struct NapiEnv : public WTF::RefCounted<NapiEnv> {
}

inline Zig::GlobalObject* globalObject() const { return m_globalObject; }
// `bun test --isolate` creates a fresh Zig::GlobalObject per file and
// gcUnprotect()s the previous one. NapiEnv outlives its owning global —
// GC-enqueued NapiFinalizerTasks hold a Ref<NapiEnv> and run on the event
// loop *after* the swap. Finalizer.run opens a NapiHandleScope via
// env->globalObject(), which would write m_currentNapiHandleScopeImpl on
// the now-dead old global and trip a write barrier on an unmarked cell
// (debug: `ASSERT(isMarked(cell))` in Heap::addToRememberedSet; release:
// segfault when the marker later walks it). The isolation swap calls this
// to point surviving envs at the new global before unprotecting the old
// one.
inline void retargetGlobalObject(Zig::GlobalObject* newGlobal) { m_globalObject = newGlobal; }
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
inline const napi_module& napiModule() const { return m_napiModule; }
inline JSC::VM& vm() const { return m_vm; }
inline std::optional<JSC::JSValue> pendingException() const
Expand Down
102 changes: 68 additions & 34 deletions src/cli/test/parallel/Coordinator.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,6 @@
envps: []const [:null]?[*:0]const u8,

workers: []Worker,
/// retries[i] counts how many times files[i] has been re-queued after a
/// worker crashed mid-run.
retries: []u8,
pending_retry: []?u32,
/// Temp dir for per-worker JUnit XML and LCOV coverage fragments; null
Comment thread
robobun marked this conversation as resolved.
/// when neither was requested.
worker_tmpdir: ?[:0]const u8,
Expand Down Expand Up @@ -295,24 +291,29 @@
// the IPC pipe has been drained and this reap actually runs.
this.live_workers -= 1;
this.flushCaptured(w);
var retry_idx: ?u32 = null;
if (w.inflight) |idx| {
this.breakDots();
this.ensureHeader(idx);
const rel = this.relPath(idx);
if (this.retries[idx] < 1) {
this.retries[idx] += 1;
retry_idx = idx;
Output.prettyError("<r><yellow>⟳<r> crashed running <b>{s}<r>, retrying\n", .{rel});
} else {
this.accountCrash(idx, @tagName(status));
}
// A worker dying mid-file is never silently retried. If a test
// intentionally exits (process.exit) that file is marked failed
// and the run continues in a fresh worker. If the worker was
// killed by a fatal signal — SIGILL/SIGTRAP from Bun's own panic
// handler, SIGSEGV/SIGBUS/SIGFPE from native code, SIGABRT from a
// JSC/WTF assertion — that's a Bun or addon bug and must not be
// masked by the rest of the suite passing: abort the whole run so
// the exit status reflects the crash. SIGKILL is treated as a
// regular failure (commonly the OOM killer or the user).
const panicked = isPanicStatus(status);
this.accountCrash(idx, status);
Output.flush();
w.inflight = null;
if (panicked) {
this.abortOnWorkerPanic(idx, status);
}
}

var respawned = false;
if (!this.bailed and (this.hasUndispatchedFiles() or retry_idx != null)) {
if (!this.bailed and this.hasUndispatchedFiles()) {
w.ipc.deinit();
w.out.deinit();
w.err.deinit();
Expand All @@ -322,23 +323,12 @@
w.process = null;
if (w.start()) |_| {
respawned = true;
if (retry_idx) |idx| this.pending_retry[w.idx] = idx;
} else |e| {
Output.err(e, "failed to respawn test worker", .{});
}
}

if (!respawned) {
// The worker slot is dead. Any retry that was queued for it (either
// from this exit or from a prior respawn that died before .ready)
// will never be picked up — count it as a crash so totals stay
// correct and drive() doesn't wait on a files_done that can't
// advance.
if (retry_idx orelse this.pending_retry[w.idx]) |orphan| {
this.pending_retry[w.idx] = null;
this.accountCrash(orphan, "retry abandoned");
Output.flush();
}
if (!this.bailed and this.live_workers == 0) {
this.abortQueuedFiles("no live workers");
}
Expand All @@ -349,16 +339,63 @@
}
}

fn accountCrash(this: *Coordinator, file_idx: u32, reason: []const u8) void {
fn accountCrash(this: *Coordinator, file_idx: u32, status: bun.spawn.Status) void {
this.breakDots();
Output.prettyError("<r><red>✗<r> <b>{s}<r> <d>(crashed: {s})<r>\n", .{ this.relPath(file_idx), reason });
Output.prettyError("<r><red>✗<r> <b>{s}<r> <d>(worker crashed: {s})<r>\n", .{
this.relPath(file_idx),
describeStatus(status),
});
this.reporter.summary().fail += 1;
this.reporter.summary().files += 1;
bun.handleOom(this.crashed_files.append(bun.default_allocator, file_idx));
this.files_done += 1;
if (this.bail > 0 and this.reporter.summary().fail >= this.bail) this.bailOut();
}

/// Fatal signals that indicate Bun itself (or a native addon) crashed,
/// as opposed to the test calling process.exit() or being SIGKILL'd by
/// the OOM killer. Bun's panic handler ends in @trap() → SIGILL on
/// POSIX; JSC/WTF assertion failures abort() → SIGABRT. On Windows
/// abort() surfaces as exit code 3 and STATUS_* fault codes map through
/// uv to the POSIX signal equivalents, so the same check covers both.
fn isPanicStatus(status: bun.spawn.Status) bool {
const sig = status.signalCode() orelse return false;
return switch (sig) {
.SIGILL, .SIGTRAP, .SIGABRT, .SIGBUS, .SIGFPE, .SIGSEGV, .SIGSYS => true,
else => false,
};
}
Comment thread
robobun marked this conversation as resolved.

fn describeStatus(status: bun.spawn.Status) []const u8 {
return switch (status) {
.exited => |e| bun.handleOom(std.fmt.allocPrint(bun.default_allocator, "exit code {d}", .{e.code})),
.signaled => |sig| @tagName(sig),
.err => |e| @tagName(e.getErrno()),
.running => "running",

Check warning on line 374 in src/cli/test/parallel/Coordinator.zig

View check run for this annotation

Claude / Claude Code Review

describeStatus leaks allocPrint result for .exited status

Nit: the `.exited` arm here heap-allocates via `std.fmt.allocPrint(bun.default_allocator, ...)` and the returned slice is never freed by `accountCrash`, leaking ~15 bytes per worker that `process.exit()`s mid-file. It's bounded and the coordinator is short-lived so impact is negligible, but since `bun.spawn.Status` already implements `format()` you could just pass `status` directly with `{}` in the `prettyError` format string (or use a stack buffer) and drop `describeStatus` entirely.
Comment thread
robobun marked this conversation as resolved.
};
Comment thread
robobun marked this conversation as resolved.
}

/// A worker was killed by a crash signal — treat this as a Bun bug, not
/// a test failure. Shut down every other worker and mark all remaining
/// files as aborted so the run ends immediately with a non-zero exit
/// and the panic's stderr (already flushed via flushCaptured) is the
/// last meaningful output, not buried under hundreds of later passes.
fn abortOnWorkerPanic(this: *Coordinator, file_idx: u32, status: bun.spawn.Status) void {
if (this.bailed) return;
this.bailed = true;
this.breakDots();
Output.prettyError(
"\n<red>error<r>: a test worker process crashed with <b>{s}<r> while running <b>{s}<r>.\n" ++
"This indicates a bug in Bun or in a native addon, not in the test itself. Aborting.\n",
.{ describeStatus(status), this.relPath(file_idx) },
);
Output.flush();
for (this.workers[0..this.spawned_count]) |*other| {
if (other.alive and other.inflight == null) other.shutdown();
}
Comment thread
claude[bot] marked this conversation as resolved.
this.abortQueuedFiles("aborted: worker panicked");
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/// Mark every not-yet-dispatched file as failed so `drive()` can exit
/// instead of spinning when no live worker remains to make progress.
fn abortQueuedFiles(this: *Coordinator, reason: []const u8) void {
Expand All @@ -375,13 +412,10 @@
}

fn assignWorkOrRetry(this: *Coordinator, w: *Worker) void {
if (this.bailed) return w.shutdown();
if (this.pending_retry[w.idx]) |idx| {
this.pending_retry[w.idx] = null;
w.dispatch(idx, this.files[idx].slice());
} else {
this.assignWork(w);
}
// Kept as a separate entry point from assignWork so the .ready
// handler has one call site; retry is gone but the indirection
// costs nothing.
this.assignWork(w);
}

/// Coordinator-side SIGINT/SIGTERM handling. The signal handler only sets a
Expand Down
6 changes: 0 additions & 6 deletions src/cli/test/parallel/runner.zig
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ pub fn runAsCoordinator(
}

const workers = try allocator.alloc(Worker, K);
const retries = try allocator.alloc(u8, sorted.len);
@memset(retries, 0);
const pending_retry = try allocator.alloc(?u32, K);
@memset(pending_retry, null);

var coord = Coordinator{
.vm = vm,
Expand All @@ -100,8 +96,6 @@ pub fn runAsCoordinator(
.argv = argv,
.envps = envps,
.workers = workers,
.retries = retries,
.pending_retry = pending_retry,
.worker_tmpdir = worker_tmpdir,
.parallel_limit = K,
.scale_up_after_ms = if (ctx.test_options.parallel_delay_ms) |d|
Expand Down
21 changes: 12 additions & 9 deletions test/cli/test/parallel.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ test("--parallel surfaces failures and exits non-zero", async () => {
expect(exitCode).toBe(1);
});

test("--parallel re-queues a file when its worker crashes mid-run", async () => {
test("--parallel marks a file whose worker exits mid-run as failed (no retry)", async () => {
using dir = tempDir("parallel-crash", {
"a.test.js": `import {test,expect} from "bun:test"; test("a",()=>expect(1).toBe(1));`,
"b.test.js": `import {test,expect} from "bun:test"; test("b",()=>expect(1).toBe(1));`,
Expand All @@ -114,13 +114,14 @@ test("--parallel re-queues a file when its worker crashes mid-run", async () =>
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);

// good files still ran and passed
// good files still ran and passed in a fresh worker
expect(stderr).toContain("a.test.js");
expect(stderr).toContain("b.test.js");
// crashed file was retried then marked failed
expect(stderr).toContain("crashed running");
// crashed file is counted as a failure once — never retried, so an
// intermittent worker crash can't be masked by a passing second attempt.
expect(stderr).not.toContain("retrying");
expect(stderr).toContain("boom.test.js");
expect(stderr).toContain("(crashed:");
expect(stderr).toContain("(worker crashed: exit code 7)");
// summary counts the crash as one failure
expect(stderr).toContain("Ran 3 tests across 3 files.");
expect(exitCode).toBe(1);
Expand Down Expand Up @@ -735,11 +736,13 @@ test("--parallel: a test writing garbage to fd 3 does not hang the coordinator",
expect(result).not.toBe("TIMEOUT");
const [stdout, stderr, exitCode] = result as [string, string, number];
expect(stdout).toContain("PARALLEL");
// ok.test.js's pass survives; bad.test.js's worker is treated as crashed once
// its IPC pipe is dropped, then retried. We don't assert exact counts (the
// retry may also corrupt fd 3) — only that the run completes deterministically.
// ok.test.js's pass survives; bad.test.js's worker is treated as crashed
// once its IPC pipe is dropped — no retry, so the run is deterministically
// 1 pass / 1 fail. The coordinator kill(9)s the hostile worker, which on
// POSIX surfaces as SIGKILL (non-panic → no whole-run abort).
expect(stderr).not.toContain("retrying");
expect(stderr).toContain("Ran ");
expect([0, 1]).toContain(exitCode);
expect(exitCode).toBe(1);
});

test("--parallel --randomize without --seed is reproducible via the printed seed", async () => {
Expand Down
10 changes: 10 additions & 0 deletions test/napi/napi-app/binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@
"NAPI_DISABLE_CPP_EXCEPTIONS",
],
},
{
"target_name": "isolate_finalizer_addon",
"sources": ["isolate_finalizer_addon.c"],
"include_dirs": ["<!@(node -p \"require('node-addon-api').include\")"],
"libraries": [],
"dependencies": ["<!(node -p \"require('node-addon-api').gyp\")"],
"defines": [
"NAPI_DISABLE_CPP_EXCEPTIONS",
],
},
{
"target_name": "ffi_addon_1",
"sources": ["ffi_addon_1.c"],
Expand Down
48 changes: 48 additions & 0 deletions test/napi/napi-app/isolate_finalizer_addon.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Non-experimental (NAPI_VERSION 8) addon that napi_wrap()s each object it's
// handed. Because nm_version != NAPI_VERSION_EXPERIMENTAL, the finalizer is
// deferred to a NapiFinalizerTask on the event loop rather than run inside GC
// sweep. Under `bun test --isolate`, objects rooted on the old global only
// become collectable once the swap gcUnprotect()s it; their finalizers then
// run while the *next* file is loading, with a NapiEnv whose m_globalObject
// used to point at the now-dead old global. See test/regression/issue/30205.

#include <js_native_api.h>
#include <node_api.h>
#include <stdlib.h>

#define CALL(env, call) \
do { \
if ((call) != napi_ok) { \
napi_throw_error((env), NULL, "napi call failed: " #call); \
return NULL; \
} \
} while (0)

static void finalize(napi_env env, void *data, void *hint) {
(void)env;
(void)hint;
free(data);
}

// wrap(obj) -> obj — attaches a deferred finalizer and returns the same
// object so the caller can root it (e.g. on globalThis).
static napi_value wrap(napi_env env, napi_callback_info info) {
size_t argc = 1;
napi_value argv[1];
CALL(env, napi_get_cb_info(env, info, &argc, argv, NULL, NULL));
if (argc < 1) {
napi_throw_type_error(env, NULL, "wrap: expected one argument");
return NULL;
}
int *data = (int *)malloc(sizeof *data);
*data = 1;
CALL(env, napi_wrap(env, argv[0], data, finalize, NULL, NULL));
return argv[0];
}

NAPI_MODULE_INIT(/* napi_env env, napi_value exports */) {
napi_value fn;
CALL(env, napi_create_function(env, "wrap", NAPI_AUTO_LENGTH, wrap, NULL, &fn));
CALL(env, napi_set_named_property(env, exports, "wrap", fn));
return exports;
}
Loading
Loading