Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/jsc/VirtualMachine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,22 @@ impl VirtualMachine {
// JSC `Strong`/`Weak` handles against a live HandleSet.
self.event_loop_mut().release_queued_tasks_for_shutdown();

// Release RareData's and RuntimeState's JSC GC handles (the
// default S3 client Strong, SQL context Strongs, per-VM DNS data)
// while the HandleSet is still alive — `destroy()` below drops
// both after the VM deref inside `destructOnExit`, and a `Strong`
// dropped then would unlink a HandleNode from freed memory.
if let Some(rare) = self.rare_data.as_deref_mut() {
rare.s3_default_client.deinit();
}
if let Some(hooks) = runtime_hooks() {
// SAFETY: live per-thread VM on the JS thread; JSC teardown
// is `destructOnExit` below.
unsafe {
(hooks.release_runtime_state_js_handles)(core::ptr::from_mut(self));
}
}

Zig__GlobalObject__destructOnExit(self.global());

// lastChanceToFinalize() above runs Listener/Server finalize →
Expand Down Expand Up @@ -1806,6 +1822,20 @@ pub struct RuntimeHooks {
/// `vm` is the live per-thread VM; `runtime_state` must still be installed
/// and the JSC heap must not have been swept yet.
pub cancel_all_timers: unsafe fn(vm: *mut VirtualMachine),
/// Release the JSC GC handles owned by the high-tier `RuntimeState` (the
/// SQL contexts' `Strong` callbacks, the per-VM DNS data with its
/// pending-query promises) while the JSC VM is still alive.
/// `RuntimeState` itself drops in `deinit_runtime_state`, which runs
/// after `~VM` (`WebWorker__teardownJSCVM` /
/// `Zig__GlobalObject__destructOnExit`) — a `Strong` dropped there
/// unlinks a `HandleNode` from the already-freed `HandleSet`.
///
/// # Safety
/// `vm` is the live per-thread VM on the JS thread; `runtime_state` must
/// still be installed and the JSC VM must not have been torn down yet.
/// Call after the last JS that can repopulate these handles
/// (socket-group close callbacks).
pub release_runtime_state_js_handles: unsafe fn(vm: *mut VirtualMachine),
}

/// Canonical `EventLoopCtx` vtable for a `*mut VirtualMachine` owner — the JS
Expand Down
15 changes: 15 additions & 0 deletions src/jsc/web_worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,21 @@ impl WebWorker {
// is step 3 below).
rare.close_all_socket_groups(unsafe { &*vm_ptr });
}
// Release RareData's and RuntimeState's JSC GC handles (the
// default S3 client Strong, SQL context Strongs, per-VM DNS data)
// while the HandleSet is still alive — `destroy()` in step 5
// drops both after teardownJSCVM freed it, and a `Strong` dropped
// then would unlink a HandleNode from freed memory. Runs after
// `close_all_socket_groups` so SQL on_close callbacks can still
// dispatch through the contexts.
if let Some(rare) = vm.rare_data.as_deref_mut() {
rare.s3_default_client.deinit();
}
if let Some(hooks) = runtime_hooks() {
// SAFETY: `vm_ptr` unpublished above (sole owner);
// `runtime_state` still installed; JSC teardown is step 3.
unsafe { (hooks.release_runtime_state_js_handles)(vm_ptr) };
}
exit_code = i32::from(vm.exit_handler.exit_code);
global_object = Some(vm.global);
}
Expand Down
9 changes: 9 additions & 0 deletions src/runtime/dns_jsc/cares_jsc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,15 @@ impl ErrorDeferred {
}

pub(crate) fn reject_later(self: Box<Self>, global_this: &JSGlobalObject) {
// VM teardown (`ares_destroy` firing EDESTRUCTION callbacks from
// `release_runtime_state_js_handles`): the event loop never ticks
// again, so an enqueued task would strand the deferred — its promise
// `Strong` must drop now, while the JSC heap is still alive. The
// promise is unobservable after exit handlers, so dropping it
// unsettled is fine.
if global_this.bun_vm().is_shutting_down() {
return;
}
struct Context {
deferred: Box<ErrorDeferred>,
// LIFETIMES.tsv row 1403: JSC_BORROW — the global outlives the
Expand Down
14 changes: 13 additions & 1 deletion src/runtime/dns_jsc/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2130,11 +2130,23 @@ impl Drop for GlobalData {
fn drop(&mut self) {
// `Resolver::deinit` ends with `heap::take(this)`, which is wrong for a
// value field — open-code the channel teardown so the c-ares state
// frees when this box drops in `deinit_runtime_state`.
// frees when this box drops in `release_runtime_state_js_handles`.
if let Some(channel) = self.resolver.channel.take() {
// SAFETY: `channel` is the live handle from `ares_init_options`, owned by this resolver.
unsafe { c_ares::Channel::destroy(channel) };
}
// With queries pending, `resolver.event_loop_timer` is linked into the
// per-thread timer heap, which outlives this box (`WTFTimer::update`
// still walks it until teardown finishes) — unlink before the node's
// memory frees. The EDESTRUCTION callbacks fired by the channel
// destroy above drop their deferreds without the `request_completed`
// bookkeeping that normally disarms the timer. Guarded because
// `remove_timer` derefs the thread-local `RuntimeState`, which
// `deinit_runtime_state` clears before its (fallback) drop of this
// box; the heap dies with us there, so the stale link is moot.
if !crate::jsc_hooks::runtime_state().is_null() {
self.resolver.remove_timer();
}
}
}

Expand Down
29 changes: 29 additions & 0 deletions src/runtime/jsc_hooks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1476,6 +1476,7 @@ pub(crate) static __BUN_RUNTIME_HOOKS: RuntimeHooks = RuntimeHooks {
terminate_all_workers_and_wait,
retroactively_report_discovered_tests,
cancel_all_timers,
release_runtime_state_js_handles,
};

// ════════════════════════════════════════════════════════════════════════════
Expand Down Expand Up @@ -1609,6 +1610,34 @@ unsafe fn cancel_all_timers(vm: *mut VirtualMachine) {
}
}

/// `RuntimeHooks::release_runtime_state_js_handles` — release the JSC GC
/// handles owned by [`RuntimeState`] while the JSC VM is still alive: the SQL
/// contexts' `Strong` callbacks, and the per-VM DNS data (dropping it runs
/// `ares_destroy`, which fires pending-query callbacks that reject JS
/// promises and drop `JSPromiseStrong` handles). [`RuntimeState`] itself
/// drops in [`deinit_runtime_state`], which runs after `~VM` — a `Strong`
/// dropped there unlinks a `HandleNode` from the already-freed `HandleSet`.
///
/// # Safety
/// Must run on the JS thread with `runtime_state` still installed, after the
/// last JS that can repopulate these handles (socket-group close callbacks)
/// and before JSC teardown (`WebWorker__teardownJSCVM` /
/// `Zig__GlobalObject__destructOnExit`).
unsafe fn release_runtime_state_js_handles(_vm: *mut VirtualMachine) {
let state = runtime_state();
if state.is_null() {
return;
}
// SAFETY: `state` is the live boxed per-thread `RuntimeState`; each
// `&mut` field borrow ends before the next statement. The c-ares
// destruction callbacks fired by the `GlobalData` drop enqueue event-loop
// tasks and deref the resolver, but never re-enter `runtime_state()`.
unsafe {
(*state).sql_rare.deinit_js_handles();
drop((*state).global_dns_data.take());
}
}

pub(crate) fn close_isolation_handles(vm: &mut VirtualMachine) {
let state = runtime_state();
if state.is_null() {
Expand Down
11 changes: 11 additions & 0 deletions src/sql_jsc/jsc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,17 @@ pub struct RareData {
pub postgresql_context: crate::postgres::PostgresSQLContext,
}

impl RareData {
/// Release the JSC `Strong` handles owned by the SQL contexts while the
/// VM is still alive. This struct drops with `RuntimeState` after `~VM`;
/// a `Strong` dropped there unlinks a `HandleNode` from the already-freed
/// `HandleSet`.
pub fn deinit_js_handles(&mut self) {
self.mysql_context.deinit();
self.postgresql_context.deinit();
}
}

/// SQL-specific accessors on [VirtualMachine] for state owned by the
/// higher-tier bun_runtime::jsc_hooks::RuntimeState.
pub(crate) trait VirtualMachineSqlExt {
Expand Down
11 changes: 11 additions & 0 deletions src/sql_jsc/mysql/MySQLContext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ pub struct MySQLContext {
pub on_query_reject_fn: StrongOptional,
}

impl MySQLContext {
/// Release the JSC `Strong` handles while the VM is still alive. This
/// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after
/// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the
/// already-freed `HandleSet`.
pub fn deinit(&mut self) {
self.on_query_resolve_fn.deinit();
self.on_query_reject_fn.deinit();
}
}

// The binding object is built in Rust (`mysql.rs` registers this fn through
// `put_host_functions!`/`IntoJSHostFn`), so no C symbol is needed.
pub(crate) fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue {
Expand Down
9 changes: 9 additions & 0 deletions src/sql_jsc/postgres/PostgresSQLContext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@ pub struct PostgresSQLContext {
}

impl PostgresSQLContext {
/// Release the JSC `Strong` handles while the VM is still alive. This
/// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after
/// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the
/// already-freed `HandleSet`.
pub fn deinit(&mut self) {
self.on_query_resolve_fn.deinit();
self.on_query_reject_fn.deinit();
}

// Registered directly as `init` via `put_host_functions!` in
// `postgres.rs`, so no exported symbol is needed.
pub fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue {
Expand Down
127 changes: 126 additions & 1 deletion test/js/web/workers/worker-terminate-lifetime.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { expect, test } from "bun:test";
import { bunEnv, bunExe, isASAN, isDebug } from "harness";
import { bunEnv, bunExe, isASAN, isDebug, isWindows } from "harness";

// Worker VM startup/teardown is much slower under debug and/or ASAN; these
// tests spawn many workers, so scale iteration counts and timeouts down.
Expand All @@ -10,6 +10,26 @@ const rounds = slow ? 4 : 8;
const perRound = slow ? 12 : 32;
const timeout = slow ? 60_000 : 20_000;

// Env for the freed-JSC-handle regression tests below. Malloc=1 routes
// WebKit's fastMalloc through the system allocator (bmalloc DebugHeap) so
// ASAN builds poison freed HandleSet/HandleBlock memory and report the
// use-after-free deterministically; with libpas the freed pages stay mapped
// and the bug only crashes when the pool reuses them. That same routing
// exposes every deliberately-unreclaimed exit-time WebKit allocation to
// LeakSanitizer, whose sweep (enabled by ASAN CI lanes via detect_leaks=1)
// then takes minutes — disable it in the child; the use-after-free detection
// these tests exist for is AddressSanitizer proper and unaffected.
//
// Not on Windows: bmalloc's system-heap fallback is unsupported there, so
// Malloc=1 aborts the child at startup, and no Windows lane runs ASAN — the
// tests still cover the plain clean-shutdown contract.
const debugHeapEnv = {
...bunEnv,
// `undefined` also clears a Malloc inherited from the parent environment.
Malloc: isWindows ? undefined : "1",
ASAN_OPTIONS: [bunEnv.ASAN_OPTIONS, "detect_leaks=0"].filter(Boolean).join(":"),
Comment thread
coderabbitai[bot] marked this conversation as resolved.
};

// Regression: `new Worker(url, { ref: false })` was silently ignored — the
// Zig-side `user_keep_alive` field was set from it but never read, and the
// parent keep-alive was taken unconditionally in `create()`. `.unref()` after
Expand Down Expand Up @@ -82,6 +102,111 @@ test(
timeout,
);

// Regression: worker shutdown tore down the JSC VM (freeing its HandleSet)
// before dropping the per-VM RareData/RuntimeState, so the SQL contexts'
// Strong handles — populated at module load by internal/sql/{postgres,mysql}'s
// top-level init — and the default S3 client Strong were released against
// freed memory (segfault in Bun__StrongRef__delete →
// JSC::HandleSet::deallocate → WTF::SentinelLinkedList::remove during
// WebWorker::shutdown).
test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC handles", async () => {
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
// Touching Bun.SQL requires the bun:sql internal module, whose
// top-level init() stores Strong refs in the worker's per-VM SQL
// contexts; touching Bun.s3 caches the default S3 client in a
// RareData Strong. The worker then drains and exits naturally,
// running the full shutdown sequence.
const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; Bun.s3; postMessage('loaded');"));
const closed = new Promise(r => w.addEventListener("close", r, { once: true }));
const loaded = new Promise((resolve, reject) => {
w.onmessage = resolve;
w.onerror = reject;
// A close before "loaded" means the worker died early; fail fast
// instead of hanging on a promise that can never settle.
closed.then(() => reject(new Error("worker closed before posting 'loaded'")));
});
await loaded;
await closed;
console.log("worker closed");
`,
],
env: debugHeapEnv,
stdout: "pipe",
stderr: "pipe",
});

const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).toBe("");
expect(stdout).toBe("worker closed\n");
expect(exitCode).toBe(0);
Comment thread
robobun marked this conversation as resolved.
});

// Terminating a worker with an in-flight dns.resolve* query tears down the
// per-VM c-ares channel during shutdown: the EDESTRUCTION callbacks must drop
// their promise Strongs against a live JSC heap, and the resolver's timeout
// timer must be unlinked from the per-thread timer heap before its memory
// frees (WTFTimer::update still walks that heap during teardown).
test("worker terminated with an in-flight DNS query shuts down cleanly", async () => {
await using proc = Bun.spawn({
cmd: [
bunExe(),
"-e",
`
// Local UDP socket that never replies: the worker's c-ares query
// stays in flight until terminate() without touching the network.
const udp = await Bun.udpSocket({ socket: { data() {} } });
const code =
'const dns = require("node:dns");' +
'dns.setServers(["127.0.0.1:' + udp.port + '"]);' +
'dns.promises.resolve4("inflight.example").catch(() => {});' +
'postMessage("inflight");' +
'setInterval(() => {}, 1000);'; // keep the worker alive until terminate()
const w = new Worker("data:text/javascript," + encodeURIComponent(code));
await new Promise((res, rej) => {
w.onmessage = res;
w.onerror = rej;
// A close before "inflight" means the worker died early; fail fast
// instead of hanging on a promise that can never settle.
w.addEventListener("close", () => rej(new Error("worker closed before posting 'inflight'")), { once: true });
});
await w.terminate();
udp.close();
console.log("terminated ok");
`,
],
env: debugHeapEnv,
stdout: "pipe",
stderr: "pipe",
});

const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).toBe("");
expect(stdout).toBe("terminated ok\n");
expect(exitCode).toBe(0);
});

// Main-thread variant of the same teardown ordering: with
// BUN_DESTRUCT_VM_ON_EXIT=1 (set by ASAN CI lanes), global_exit derefs the
// JSC VM in Zig__GlobalObject__destructOnExit before destroy() drops
// RuntimeState, hitting the identical freed-HandleSet release.
test("main thread that loaded Bun.SQL and Bun.s3 destructs on exit without touching freed JSC handles", async () => {
await using proc = Bun.spawn({
cmd: [bunExe(), "-e", `Bun.SQL; Bun.s3; console.log("loaded");`],
env: { ...debugHeapEnv, BUN_DESTRUCT_VM_ON_EXIT: "1" },
stdout: "pipe",
stderr: "pipe",
});

const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).toBe("");
expect(stdout).toBe("loaded\n");
expect(exitCode).toBe(0);
});

// Regression: WebWorker__dispatchExit deref'd the C++ Worker on the worker
// thread; if that was the last ref, ~Worker → ~EventTarget ran there and
// EventListenerMap::releaseAssertOrSetThreadUID tripped because the listener
Expand Down
Loading