diff --git a/src/jsc/VirtualMachine.rs b/src/jsc/VirtualMachine.rs index e847b6a6627..aeaa1b6f85d 100644 --- a/src/jsc/VirtualMachine.rs +++ b/src/jsc/VirtualMachine.rs @@ -1578,6 +1578,22 @@ impl VirtualMachine { // JSC `Strong`/`Weak` handles against a live HandleSet. self.event_loop_mut().release_queued_tasks_for_shutdown(); + // Release RareData's and RuntimeState's JSC GC handles (the + // default S3 client Strong, SQL context Strongs, per-VM DNS data) + // while the HandleSet is still alive — `destroy()` below drops + // both after the VM deref inside `destructOnExit`, and a `Strong` + // dropped then would unlink a HandleNode from freed memory. + if let Some(rare) = self.rare_data.as_deref_mut() { + rare.s3_default_client.deinit(); + } + if let Some(hooks) = runtime_hooks() { + // SAFETY: live per-thread VM on the JS thread; JSC teardown + // is `destructOnExit` below. + unsafe { + (hooks.release_runtime_state_js_handles)(core::ptr::from_mut(self)); + } + } + Zig__GlobalObject__destructOnExit(self.global()); // lastChanceToFinalize() above runs Listener/Server finalize → @@ -1806,6 +1822,20 @@ pub struct RuntimeHooks { /// `vm` is the live per-thread VM; `runtime_state` must still be installed /// and the JSC heap must not have been swept yet. pub cancel_all_timers: unsafe fn(vm: *mut VirtualMachine), + /// Release the JSC GC handles owned by the high-tier `RuntimeState` (the + /// SQL contexts' `Strong` callbacks, the per-VM DNS data with its + /// pending-query promises) while the JSC VM is still alive. + /// `RuntimeState` itself drops in `deinit_runtime_state`, which runs + /// after `~VM` (`WebWorker__teardownJSCVM` / + /// `Zig__GlobalObject__destructOnExit`) — a `Strong` dropped there + /// unlinks a `HandleNode` from the already-freed `HandleSet`. + /// + /// # Safety + /// `vm` is the live per-thread VM on the JS thread; `runtime_state` must + /// still be installed and the JSC VM must not have been torn down yet. + /// Call after the last JS that can repopulate these handles + /// (socket-group close callbacks). + pub release_runtime_state_js_handles: unsafe fn(vm: *mut VirtualMachine), } /// Canonical `EventLoopCtx` vtable for a `*mut VirtualMachine` owner — the JS diff --git a/src/jsc/web_worker.rs b/src/jsc/web_worker.rs index ceb045b8f5e..b2d5685f6f0 100644 --- a/src/jsc/web_worker.rs +++ b/src/jsc/web_worker.rs @@ -1250,6 +1250,21 @@ impl WebWorker { // is step 3 below). rare.close_all_socket_groups(unsafe { &*vm_ptr }); } + // Release RareData's and RuntimeState's JSC GC handles (the + // default S3 client Strong, SQL context Strongs, per-VM DNS data) + // while the HandleSet is still alive — `destroy()` in step 5 + // drops both after teardownJSCVM freed it, and a `Strong` dropped + // then would unlink a HandleNode from freed memory. Runs after + // `close_all_socket_groups` so SQL on_close callbacks can still + // dispatch through the contexts. + if let Some(rare) = vm.rare_data.as_deref_mut() { + rare.s3_default_client.deinit(); + } + if let Some(hooks) = runtime_hooks() { + // SAFETY: `vm_ptr` unpublished above (sole owner); + // `runtime_state` still installed; JSC teardown is step 3. + unsafe { (hooks.release_runtime_state_js_handles)(vm_ptr) }; + } exit_code = i32::from(vm.exit_handler.exit_code); global_object = Some(vm.global); } diff --git a/src/runtime/dns_jsc/cares_jsc.rs b/src/runtime/dns_jsc/cares_jsc.rs index 5d54b8cf802..f6e547d3fcf 100644 --- a/src/runtime/dns_jsc/cares_jsc.rs +++ b/src/runtime/dns_jsc/cares_jsc.rs @@ -700,6 +700,15 @@ impl ErrorDeferred { } pub(crate) fn reject_later(self: Box, global_this: &JSGlobalObject) { + // VM teardown (`ares_destroy` firing EDESTRUCTION callbacks from + // `release_runtime_state_js_handles`): the event loop never ticks + // again, so an enqueued task would strand the deferred — its promise + // `Strong` must drop now, while the JSC heap is still alive. The + // promise is unobservable after exit handlers, so dropping it + // unsettled is fine. + if global_this.bun_vm().is_shutting_down() { + return; + } struct Context { deferred: Box, // LIFETIMES.tsv row 1403: JSC_BORROW — the global outlives the diff --git a/src/runtime/dns_jsc/dns.rs b/src/runtime/dns_jsc/dns.rs index 145e2a5aaa7..a8bab25b321 100644 --- a/src/runtime/dns_jsc/dns.rs +++ b/src/runtime/dns_jsc/dns.rs @@ -2130,11 +2130,23 @@ impl Drop for GlobalData { fn drop(&mut self) { // `Resolver::deinit` ends with `heap::take(this)`, which is wrong for a // value field — open-code the channel teardown so the c-ares state - // frees when this box drops in `deinit_runtime_state`. + // frees when this box drops in `release_runtime_state_js_handles`. if let Some(channel) = self.resolver.channel.take() { // SAFETY: `channel` is the live handle from `ares_init_options`, owned by this resolver. unsafe { c_ares::Channel::destroy(channel) }; } + // With queries pending, `resolver.event_loop_timer` is linked into the + // per-thread timer heap, which outlives this box (`WTFTimer::update` + // still walks it until teardown finishes) — unlink before the node's + // memory frees. The EDESTRUCTION callbacks fired by the channel + // destroy above drop their deferreds without the `request_completed` + // bookkeeping that normally disarms the timer. Guarded because + // `remove_timer` derefs the thread-local `RuntimeState`, which + // `deinit_runtime_state` clears before its (fallback) drop of this + // box; the heap dies with us there, so the stale link is moot. + if !crate::jsc_hooks::runtime_state().is_null() { + self.resolver.remove_timer(); + } } } diff --git a/src/runtime/jsc_hooks.rs b/src/runtime/jsc_hooks.rs index e7eac1d2afa..de4f006c6da 100644 --- a/src/runtime/jsc_hooks.rs +++ b/src/runtime/jsc_hooks.rs @@ -1476,6 +1476,7 @@ pub(crate) static __BUN_RUNTIME_HOOKS: RuntimeHooks = RuntimeHooks { terminate_all_workers_and_wait, retroactively_report_discovered_tests, cancel_all_timers, + release_runtime_state_js_handles, }; // ════════════════════════════════════════════════════════════════════════════ @@ -1609,6 +1610,34 @@ unsafe fn cancel_all_timers(vm: *mut VirtualMachine) { } } +/// `RuntimeHooks::release_runtime_state_js_handles` — release the JSC GC +/// handles owned by [`RuntimeState`] while the JSC VM is still alive: the SQL +/// contexts' `Strong` callbacks, and the per-VM DNS data (dropping it runs +/// `ares_destroy`, which fires pending-query callbacks that reject JS +/// promises and drop `JSPromiseStrong` handles). [`RuntimeState`] itself +/// drops in [`deinit_runtime_state`], which runs after `~VM` — a `Strong` +/// dropped there unlinks a `HandleNode` from the already-freed `HandleSet`. +/// +/// # Safety +/// Must run on the JS thread with `runtime_state` still installed, after the +/// last JS that can repopulate these handles (socket-group close callbacks) +/// and before JSC teardown (`WebWorker__teardownJSCVM` / +/// `Zig__GlobalObject__destructOnExit`). +unsafe fn release_runtime_state_js_handles(_vm: *mut VirtualMachine) { + let state = runtime_state(); + if state.is_null() { + return; + } + // SAFETY: `state` is the live boxed per-thread `RuntimeState`; each + // `&mut` field borrow ends before the next statement. The c-ares + // destruction callbacks fired by the `GlobalData` drop enqueue event-loop + // tasks and deref the resolver, but never re-enter `runtime_state()`. + unsafe { + (*state).sql_rare.deinit_js_handles(); + drop((*state).global_dns_data.take()); + } +} + pub(crate) fn close_isolation_handles(vm: &mut VirtualMachine) { let state = runtime_state(); if state.is_null() { diff --git a/src/sql_jsc/jsc.rs b/src/sql_jsc/jsc.rs index 820dc183384..4c8f6c49f57 100644 --- a/src/sql_jsc/jsc.rs +++ b/src/sql_jsc/jsc.rs @@ -272,6 +272,17 @@ pub struct RareData { pub postgresql_context: crate::postgres::PostgresSQLContext, } +impl RareData { + /// Release the JSC `Strong` handles owned by the SQL contexts while the + /// VM is still alive. This struct drops with `RuntimeState` after `~VM`; + /// a `Strong` dropped there unlinks a `HandleNode` from the already-freed + /// `HandleSet`. + pub fn deinit_js_handles(&mut self) { + self.mysql_context.deinit(); + self.postgresql_context.deinit(); + } +} + /// SQL-specific accessors on [VirtualMachine] for state owned by the /// higher-tier bun_runtime::jsc_hooks::RuntimeState. pub(crate) trait VirtualMachineSqlExt { diff --git a/src/sql_jsc/mysql/MySQLContext.rs b/src/sql_jsc/mysql/MySQLContext.rs index ae43273b45a..7cab603a8f7 100644 --- a/src/sql_jsc/mysql/MySQLContext.rs +++ b/src/sql_jsc/mysql/MySQLContext.rs @@ -7,6 +7,17 @@ pub struct MySQLContext { pub on_query_reject_fn: StrongOptional, } +impl MySQLContext { + /// Release the JSC `Strong` handles while the VM is still alive. This + /// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after + /// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the + /// already-freed `HandleSet`. + pub fn deinit(&mut self) { + self.on_query_resolve_fn.deinit(); + self.on_query_reject_fn.deinit(); + } +} + // The binding object is built in Rust (`mysql.rs` registers this fn through // `put_host_functions!`/`IntoJSHostFn`), so no C symbol is needed. pub(crate) fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue { diff --git a/src/sql_jsc/postgres/PostgresSQLContext.rs b/src/sql_jsc/postgres/PostgresSQLContext.rs index 896cc050b1f..42d828846be 100644 --- a/src/sql_jsc/postgres/PostgresSQLContext.rs +++ b/src/sql_jsc/postgres/PostgresSQLContext.rs @@ -12,6 +12,15 @@ pub struct PostgresSQLContext { } impl PostgresSQLContext { + /// Release the JSC `Strong` handles while the VM is still alive. This + /// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after + /// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the + /// already-freed `HandleSet`. + pub fn deinit(&mut self) { + self.on_query_resolve_fn.deinit(); + self.on_query_reject_fn.deinit(); + } + // Registered directly as `init` via `put_host_functions!` in // `postgres.rs`, so no exported symbol is needed. pub fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue { diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index b938d02fc47..fc76aef162c 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "bun:test"; -import { bunEnv, bunExe, isASAN, isDebug } from "harness"; +import { bunEnv, bunExe, isASAN, isDebug, isWindows } from "harness"; // Worker VM startup/teardown is much slower under debug and/or ASAN; these // tests spawn many workers, so scale iteration counts and timeouts down. @@ -10,6 +10,26 @@ const rounds = slow ? 4 : 8; const perRound = slow ? 12 : 32; const timeout = slow ? 60_000 : 20_000; +// Env for the freed-JSC-handle regression tests below. Malloc=1 routes +// WebKit's fastMalloc through the system allocator (bmalloc DebugHeap) so +// ASAN builds poison freed HandleSet/HandleBlock memory and report the +// use-after-free deterministically; with libpas the freed pages stay mapped +// and the bug only crashes when the pool reuses them. That same routing +// exposes every deliberately-unreclaimed exit-time WebKit allocation to +// LeakSanitizer, whose sweep (enabled by ASAN CI lanes via detect_leaks=1) +// then takes minutes — disable it in the child; the use-after-free detection +// these tests exist for is AddressSanitizer proper and unaffected. +// +// Not on Windows: bmalloc's system-heap fallback is unsupported there, so +// Malloc=1 aborts the child at startup, and no Windows lane runs ASAN — the +// tests still cover the plain clean-shutdown contract. +const debugHeapEnv = { + ...bunEnv, + // `undefined` also clears a Malloc inherited from the parent environment. + Malloc: isWindows ? undefined : "1", + ASAN_OPTIONS: [bunEnv.ASAN_OPTIONS, "detect_leaks=0"].filter(Boolean).join(":"), +}; + // Regression: `new Worker(url, { ref: false })` was silently ignored — the // Zig-side `user_keep_alive` field was set from it but never read, and the // parent keep-alive was taken unconditionally in `create()`. `.unref()` after @@ -82,6 +102,111 @@ test( timeout, ); +// Regression: worker shutdown tore down the JSC VM (freeing its HandleSet) +// before dropping the per-VM RareData/RuntimeState, so the SQL contexts' +// Strong handles — populated at module load by internal/sql/{postgres,mysql}'s +// top-level init — and the default S3 client Strong were released against +// freed memory (segfault in Bun__StrongRef__delete → +// JSC::HandleSet::deallocate → WTF::SentinelLinkedList::remove during +// WebWorker::shutdown). +test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC handles", async () => { + await using proc = Bun.spawn({ + cmd: [ + bunExe(), + "-e", + ` + // Touching Bun.SQL requires the bun:sql internal module, whose + // top-level init() stores Strong refs in the worker's per-VM SQL + // contexts; touching Bun.s3 caches the default S3 client in a + // RareData Strong. The worker then drains and exits naturally, + // running the full shutdown sequence. + const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; Bun.s3; postMessage('loaded');")); + const closed = new Promise(r => w.addEventListener("close", r, { once: true })); + const loaded = new Promise((resolve, reject) => { + w.onmessage = resolve; + w.onerror = reject; + // A close before "loaded" means the worker died early; fail fast + // instead of hanging on a promise that can never settle. + closed.then(() => reject(new Error("worker closed before posting 'loaded'"))); + }); + await loaded; + await closed; + console.log("worker closed"); + `, + ], + env: debugHeapEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("worker closed\n"); + expect(exitCode).toBe(0); +}); + +// Terminating a worker with an in-flight dns.resolve* query tears down the +// per-VM c-ares channel during shutdown: the EDESTRUCTION callbacks must drop +// their promise Strongs against a live JSC heap, and the resolver's timeout +// timer must be unlinked from the per-thread timer heap before its memory +// frees (WTFTimer::update still walks that heap during teardown). +test("worker terminated with an in-flight DNS query shuts down cleanly", async () => { + await using proc = Bun.spawn({ + cmd: [ + bunExe(), + "-e", + ` + // Local UDP socket that never replies: the worker's c-ares query + // stays in flight until terminate() without touching the network. + const udp = await Bun.udpSocket({ socket: { data() {} } }); + const code = + 'const dns = require("node:dns");' + + 'dns.setServers(["127.0.0.1:' + udp.port + '"]);' + + 'dns.promises.resolve4("inflight.example").catch(() => {});' + + 'postMessage("inflight");' + + 'setInterval(() => {}, 1000);'; // keep the worker alive until terminate() + const w = new Worker("data:text/javascript," + encodeURIComponent(code)); + await new Promise((res, rej) => { + w.onmessage = res; + w.onerror = rej; + // A close before "inflight" means the worker died early; fail fast + // instead of hanging on a promise that can never settle. + w.addEventListener("close", () => rej(new Error("worker closed before posting 'inflight'")), { once: true }); + }); + await w.terminate(); + udp.close(); + console.log("terminated ok"); + `, + ], + env: debugHeapEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("terminated ok\n"); + expect(exitCode).toBe(0); +}); + +// Main-thread variant of the same teardown ordering: with +// BUN_DESTRUCT_VM_ON_EXIT=1 (set by ASAN CI lanes), global_exit derefs the +// JSC VM in Zig__GlobalObject__destructOnExit before destroy() drops +// RuntimeState, hitting the identical freed-HandleSet release. +test("main thread that loaded Bun.SQL and Bun.s3 destructs on exit without touching freed JSC handles", async () => { + await using proc = Bun.spawn({ + cmd: [bunExe(), "-e", `Bun.SQL; Bun.s3; console.log("loaded");`], + env: { ...debugHeapEnv, BUN_DESTRUCT_VM_ON_EXIT: "1" }, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("loaded\n"); + expect(exitCode).toBe(0); +}); + // Regression: WebWorker__dispatchExit deref'd the C++ Worker on the worker // thread; if that was the last ref, ~Worker → ~EventTarget ran there and // EventListenerMap::releaseAssertOrSetThreadUID tripped because the listener