From 214dbc9cc017fddfda8538b46fb3990c43a4f80c Mon Sep 17 00:00:00 2001 From: robobun Date: Mon, 8 Jun 2026 16:40:29 +0000 Subject: [PATCH 1/6] Release RuntimeState's JSC handles before tearing down the VM WebWorker::shutdown (and global_exit under BUN_DESTRUCT_VM_ON_EXIT) tore down the JSC VM, then VirtualMachine::destroy dropped the per-VM RuntimeState. The SQL contexts' Strong handles and the per-VM DNS data dropped in that window, so Bun__StrongRef__delete unlinked HandleNodes from the already-freed HandleSet (segfault in WTF::SentinelLinkedList::remove). Add a release_runtime_state_js_handles runtime hook, called from both teardown paths after the last JS runs (socket-group close callbacks) and before the VM deref. It releases the MySQL/Postgres context Strongs and drops the DNS GlobalData while the JSC heap is alive. Dropping GlobalData pre-teardown surfaced two follow-on ordering issues in the same class, fixed here too: - reject_later now drops the deferred synchronously when the VM is shutting down instead of enqueueing a task that can never run (the promise Strong must drop while the heap is alive, and the stranded box leaked). - GlobalData::drop unlinks the resolver's pending-query timeout timer from the per-thread timer heap, which now outlives the box and is still walked by WTFTimer::update during teardown. --- src/jsc/VirtualMachine.rs | 27 ++++++ src/jsc/web_worker.rs | 11 +++ src/runtime/dns_jsc/cares_jsc.rs | 9 ++ src/runtime/dns_jsc/dns.rs | 14 ++- src/runtime/jsc_hooks.rs | 29 ++++++ src/sql_jsc/jsc.rs | 11 +++ src/sql_jsc/mysql/MySQLContext.rs | 11 +++ src/sql_jsc/postgres/PostgresSQLContext.rs | 9 ++ .../workers/worker-terminate-lifetime.test.ts | 96 +++++++++++++++++++ 9 files changed, 216 insertions(+), 1 deletion(-) diff --git a/src/jsc/VirtualMachine.rs b/src/jsc/VirtualMachine.rs index e847b6a6627..ec1a7c8b674 100644 --- a/src/jsc/VirtualMachine.rs +++ b/src/jsc/VirtualMachine.rs @@ -1578,6 +1578,19 @@ impl VirtualMachine { // JSC `Strong`/`Weak` handles against a live HandleSet. self.event_loop_mut().release_queued_tasks_for_shutdown(); + // Release RuntimeState's JSC GC handles (SQL context Strongs, + // per-VM DNS data) while the HandleSet is still alive — + // `destroy()` below drops RuntimeState after the VM deref inside + // `destructOnExit`, and a `Strong` dropped then would unlink a + // HandleNode from freed memory. + if let Some(hooks) = runtime_hooks() { + // SAFETY: live per-thread VM on the JS thread; JSC teardown + // is `destructOnExit` below. + unsafe { + (hooks.release_runtime_state_js_handles)(core::ptr::from_mut(self)); + } + } + Zig__GlobalObject__destructOnExit(self.global()); // lastChanceToFinalize() above runs Listener/Server finalize → @@ -1806,6 +1819,20 @@ pub struct RuntimeHooks { /// `vm` is the live per-thread VM; `runtime_state` must still be installed /// and the JSC heap must not have been swept yet. pub cancel_all_timers: unsafe fn(vm: *mut VirtualMachine), + /// Release the JSC GC handles owned by the high-tier `RuntimeState` (the + /// SQL contexts' `Strong` callbacks, the per-VM DNS data with its + /// pending-query promises) while the JSC VM is still alive. + /// `RuntimeState` itself drops in `deinit_runtime_state`, which runs + /// after `~VM` (`WebWorker__teardownJSCVM` / + /// `Zig__GlobalObject__destructOnExit`) — a `Strong` dropped there + /// unlinks a `HandleNode` from the already-freed `HandleSet`. + /// + /// # Safety + /// `vm` is the live per-thread VM on the JS thread; `runtime_state` must + /// still be installed and the JSC VM must not have been torn down yet. + /// Call after the last JS that can repopulate these handles + /// (socket-group close callbacks). + pub release_runtime_state_js_handles: unsafe fn(vm: *mut VirtualMachine), } /// Canonical `EventLoopCtx` vtable for a `*mut VirtualMachine` owner — the JS diff --git a/src/jsc/web_worker.rs b/src/jsc/web_worker.rs index ceb045b8f5e..13b2cf0fb63 100644 --- a/src/jsc/web_worker.rs +++ b/src/jsc/web_worker.rs @@ -1250,6 +1250,17 @@ impl WebWorker { // is step 3 below). rare.close_all_socket_groups(unsafe { &*vm_ptr }); } + // Release RuntimeState's JSC GC handles (SQL context Strongs, + // per-VM DNS data) while the HandleSet is still alive — + // `destroy()` in step 5 drops RuntimeState after teardownJSCVM + // freed it, and a `Strong` dropped then would unlink a HandleNode + // from freed memory. Runs after `close_all_socket_groups` so SQL + // on_close callbacks can still dispatch through the contexts. + if let Some(hooks) = runtime_hooks() { + // SAFETY: `vm_ptr` unpublished above (sole owner); + // `runtime_state` still installed; JSC teardown is step 3. + unsafe { (hooks.release_runtime_state_js_handles)(vm_ptr) }; + } exit_code = i32::from(vm.exit_handler.exit_code); global_object = Some(vm.global); } diff --git a/src/runtime/dns_jsc/cares_jsc.rs b/src/runtime/dns_jsc/cares_jsc.rs index 5d54b8cf802..f6e547d3fcf 100644 --- a/src/runtime/dns_jsc/cares_jsc.rs +++ b/src/runtime/dns_jsc/cares_jsc.rs @@ -700,6 +700,15 @@ impl ErrorDeferred { } pub(crate) fn reject_later(self: Box, global_this: &JSGlobalObject) { + // VM teardown (`ares_destroy` firing EDESTRUCTION callbacks from + // `release_runtime_state_js_handles`): the event loop never ticks + // again, so an enqueued task would strand the deferred — its promise + // `Strong` must drop now, while the JSC heap is still alive. The + // promise is unobservable after exit handlers, so dropping it + // unsettled is fine. + if global_this.bun_vm().is_shutting_down() { + return; + } struct Context { deferred: Box, // LIFETIMES.tsv row 1403: JSC_BORROW — the global outlives the diff --git a/src/runtime/dns_jsc/dns.rs b/src/runtime/dns_jsc/dns.rs index 145e2a5aaa7..a8bab25b321 100644 --- a/src/runtime/dns_jsc/dns.rs +++ b/src/runtime/dns_jsc/dns.rs @@ -2130,11 +2130,23 @@ impl Drop for GlobalData { fn drop(&mut self) { // `Resolver::deinit` ends with `heap::take(this)`, which is wrong for a // value field — open-code the channel teardown so the c-ares state - // frees when this box drops in `deinit_runtime_state`. + // frees when this box drops in `release_runtime_state_js_handles`. if let Some(channel) = self.resolver.channel.take() { // SAFETY: `channel` is the live handle from `ares_init_options`, owned by this resolver. unsafe { c_ares::Channel::destroy(channel) }; } + // With queries pending, `resolver.event_loop_timer` is linked into the + // per-thread timer heap, which outlives this box (`WTFTimer::update` + // still walks it until teardown finishes) — unlink before the node's + // memory frees. The EDESTRUCTION callbacks fired by the channel + // destroy above drop their deferreds without the `request_completed` + // bookkeeping that normally disarms the timer. Guarded because + // `remove_timer` derefs the thread-local `RuntimeState`, which + // `deinit_runtime_state` clears before its (fallback) drop of this + // box; the heap dies with us there, so the stale link is moot. + if !crate::jsc_hooks::runtime_state().is_null() { + self.resolver.remove_timer(); + } } } diff --git a/src/runtime/jsc_hooks.rs b/src/runtime/jsc_hooks.rs index e7eac1d2afa..de4f006c6da 100644 --- a/src/runtime/jsc_hooks.rs +++ b/src/runtime/jsc_hooks.rs @@ -1476,6 +1476,7 @@ pub(crate) static __BUN_RUNTIME_HOOKS: RuntimeHooks = RuntimeHooks { terminate_all_workers_and_wait, retroactively_report_discovered_tests, cancel_all_timers, + release_runtime_state_js_handles, }; // ════════════════════════════════════════════════════════════════════════════ @@ -1609,6 +1610,34 @@ unsafe fn cancel_all_timers(vm: *mut VirtualMachine) { } } +/// `RuntimeHooks::release_runtime_state_js_handles` — release the JSC GC +/// handles owned by [`RuntimeState`] while the JSC VM is still alive: the SQL +/// contexts' `Strong` callbacks, and the per-VM DNS data (dropping it runs +/// `ares_destroy`, which fires pending-query callbacks that reject JS +/// promises and drop `JSPromiseStrong` handles). [`RuntimeState`] itself +/// drops in [`deinit_runtime_state`], which runs after `~VM` — a `Strong` +/// dropped there unlinks a `HandleNode` from the already-freed `HandleSet`. +/// +/// # Safety +/// Must run on the JS thread with `runtime_state` still installed, after the +/// last JS that can repopulate these handles (socket-group close callbacks) +/// and before JSC teardown (`WebWorker__teardownJSCVM` / +/// `Zig__GlobalObject__destructOnExit`). +unsafe fn release_runtime_state_js_handles(_vm: *mut VirtualMachine) { + let state = runtime_state(); + if state.is_null() { + return; + } + // SAFETY: `state` is the live boxed per-thread `RuntimeState`; each + // `&mut` field borrow ends before the next statement. The c-ares + // destruction callbacks fired by the `GlobalData` drop enqueue event-loop + // tasks and deref the resolver, but never re-enter `runtime_state()`. + unsafe { + (*state).sql_rare.deinit_js_handles(); + drop((*state).global_dns_data.take()); + } +} + pub(crate) fn close_isolation_handles(vm: &mut VirtualMachine) { let state = runtime_state(); if state.is_null() { diff --git a/src/sql_jsc/jsc.rs b/src/sql_jsc/jsc.rs index 820dc183384..4c8f6c49f57 100644 --- a/src/sql_jsc/jsc.rs +++ b/src/sql_jsc/jsc.rs @@ -272,6 +272,17 @@ pub struct RareData { pub postgresql_context: crate::postgres::PostgresSQLContext, } +impl RareData { + /// Release the JSC `Strong` handles owned by the SQL contexts while the + /// VM is still alive. This struct drops with `RuntimeState` after `~VM`; + /// a `Strong` dropped there unlinks a `HandleNode` from the already-freed + /// `HandleSet`. + pub fn deinit_js_handles(&mut self) { + self.mysql_context.deinit(); + self.postgresql_context.deinit(); + } +} + /// SQL-specific accessors on [VirtualMachine] for state owned by the /// higher-tier bun_runtime::jsc_hooks::RuntimeState. pub(crate) trait VirtualMachineSqlExt { diff --git a/src/sql_jsc/mysql/MySQLContext.rs b/src/sql_jsc/mysql/MySQLContext.rs index ae43273b45a..7cab603a8f7 100644 --- a/src/sql_jsc/mysql/MySQLContext.rs +++ b/src/sql_jsc/mysql/MySQLContext.rs @@ -7,6 +7,17 @@ pub struct MySQLContext { pub on_query_reject_fn: StrongOptional, } +impl MySQLContext { + /// Release the JSC `Strong` handles while the VM is still alive. This + /// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after + /// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the + /// already-freed `HandleSet`. + pub fn deinit(&mut self) { + self.on_query_resolve_fn.deinit(); + self.on_query_reject_fn.deinit(); + } +} + // The binding object is built in Rust (`mysql.rs` registers this fn through // `put_host_functions!`/`IntoJSHostFn`), so no C symbol is needed. pub(crate) fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue { diff --git a/src/sql_jsc/postgres/PostgresSQLContext.rs b/src/sql_jsc/postgres/PostgresSQLContext.rs index 896cc050b1f..42d828846be 100644 --- a/src/sql_jsc/postgres/PostgresSQLContext.rs +++ b/src/sql_jsc/postgres/PostgresSQLContext.rs @@ -12,6 +12,15 @@ pub struct PostgresSQLContext { } impl PostgresSQLContext { + /// Release the JSC `Strong` handles while the VM is still alive. This + /// struct is owned by `bun_runtime`'s `RuntimeState`, which drops after + /// `~VM` — a `Strong` dropped there unlinks a `HandleNode` from the + /// already-freed `HandleSet`. + pub fn deinit(&mut self) { + self.on_query_resolve_fn.deinit(); + self.on_query_reject_fn.deinit(); + } + // Registered directly as `init` via `put_host_functions!` in // `postgres.rs`, so no exported symbol is needed. pub fn init(global: &JSGlobalObject, frame: &CallFrame) -> JSValue { diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index b938d02fc47..e7c74af9d98 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -82,6 +82,102 @@ test( timeout, ); +// Regression: worker shutdown tore down the JSC VM (freeing its HandleSet) +// before dropping bun_runtime's RuntimeState, so the SQL contexts' Strong +// handles — populated at module load by internal/sql/{postgres,mysql}'s +// top-level init — were released against freed memory (segfault in +// Bun__StrongRef__delete → JSC::HandleSet::deallocate → +// WTF::SentinelLinkedList::remove during WebWorker::shutdown). +// +// Malloc=1 makes WebKit's fastMalloc use the system allocator (bmalloc +// DebugHeap) so ASAN builds poison the freed HandleSet/HandleBlock memory +// and report the use-after-free deterministically; with libpas the freed +// pages stay mapped and the bug only crashes when the pool reuses them. +test("worker that loaded Bun.SQL exits without touching freed JSC handles", async () => { + await using proc = Bun.spawn({ + cmd: [ + bunExe(), + "-e", + ` + // Touching Bun.SQL requires the bun:sql internal module, whose + // top-level init() stores Strong refs in the worker's per-VM SQL + // contexts. The worker then drains and exits naturally, running the + // full shutdown sequence. + const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; postMessage('loaded');")); + const loaded = new Promise((resolve, reject) => { + w.onmessage = resolve; + w.onerror = reject; + }); + const closed = new Promise(r => w.addEventListener("close", r, { once: true })); + await loaded; + await closed; + console.log("worker closed"); + `, + ], + env: { ...bunEnv, Malloc: "1" }, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("worker closed\n"); + expect(exitCode).toBe(0); +}); + +// Terminating a worker with an in-flight dns.resolve* query tears down the +// per-VM c-ares channel during shutdown: the EDESTRUCTION callbacks must drop +// their promise Strongs against a live JSC heap, and the resolver's timeout +// timer must be unlinked from the per-thread timer heap before its memory +// frees (WTFTimer::update still walks that heap during teardown). +test("worker terminated with an in-flight DNS query shuts down cleanly", async () => { + await using proc = Bun.spawn({ + cmd: [ + bunExe(), + "-e", + ` + const code = \` + const dns = require("node:dns"); + dns.setServers(["192.0.2.1"]); // TEST-NET blackhole: the query stays in flight + dns.promises.resolve4("inflight.example").catch(() => {}); + postMessage("inflight"); + setInterval(() => {}, 1000); // keep the worker alive until terminate() + \`; + const w = new Worker("data:text/javascript," + encodeURIComponent(code)); + await new Promise((res, rej) => { w.onmessage = res; w.onerror = rej; }); + await w.terminate(); + console.log("terminated ok"); + `, + ], + env: { ...bunEnv, Malloc: "1" }, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("terminated ok\n"); + expect(exitCode).toBe(0); +}); + +// Main-thread variant of the same teardown ordering: with +// BUN_DESTRUCT_VM_ON_EXIT=1 (set by ASAN CI lanes), global_exit derefs the +// JSC VM in Zig__GlobalObject__destructOnExit before destroy() drops +// RuntimeState, hitting the identical freed-HandleSet release. +test("main thread that loaded Bun.SQL destructs on exit without touching freed JSC handles", async () => { + await using proc = Bun.spawn({ + cmd: [bunExe(), "-e", `Bun.SQL; console.log("loaded");`], + env: { ...bunEnv, BUN_DESTRUCT_VM_ON_EXIT: "1", Malloc: "1" }, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + expect(stderr).toBe(""); + expect(stdout).toBe("loaded\n"); + expect(exitCode).toBe(0); +}); + // Regression: WebWorker__dispatchExit deref'd the C++ Worker on the worker // thread; if that was the last ref, ~Worker → ~EventTarget ran there and // EventListenerMap::releaseAssertOrSetThreadUID tripped because the listener From ceb930e352a8eb6b7f1a4cea506667afe9bd2277 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:01:18 +0000 Subject: [PATCH 2/6] Release the default S3 client Strong before VM teardown too RareData.s3_default_client (cached by the Bun.s3 getter) is dropped in destroy() after the HandleSet is freed, on both the worker shutdown and the BUN_DESTRUCT_VM_ON_EXIT main-thread paths: same class as the SQL context Strongs. Release it in the same pre-teardown step. --- src/jsc/VirtualMachine.rs | 13 ++++++---- src/jsc/web_worker.rs | 16 ++++++++----- .../workers/worker-terminate-lifetime.test.ts | 24 ++++++++++--------- 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/jsc/VirtualMachine.rs b/src/jsc/VirtualMachine.rs index ec1a7c8b674..aeaa1b6f85d 100644 --- a/src/jsc/VirtualMachine.rs +++ b/src/jsc/VirtualMachine.rs @@ -1578,11 +1578,14 @@ impl VirtualMachine { // JSC `Strong`/`Weak` handles against a live HandleSet. self.event_loop_mut().release_queued_tasks_for_shutdown(); - // Release RuntimeState's JSC GC handles (SQL context Strongs, - // per-VM DNS data) while the HandleSet is still alive — - // `destroy()` below drops RuntimeState after the VM deref inside - // `destructOnExit`, and a `Strong` dropped then would unlink a - // HandleNode from freed memory. + // Release RareData's and RuntimeState's JSC GC handles (the + // default S3 client Strong, SQL context Strongs, per-VM DNS data) + // while the HandleSet is still alive — `destroy()` below drops + // both after the VM deref inside `destructOnExit`, and a `Strong` + // dropped then would unlink a HandleNode from freed memory. + if let Some(rare) = self.rare_data.as_deref_mut() { + rare.s3_default_client.deinit(); + } if let Some(hooks) = runtime_hooks() { // SAFETY: live per-thread VM on the JS thread; JSC teardown // is `destructOnExit` below. diff --git a/src/jsc/web_worker.rs b/src/jsc/web_worker.rs index 13b2cf0fb63..b2d5685f6f0 100644 --- a/src/jsc/web_worker.rs +++ b/src/jsc/web_worker.rs @@ -1250,12 +1250,16 @@ impl WebWorker { // is step 3 below). rare.close_all_socket_groups(unsafe { &*vm_ptr }); } - // Release RuntimeState's JSC GC handles (SQL context Strongs, - // per-VM DNS data) while the HandleSet is still alive — - // `destroy()` in step 5 drops RuntimeState after teardownJSCVM - // freed it, and a `Strong` dropped then would unlink a HandleNode - // from freed memory. Runs after `close_all_socket_groups` so SQL - // on_close callbacks can still dispatch through the contexts. + // Release RareData's and RuntimeState's JSC GC handles (the + // default S3 client Strong, SQL context Strongs, per-VM DNS data) + // while the HandleSet is still alive — `destroy()` in step 5 + // drops both after teardownJSCVM freed it, and a `Strong` dropped + // then would unlink a HandleNode from freed memory. Runs after + // `close_all_socket_groups` so SQL on_close callbacks can still + // dispatch through the contexts. + if let Some(rare) = vm.rare_data.as_deref_mut() { + rare.s3_default_client.deinit(); + } if let Some(hooks) = runtime_hooks() { // SAFETY: `vm_ptr` unpublished above (sole owner); // `runtime_state` still installed; JSC teardown is step 3. diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index e7c74af9d98..fde0a2745f7 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -83,17 +83,18 @@ test( ); // Regression: worker shutdown tore down the JSC VM (freeing its HandleSet) -// before dropping bun_runtime's RuntimeState, so the SQL contexts' Strong -// handles — populated at module load by internal/sql/{postgres,mysql}'s -// top-level init — were released against freed memory (segfault in -// Bun__StrongRef__delete → JSC::HandleSet::deallocate → -// WTF::SentinelLinkedList::remove during WebWorker::shutdown). +// before dropping the per-VM RareData/RuntimeState, so the SQL contexts' +// Strong handles — populated at module load by internal/sql/{postgres,mysql}'s +// top-level init — and the default S3 client Strong were released against +// freed memory (segfault in Bun__StrongRef__delete → +// JSC::HandleSet::deallocate → WTF::SentinelLinkedList::remove during +// WebWorker::shutdown). // // Malloc=1 makes WebKit's fastMalloc use the system allocator (bmalloc // DebugHeap) so ASAN builds poison the freed HandleSet/HandleBlock memory // and report the use-after-free deterministically; with libpas the freed // pages stay mapped and the bug only crashes when the pool reuses them. -test("worker that loaded Bun.SQL exits without touching freed JSC handles", async () => { +test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC handles", async () => { await using proc = Bun.spawn({ cmd: [ bunExe(), @@ -101,9 +102,10 @@ test("worker that loaded Bun.SQL exits without touching freed JSC handles", asyn ` // Touching Bun.SQL requires the bun:sql internal module, whose // top-level init() stores Strong refs in the worker's per-VM SQL - // contexts. The worker then drains and exits naturally, running the - // full shutdown sequence. - const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; postMessage('loaded');")); + // contexts; touching Bun.s3 caches the default S3 client in a + // RareData Strong. The worker then drains and exits naturally, + // running the full shutdown sequence. + const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; Bun.s3; postMessage('loaded');")); const loaded = new Promise((resolve, reject) => { w.onmessage = resolve; w.onerror = reject; @@ -164,9 +166,9 @@ test("worker terminated with an in-flight DNS query shuts down cleanly", async ( // BUN_DESTRUCT_VM_ON_EXIT=1 (set by ASAN CI lanes), global_exit derefs the // JSC VM in Zig__GlobalObject__destructOnExit before destroy() drops // RuntimeState, hitting the identical freed-HandleSet release. -test("main thread that loaded Bun.SQL destructs on exit without touching freed JSC handles", async () => { +test("main thread that loaded Bun.SQL and Bun.s3 destructs on exit without touching freed JSC handles", async () => { await using proc = Bun.spawn({ - cmd: [bunExe(), "-e", `Bun.SQL; console.log("loaded");`], + cmd: [bunExe(), "-e", `Bun.SQL; Bun.s3; console.log("loaded");`], env: { ...bunEnv, BUN_DESTRUCT_VM_ON_EXIT: "1", Malloc: "1" }, stdout: "pipe", stderr: "pipe", From f5caab2fdd6923f408bacf756829d1d16c0a5fcd Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:33:20 +0000 Subject: [PATCH 3/6] Disable LSAN in the teardown regression tests' child processes Malloc=1 exposes every deliberately-unreclaimed exit-time WebKit allocation to LeakSanitizer, so the leak sweep enabled by ASAN CI lanes (detect_leaks=1) took minutes in the spawned children and timed the tests out. The use-after-free detection the tests exist for is AddressSanitizer proper and unaffected by detect_leaks=0. --- .../workers/worker-terminate-lifetime.test.ts | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index fde0a2745f7..566016c3469 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -10,6 +10,21 @@ const rounds = slow ? 4 : 8; const perRound = slow ? 12 : 32; const timeout = slow ? 60_000 : 20_000; +// Env for the freed-JSC-handle regression tests below. Malloc=1 routes +// WebKit's fastMalloc through the system allocator (bmalloc DebugHeap) so +// ASAN builds poison freed HandleSet/HandleBlock memory and report the +// use-after-free deterministically; with libpas the freed pages stay mapped +// and the bug only crashes when the pool reuses them. That same routing +// exposes every deliberately-unreclaimed exit-time WebKit allocation to +// LeakSanitizer, whose sweep (enabled by ASAN CI lanes via detect_leaks=1) +// then takes minutes — disable it in the child; the use-after-free detection +// these tests exist for is AddressSanitizer proper and unaffected. +const debugHeapEnv = { + ...bunEnv, + Malloc: "1", + ASAN_OPTIONS: [bunEnv.ASAN_OPTIONS, "detect_leaks=0"].filter(Boolean).join(":"), +}; + // Regression: `new Worker(url, { ref: false })` was silently ignored — the // Zig-side `user_keep_alive` field was set from it but never read, and the // parent keep-alive was taken unconditionally in `create()`. `.unref()` after @@ -89,11 +104,6 @@ test( // freed memory (segfault in Bun__StrongRef__delete → // JSC::HandleSet::deallocate → WTF::SentinelLinkedList::remove during // WebWorker::shutdown). -// -// Malloc=1 makes WebKit's fastMalloc use the system allocator (bmalloc -// DebugHeap) so ASAN builds poison the freed HandleSet/HandleBlock memory -// and report the use-after-free deterministically; with libpas the freed -// pages stay mapped and the bug only crashes when the pool reuses them. test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC handles", async () => { await using proc = Bun.spawn({ cmd: [ @@ -116,7 +126,7 @@ test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC han console.log("worker closed"); `, ], - env: { ...bunEnv, Malloc: "1" }, + env: debugHeapEnv, stdout: "pipe", stderr: "pipe", }); @@ -151,7 +161,7 @@ test("worker terminated with an in-flight DNS query shuts down cleanly", async ( console.log("terminated ok"); `, ], - env: { ...bunEnv, Malloc: "1" }, + env: debugHeapEnv, stdout: "pipe", stderr: "pipe", }); @@ -169,7 +179,7 @@ test("worker terminated with an in-flight DNS query shuts down cleanly", async ( test("main thread that loaded Bun.SQL and Bun.s3 destructs on exit without touching freed JSC handles", async () => { await using proc = Bun.spawn({ cmd: [bunExe(), "-e", `Bun.SQL; Bun.s3; console.log("loaded");`], - env: { ...bunEnv, BUN_DESTRUCT_VM_ON_EXIT: "1", Malloc: "1" }, + env: { ...debugHeapEnv, BUN_DESTRUCT_VM_ON_EXIT: "1" }, stdout: "pipe", stderr: "pipe", }); From 497f4080148a5daf9bed9e6a31d32ed4e59bc295 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 8 Jun 2026 17:52:58 +0000 Subject: [PATCH 4/6] ci: retrigger From e7c70607c538e8d55b3127da45153e0a0627415f Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Mon, 8 Jun 2026 18:18:58 +0000 Subject: [PATCH 5/6] Skip Malloc=1 in the teardown tests on Windows bmalloc has no system-heap fallback on Windows, so the spawned child aborts at startup before running any JS. No Windows lane runs ASAN, so the env var only served the Linux/macOS ASAN lanes; on Windows the tests still cover the plain clean-shutdown contract. --- test/js/web/workers/worker-terminate-lifetime.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index 566016c3469..688ffa68e15 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "bun:test"; -import { bunEnv, bunExe, isASAN, isDebug } from "harness"; +import { bunEnv, bunExe, isASAN, isDebug, isWindows } from "harness"; // Worker VM startup/teardown is much slower under debug and/or ASAN; these // tests spawn many workers, so scale iteration counts and timeouts down. @@ -19,9 +19,13 @@ const timeout = slow ? 60_000 : 20_000; // LeakSanitizer, whose sweep (enabled by ASAN CI lanes via detect_leaks=1) // then takes minutes — disable it in the child; the use-after-free detection // these tests exist for is AddressSanitizer proper and unaffected. +// +// Not on Windows: bmalloc's system-heap fallback is unsupported there, so +// Malloc=1 aborts the child at startup, and no Windows lane runs ASAN — the +// tests still cover the plain clean-shutdown contract. const debugHeapEnv = { ...bunEnv, - Malloc: "1", + ...(isWindows ? {} : { Malloc: "1" }), ASAN_OPTIONS: [bunEnv.ASAN_OPTIONS, "detect_leaks=0"].filter(Boolean).join(":"), }; From d5f126fd24bab17f4e76530a00ca7f49b429bab8 Mon Sep 17 00:00:00 2001 From: robobun <117481402+robobun@users.noreply.github.com> Date: Wed, 10 Jun 2026 01:47:30 +0000 Subject: [PATCH 6/6] Address review: hermetic DNS blackhole, close-reject wiring, explicit Malloc clear - The in-flight DNS test now points c-ares at a local UDP socket that never replies (port 0), instead of TEST-NET 192.0.2.1; this also makes the pending-query precondition deterministic (no ICMP fast-fail). - The worker readiness waits reject on close so an early worker death fails with a message instead of hanging to the test timeout. - debugHeapEnv explicitly clears an inherited Malloc on Windows. --- .../workers/worker-terminate-lifetime.test.ts | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/test/js/web/workers/worker-terminate-lifetime.test.ts b/test/js/web/workers/worker-terminate-lifetime.test.ts index 688ffa68e15..fc76aef162c 100644 --- a/test/js/web/workers/worker-terminate-lifetime.test.ts +++ b/test/js/web/workers/worker-terminate-lifetime.test.ts @@ -25,7 +25,8 @@ const timeout = slow ? 60_000 : 20_000; // tests still cover the plain clean-shutdown contract. const debugHeapEnv = { ...bunEnv, - ...(isWindows ? {} : { Malloc: "1" }), + // `undefined` also clears a Malloc inherited from the parent environment. + Malloc: isWindows ? undefined : "1", ASAN_OPTIONS: [bunEnv.ASAN_OPTIONS, "detect_leaks=0"].filter(Boolean).join(":"), }; @@ -120,11 +121,14 @@ test("worker that loaded Bun.SQL and Bun.s3 exits without touching freed JSC han // RareData Strong. The worker then drains and exits naturally, // running the full shutdown sequence. const w = new Worker("data:text/javascript," + encodeURIComponent("Bun.SQL; Bun.s3; postMessage('loaded');")); + const closed = new Promise(r => w.addEventListener("close", r, { once: true })); const loaded = new Promise((resolve, reject) => { w.onmessage = resolve; w.onerror = reject; + // A close before "loaded" means the worker died early; fail fast + // instead of hanging on a promise that can never settle. + closed.then(() => reject(new Error("worker closed before posting 'loaded'"))); }); - const closed = new Promise(r => w.addEventListener("close", r, { once: true })); await loaded; await closed; console.log("worker closed"); @@ -152,16 +156,25 @@ test("worker terminated with an in-flight DNS query shuts down cleanly", async ( bunExe(), "-e", ` - const code = \` - const dns = require("node:dns"); - dns.setServers(["192.0.2.1"]); // TEST-NET blackhole: the query stays in flight - dns.promises.resolve4("inflight.example").catch(() => {}); - postMessage("inflight"); - setInterval(() => {}, 1000); // keep the worker alive until terminate() - \`; + // Local UDP socket that never replies: the worker's c-ares query + // stays in flight until terminate() without touching the network. + const udp = await Bun.udpSocket({ socket: { data() {} } }); + const code = + 'const dns = require("node:dns");' + + 'dns.setServers(["127.0.0.1:' + udp.port + '"]);' + + 'dns.promises.resolve4("inflight.example").catch(() => {});' + + 'postMessage("inflight");' + + 'setInterval(() => {}, 1000);'; // keep the worker alive until terminate() const w = new Worker("data:text/javascript," + encodeURIComponent(code)); - await new Promise((res, rej) => { w.onmessage = res; w.onerror = rej; }); + await new Promise((res, rej) => { + w.onmessage = res; + w.onerror = rej; + // A close before "inflight" means the worker died early; fail fast + // instead of hanging on a promise that can never settle. + w.addEventListener("close", () => rej(new Error("worker closed before posting 'inflight'")), { once: true }); + }); await w.terminate(); + udp.close(); console.log("terminated ok"); `, ],