diff --git a/src/jsc/bindings/BunClientData.cpp b/src/jsc/bindings/BunClientData.cpp index a80a81f2e9d..031428729e7 100644 --- a/src/jsc/bindings/BunClientData.cpp +++ b/src/jsc/bindings/BunClientData.cpp @@ -125,11 +125,4 @@ void JSVMClientData::create(VM* vm, void* bunVM) clientData->builtinFunctions().exportNames(); } -WebCore::HTTPHeaderIdentifiers& JSVMClientData::httpHeaderIdentifiers() -{ - if (!m_httpHeaderIdentifiers) - m_httpHeaderIdentifiers.emplace(); - return *m_httpHeaderIdentifiers; -} - } // namespace WebCore diff --git a/src/jsc/bindings/BunClientData.h b/src/jsc/bindings/BunClientData.h index 39830bb5384..dfa5cbc2325 100644 --- a/src/jsc/bindings/BunClientData.h +++ b/src/jsc/bindings/BunClientData.h @@ -111,7 +111,12 @@ class JSVMClientData : public JSC::VM::ClientData { JSC::GCClient::IsoSubspace& domBuiltinConstructorSpace() { return m_domBuiltinConstructorSpace; } - WebCore::HTTPHeaderIdentifiers& httpHeaderIdentifiers(); + // Constructed eagerly so the concurrent GC marker + // (Zig::GlobalObject::visitChildrenImpl) never races the mutator on a + // lazy std::optional::emplace(). The ctor only calls + // LazyProperty::initLater ~90 times (stores a tagged function pointer), + // so there is no startup cost worth deferring. + WebCore::HTTPHeaderIdentifiers& httpHeaderIdentifiers() { return m_httpHeaderIdentifiers; } template void forEachOutputConstraintSpace(const Func& func) { @@ -163,7 +168,7 @@ class JSVMClientData : public JSC::VM::ClientData { std::unique_ptr m_clientSubspaces; Vector m_outputConstraintSpaces; - std::optional m_httpHeaderIdentifiers; + WebCore::HTTPHeaderIdentifiers m_httpHeaderIdentifiers; WeakHashSet m_clients; }; diff --git a/src/jsc/bindings/ZigGlobalObject.cpp b/src/jsc/bindings/ZigGlobalObject.cpp index 456898e72aa..9870ff38142 100644 --- a/src/jsc/bindings/ZigGlobalObject.cpp +++ b/src/jsc/bindings/ZigGlobalObject.cpp @@ -3165,7 +3165,15 @@ void GlobalObject::visitChildrenImpl(JSCell* cell, Visitor& visitor) FOR_EACH_GLOBALOBJECT_GC_MEMBER(VISIT_GLOBALOBJECT_GC_MEMBER) #undef VISIT_GLOBALOBJECT_GC_MEMBER - WebCore::clientData(thisObject->vm())->httpHeaderIdentifiers().visit(visitor); + // This runs on a concurrent GC helper thread. Fetch the VM through the + // visitor (AbstractSlotVisitor::vm() returns m_heap.vm(), guaranteed alive + // for the duration of marking) rather than thisObject->vm() which + // dereferences JSGlobalObject::m_vm and can read stale bytes if the cell + // was picked up via conservative scan mid-recycle (see the + // visitGlobalObjectMember(unique_ptr) guard above for the same window). + // A stale m_vm surfaces as a SEGV in TypeCastTraits::isType + // when downcast<> calls the virtual isWebCoreJSClientData() on garbage. + WebCore::clientData(visitor.vm())->httpHeaderIdentifiers().template visit(visitor); thisObject->visitGeneratedLazyClasses(thisObject, visitor); thisObject->visitAdditionalChildrenInGCThread(visitor); diff --git a/test/js/web/broadcastchannel/broadcast-channel-worker-gc.test.ts b/test/js/web/broadcastchannel/broadcast-channel-worker-gc.test.ts index f4c4bc693a6..01e7c8d5b6e 100644 --- a/test/js/web/broadcastchannel/broadcast-channel-worker-gc.test.ts +++ b/test/js/web/broadcastchannel/broadcast-channel-worker-gc.test.ts @@ -1,5 +1,5 @@ import { expect, test } from "bun:test"; -import { bunEnv, bunExe, isASAN, isDebug } from "harness"; +import { bunEnv, bunExe, isASAN, isDebug, isWindows } from "harness"; // Debug/ASAN builds are much slower at spawning workers. const timeout = isDebug || isASAN ? 60_000 : 10_000; @@ -185,6 +185,108 @@ test( timeout, ); +// SEGV in TypeCastTraits::isType reached from +// Zig::GlobalObject::visitChildrenImpl on a concurrent GC helper thread. The +// visit used `clientData(thisObject->vm())` (raw JSGlobalObject::m_vm deref) +// and `httpHeaderIdentifiers()` did an unsynchronized std::optional::emplace() +// that both the mutator and parallel marker threads could enter. Observed in +// production crash reports almost exclusively on Windows x64, strongly +// correlated with worker spawn+terminate. +// +// This stress maximises the race surface: +// - extra ShadowRealm globals so multiple parallel marker helpers each visit +// a distinct Zig::GlobalObject and all dereference vm.clientData +// - continuous allocation so the concurrent collector is always active +// - worker spawn/terminate churn for the reported correlation +// +// The race window is too narrow to trip deterministically on Linux even under +// ASAN + Malloc=1 + collectContinuously, so this test is a guard rather than +// a fail-before proof; it is the Windows CI lane that intermittently crashed +// with this signature on the unfixed build. +test( + "GlobalObject::visitChildren does not dereference stale vm.clientData under parallel marking", + async () => { + const script = /* js */ ` + const workerCode = \` + // Extra Zig::GlobalObject cells in this VM so parallel GC helper + // threads each get one to visit and all call clientData(vm). + const realms = []; + for (let i = 0; i < 6; i++) { try { realms.push(new ShadowRealm()); } catch {} } + + const bc = new BroadcastChannel("clientdata-visit"); + bc.onmessage = () => {}; + + // Keep the concurrent marker busy so visitChildrenImpl fires + // repeatedly on helper threads while the mutator runs. + const junk = []; + for (let i = 0; i < 2000; i++) junk.push({ i, a: [i, i] }); + Bun.gc(true); + postMessage("ready"); + for (let i = 0; i < 2000; i++) junk.push({ i }); + \`; + const blobUrl = URL.createObjectURL(new Blob([workerCode], { type: "application/javascript" })); + + const mainChannel = new BroadcastChannel("clientdata-visit"); + mainChannel.onmessage = () => {}; + + // Extra globals on the main VM too. + const realms = []; + for (let i = 0; i < 6; i++) { try { realms.push(new ShadowRealm()); } catch {} } + + for (let round = 0; round < 6; round++) { + const workers = []; + const ready = []; + for (let i = 0; i < 4; i++) { + const w = new Worker(blobUrl); + const { promise, resolve, reject } = Promise.withResolvers(); + w.onmessage = () => resolve(); + w.onerror = (e) => reject(e.message ?? String(e)); + workers.push(w); + ready.push(promise); + } + await Promise.all(ready); + + // Allocation pressure on main so its parallel markers are live while + // worker VMs tear down. + const junk = []; + for (let i = 0; i < 3000; i++) junk.push({ a: i, b: [i] }); + + for (const w of workers) { + mainChannel.postMessage("x"); + w.terminate(); + } + for (let i = 0; i < 3; i++) Bun.gc(true); + junk.length = 0; + } + + mainChannel.close(); + console.log("OK"); + `; + + await using proc = Bun.spawn({ + cmd: [bunExe(), "-e", script], + env: { + ...bunEnv, + BUN_JSC_numberOfGCMarkers: "8", + // Route bmalloc/libpas to the system allocator so ASAN can see a UAF + // on JSVMClientData instead of it being masked by the pool. Not set + // on Windows: bmalloc's SystemHeap is unimplemented there and would + // RELEASE_BASSERT, and Windows builds have no ASAN lane anyway. + ...(isWindows ? {} : { Malloc: "1" }), + }, + stderr: "pipe", + stdout: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(filterStderr(stderr)).toBe(""); + expect(stdout.trim()).toBe("OK"); + expect(exitCode).toBe(0); + }, + timeout, +); + test( "BroadcastChannel: repeated worker create/terminate stress", async () => {