diff --git a/src/jsc/bindings/NodeVMModule.cpp b/src/jsc/bindings/NodeVMModule.cpp index 23292142dc3..c587d37a848 100644 --- a/src/jsc/bindings/NodeVMModule.cpp +++ b/src/jsc/bindings/NodeVMModule.cpp @@ -106,14 +106,20 @@ JSValue NodeVMModule::evaluate(JSGlobalObject* globalObject, uint32_t timeout, b std::ignore = scope.exception(); if (vm.hasTerminationRequest() || vm.hasPendingTerminationException()) { vm.drainMicrotasksForGlobalObject(nodeVmGlobalObject); - DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); - vm.clearHasTerminationRequest(); if (getSigintReceived()) { + DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); + vm.clearHasTerminationRequest(); setSigintReceived(false); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_INTERRUPTED, "Script execution was interrupted by `SIGINT`"_s); - } else { + } else if (timeout != 0) { + DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); + vm.clearHasTerminationRequest(); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_TIMEOUT, makeString("Script execution timed out after "_s, timeout, "ms"_s)); } + // else: termination came from outside this module evaluation + // (the bun:test watchdog around the test body, + // Worker.terminate(), etc.). Leave the TerminationException + // pending so it propagates to whoever armed it. return {}; } } @@ -246,16 +252,20 @@ JSValue NodeVMModule::evaluate(JSGlobalObject* globalObject, uint32_t timeout, b std::ignore = scope.exception(); if (vm.hasTerminationRequest() || vm.hasPendingTerminationException()) { vm.drainMicrotasksForGlobalObject(nodeVmGlobalObject); - DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); - vm.clearHasTerminationRequest(); if (getSigintReceived()) { + DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); + vm.clearHasTerminationRequest(); setSigintReceived(false); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_INTERRUPTED, "Script execution was interrupted by `SIGINT`"_s); } else if (timeout != 0) { + DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); + vm.clearHasTerminationRequest(); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_TIMEOUT, makeString("Script execution timed out after "_s, timeout, "ms"_s)); - } else { - RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("vm.SourceTextModule evaluation terminated due neither to SIGINT nor to timeout"); } + // else: termination came from outside this module evaluation (the + // bun:test watchdog around the test body, Worker.terminate(), etc.). + // Leave the TerminationException pending so it propagates to + // whoever armed it; VM_RETURN_IF_EXCEPTION below bails out. } else { setSigintReceived(false); } diff --git a/src/jsc/bindings/NodeVMScript.cpp b/src/jsc/bindings/NodeVMScript.cpp index 90d1df51b52..edbf1aec920 100644 --- a/src/jsc/bindings/NodeVMScript.cpp +++ b/src/jsc/bindings/NodeVMScript.cpp @@ -288,17 +288,23 @@ static bool checkForTermination(JSC::VM& vm, JSC::JSGlobalObject* globalObject, // The termination may have fired inside an afterEvaluate microtask // checkpoint, leaving the termination exception pending; clear it so // the ERR_SCRIPT_EXECUTION_* error below replaces it. - if (vm.hasPendingTerminationException()) - DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); - vm.clearHasTerminationRequest(); + auto clearPendingTermination = [&] { + if (vm.hasPendingTerminationException()) + DECLARE_TOP_EXCEPTION_SCOPE(vm).clearException(); + vm.clearHasTerminationRequest(); + }; if (script->getSigintReceived()) { + clearPendingTermination(); script->setSigintReceived(false); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_INTERRUPTED, "Script execution was interrupted by `SIGINT`"_s); } else if (timeout) { + clearPendingTermination(); throwError(globalObject, scope, ErrorCode::ERR_SCRIPT_EXECUTION_TIMEOUT, makeString("Script execution timed out after "_s, *timeout, "ms"_s)); - } else { - RELEASE_ASSERT_NOT_REACHED_WITH_MESSAGE("vm.Script terminated due neither to SIGINT nor to timeout"); } + // else: termination came from outside this Script (the bun:test + // watchdog around the test body, Worker.terminate(), etc.). Leave + // the request set and the TerminationException in the scope so it + // propagates to whoever armed it. return true; } diff --git a/src/jsc/bindings/bindings.cpp b/src/jsc/bindings/bindings.cpp index 51044d19fe4..9452b428f6f 100644 --- a/src/jsc/bindings/bindings.cpp +++ b/src/jsc/bindings/bindings.cpp @@ -2870,8 +2870,15 @@ void JSC__VM__collectAsync(JSC::VM* vm) extern "C" bool JSC__VM__hasExecutionTimeLimit(JSC::VM* vm) { JSC::JSLockHolder locker(vm); - if (vm->watchdog()) { - return vm->watchdog()->hasTimeLimit(); + if (auto* watchdog = vm->watchdog()) { + // The bun:test runner relaxes the watchdog to INT32_MAX seconds + // between test callbacks (see run_test_callback in bun_test.rs) + // instead of clearing it, to keep Watchdog's internal state + // consistent across stale dispatchAfter callbacks. Treat that idle + // sentinel as "no limit" so it doesn't permanently opt spawnSync + // out of its blocking fast path; no real caller sets a limit in + // this range. + return watchdog->hasTimeLimit() && watchdog->getTimeLimit() < WTF::Seconds { static_cast(INT32_MAX) }; } return false; @@ -4922,6 +4929,21 @@ void JSC__VM__setExecutionTimeLimit(JSC::VM* vm, double limit) { JSC::JSLockHolder locker(vm); JSC::Watchdog& watchdog = vm->ensureWatchdog(); + // When called from inside an existing VMEntryScope, start the watchdog + // timer now: the scope's own Watchdog::enteredVM() only runs on the + // outermost entry, so if the watchdog is being created for the first + // time here, m_hasEnteredVM would otherwise stay false and the timer + // would never arm. Mirrors setupWatchdog() in NodeVMScript.cpp. + // + // When called from *outside* any VMEntryScope (the test runner arming + // before callback.call()), leave m_hasEnteredVM alone — the next + // VMEntryScope's setUpSlow() will call enteredVM() and start the timer. + // Forcing it true here with no scope active would make + // Watchdog::isActive() lie, and VMTraps::handleTraps() would then + // dereference the null vm.entryScope when servicing a stale + // NeedWatchdogCheck trap from native code. + if (vm->entryScope) + watchdog.enteredVM(); watchdog.setTimeLimit(WTF::Seconds { limit }); } diff --git a/src/runtime/test_runner/Execution.rs b/src/runtime/test_runner/Execution.rs index a803a5a1fc8..d5f5deb278a 100644 --- a/src/runtime/test_runner/Execution.rs +++ b/src/runtime/test_runner/Execution.rs @@ -301,9 +301,6 @@ impl Execution { pub fn handle_timeout(&mut self, global_this: &JSGlobalObject) -> JsResult<()> { let _g = group_begin!(); - // if the concurrent group has one sequence and the sequence has an active entry that has timed out, - // kill any dangling processes - // when using test.concurrent(), we can't do this because it could kill multiple tests at once. if let Some(current_group) = self.active_group() { // reshaped for borrowck — capture range, drop &mut group, re-borrow sequences let (start, end) = (current_group.sequence_start, current_group.sequence_end); @@ -315,16 +312,7 @@ impl Execution { let entry = unsafe { entry.as_ref() }; let now = Timespec::now_force_real_time(); if entry.timespec.order(&now) == core::cmp::Ordering::Less { - // SAFETY: bun_vm() returns the live per-thread VM. - let kill_count = global_this.bun_vm().as_mut().auto_killer.kill(); - if kill_count.processes > 0 { - bun_core::pretty_errorln!( - "killed {} dangling process{}", - kill_count.processes, - if kill_count.processes != 1 { "es" } else { "" }, - ); - bun_core::Output::flush(); - } + kill_dangling_processes(end - start, global_this); } } } @@ -1029,7 +1017,11 @@ fn step_sequence_one( // SAFETY: re-deref after run_test_callback; sequence_ptr still valid (sequences is a // Box<[ExecutionSequence]>, never reallocated during execution). let sequence = unsafe { &mut *sequence_ptr.as_ptr() }; - let _ = next_item.evaluate_timeout(sequence, now); + if next_item.evaluate_timeout(sequence, now) { + // SAFETY: group points into this.groups; read-only. + let g = unsafe { group.as_ref() }; + kill_dangling_processes(g.sequence_end - g.sequence_start, global_this); + } // the result is available immediately; advance the sequence and run again. Execution::advance_sequence(buntest_ptr, sequence_ptr, group); @@ -1070,3 +1062,26 @@ fn step_sequence_one( return Ok(None); // run again } } + +/// Kill child processes spawned by the timed-out test so they don't outlive +/// it. Skipped under test.concurrent() because the auto-killer tracks +/// processes globally and we'd take out other still-running tests' children. +/// Called from both the event-loop timer path (handle_timeout) and the +/// synchronous-return path in step_sequence_one when the JSC watchdog has +/// interrupted a busy-looping callback — in that case the event-loop timer +/// never fires, so this is the only chance to clean up. +fn kill_dangling_processes(group_sequence_count: usize, global_this: &JSGlobalObject) { + if group_sequence_count != 1 { + return; + } + // SAFETY: bun_vm() returns the live per-thread VM. + let kill_count = global_this.bun_vm().as_mut().auto_killer.kill(); + if kill_count.processes > 0 { + bun_core::pretty_errorln!( + "killed {} dangling process{}", + kill_count.processes, + if kill_count.processes != 1 { "es" } else { "" }, + ); + bun_core::Output::flush(); + } +} diff --git a/src/runtime/test_runner/bun_test.rs b/src/runtime/test_runner/bun_test.rs index a59d41f484d..ccf069636f4 100644 --- a/src/runtime/test_runner/bun_test.rs +++ b/src/runtime/test_runner/bun_test.rs @@ -1150,6 +1150,48 @@ impl BunTest { // SAFETY: `UnsafeCell`-derived; sole `&mut` at this point (before JS re-entry). unsafe { (*this).update_min_timeout(global_this, timeout) }; + + // Arm the JSC watchdog so synchronous infinite loops (e.g. + // `while (true);`) in the test body are interrupted. The event-loop + // timer above can only fire once control returns to the event loop, + // which never happens if the callback never yields. JSC's Watchdog + // schedules on a separate VMTraps queue thread and raises a + // TerminationException at the next safepoint; the Err arm below + // clears it so subsequent tests can run, and evaluate_timeout() in + // step_sequence_one() reports FailBecauseTimeout. + // + // The watchdog is a hang detector, not the precise timer — that's the + // event-loop timer's job for callbacks that yield. A one-second grace + // over the test deadline avoids interrupting a synchronous prologue + // (spawning a child, building fixtures) that would have yielded in + // time for the event-loop timer to handle the timeout on the next + // tick; without it, very short per-test timeouts would be cut off + // before they reach their first await. + // + // On return, the limit is relaxed to a large finite sentinel rather + // than cleared to noTimeLimit. Watchdog::startTimer()'s dispatchAfter + // can't be cancelled; if m_timeLimit were infinity then the next + // VMEntryScope's enteredVM() would skip startTimer (no hasTimeLimit), + // leaving m_cpuDeadline at the infinity exitedVM() parked it at, and + // when the stale dispatch fires shouldTerminate() would call + // startTimer(∞) and trip ASSERT(hasTimeLimit()). Keeping a finite + // limit makes every enteredVM() refresh m_cpuDeadline so the stale + // timer resolves to a harmless early return. + const WATCHDOG_GRACE_SECONDS: f64 = 1.0; + const WATCHDOG_IDLE_SECONDS: f64 = i32::MAX as f64; + let watchdog_armed = !timeout.eql(&Timespec::EPOCH); + if watchdog_armed { + let now = Timespec::now_force_real_time(); + let remaining_ns: u64 = if timeout.order(&now).is_gt() { timeout.duration(&now).ns() } else { 0 }; + let remaining_seconds = remaining_ns as f64 / bun_core::time::NS_PER_S as f64; + vm.jsc_vm().set_execution_time_limit(remaining_seconds + WATCHDOG_GRACE_SECONDS); + } + let _watchdog_relax = scopeguard::guard(watchdog_armed, |armed| { + if armed { + vm.jsc_vm().set_execution_time_limit(WATCHDOG_IDLE_SECONDS); + } + }); + let args_slice: &[JSValue] = if !done_arg.is_empty() { core::slice::from_ref(&done_arg) } else { &[] }; let result: JSValue = match vm.event_loop_mut().run_callback_with_result_and_forcefully_drain_microtasks( cfg_callback, diff --git a/test/cli/test/test-timeout-behavior.test.ts b/test/cli/test/test-timeout-behavior.test.ts index 07e02e3f039..64452fc33db 100644 --- a/test/cli/test/test-timeout-behavior.test.ts +++ b/test/cli/test/test-timeout-behavior.test.ts @@ -1,7 +1,102 @@ import { expect, test } from "bun:test"; -import { bunEnv, bunExe, isFlaky, isLinux } from "harness"; +import { bunEnv, bunExe, isFlaky, isLinux, tempDir } from "harness"; import path from "path"; +// https://github.com/oven-sh/bun/issues/21277 +// A synchronous infinite loop in a test body must be interrupted by the +// per-test timeout. The event-loop timer alone cannot fire while JS is +// running, so the JSC watchdog is armed around the callback to raise a +// TerminationException at the next safepoint. +test.concurrent("synchronous infinite loop is interrupted by --timeout", async () => { + using dir = tempDir("timeout-sync-loop", { + "loop.test.ts": ` + import { test } from "bun:test"; + test("spins forever", () => { + while (true); + }); + test("runs after the timed-out test", () => {}); + `, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--timeout=500", "loop.test.ts"], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + const combined = stdout + stderr; + + // The spinning test is reported as a timeout (not a generic failure), + // and the next test in the file still runs. + expect(combined).toContain("(fail) spins forever"); + expect(combined).toContain("timed out after 500ms"); + expect(combined).toContain("(pass) runs after the timed-out test"); + expect(exitCode).toBe(1); +}); + +test.concurrent("synchronous infinite loop after awaited microtask is interrupted by --timeout", async () => { + using dir = tempDir("timeout-sync-loop-microtask", { + "loop.test.ts": ` + import { test } from "bun:test"; + test("spins after await", async () => { + await Promise.resolve(); + while (true); + }); + `, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--timeout=500", "loop.test.ts"], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + const combined = stdout + stderr; + + expect(combined).toContain("(fail) spins after await"); + expect(combined).toContain("timed out after 500ms"); + expect(exitCode).toBe(1); +}); + +// The outer watchdog's TerminationException must propagate through +// node:vm's Script/Module evaluation when the user didn't pass a +// {timeout} option — NodeVMScript::checkForTermination previously +// RELEASE_ASSERT'd that the termination came from its own watchdog. +test.concurrent("synchronous infinite loop inside node:vm without {timeout} is interrupted", async () => { + using dir = tempDir("timeout-sync-loop-nodevm", { + "loop.test.ts": ` + import { test } from "bun:test"; + import vm from "node:vm"; + test("spins inside runInThisContext", () => { + vm.runInThisContext("while (true);"); + }); + test("runs after the timed-out test", () => {}); + `, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test", "--timeout=500", "loop.test.ts"], + env: bunEnv, + cwd: String(dir), + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + const combined = stdout + stderr; + + expect(combined).toContain("(fail) spins inside runInThisContext"); + expect(combined).toContain("timed out after 500ms"); + expect(combined).toContain("(pass) runs after the timed-out test"); + expect(exitCode).toBe(1); +}); + if (isFlaky && isLinux) { test.todo("processes get killed"); } else {