From d275a271301ad8d3cd7e6d8396c8a6a89525a758 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Mon, 27 Apr 2026 23:30:17 +0100 Subject: [PATCH 1/3] fix(execute): run backend under Node so isolated-vm loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bun uses JavaScriptCore and exposes a partial V8 ABI shim — enough that node-gyp-build picks the abi137 prebuild for isolated-vm at install time, but not enough that the addon dlopens at runtime. Loading throws 'undefined symbol: v8::ValueSerializer::Delegate::HasCustomHostObject'. The previous fix (#427) caught this and returned RuntimeUnavailable instead of crashing — but the execute MCP tool still silently failed in prod. Fix the underlying mismatch by running the backend under Node: the source layout, dependencies, and Hono server entry are already Node-style ('Node.js Server Entry Point' is in the docblock); the only Bun-specific thing was 'exec bun src/server.ts' in start.sh. Switch start.sh to 'exec node --import tsx src/server.ts'. tsx is already installed (devDep, copied into the runtime image via the existing node_modules pipeline) and handles ESM + TypeScript loading without a precompile step. Verified end-to-end inside the live prod image: node --import tsx loads isolated-vm, runs runScript() and returns the expected value. The regression silently shipped because the existing sandbox tests ran under bun test with a 'skipIfRuntimeUnavailable' helper that turned the load failure into a no-op pass. Replace with a vitest integration test that fails loudly when isolated-vm can't load, and wire it into CI invoking vitest under node directly. SKIP_TEST_DB_SETUP=1 keeps it fast — the test uses stub SDKs and doesn't need Postgres. The broader vitest suite (~38 integration files) is not yet wired into CI; many are stale after #348's MCP tool consolidation. Tracked separately. --- .github/workflows/ci.yml | 16 +++++ docker/app/start.sh | 11 +++- packages/owletto-backend/package.json | 1 + .../sandbox/run-script-runtime.test.ts} | 63 +++++++++++-------- 4 files changed, 63 insertions(+), 28 deletions(-) rename packages/owletto-backend/src/__tests__/{unit/sandbox/run-script.test.ts => integration/sandbox/run-script-runtime.test.ts} (69%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2409cdef..a1fb75dac 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,6 +41,22 @@ jobs: # Worker tests that don't transitively load pi-coding-agent runtime (WASM unavailable on CI) bun test packages/worker/src/__tests__/embedded-tools.test.ts packages/worker/src/__tests__/model-resolver.test.ts packages/worker/src/__tests__/tool-policy.test.ts packages/worker/src/__tests__/processor.test.ts packages/worker/src/__tests__/audio-provider-suggestions.test.ts packages/worker/src/__tests__/generated-media.test.ts packages/worker/src/__tests__/tool-implementations.test.ts packages/worker/src/__tests__/instructions.test.ts packages/worker/src/__tests__/custom-tools.test.ts + # The execute MCP tool runs scripts in isolated-vm — a V8 native addon. + # Bun (JavaScriptCore) cannot link the V8 ABI, so this test must run + # under Node, the production runtime. Invoking vitest via `node` (not + # `bun run`) guarantees the runtime even though the binary's shebang + # already points at node. SKIP_TEST_DB_SETUP=1 keeps this fast — the + # test uses a stub SDK and doesn't need Postgres. + # + # The broader vitest suite (~38 integration files) is not yet wired + # into CI; many are stale after the manage_* → execute/search MCP + # consolidation in #348. Tracked separately. + - name: Sandbox runtime test (Node + isolated-vm) + working-directory: packages/owletto-backend + env: + SKIP_TEST_DB_SETUP: '1' + run: node ../../node_modules/.bin/vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts + - name: Upload coverage if: always() uses: codecov/codecov-action@v4 diff --git a/docker/app/start.sh b/docker/app/start.sh index e30959b0b..df300bb9d 100644 --- a/docker/app/start.sh +++ b/docker/app/start.sh @@ -3,7 +3,7 @@ set -e MODE="${1:-server}" -echo "Starting Owletto backend (Bun)" +echo "Starting Owletto backend (Node + tsx)" echo "================================" echo "Environment:" @@ -41,4 +41,11 @@ else run_migrations fi -exec bun /app/packages/owletto-backend/src/server.ts +# Run under Node so V8 native addons (isolated-vm) load. +# Bun uses JavaScriptCore and cannot link the V8 ABI surface that +# isolated-vm requires; the execute MCP tool silently degrades to +# RuntimeUnavailable under bun. tsx provides the TS loader so the +# source layout stays uncompiled. cwd needs to be the package so +# `--import tsx` resolves from owletto-backend's node_modules. +cd /app/packages/owletto-backend +exec node --import tsx src/server.ts diff --git a/packages/owletto-backend/package.json b/packages/owletto-backend/package.json index 67d6d613c..da3d8ef22 100644 --- a/packages/owletto-backend/package.json +++ b/packages/owletto-backend/package.json @@ -10,6 +10,7 @@ "start": "tsx src/server.ts", "test": "vitest", "test:pglite": "OWLETTO_TEST_BACKEND=pglite vitest", + "test:sandbox-runtime": "SKIP_TEST_DB_SETUP=1 vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts", "typecheck": "tsc --noEmit", "lint": "biome lint src", "lint:fix": "biome lint --write src", diff --git a/packages/owletto-backend/src/__tests__/unit/sandbox/run-script.test.ts b/packages/owletto-backend/src/__tests__/integration/sandbox/run-script-runtime.test.ts similarity index 69% rename from packages/owletto-backend/src/__tests__/unit/sandbox/run-script.test.ts rename to packages/owletto-backend/src/__tests__/integration/sandbox/run-script-runtime.test.ts index b9c91c859..493640518 100644 --- a/packages/owletto-backend/src/__tests__/unit/sandbox/run-script.test.ts +++ b/packages/owletto-backend/src/__tests__/integration/sandbox/run-script-runtime.test.ts @@ -1,16 +1,44 @@ -import { describe, expect, it } from "bun:test"; +/** + * Sandbox runtime integration test. + * + * Asserts that the host runtime can actually load `isolated-vm` and run a + * script end-to-end. Lives under integration/ and is invoked by the + * `test:sandbox-runtime` package script — which CI runs under Node, the + * production runtime. + * + * Background: `isolated-vm` is a V8 native addon. Bun (which uses + * JavaScriptCore with a partial V8 ABI shim) cannot load it; the addon + * throws at dlopen. The previous bun:test version of this suite hid that + * gap by skipping when the runner reported `RuntimeUnavailable`. The + * production app image silently regressed for months as a result. + * + * This file deliberately fails (not skips) when the runtime can't load + * `isolated-vm` so the regression cannot ship again. + */ + +import { describe, expect, it } from "vitest"; import type { ClientSDK } from "../../../sandbox/client-sdk"; import { getDefaultLimits, runScript } from "../../../sandbox/run-script"; -function skipIfRuntimeUnavailable( - result: Awaited>, -): boolean { - if (result.error?.name !== "RuntimeUnavailable") return false; - expect(result.success).toBe(false); - return true; -} +describe("sandbox runtime", () => { + it("loads isolated-vm and runs a trivial script", async () => { + const stubSdk = { log: () => undefined } as unknown as ClientSDK; + const result = await runScript({ + source: "export default async () => 1 + 2;", + sdk: stubSdk, + }); + if (result.error?.name === "RuntimeUnavailable") { + throw new Error( + "isolated-vm failed to load under the test runtime. " + + "Production runs the backend under Node; this test must too. " + + `Detail: ${result.error.message}`, + ); + } + expect(result.success).toBe(true); + expect(result.returnValue).toBe(3); + expect(result.sdkCalls).toBe(0); + }); -describe("runScript", () => { it("exposes default resource limits", () => { const limits = getDefaultLimits(); expect(limits.memoryMb).toBe(64); @@ -32,21 +60,6 @@ describe("runScript", () => { expect(result.durationMs).toBeGreaterThanOrEqual(0); }); - it("runs a default-export script and returns its value", async () => { - const stubSdk = { log: () => undefined } as unknown as ClientSDK; - const result = await runScript({ - source: "export default async () => 1 + 2;", - sdk: stubSdk, - }); - // Skip on environments where the optional native module is unavailable - // (the runner reports RuntimeUnavailable). Otherwise the bridge must - // succeed and forward the return value. - if (skipIfRuntimeUnavailable(result)) return; - expect(result.success).toBe(true); - expect(result.returnValue).toBe(3); - expect(result.sdkCalls).toBe(0); - }); - it("supports direct client.org(slug).namespace.method() chaining", async () => { const orgSdk = { entities: { @@ -73,7 +86,6 @@ describe("runScript", () => { sdk: stubSdk, }); - if (skipIfRuntimeUnavailable(result)) return; expect(result.success).toBe(true); expect(result.returnValue).toEqual({ org: "atlas", id: 123 }); expect(result.sdkCalls).toBe(1); @@ -95,7 +107,6 @@ describe("runScript", () => { limits: { timeoutMs: 25 }, }); - if (skipIfRuntimeUnavailable(result)) return; expect(result.success).toBe(false); expect(result.error?.name).toBe("TimeoutError"); expect(result.sdkCalls).toBe(1); From fd606283e2b13bdc6ca36cf3c9bd784a2d4f4db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Mon, 27 Apr 2026 23:34:26 +0100 Subject: [PATCH 2/3] ci: pin Node 22 so isolated-vm prebuild loads in test job --- .github/workflows/ci.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1fb75dac..2a6a759eb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,6 +22,13 @@ jobs: with: bun-version: 1.3.5 + # Pin Node so the Sandbox runtime test below can load isolated-vm — + # it ships abi127 (Node 22) and abi137 (Node 24) prebuilds; runner + # default may be a non-matching major. + - uses: actions/setup-node@v4 + with: + node-version: '22' + - name: Install dependencies run: bun install From 224051c7681c38ca0f7cbeca39c5e11060bd7cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Mon, 27 Apr 2026 23:38:47 +0100 Subject: [PATCH 3/3] fix(start): keep cwd=/app, use absolute tsx loader path Codex review on #430 caught that 'cd /app/packages/owletto-backend' breaks bundled config resolution: gateway's ProviderRegistryService and the embedded agent routes both resolve 'config/providers.json' from process.cwd(). Under the previous cwd of /app, this finds /app/config/providers.json. Under /app/packages/owletto-backend, it resolves to a path that doesn't exist and the bundled provider registry is silently empty. Pass tsx as an absolute file URL so the loader resolves regardless of cwd, and run server.ts by absolute path. Verified in the live prod image: runScript() returns success with cwd preserved at /app. --- docker/app/start.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/docker/app/start.sh b/docker/app/start.sh index df300bb9d..5062b5b4b 100644 --- a/docker/app/start.sh +++ b/docker/app/start.sh @@ -45,7 +45,12 @@ fi # Bun uses JavaScriptCore and cannot link the V8 ABI surface that # isolated-vm requires; the execute MCP tool silently degrades to # RuntimeUnavailable under bun. tsx provides the TS loader so the -# source layout stays uncompiled. cwd needs to be the package so -# `--import tsx` resolves from owletto-backend's node_modules. -cd /app/packages/owletto-backend -exec node --import tsx src/server.ts +# source layout stays uncompiled. +# +# Keep cwd=/app — gateway services and embedded agent routes resolve +# bundled config (`config/providers.json`) relative to process.cwd(). +# Use the absolute tsx loader path so the resolution doesn't depend +# on cwd or PATH. +exec node \ + --import "file:///app/packages/owletto-backend/node_modules/tsx/dist/loader.mjs" \ + /app/packages/owletto-backend/src/server.ts