lobu-ai · buremba · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -22,6 +22,13 @@ jobs:
         with:
           bun-version: 1.3.5
 
+      # Pin Node so the Sandbox runtime test below can load isolated-vm —
+      # it ships abi127 (Node 22) and abi137 (Node 24) prebuilds; runner
+      # default may be a non-matching major.
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+
       - name: Install dependencies
         run: bun install
 
@@ -41,6 +48,22 @@ jobs:
           # Worker tests that don't transitively load pi-coding-agent runtime (WASM unavailable on CI)
           bun test packages/worker/src/__tests__/embedded-tools.test.ts packages/worker/src/__tests__/model-resolver.test.ts packages/worker/src/__tests__/tool-policy.test.ts packages/worker/src/__tests__/processor.test.ts packages/worker/src/__tests__/audio-provider-suggestions.test.ts packages/worker/src/__tests__/generated-media.test.ts packages/worker/src/__tests__/tool-implementations.test.ts packages/worker/src/__tests__/instructions.test.ts packages/worker/src/__tests__/custom-tools.test.ts
 
+      # The execute MCP tool runs scripts in isolated-vm — a V8 native addon.
+      # Bun (JavaScriptCore) cannot link the V8 ABI, so this test must run
+      # under Node, the production runtime. Invoking vitest via `node` (not
+      # `bun run`) guarantees the runtime even though the binary's shebang
+      # already points at node. SKIP_TEST_DB_SETUP=1 keeps this fast — the
+      # test uses a stub SDK and doesn't need Postgres.
+      #
+      # The broader vitest suite (~38 integration files) is not yet wired
+      # into CI; many are stale after the manage_* → execute/search MCP
+      # consolidation in #348. Tracked separately.
+      - name: Sandbox runtime test (Node + isolated-vm)
+        working-directory: packages/owletto-backend
+        env:
+          SKIP_TEST_DB_SETUP: '1'
+        run: node ../../node_modules/.bin/vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts
+
       - name: Upload coverage
         if: always()
         uses: codecov/codecov-action@v4

diff --git a/docker/app/start.sh b/docker/app/start.sh
@@ -3,7 +3,7 @@ set -e
 
 MODE="${1:-server}"
 
-echo "Starting Owletto backend (Bun)"
+echo "Starting Owletto backend (Node + tsx)"
 echo "================================"
 
 echo "Environment:"
@@ -41,4 +41,16 @@ else
   run_migrations
 fi
 
-exec bun /app/packages/owletto-backend/src/server.ts
+# Run under Node so V8 native addons (isolated-vm) load.
+# Bun uses JavaScriptCore and cannot link the V8 ABI surface that
+# isolated-vm requires; the execute MCP tool silently degrades to
+# RuntimeUnavailable under bun. tsx provides the TS loader so the
+# source layout stays uncompiled.
+#
+# Keep cwd=/app — gateway services and embedded agent routes resolve
+# bundled config (`config/providers.json`) relative to process.cwd().
+# Use the absolute tsx loader path so the resolution doesn't depend
+# on cwd or PATH.
+exec node \
+  --import "file:///app/packages/owletto-backend/node_modules/tsx/dist/loader.mjs" \
+  /app/packages/owletto-backend/src/server.ts
diff --git a/packages/owletto-backend/package.json b/packages/owletto-backend/package.json
@@ -10,6 +10,7 @@
     "start": "tsx src/server.ts",
     "test": "vitest",
     "test:pglite": "OWLETTO_TEST_BACKEND=pglite vitest",
+    "test:sandbox-runtime": "SKIP_TEST_DB_SETUP=1 vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts",
     "typecheck": "tsc --noEmit",
     "lint": "biome lint src",
     "lint:fix": "biome lint --write src",

diff --git a/...__tests__/unit/sandbox/run-script.test.ts → ...ration/sandbox/run-script-runtime.test.ts b/...__tests__/unit/sandbox/run-script.test.ts → ...ration/sandbox/run-script-runtime.test.ts
@@ -1,16 +1,44 @@
-import { describe, expect, it } from "bun:test";
+/**
+ * Sandbox runtime integration test.
+ *
+ * Asserts that the host runtime can actually load `isolated-vm` and run a
+ * script end-to-end. Lives under integration/ and is invoked by the
+ * `test:sandbox-runtime` package script — which CI runs under Node, the
+ * production runtime.
+ *
+ * Background: `isolated-vm` is a V8 native addon. Bun (which uses
+ * JavaScriptCore with a partial V8 ABI shim) cannot load it; the addon
+ * throws at dlopen. The previous bun:test version of this suite hid that
+ * gap by skipping when the runner reported `RuntimeUnavailable`. The
+ * production app image silently regressed for months as a result.
+ *
+ * This file deliberately fails (not skips) when the runtime can't load
+ * `isolated-vm` so the regression cannot ship again.
+ */
+
+import { describe, expect, it } from "vitest";
 import type { ClientSDK } from "../../../sandbox/client-sdk";
 import { getDefaultLimits, runScript } from "../../../sandbox/run-script";
 
-function skipIfRuntimeUnavailable(
-  result: Awaited<ReturnType<typeof runScript>>,
-): boolean {
-  if (result.error?.name !== "RuntimeUnavailable") return false;
-  expect(result.success).toBe(false);
-  return true;
-}
+describe("sandbox runtime", () => {
+  it("loads isolated-vm and runs a trivial script", async () => {
+    const stubSdk = { log: () => undefined } as unknown as ClientSDK;
+    const result = await runScript({
+      source: "export default async () => 1 + 2;",
+      sdk: stubSdk,
+    });
+    if (result.error?.name === "RuntimeUnavailable") {
+      throw new Error(
+        "isolated-vm failed to load under the test runtime. " +
+          "Production runs the backend under Node; this test must too. " +
+          `Detail: ${result.error.message}`,
+      );
+    }
+    expect(result.success).toBe(true);
+    expect(result.returnValue).toBe(3);
+    expect(result.sdkCalls).toBe(0);
+  });
 
-describe("runScript", () => {
   it("exposes default resource limits", () => {
     const limits = getDefaultLimits();
     expect(limits.memoryMb).toBe(64);
@@ -32,21 +60,6 @@ describe("runScript", () => {
     expect(result.durationMs).toBeGreaterThanOrEqual(0);
   });
 
-  it("runs a default-export script and returns its value", async () => {
-    const stubSdk = { log: () => undefined } as unknown as ClientSDK;
-    const result = await runScript({
-      source: "export default async () => 1 + 2;",
-      sdk: stubSdk,
-    });
-    // Skip on environments where the optional native module is unavailable
-    // (the runner reports RuntimeUnavailable). Otherwise the bridge must
-    // succeed and forward the return value.
-    if (skipIfRuntimeUnavailable(result)) return;
-    expect(result.success).toBe(true);
-    expect(result.returnValue).toBe(3);
-    expect(result.sdkCalls).toBe(0);
-  });
-
   it("supports direct client.org(slug).namespace.method() chaining", async () => {
     const orgSdk = {
       entities: {
@@ -73,7 +86,6 @@ describe("runScript", () => {
       sdk: stubSdk,
     });
 
-    if (skipIfRuntimeUnavailable(result)) return;
     expect(result.success).toBe(true);
     expect(result.returnValue).toEqual({ org: "atlas", id: 123 });
     expect(result.sdkCalls).toBe(1);
@@ -95,7 +107,6 @@ describe("runScript", () => {
       limits: { timeoutMs: 25 },
     });
 
-    if (skipIfRuntimeUnavailable(result)) return;
     expect(result.success).toBe(false);
     expect(result.error?.name).toBe("TimeoutError");
     expect(result.sdkCalls).toBe(1);