Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ jobs:
with:
bun-version: 1.3.5

# Pin Node so the Sandbox runtime test below can load isolated-vm —
# it ships abi127 (Node 22) and abi137 (Node 24) prebuilds; runner
# default may be a non-matching major.
- uses: actions/setup-node@v4
with:
node-version: '22'

- name: Install dependencies
run: bun install

Expand All @@ -41,6 +48,22 @@ jobs:
# Worker tests that don't transitively load pi-coding-agent runtime (WASM unavailable on CI)
bun test packages/worker/src/__tests__/embedded-tools.test.ts packages/worker/src/__tests__/model-resolver.test.ts packages/worker/src/__tests__/tool-policy.test.ts packages/worker/src/__tests__/processor.test.ts packages/worker/src/__tests__/audio-provider-suggestions.test.ts packages/worker/src/__tests__/generated-media.test.ts packages/worker/src/__tests__/tool-implementations.test.ts packages/worker/src/__tests__/instructions.test.ts packages/worker/src/__tests__/custom-tools.test.ts

# The execute MCP tool runs scripts in isolated-vm — a V8 native addon.
# Bun (JavaScriptCore) cannot link the V8 ABI, so this test must run
# under Node, the production runtime. Invoking vitest via `node` (not
# `bun run`) guarantees the runtime even though the binary's shebang
# already points at node. SKIP_TEST_DB_SETUP=1 keeps this fast — the
# test uses a stub SDK and doesn't need Postgres.
#
# The broader vitest suite (~38 integration files) is not yet wired
# into CI; many are stale after the manage_* → execute/search MCP
# consolidation in #348. Tracked separately.
- name: Sandbox runtime test (Node + isolated-vm)
working-directory: packages/owletto-backend
env:
SKIP_TEST_DB_SETUP: '1'
run: node ../../node_modules/.bin/vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts

- name: Upload coverage
if: always()
uses: codecov/codecov-action@v4
Expand Down
16 changes: 14 additions & 2 deletions docker/app/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -e

MODE="${1:-server}"

echo "Starting Owletto backend (Bun)"
echo "Starting Owletto backend (Node + tsx)"
echo "================================"

echo "Environment:"
Expand Down Expand Up @@ -41,4 +41,16 @@ else
run_migrations
fi

exec bun /app/packages/owletto-backend/src/server.ts
# Run under Node so V8 native addons (isolated-vm) load.
# Bun uses JavaScriptCore and cannot link the V8 ABI surface that
# isolated-vm requires; the execute MCP tool silently degrades to
# RuntimeUnavailable under bun. tsx provides the TS loader so the
# source layout stays uncompiled.
#
# Keep cwd=/app — gateway services and embedded agent routes resolve
# bundled config (`config/providers.json`) relative to process.cwd().
# Use the absolute tsx loader path so the resolution doesn't depend
# on cwd or PATH.
exec node \
--import "file:///app/packages/owletto-backend/node_modules/tsx/dist/loader.mjs" \
/app/packages/owletto-backend/src/server.ts
1 change: 1 addition & 0 deletions packages/owletto-backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"start": "tsx src/server.ts",
"test": "vitest",
"test:pglite": "OWLETTO_TEST_BACKEND=pglite vitest",
"test:sandbox-runtime": "SKIP_TEST_DB_SETUP=1 vitest run src/__tests__/integration/sandbox/run-script-runtime.test.ts",
"typecheck": "tsc --noEmit",
"lint": "biome lint src",
"lint:fix": "biome lint --write src",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,44 @@
import { describe, expect, it } from "bun:test";
/**
* Sandbox runtime integration test.
*
* Asserts that the host runtime can actually load `isolated-vm` and run a
* script end-to-end. Lives under integration/ and is invoked by the
* `test:sandbox-runtime` package script — which CI runs under Node, the
* production runtime.
*
* Background: `isolated-vm` is a V8 native addon. Bun (which uses
* JavaScriptCore with a partial V8 ABI shim) cannot load it; the addon
* throws at dlopen. The previous bun:test version of this suite hid that
* gap by skipping when the runner reported `RuntimeUnavailable`. The
* production app image silently regressed for months as a result.
*
* This file deliberately fails (not skips) when the runtime can't load
* `isolated-vm` so the regression cannot ship again.
*/

import { describe, expect, it } from "vitest";
import type { ClientSDK } from "../../../sandbox/client-sdk";
import { getDefaultLimits, runScript } from "../../../sandbox/run-script";

function skipIfRuntimeUnavailable(
result: Awaited<ReturnType<typeof runScript>>,
): boolean {
if (result.error?.name !== "RuntimeUnavailable") return false;
expect(result.success).toBe(false);
return true;
}
describe("sandbox runtime", () => {
it("loads isolated-vm and runs a trivial script", async () => {
const stubSdk = { log: () => undefined } as unknown as ClientSDK;
const result = await runScript({
source: "export default async () => 1 + 2;",
sdk: stubSdk,
});
if (result.error?.name === "RuntimeUnavailable") {
throw new Error(
"isolated-vm failed to load under the test runtime. " +
"Production runs the backend under Node; this test must too. " +
`Detail: ${result.error.message}`,
);
}
expect(result.success).toBe(true);
expect(result.returnValue).toBe(3);
expect(result.sdkCalls).toBe(0);
});

describe("runScript", () => {
it("exposes default resource limits", () => {
const limits = getDefaultLimits();
expect(limits.memoryMb).toBe(64);
Expand All @@ -32,21 +60,6 @@ describe("runScript", () => {
expect(result.durationMs).toBeGreaterThanOrEqual(0);
});

it("runs a default-export script and returns its value", async () => {
const stubSdk = { log: () => undefined } as unknown as ClientSDK;
const result = await runScript({
source: "export default async () => 1 + 2;",
sdk: stubSdk,
});
// Skip on environments where the optional native module is unavailable
// (the runner reports RuntimeUnavailable). Otherwise the bridge must
// succeed and forward the return value.
if (skipIfRuntimeUnavailable(result)) return;
expect(result.success).toBe(true);
expect(result.returnValue).toBe(3);
expect(result.sdkCalls).toBe(0);
});

it("supports direct client.org(slug).namespace.method() chaining", async () => {
const orgSdk = {
entities: {
Expand All @@ -73,7 +86,6 @@ describe("runScript", () => {
sdk: stubSdk,
});

if (skipIfRuntimeUnavailable(result)) return;
expect(result.success).toBe(true);
expect(result.returnValue).toEqual({ org: "atlas", id: 123 });
expect(result.sdkCalls).toBe(1);
Expand All @@ -95,7 +107,6 @@ describe("runScript", () => {
limits: { timeoutMs: 25 },
});

if (skipIfRuntimeUnavailable(result)) return;
expect(result.success).toBe(false);
expect(result.error?.name).toBe("TimeoutError");
expect(result.sdkCalls).toBe(1);
Expand Down
Loading