Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions apps/macos/src/main/local-mode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
resolveLockfilePaths,
runHatch,
runRetire,
runWake,
upsertLockfileAssistant,
type CliInvocation,
type LockfileWriteResult,
Expand Down Expand Up @@ -54,6 +55,11 @@ interface RetireResult {
error?: string;
}

interface WakeResult {
ok: boolean;
error?: string;
}

/**
* Resolve how to invoke the CLI. Precedence:
* 1. `VELLUM_CLI_PATH` env var override
Expand Down Expand Up @@ -115,6 +121,22 @@ async function retire(assistantId: string): Promise<RetireResult> {
return result.ok ? { ok: true } : { ok: false, error: result.error };
}

/**
* Wake (start/restart) a local assistant's daemon and gateway, re-seeding its
* guardian token. The non-destructive repair primitive. Mirrors `hatch`'s
* never-reject contract.
*/
async function wake(assistantId: string): Promise<WakeResult> {
let invocation: CliInvocation;
try {
invocation = await resolveCliInvocation();
} catch (err) {
return { ok: false, error: (err as Error).message };
}
const result = await runWake(invocation, assistantId);
return result.ok ? { ok: true } : { ok: false, error: result.error };
}

// A persisted assistant entry as it crosses the IPC boundary. The
// package's lockfile parser owns the real field-level contract; here we
// only assert the renderer sent an object, so unknown/forward-compat
Expand Down Expand Up @@ -187,6 +209,11 @@ export const installLocalMode = (): void => {
return retire(assistantId);
});

handle("vellum:localMode:wake", assistantIdArgs, ([assistantId]) => {
if (!assistantId) return { ok: false, error: "Missing assistantId" };
return wake(assistantId);
});

handle(
"vellum:localMode:guardianToken",
assistantIdArgs,
Expand Down
12 changes: 12 additions & 0 deletions apps/macos/src/preload/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,13 @@ export interface VellumBridge {
* on failure rather than rejecting.
*/
retire(assistantId: string): Promise<{ ok: boolean; error?: string }>;
/**
* Wake (start/restart) a local assistant's daemon and gateway via the
* Vellum CLI's `wake`, re-seeding its guardian token. The non-destructive
* repair primitive used to recover a stopped or mis-seeded assistant in
* place. Mirrors `retire`'s never-reject contract.
*/
wake(assistantId: string): Promise<{ ok: boolean; error?: string }>;
/**
* Acquire a fresh guardian access token for a local assistant, reading
* the token file from disk and refreshing it via the CLI when expired.
Expand Down Expand Up @@ -290,6 +297,11 @@ const bridge: VellumBridge = {
"vellum:localMode:replacePlatformAssistants",
platformAssistants,
) as Promise<LockfileWriteResult>,
wake: (assistantId: string) =>
ipcRenderer.invoke("vellum:localMode:wake", assistantId) as Promise<{
ok: boolean;
error?: string;
}>,
retire: (assistantId: string) =>
ipcRenderer.invoke("vellum:localMode:retire", assistantId) as Promise<{
ok: boolean;
Expand Down
126 changes: 126 additions & 0 deletions apps/web/src/lib/local-mode-repair.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test";

import { useLockfileStore } from "@/stores/lockfile-store";
import type { Lockfile, LockfileAssistant } from "@/runtime/local-mode-host";

// The wrapper under test orchestrates the real connect primitive, so we drive
// its external seams rather than the primitive itself: the guardian-token read
// (which decides success/failure) and the wake repair call. Everything else in
// the primitive (gateway token exchange, self-hosted connection write) is
// stubbed to no-op so a successful prime resolves cleanly.
const host = await import("@/runtime/local-mode-host");

let primeShouldSucceed: () => boolean;
let fetchGuardianTokenHost = mock(async (_id: string) => "tok");
let wakeLocalAssistantHost = mock(async (_id: string) => ({ ok: true }));

mock.module("@/runtime/local-mode-host", () => ({
...host,
fetchGuardianTokenHost: (id: string) => fetchGuardianTokenHost(id),
wakeLocalAssistantHost: (id: string) => wakeLocalAssistantHost(id),
}));

mock.module("@/lib/auth/gateway-session", () => ({
clearGatewayToken: () => {},
ensureGatewayToken: async () => {},
getGatewayToken: () => "gateway-tok",
getLocalTokenUrl: () => "http://127.0.0.1:7830/token",
}));

mock.module("@/lib/self-hosted/connection", () => ({
setSelfHostedConnection: () => {},
}));

const { GuardianTokenError } = host;
const { primeLocalGatewayConnectionWithRepair } = await import("@/lib/local-mode");

const localAssistant: LockfileAssistant = {
assistantId: "local-a",
cloud: "local",
resources: { gatewayPort: 7830 },
} as LockfileAssistant;

function selectLocalAssistant(): void {
const lockfile: Lockfile = {
assistants: [localAssistant],
activeAssistant: "local-a",
};
useLockfileStore.setState({ lockfile });
localStorage.setItem("vellum:local:selected-assistant", "local-a");
}

beforeEach(() => {
primeShouldSucceed = () => true;
fetchGuardianTokenHost = mock(async (_id: string) => {
if (!primeShouldSucceed()) throw new GuardianTokenError(404, "token gone");
return "tok";
});
wakeLocalAssistantHost = mock(async (_id: string) => ({ ok: true }));
selectLocalAssistant();
});

afterEach(() => {
useLockfileStore.setState({ lockfile: null });
localStorage.clear();
});

describe("primeLocalGatewayConnectionWithRepair", () => {
test("a clean first attempt never wakes the assistant", async () => {
await primeLocalGatewayConnectionWithRepair();
expect(wakeLocalAssistantHost).not.toHaveBeenCalled();
});

test("a repairable failure wakes once, then retries and succeeds", async () => {
let attempts = 0;
// Fail the first prime (missing token), succeed once wake has run.
primeShouldSucceed = () => attempts++ > 0;

await primeLocalGatewayConnectionWithRepair();

expect(wakeLocalAssistantHost).toHaveBeenCalledTimes(1);
expect(wakeLocalAssistantHost).toHaveBeenCalledWith("local-a");
// One failing attempt + one succeeding retry.
expect(fetchGuardianTokenHost).toHaveBeenCalledTimes(2);
});

test("a still-failing retry surfaces the original error and wakes only once", async () => {
primeShouldSucceed = () => false;

const err = await primeLocalGatewayConnectionWithRepair().catch(
(e: unknown) => e,
);

expect(err).toBeInstanceOf(GuardianTokenError);
expect(wakeLocalAssistantHost).toHaveBeenCalledTimes(1);
});

test("a failed wake surfaces the original error without retrying", async () => {
primeShouldSucceed = () => false;
wakeLocalAssistantHost = mock(async () => ({
ok: false,
error: "no sibling env",
}));

const err = await primeLocalGatewayConnectionWithRepair().catch(
(e: unknown) => e,
);

expect(err).toBeInstanceOf(GuardianTokenError);
// The first prime failed and wake failed — the connection is never retried.
expect(fetchGuardianTokenHost).toHaveBeenCalledTimes(1);
});

test("a non-repairable 403 surfaces immediately and never wakes", async () => {
fetchGuardianTokenHost = mock(async () => {
throw new GuardianTokenError(403, "forbidden");
});

const err = await primeLocalGatewayConnectionWithRepair().catch(
(e: unknown) => e,
);

expect(err).toBeInstanceOf(GuardianTokenError);
expect((err as InstanceType<typeof GuardianTokenError>).status).toBe(403);
expect(wakeLocalAssistantHost).not.toHaveBeenCalled();
});
});
40 changes: 40 additions & 0 deletions apps/web/src/lib/local-mode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@ import { setSelfHostedConnection } from "@/lib/self-hosted/connection";
import { useLockfileStore } from "@/stores/lockfile-store";
import {
fetchGuardianTokenHost,
GuardianTokenError,
loadLockfileHost,
parseLockfile,
replacePlatformAssistantsHost,
retireLocalAssistantHost,
saveLockfileAssistantHost,
wakeLocalAssistantHost,
} from "@/runtime/local-mode-host";
import type {
Lockfile,
Expand Down Expand Up @@ -281,3 +283,41 @@ export async function primeLocalGatewayConnection(): Promise<void> {
token: getGatewayToken(),
});
}

/**
* Classify a connect failure as repairable by `wake`. A `403` means the host
* refused the loopback boundary — a security decision wake can't change — so
* it surfaces as-is. Every other failure (a missing/expired/malformed guardian
* token, or an unreachable or stopped gateway) is something `wake` can fix by
* re-seeding the token and restarting the daemon + gateway.
*/
function isRepairableConnectError(error: unknown): boolean {
if (error instanceof GuardianTokenError) return error.status !== 403;
return true;
}

/**
* Prime the local gateway connection, transparently repairing the assistant in
* place when the first attempt fails for a repairable reason.
*
* This mirrors the native client's bootstrap, which re-pairs a stopped,
* expired, or mis-seeded local assistant before the failure ever reaches the
* user: on a repairable failure it runs `wake` (re-seeds the guardian token
* and restarts the daemon + gateway, leaving the assistant's data and identity
* untouched), then primes the connection once more. A non-repairable failure,
* a wake that itself fails, or a still-failing retry propagate the original
* error so the existing connect-error UI surfaces it unchanged.
*/
export async function primeLocalGatewayConnectionWithRepair(): Promise<void> {
try {
await primeLocalGatewayConnection();
return;
Comment thread
ashleeradka marked this conversation as resolved.
Comment on lines +313 to +314

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Verify gateway reachability before skipping wake

When a user reconnects to a slept/stopped local assistant while an unexpired gateway token is still cached for the same token URL, primeLocalGatewayConnection() can resolve without touching the gateway: it reads the guardian token from disk, ensureGatewayToken() returns the cached token, and this path returns before wake is attempted. connectLocalAssistant then marks the user logged in even though the gateway is still stopped, so the new auto-repair path misses a common stopped-assistant case until later API calls fail. Consider forcing a real gateway/token probe or bypassing the cached gateway token before deciding repair is unnecessary.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same gap as the earlier thread on this file — agreed it's real, and intentionally not fixed here.

ensureGatewayToken() returns the cached token (30-day TTL) without touching the gateway, so a slept/stopped assistant with a live token primes "successfully" and this path returns before wake; the dead gateway only surfaces on the first data-plane request.

Two reasons it stays out of this PR:

  1. It changes the happy path. A real gateway/token probe (or bypassing the cache) on every connect would spawn the daemon + gateway on every login even when nothing's wrong — out of scope for this minimal native-parity change. Note native is the same shape: forceReBootstrap fires from GatewayConnectionManager.attemptRePair() on a failed gateway request, not a pre-connect probe.
  2. It's the gateway-liveness half of the recovery work being handed to @noanflaherty.

Tracked as remaining scope on LUM-2232 (gateway-liveness probe / recovery driven off the first failed request). Leaving this thread open as the pointer to that follow-up rather than resolving it.

} catch (error) {
if (!isRepairableConnectError(error)) throw error;
const assistantId = getSelectedAssistant()?.assistantId;
if (!assistantId) throw error;
const repair = await wakeLocalAssistantHost(assistantId);
if (!repair.ok) throw error;
await primeLocalGatewayConnection();
}
}
4 changes: 4 additions & 0 deletions apps/web/src/runtime/is-electron.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ declare global {
platformAssistants: Array<Record<string, unknown>>,
): Promise<LockfileWriteResult>;
retire(assistantId: string): Promise<{ ok: boolean; error?: string }>;
// Optional: older Electron shells predate the wake IPC channel. The
// macOS app and web bundle don't release together, so a newer renderer
// can run against an older preload; callers must guard on its presence.
wake?(assistantId: string): Promise<{ ok: boolean; error?: string }>;
guardianToken(
assistantId: string,
): Promise<
Expand Down
36 changes: 36 additions & 0 deletions apps/web/src/runtime/local-mode-host.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ const {
saveLockfileAssistantHost,
replacePlatformAssistantsHost,
retireLocalAssistantHost,
wakeLocalAssistantHost,
fetchGuardianTokenHost,
} = await import("./local-mode-host");

Expand Down Expand Up @@ -208,6 +209,41 @@ describe("retireLocalAssistantHost", () => {
});
});

describe("wakeLocalAssistantHost", () => {
test("web/dev host POSTs the assistant id to the wake middleware", async () => {
const fetchMock = mock(async () => ({ json: async () => ({ ok: true }) }));
globalThis.fetch = fetchMock as unknown as typeof fetch;

expect(await wakeLocalAssistantHost("a-1")).toEqual({ ok: true });
const [url, init] = fetchMock.mock.calls[0] as unknown as [string, RequestInit];
expect(url).toBe("/assistant/__local/wake");
expect(init.method).toBe("POST");
expect(JSON.parse(init.body as string)).toEqual({ assistantId: "a-1" });
});

test("Electron host wakes through the bridge and never touches fetch", async () => {
const wake = mock(async () => ({ ok: true }));
const fetchMock = mock(async () => {
throw new Error("fetch must not run on the Electron branch");
});
globalThis.fetch = fetchMock as unknown as typeof fetch;
setElectronBridge({ wake });

expect(await wakeLocalAssistantHost("a-1")).toEqual({ ok: true });
expect(wake).toHaveBeenCalledWith("a-1");
expect(fetchMock).not.toHaveBeenCalled();
});

test("older Electron shell without the wake channel reports an unsupported failure", async () => {
// The macOS app and web bundle don't release together: a newer renderer
// can run against a preload that predates the wake IPC channel.
setElectronBridge({});

const result = await wakeLocalAssistantHost("a-1");
expect(result.ok).toBe(false);
});
});

describe("fetchGuardianTokenHost", () => {
test("web/dev host GETs the guardian-token middleware and returns the access token", async () => {
const fetchMock = mock(async () => ({
Expand Down
36 changes: 36 additions & 0 deletions apps/web/src/runtime/local-mode-host.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ export interface LocalRetireResult {
error?: string;
}

export interface LocalWakeResult {
ok: boolean;
error?: string;
}

/**
* Thrown by {@link fetchGuardianTokenHost} when a host returns a structured
* guardian-token failure. Carries the host's `status` so callers can branch on
Expand Down Expand Up @@ -188,6 +193,37 @@ export async function retireLocalAssistantHost(
return res.json() as Promise<LocalRetireResult>;
}

/**
* Wake (start/restart) a local assistant's daemon and gateway, re-seeding its
* guardian token. Both hosts drive the Vellum CLI's `wake` in a trusted
* process and return the same `{ ok, error }` contract.
*
* This is the non-destructive repair primitive: it revives a stopped or
* mis-seeded assistant in place without touching its data or identity, the
* counterpart to {@link retireLocalAssistantHost}'s destructive removal.
* Older Electron hosts that predate this IPC channel resolve `wake` as
* `undefined`; callers treat that as a no-op repair and fall through to the
* underlying connect error.
*/
export async function wakeLocalAssistantHost(
assistantId: string,
): Promise<LocalWakeResult> {
if (isElectron()) {
const wake = window.vellum!.localMode.wake;
if (!wake) {
return { ok: false, error: "Wake is not supported by this app version" };
}
return wake(assistantId);
}

const res = await fetch("/assistant/__local/wake", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ assistantId }),
});
return res.json() as Promise<LocalWakeResult>;
}

/**
* Acquire a fresh guardian access token for a local assistant, used to
* authorize the gateway token exchange. Reading the token file and refreshing
Expand Down
Loading
Loading