-
Notifications
You must be signed in to change notification settings - Fork 101
feat(web): auto re-pair a local assistant on connect via vellum wake (LUM-2233) #33271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7ca8d30
6442c1a
67f8955
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,126 @@ | ||
| import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; | ||
|
|
||
| import { useLockfileStore } from "@/stores/lockfile-store"; | ||
| import type { Lockfile, LockfileAssistant } from "@/runtime/local-mode-host"; | ||
|
|
||
| // The wrapper under test orchestrates the real connect primitive, so we drive | ||
| // its external seams rather than the primitive itself: the guardian-token read | ||
| // (which decides success/failure) and the wake repair call. Everything else in | ||
| // the primitive (gateway token exchange, self-hosted connection write) is | ||
| // stubbed to no-op so a successful prime resolves cleanly. | ||
| const host = await import("@/runtime/local-mode-host"); | ||
|
|
||
| let primeShouldSucceed: () => boolean; | ||
| let fetchGuardianTokenHost = mock(async (_id: string) => "tok"); | ||
| let wakeLocalAssistantHost = mock(async (_id: string) => ({ ok: true })); | ||
|
|
||
| mock.module("@/runtime/local-mode-host", () => ({ | ||
| ...host, | ||
| fetchGuardianTokenHost: (id: string) => fetchGuardianTokenHost(id), | ||
| wakeLocalAssistantHost: (id: string) => wakeLocalAssistantHost(id), | ||
| })); | ||
|
|
||
| mock.module("@/lib/auth/gateway-session", () => ({ | ||
| clearGatewayToken: () => {}, | ||
| ensureGatewayToken: async () => {}, | ||
| getGatewayToken: () => "gateway-tok", | ||
| getLocalTokenUrl: () => "http://127.0.0.1:7830/token", | ||
| })); | ||
|
|
||
| mock.module("@/lib/self-hosted/connection", () => ({ | ||
| setSelfHostedConnection: () => {}, | ||
| })); | ||
|
|
||
| const { GuardianTokenError } = host; | ||
| const { primeLocalGatewayConnectionWithRepair } = await import("@/lib/local-mode"); | ||
|
|
||
| const localAssistant: LockfileAssistant = { | ||
| assistantId: "local-a", | ||
| cloud: "local", | ||
| resources: { gatewayPort: 7830 }, | ||
| } as LockfileAssistant; | ||
|
|
||
| function selectLocalAssistant(): void { | ||
| const lockfile: Lockfile = { | ||
| assistants: [localAssistant], | ||
| activeAssistant: "local-a", | ||
| }; | ||
| useLockfileStore.setState({ lockfile }); | ||
| localStorage.setItem("vellum:local:selected-assistant", "local-a"); | ||
| } | ||
|
|
||
| beforeEach(() => { | ||
| primeShouldSucceed = () => true; | ||
| fetchGuardianTokenHost = mock(async (_id: string) => { | ||
| if (!primeShouldSucceed()) throw new GuardianTokenError(404, "token gone"); | ||
| return "tok"; | ||
| }); | ||
| wakeLocalAssistantHost = mock(async (_id: string) => ({ ok: true })); | ||
| selectLocalAssistant(); | ||
| }); | ||
|
|
||
| afterEach(() => { | ||
| useLockfileStore.setState({ lockfile: null }); | ||
| localStorage.clear(); | ||
| }); | ||
|
|
||
| describe("primeLocalGatewayConnectionWithRepair", () => { | ||
| test("a clean first attempt never wakes the assistant", async () => { | ||
| await primeLocalGatewayConnectionWithRepair(); | ||
| expect(wakeLocalAssistantHost).not.toHaveBeenCalled(); | ||
| }); | ||
|
|
||
| test("a repairable failure wakes once, then retries and succeeds", async () => { | ||
| let attempts = 0; | ||
| // Fail the first prime (missing token), succeed once wake has run. | ||
| primeShouldSucceed = () => attempts++ > 0; | ||
|
|
||
| await primeLocalGatewayConnectionWithRepair(); | ||
|
|
||
| expect(wakeLocalAssistantHost).toHaveBeenCalledTimes(1); | ||
| expect(wakeLocalAssistantHost).toHaveBeenCalledWith("local-a"); | ||
| // One failing attempt + one succeeding retry. | ||
| expect(fetchGuardianTokenHost).toHaveBeenCalledTimes(2); | ||
| }); | ||
|
|
||
| test("a still-failing retry surfaces the original error and wakes only once", async () => { | ||
| primeShouldSucceed = () => false; | ||
|
|
||
| const err = await primeLocalGatewayConnectionWithRepair().catch( | ||
| (e: unknown) => e, | ||
| ); | ||
|
|
||
| expect(err).toBeInstanceOf(GuardianTokenError); | ||
| expect(wakeLocalAssistantHost).toHaveBeenCalledTimes(1); | ||
| }); | ||
|
|
||
| test("a failed wake surfaces the original error without retrying", async () => { | ||
| primeShouldSucceed = () => false; | ||
| wakeLocalAssistantHost = mock(async () => ({ | ||
| ok: false, | ||
| error: "no sibling env", | ||
| })); | ||
|
|
||
| const err = await primeLocalGatewayConnectionWithRepair().catch( | ||
| (e: unknown) => e, | ||
| ); | ||
|
|
||
| expect(err).toBeInstanceOf(GuardianTokenError); | ||
| // The first prime failed and wake failed — the connection is never retried. | ||
| expect(fetchGuardianTokenHost).toHaveBeenCalledTimes(1); | ||
| }); | ||
|
|
||
| test("a non-repairable 403 surfaces immediately and never wakes", async () => { | ||
| fetchGuardianTokenHost = mock(async () => { | ||
| throw new GuardianTokenError(403, "forbidden"); | ||
| }); | ||
|
|
||
| const err = await primeLocalGatewayConnectionWithRepair().catch( | ||
| (e: unknown) => e, | ||
| ); | ||
|
|
||
| expect(err).toBeInstanceOf(GuardianTokenError); | ||
| expect((err as InstanceType<typeof GuardianTokenError>).status).toBe(403); | ||
| expect(wakeLocalAssistantHost).not.toHaveBeenCalled(); | ||
| }); | ||
| }); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,11 +13,13 @@ import { setSelfHostedConnection } from "@/lib/self-hosted/connection"; | |
| import { useLockfileStore } from "@/stores/lockfile-store"; | ||
| import { | ||
| fetchGuardianTokenHost, | ||
| GuardianTokenError, | ||
| loadLockfileHost, | ||
| parseLockfile, | ||
| replacePlatformAssistantsHost, | ||
| retireLocalAssistantHost, | ||
| saveLockfileAssistantHost, | ||
| wakeLocalAssistantHost, | ||
| } from "@/runtime/local-mode-host"; | ||
| import type { | ||
| Lockfile, | ||
|
|
@@ -281,3 +283,41 @@ export async function primeLocalGatewayConnection(): Promise<void> { | |
| token: getGatewayToken(), | ||
| }); | ||
| } | ||
|
|
||
| /** | ||
| * Classify a connect failure as repairable by `wake`. A `403` means the host | ||
| * refused the loopback boundary — a security decision wake can't change — so | ||
| * it surfaces as-is. Every other failure (a missing/expired/malformed guardian | ||
| * token, or an unreachable or stopped gateway) is something `wake` can fix by | ||
| * re-seeding the token and restarting the daemon + gateway. | ||
| */ | ||
| function isRepairableConnectError(error: unknown): boolean { | ||
| if (error instanceof GuardianTokenError) return error.status !== 403; | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
| * Prime the local gateway connection, transparently repairing the assistant in | ||
| * place when the first attempt fails for a repairable reason. | ||
| * | ||
| * This mirrors the native client's bootstrap, which re-pairs a stopped, | ||
| * expired, or mis-seeded local assistant before the failure ever reaches the | ||
| * user: on a repairable failure it runs `wake` (re-seeds the guardian token | ||
| * and restarts the daemon + gateway, leaving the assistant's data and identity | ||
| * untouched), then primes the connection once more. A non-repairable failure, | ||
| * a wake that itself fails, or a still-failing retry propagate the original | ||
| * error so the existing connect-error UI surfaces it unchanged. | ||
| */ | ||
| export async function primeLocalGatewayConnectionWithRepair(): Promise<void> { | ||
| try { | ||
| await primeLocalGatewayConnection(); | ||
| return; | ||
|
Comment on lines
+313
to
+314
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When a user reconnects to a slept/stopped local assistant while an unexpired gateway token is still cached for the same token URL, Useful? React with 👍 / 👎.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same gap as the earlier thread on this file — agreed it's real, and intentionally not fixed here.
Two reasons it stays out of this PR:
Tracked as remaining scope on LUM-2232 (gateway-liveness probe / recovery driven off the first failed request). Leaving this thread open as the pointer to that follow-up rather than resolving it. |
||
| } catch (error) { | ||
| if (!isRepairableConnectError(error)) throw error; | ||
| const assistantId = getSelectedAssistant()?.assistantId; | ||
| if (!assistantId) throw error; | ||
| const repair = await wakeLocalAssistantHost(assistantId); | ||
| if (!repair.ok) throw error; | ||
| await primeLocalGatewayConnection(); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.