From f48646a5084965ee73064aaeb50fe55d23030eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Thu, 28 May 2026 17:14:28 +0100 Subject: [PATCH 1/3] fix(gateway): stamp ephemeral agent worker tokens with caller org (#1068 follow-up) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cross-pod conversation-lock guard introduced in #1068 refuses to spawn a worker when an org-less turn is enqueued. Ephemeral agents created by 'lobu chat -c local' (no project / no metadata) fell into that path: createAgent stamped tokenOrganizationId from the agent's own metadata only, leaving it undefined for ephemeral agents — so the session shipped to enqueueMessage with no organizationId and the deployment manager threw 'Cannot acquire per-conversation lock'. Fall back to the caller's organizationId resolved by createLobuAuthBridge (from the PAT or Better Auth session). Also surface the same info via a new ApiAuthContext that createApiAuthMiddleware sets after worker-token or /oauth/userinfo validation, so consumers that don't sit behind the Lobu auth bridge can read it too. ExternalAuthClient.fetchUserInfo now forwards the organization_id resolved from /oauth/userinfo's organization_slug + organizations[] list. E2E: lobu run + lobu chat -c local 'ping' now reaches the worker (no more 'Worker startup failed: cross-pod conversation lock requires organizationId'). The remaining 'No model configured' error is a separate ephemeral-provider-binding gap. --- .../src/gateway/auth/api-auth-middleware.ts | 32 ++++++++++++++++- .../src/gateway/auth/external/client.ts | 35 +++++++++++++++++-- .../server/src/gateway/routes/public/agent.ts | 20 +++++++---- 3 files changed, 76 insertions(+), 11 deletions(-) diff --git a/packages/server/src/gateway/auth/api-auth-middleware.ts b/packages/server/src/gateway/auth/api-auth-middleware.ts index a9414d740..6a5f825b7 100644 --- a/packages/server/src/gateway/auth/api-auth-middleware.ts +++ b/packages/server/src/gateway/auth/api-auth-middleware.ts @@ -6,6 +6,25 @@ import { getRevokedTokenStore } from "./revoked-token-store.js"; export const TOKEN_EXPIRATION_MS = 24 * 60 * 60 * 1000; +/** + * Caller identity surfaced to handlers via `c.get("authContext")` after a + * successful auth check. `organizationId` is the token-bound or personal-org + * id when the auth path can resolve one (worker token payload, or + * `/oauth/userinfo` org slug); otherwise undefined. `createAgent` uses it to + * stamp the worker token for ephemeral agents so the cross-pod conversation + * lock can be acquired (#1068). + */ +export interface ApiAuthContext { + userId?: string; + organizationId?: string; +} + +declare module "hono" { + interface ContextVariableMap { + authContext?: ApiAuthContext; + } +} + /** * Creates a Hono middleware that enforces the standard auth check: * 1. Settings session cookie 2. Worker token (local) 3. External OAuth @@ -27,6 +46,7 @@ export function createApiAuthMiddleware(opts: { if (opts.allowSettingsSession) { const session = await verifySettingsSession(c); if (session) { + c.set("authContext", { userId: session.userId } satisfies ApiAuthContext); return next(); } } @@ -46,6 +66,10 @@ export function createApiAuthMiddleware(opts: { if (workerData.jti && (await revokedTokens.isRevoked(workerData.jti))) { return c.json({ success: false, error: "Unauthorized" }, 401); } + c.set("authContext", { + userId: workerData.userId, + organizationId: workerData.organizationId, + } satisfies ApiAuthContext); return next(); } } @@ -55,7 +79,13 @@ export function createApiAuthMiddleware(opts: { if (opts.externalAuthClient) { try { const userInfo = await opts.externalAuthClient.fetchUserInfo(token); - if (userInfo?.sub) return next(); + if (userInfo?.sub) { + c.set("authContext", { + userId: userInfo.sub, + organizationId: userInfo.organizationId, + } satisfies ApiAuthContext); + return next(); + } } catch { // Token not valid for external auth, continue to next method } diff --git a/packages/server/src/gateway/auth/external/client.ts b/packages/server/src/gateway/auth/external/client.ts index 3fb843e47..82a06a931 100644 --- a/packages/server/src/gateway/auth/external/client.ts +++ b/packages/server/src/gateway/auth/external/client.ts @@ -43,10 +43,28 @@ interface WellKnownMetadata { grant_types_supported?: string[]; } -interface UserInfoResponse { +export interface UserInfoResponse { sub: string; email: string; name?: string; + /** + * Token-bound org id, or — when the token has no org binding (e.g. a + * device-flow PAT) — the user's personal-org id. Resolved by mapping + * `organization_slug` to its entry in `organizations[]` (both already + * returned by `/oauth/userinfo`). Surfaced so the gateway middleware can + * attach an auth-context org to handlers like `createAgent`, which + * otherwise has no way to find the org for an ephemeral agent and + * downstream cross-pod conversation-lock acquisition fails (#1068). + */ + organizationId?: string; +} + +interface UserInfoApiResponse { + sub: string; + email: string; + name?: string; + organization_slug?: string | null; + organizations?: { id: string; slug: string; name: string }[]; } interface DynamicClientCredentials { @@ -167,12 +185,23 @@ export class ExternalAuthClient { ); } - const data = (await response.json()) as UserInfoResponse; + const data = (await response.json()) as UserInfoApiResponse; + const orgId = + data.organization_slug && data.organizations + ? (data.organizations.find((o) => o.slug === data.organization_slug) + ?.id ?? undefined) + : undefined; logger.info("Fetched external auth user info", { sub: data.sub, email: data.email, + orgId, }); - return data; + return { + sub: data.sub, + email: data.email, + name: data.name, + organizationId: orgId, + }; } async getCapabilities(): Promise { diff --git a/packages/server/src/gateway/routes/public/agent.ts b/packages/server/src/gateway/routes/public/agent.ts index 4cc6bb488..33aa286a6 100644 --- a/packages/server/src/gateway/routes/public/agent.ts +++ b/packages/server/src/gateway/routes/public/agent.ts @@ -638,16 +638,22 @@ export function createAgentApi(config: AgentApiConfig): OpenAPIHono { if (denial) return denial; } - // Stamp the worker token with the agent's owning org so the egress - // proxy's per-tenant gates (grant/deny, judge cache, judge policy) - // can scope decisions by org. Ephemeral agents have no preexisting - // metadata; their token mints without orgId and the proxy falls - // through to unscoped checks for that worker — flagged for a - // future fix that derives org from the auth session. - const tokenOrganizationId = + // Stamp the worker token with the owning org so the egress proxy's + // per-tenant gates (grant/deny, judge cache, judge policy) can scope + // decisions by org. Prefer the agent's own metadata; fall back to the + // caller's organizationId (already resolved by `createLobuAuthBridge` + // from the PAT or Better Auth session). Without the fallback, + // ephemeral agents under `lobu chat -c local` mint a tokenless org and + // the downstream cross-pod conversation-lock guard refuses to spawn + // the worker (#1068). + const callerOrgId = + (c.get("organizationId") as string | undefined) ?? + c.get("authContext")?.organizationId; + const metadataOrgId = !isEphemeral && ownershipMetadataStore ? (await ownershipMetadataStore.getMetadata(agentId))?.organizationId : undefined; + const tokenOrganizationId = metadataOrgId ?? callerOrgId; // For ephemeral agents, auto-provision settings from system-key // providers (env-var-based API keys). No more template-agent fallback — From 8de4544067ccb99b90cb1071e20e44d853b394ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Thu, 28 May 2026 17:27:53 +0100 Subject: [PATCH 2/3] fix(gateway): create ephemeral agent row before provisioning settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit saveSettings on the Postgres-backed AgentConfigStore is UPDATE-only — no row, no save. Ephemeral agents created via 'lobu chat -c local' never had a corresponding row in the agents table, so the 'auto-provision system providers' UPDATE matched 0 rows silently. Downstream, the worker's session-context resolved installedProviders to [] → defaultProvider = none → 'No model configured. Ask an admin to connect a provider for the base agent'. Call AgentMetadataStore.createAgent before saveSettings so the row exists. Same call site that already provisioned the providers, just runs the underlying INSERT ... ON CONFLICT DO UPDATE one step earlier. --- .../server/src/gateway/routes/public/agent.ts | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/packages/server/src/gateway/routes/public/agent.ts b/packages/server/src/gateway/routes/public/agent.ts index 4cc6bb488..713433aab 100644 --- a/packages/server/src/gateway/routes/public/agent.ts +++ b/packages/server/src/gateway/routes/public/agent.ts @@ -422,7 +422,7 @@ export interface AgentApiConfig { "getSettings" | "listAgents" | "getMetadata" >; userAgentsStore?: UserAgentsStore; - agentMetadataStore?: Pick; + agentMetadataStore?: Pick; platformRegistry?: PlatformRegistry; approveToolCall?: ( requestId: string, @@ -649,10 +649,34 @@ export function createAgentApi(config: AgentApiConfig): OpenAPIHono { ? (await ownershipMetadataStore.getMetadata(agentId))?.organizationId : undefined; - // For ephemeral agents, auto-provision settings from system-key - // providers (env-var-based API keys). No more template-agent fallback — - // there are no template/sandbox agents anymore. + // For ephemeral agents, create the `agents` row first so subsequent + // `saveSettings` (an UPDATE-only path) actually persists. Without this, + // the row never exists, the UPDATE matches 0 rows silently, and the + // worker's session-context resolves `installedProviders = []` → no + // provider → "No model configured". Followed by provisioning system- + // key providers (env-var-based API keys) so the worker has something + // to talk to. No more template-agent fallback — there are no + // template/sandbox agents anymore. if (isEphemeral && agentSettingsStore) { + if (agentMetadataStore?.createAgent) { + try { + await agentMetadataStore.createAgent( + agentId, + agentId, + "api", + agentId, + ); + } catch (err) { + // saveMetadata is INSERT ... ON CONFLICT DO UPDATE under the hood, + // so a re-create of the same id within the same org is benign. + // Genuine errors (FK / org mismatch) bubble below and surface as + // failed-create. + logger.debug( + `Ephemeral agent ${agentId}: createAgent threw (likely re-create): ${err}`, + ); + } + } + const providerModules = getModelProviderModules(); const systemProviders: InstalledProvider[] = providerModules .filter((m) => m.hasSystemKey()) From 06925eea449534e8bc8ac13d9a54ef69f1c6c7a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Burak=20Emre=20Kabakc=C4=B1?= Date: Thu, 28 May 2026 17:29:02 +0100 Subject: [PATCH 3/3] fix(server): throw on saveSettings UPDATE no-op MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Postgres-backed AgentConfigStore.saveSettings was UPDATE-only and silently returned success when 0 rows matched. That made it a footgun: any caller that saved settings before the agents row existed (the ephemeral-chat path being the case we hit) would get no error and no persisted data — surfacing downstream as 'No model configured' once the worker read installedProviders = []. Make saveSettings throw when the UPDATE matches 0 rows. The previous commit fixes the legitimate caller (createAgent for ephemeral agents now calls saveMetadata first). This commit ensures future regressions of the same pattern fail loud at the write site instead of materializing as a confusing downstream error. --- packages/server/src/lobu/stores/postgres-stores.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/server/src/lobu/stores/postgres-stores.ts b/packages/server/src/lobu/stores/postgres-stores.ts index 5827af5a8..ccadf5297 100644 --- a/packages/server/src/lobu/stores/postgres-stores.ts +++ b/packages/server/src/lobu/stores/postgres-stores.ts @@ -215,7 +215,7 @@ export function createPostgresAgentConfigStore(): AgentConfigStore { const sql = getDb(); const orgId = getOrgId(); const now = new Date(); - await sql` + const result = await sql` UPDATE agents SET model = ${settings.model ?? null}, model_selection = ${sql.json(settings.modelSelection ?? {})}, @@ -237,6 +237,16 @@ export function createPostgresAgentConfigStore(): AgentConfigStore { updated_at = ${now} WHERE id = ${agentId} AND organization_id = ${orgId} `; + // UPDATE-only by design (agents row identity belongs to saveMetadata). + // Fail loud when no row matches so a save can't silently no-op — the + // ephemeral-chat path hit exactly this footgun (#1068 follow-up: agent + // row never existed → 0 rows updated → empty installedProviders → "No + // model configured"). Callers must call saveMetadata first. + if (result.count === 0) { + throw new Error( + `saveSettings: no agents row matches id=${agentId} org=${orgId}; call saveMetadata first` + ); + } }, async updateSettings(agentId, updates) { const existing = await store.getSettings(agentId);