From 12349e8cf02bad0d5fcb537c8ae40f24d584ae5b Mon Sep 17 00:00:00 2001 From: siddseethepalli Date: Fri, 17 Apr 2026 23:03:28 +0000 Subject: [PATCH] fix(llm-callsite): route provider transport and field precedence through callSite --- .../agent-loop-callsite-precedence.test.ts | 280 ++++++++++++++++++ .../call-site-routing-provider.test.ts | 214 +++++++++++++ assistant/src/agent/loop.ts | 48 ++- assistant/src/daemon/server.ts | 13 + assistant/src/providers/call-site-routing.ts | 71 +++++ 5 files changed, 613 insertions(+), 13 deletions(-) create mode 100644 assistant/src/__tests__/agent-loop-callsite-precedence.test.ts create mode 100644 assistant/src/__tests__/call-site-routing-provider.test.ts create mode 100644 assistant/src/providers/call-site-routing.ts diff --git a/assistant/src/__tests__/agent-loop-callsite-precedence.test.ts b/assistant/src/__tests__/agent-loop-callsite-precedence.test.ts new file mode 100644 index 00000000000..8dc0c964054 --- /dev/null +++ b/assistant/src/__tests__/agent-loop-callsite-precedence.test.ts @@ -0,0 +1,280 @@ +/** + * Verifies the field-precedence contract for `AgentLoop.run(..., callSite)`. + * + * When `callSite` is set, the loop must NOT pre-set the + * `max_tokens`/`thinking`/`effort`/`speed` fields from `this.config` + * (sourced from `llm.default`) because the downstream + * `RetryProvider.normalizeSendMessageOptions` only fills these fields when + * they're undefined. If the loop pre-sets them, every per-call-site override + * for these knobs is silently ignored. + * + * Precedence (highest wins): + * 1. Per-turn explicit (from `resolveSystemPrompt`'s + * `resolved.maxTokens` / `resolved.model`) + * 2. Call-site resolved values (from `resolveCallSiteConfig` via the + * normalizer) + * 3. Conversation defaults (`this.config.*`, from `llm.default`) + * + * The tests pipe the loop's per-call options through `RetryProvider` so we + * can observe the final, post-resolution config that downstream provider + * clients consume. + */ + +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +mock.module("../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { get: () => () => {} }), +})); + +let mockLlmConfig: Record = {}; + +mock.module("../config/loader.js", () => ({ + getConfig: () => ({ llm: mockLlmConfig }), +})); + +import { AgentLoop } from "../agent/loop.js"; +import type { ResolvedSystemPrompt } from "../agent/loop.js"; +import { LLMSchema } from "../config/schemas/llm.js"; +import { RetryProvider } from "../providers/retry.js"; +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, + ToolDefinition, +} from "../providers/types.js"; + +const userMessage: Message = { + role: "user", + content: [{ type: "text", text: "hi" }], +}; + +function setLlmConfig(raw: unknown): void { + mockLlmConfig = LLMSchema.parse(raw) as Record; +} + +beforeEach(() => { + mockLlmConfig = LLMSchema.parse({}) as Record; +}); + +/** + * Build a provider that captures the final `config` it receives. Wrap it in + * `RetryProvider` so the call-site resolver runs over whatever the agent loop + * emits — exactly mirroring production wiring. + */ +function makePipeline(providerName: string): { + provider: Provider; + lastConfig: () => Record | undefined; +} { + let captured: Record | undefined; + const inner: Provider = { + name: providerName, + async sendMessage( + _messages: Message[], + _tools?: ToolDefinition[], + _systemPrompt?: string, + options?: SendMessageOptions, + ): Promise { + captured = options?.config as Record | undefined; + return { + content: [{ type: "text", text: "ok" }], + model: "mock-model", + usage: { inputTokens: 1, outputTokens: 1 }, + stopReason: "end_turn", + }; + }, + }; + return { + provider: new RetryProvider(inner), + lastConfig: () => captured, + }; +} + +describe("AgentLoop — call-site precedence", () => { + test("call-site maxTokens wins over conversation default when callSite is set", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-default", maxTokens: 64000 }, + callSites: { mainAgent: { maxTokens: 4096 } }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const loop = new AgentLoop(provider, "system", { maxTokens: 64000 }); + + await loop.run( + [userMessage], + () => {}, + undefined, + undefined, + undefined, + "mainAgent", + ); + + expect(lastConfig()!.max_tokens).toBe(4096); + }); + + test("call-site effort wins over conversation default when callSite is set", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-default", effort: "high" }, + callSites: { mainAgent: { effort: "low" } }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const loop = new AgentLoop(provider, "system", { + maxTokens: 64000, + effort: "high", + }); + + await loop.run( + [userMessage], + () => {}, + undefined, + undefined, + undefined, + "mainAgent", + ); + + expect(lastConfig()!.effort).toBe("low"); + }); + + test("call-site speed wins over conversation default when callSite is set", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-default", speed: "standard" }, + callSites: { mainAgent: { speed: "fast" } }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const loop = new AgentLoop(provider, "system", { + maxTokens: 64000, + effort: "high", + // Conversation default is "fast" (which would normally be applied) — + // ensure the call-site value is the one that ends up on the wire. + speed: "fast", + }); + + await loop.run( + [userMessage], + () => {}, + undefined, + undefined, + undefined, + "mainAgent", + ); + + expect(lastConfig()!.speed).toBe("fast"); + }); + + test("call-site thinking wins over conversation default when callSite is set", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-default", + // Default thinking enabled. + thinking: { enabled: true, streamThinking: true }, + }, + callSites: { + // Call site disables thinking — must be honoured even though the + // conversation default has it on. + mainAgent: { thinking: { enabled: false } }, + }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const loop = new AgentLoop(provider, "system", { + maxTokens: 64000, + // Conversation default also has thinking on — without the fix, this + // would pre-set `thinking: { type: "adaptive" }` and mask the + // call-site override. + thinking: { enabled: true }, + }); + + await loop.run( + [userMessage], + () => {}, + undefined, + undefined, + undefined, + "mainAgent", + ); + + const thinking = lastConfig()!.thinking as + | { enabled?: boolean; type?: string } + | undefined; + // The resolver fills the schema-shaped object, not the wire-format + // `{ type: "adaptive" }`. The important assertion is that the call-site + // value reached the provider intact. + expect(thinking).toBeDefined(); + expect(thinking!.enabled).toBe(false); + expect(thinking!.type).not.toBe("adaptive"); + }); + + test("conversation defaults still apply when callSite is absent", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-default", + // Resolver values that would *normally* be filled — but we don't + // pass a callSite, so they must not surface. + maxTokens: 999, + effort: "low", + }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const loop = new AgentLoop(provider, "system", { + maxTokens: 64000, + effort: "high", + speed: "fast", + thinking: { enabled: true }, + }); + + await loop.run([userMessage], () => {}, undefined, undefined, undefined); + + const config = lastConfig()!; + expect(config.max_tokens).toBe(64000); + expect(config.effort).toBe("high"); + expect(config.speed).toBe("fast"); + // No callSite → loop sets the wire-format thinking directly. + expect(config.thinking).toEqual({ type: "adaptive" }); + }); + + test("per-turn resolveSystemPrompt.maxTokens wins over both call-site and default", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-default", + maxTokens: 64000, + }, + callSites: { mainAgent: { maxTokens: 4096 } }, + }); + + const { provider, lastConfig } = makePipeline("anthropic"); + const resolveSystemPrompt = (): ResolvedSystemPrompt => ({ + systemPrompt: "per-turn system", + maxTokens: 8192, + }); + + const loop = new AgentLoop( + provider, + "system", + { maxTokens: 64000 }, + [], + undefined, + undefined, + resolveSystemPrompt, + ); + + await loop.run( + [userMessage], + () => {}, + undefined, + undefined, + undefined, + "mainAgent", + ); + + // Per-turn explicit value beats both the call-site (4096) and the + // default (64000). + expect(lastConfig()!.max_tokens).toBe(8192); + }); +}); diff --git a/assistant/src/__tests__/call-site-routing-provider.test.ts b/assistant/src/__tests__/call-site-routing-provider.test.ts new file mode 100644 index 00000000000..b547e2c859a --- /dev/null +++ b/assistant/src/__tests__/call-site-routing-provider.test.ts @@ -0,0 +1,214 @@ +/** + * Verifies that `CallSiteRoutingProvider` selects the right underlying + * provider transport per call based on `options.config.callSite`. + * + * The wrapper exists so per-call-site `llm.callSites..provider` + * overrides actually swap the HTTP transport, not just the request + * metadata. The conversation's transport is fixed at construction time; + * without this wrapper a memoryRetrieval call configured to run on OpenAI + * but originating from an Anthropic-default conversation would still hit + * the Anthropic transport. + */ + +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +mock.module("../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { get: () => () => {} }), +})); + +// Mutable LLM config consumed by the resolver via `getConfig()`. +let mockLlmConfig: Record = {}; + +mock.module("../config/loader.js", () => ({ + getConfig: () => ({ llm: mockLlmConfig }), +})); + +import { LLMSchema } from "../config/schemas/llm.js"; +import { CallSiteRoutingProvider } from "../providers/call-site-routing.js"; +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, +} from "../providers/types.js"; + +const DUMMY_MESSAGES: Message[] = [ + { role: "user", content: [{ type: "text", text: "hi" }] }, +]; + +function makeResponse(model: string): ProviderResponse { + return { + content: [{ type: "text", text: "ok" }], + model, + usage: { inputTokens: 1, outputTokens: 1 }, + stopReason: "end_turn", + }; +} + +function makeProvider( + name: string, + onCall: (options: SendMessageOptions | undefined) => void, +): Provider { + return { + name, + async sendMessage(_messages, _tools, _systemPrompt, options) { + onCall(options); + return makeResponse(name); + }, + }; +} + +function setLlmConfig(raw: unknown): void { + mockLlmConfig = LLMSchema.parse(raw) as Record; +} + +beforeEach(() => { + mockLlmConfig = LLMSchema.parse({}) as Record; +}); + +describe("CallSiteRoutingProvider", () => { + test("routes to default provider when callSite is absent", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + memoryRetrieval: { provider: "openai", model: "gpt-5.4" }, + }, + }); + + const calls = { default: 0, alt: 0 }; + const defaultProvider = makeProvider("anthropic", () => { + calls.default++; + }); + const altProvider = makeProvider("openai", () => { + calls.alt++; + }); + + const wrapped = new CallSiteRoutingProvider(defaultProvider, (name) => + name === "openai" ? altProvider : undefined, + ); + + const response = await wrapped.sendMessage( + DUMMY_MESSAGES, + undefined, + undefined, + // No callSite — must hit default even though `memoryRetrieval` is + // configured for openai. + { config: {} }, + ); + + expect(calls.default).toBe(1); + expect(calls.alt).toBe(0); + expect(response.model).toBe("anthropic"); + }); + + test("routes to default provider when callSite resolves to same provider name", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + // Same provider as default — no transport swap needed. + memoryRetrieval: { model: "claude-haiku-4-5-20251001" }, + }, + }); + + const calls = { default: 0, alt: 0 }; + const defaultProvider = makeProvider("anthropic", () => { + calls.default++; + }); + const altProvider = makeProvider("openai", () => { + calls.alt++; + }); + + const wrapped = new CallSiteRoutingProvider(defaultProvider, (name) => + name === "openai" ? altProvider : undefined, + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "memoryRetrieval" }, + }); + + expect(calls.default).toBe(1); + expect(calls.alt).toBe(0); + }); + + test("routes to alternative provider when callSite resolves to a different provider name", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + memoryRetrieval: { provider: "openai", model: "gpt-5.4" }, + }, + }); + + const calls = { default: 0, alt: 0 }; + const defaultProvider = makeProvider("anthropic", () => { + calls.default++; + }); + const altProvider = makeProvider("openai", () => { + calls.alt++; + }); + + const wrapped = new CallSiteRoutingProvider(defaultProvider, (name) => + name === "openai" ? altProvider : undefined, + ); + + const response = await wrapped.sendMessage( + DUMMY_MESSAGES, + undefined, + undefined, + { config: { callSite: "memoryRetrieval" } }, + ); + + expect(calls.default).toBe(0); + expect(calls.alt).toBe(1); + expect(response.model).toBe("openai"); + }); + + test("falls back to default when alternative provider is not registered", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + memoryRetrieval: { provider: "openai", model: "gpt-5.4" }, + }, + }); + + const calls = { default: 0 }; + const defaultProvider = makeProvider("anthropic", () => { + calls.default++; + }); + + // Lookup always returns undefined — simulating a missing/uninitialized + // provider in the registry. + const wrapped = new CallSiteRoutingProvider( + defaultProvider, + () => undefined, + ); + + const response = await wrapped.sendMessage( + DUMMY_MESSAGES, + undefined, + undefined, + { config: { callSite: "memoryRetrieval" } }, + ); + + expect(calls.default).toBe(1); + expect(response.model).toBe("anthropic"); + }); + + test("delegates `name` and `tokenEstimationProvider` to the default provider", () => { + const defaultProvider: Provider = { + name: "anthropic", + tokenEstimationProvider: "anthropic", + async sendMessage() { + return makeResponse("anthropic"); + }, + }; + + const wrapped = new CallSiteRoutingProvider( + defaultProvider, + () => undefined, + ); + + expect(wrapped.name).toBe("anthropic"); + expect(wrapped.tokenEstimationProvider).toBe("anthropic"); + }); +}); diff --git a/assistant/src/agent/loop.ts b/assistant/src/agent/loop.ts index 228bd8f472c..0790f8b40ff 100644 --- a/assistant/src/agent/loop.ts +++ b/assistant/src/agent/loop.ts @@ -237,25 +237,47 @@ export class AgentLoop { ? this.resolveSystemPrompt(history) : null; const turnSystemPrompt = resolved?.systemPrompt ?? this.systemPrompt; - const turnMaxTokens = resolved?.maxTokens ?? this.config.maxTokens; const turnModel = resolved?.model; - const providerConfig: Record = { - max_tokens: turnMaxTokens, - }; - if (turnModel) { - providerConfig.model = turnModel; - } - if (this.config.thinking?.enabled) { - providerConfig.thinking = { type: "adaptive" }; + // Field precedence (highest wins): + // 1. Per-turn explicit (`resolved.maxTokens` / `resolved.model`) + // 2. Call-site resolved values (filled by + // `RetryProvider.normalizeSendMessageOptions` from + // `resolveCallSiteConfig(callSite, llm)`) + // 3. Conversation defaults (`this.config.*`, sourced from + // `llm.default`) + // + // When `callSite` is present we deliberately leave + // `max_tokens`/`thinking`/`effort`/`speed` *unset* in `providerConfig` + // so the normalizer can fill them from the call-site resolution. The + // normalizer only writes these fields when they're undefined; if we + // pre-set them from `this.config` here, every per-call-site override + // for these knobs is silently ignored. + // + // `toolChoice` and `cacheTtl` are not part of the call-site schema, so + // they always come from `this.config` regardless of `callSite`. + const providerConfig: Record = {}; + + if (resolved?.maxTokens !== undefined) { + providerConfig.max_tokens = resolved.maxTokens; + } else if (!callSite) { + providerConfig.max_tokens = this.config.maxTokens; } - if (this.config.effort) { - providerConfig.effort = this.config.effort; + if (turnModel) { + providerConfig.model = turnModel; } - if (this.config.speed && this.config.speed !== "standard") { - providerConfig.speed = this.config.speed; + if (!callSite) { + if (this.config.thinking?.enabled) { + providerConfig.thinking = { type: "adaptive" }; + } + if (this.config.effort) { + providerConfig.effort = this.config.effort; + } + if (this.config.speed && this.config.speed !== "standard") { + providerConfig.speed = this.config.speed; + } } if (this.config.toolChoice) { diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts index 54f8d56538a..84b50c2639d 100644 --- a/assistant/src/daemon/server.ts +++ b/assistant/src/daemon/server.ts @@ -49,6 +49,7 @@ import { import { getOrCreateConversation } from "../memory/conversation-key-store.js"; import { syncIdentityNameToPlatform } from "../platform/sync-identity.js"; import { buildSystemPrompt } from "../prompts/system-prompt.js"; +import { CallSiteRoutingProvider } from "../providers/call-site-routing.js"; import { RateLimitProvider } from "../providers/ratelimit.js"; import { getProvider, initializeProviders } from "../providers/registry.js"; import { @@ -1034,6 +1035,18 @@ export class DaemonServer { const createPromise = (async () => { const config = getConfig(); let provider = getProvider(config.llm.default.provider); + // Per-call `options.config.callSite` can resolve to a provider name + // that differs from `llm.default.provider`. Wrap the default + // provider so the actual transport routes correctly per call, + // rather than only forwarding metadata to the default's HTTP + // client. See `providers/call-site-routing.ts`. + provider = new CallSiteRoutingProvider(provider, (name) => { + try { + return getProvider(name); + } catch { + return undefined; + } + }); const { rateLimit } = config; if (rateLimit.maxRequestsPerMinute > 0) { provider = new RateLimitProvider( diff --git a/assistant/src/providers/call-site-routing.ts b/assistant/src/providers/call-site-routing.ts new file mode 100644 index 00000000000..9ce21eef417 --- /dev/null +++ b/assistant/src/providers/call-site-routing.ts @@ -0,0 +1,71 @@ +/** + * Provider wrapper that routes each `sendMessage` call to a different + * underlying provider transport when the per-call `options.config.callSite` + * resolves to a provider name that differs from the default. + * + * Without this wrapper the conversation-level provider transport is fixed at + * construction time, so a per-call-site `llm.callSites..provider` + * override only affects the request *metadata* the downstream client sees — + * the actual HTTP transport still belongs to `llm.default.provider`. That + * means routing decisions like "send `memoryRetrieval` calls to OpenAI even + * though the main agent runs on Anthropic" silently fail. + * + * `CallSiteRoutingProvider` consults `resolveCallSiteConfig` per call. When + * the resolved provider name differs from the default's name and the + * registry can produce a Provider for it, the call is delegated to that + * provider; otherwise it falls back to the default. Other Provider interface + * surface area (`name`, `tokenEstimationProvider`) is delegated to the + * default so wrappers further out (e.g. `RateLimitProvider`) still see a + * stable identity. + */ + +import { resolveCallSiteConfig } from "../config/llm-resolver.js"; +import { getConfig } from "../config/loader.js"; +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, + ToolDefinition, +} from "./types.js"; + +export class CallSiteRoutingProvider implements Provider { + public readonly name: string; + public readonly tokenEstimationProvider?: string; + + constructor( + private readonly defaultProvider: Provider, + private readonly getProviderByName: (name: string) => Provider | undefined, + ) { + this.name = defaultProvider.name; + this.tokenEstimationProvider = defaultProvider.tokenEstimationProvider; + } + + async sendMessage( + messages: Message[], + tools?: ToolDefinition[], + systemPrompt?: string, + options?: SendMessageOptions, + ): Promise { + const target = this.selectProvider(options); + return target.sendMessage(messages, tools, systemPrompt, options); + } + + /** + * Pick the provider to route this call through. The default provider wins + * unless the per-call `callSite` resolves to a different provider name and + * the registry can produce a Provider for it. + */ + private selectProvider(options?: SendMessageOptions): Provider { + const callSite = options?.config?.callSite; + if (!callSite) return this.defaultProvider; + + const resolved = resolveCallSiteConfig(callSite, getConfig().llm); + if (resolved.provider === this.defaultProvider.name) { + return this.defaultProvider; + } + + const alternative = this.getProviderByName(resolved.provider); + return alternative ?? this.defaultProvider; + } +}