From 72e06ef73b7b3d422c588ddf4f2752366a5f66a3 Mon Sep 17 00:00:00 2001 From: siddseethepalli Date: Thu, 16 Apr 2026 20:22:15 +0000 Subject: [PATCH] providers: accept callSite in per-call config; resolve via resolveCallSiteConfig --- .../__tests__/retry-callsite.test.ts | 341 ++++++++++++++++++ .../src/providers/provider-send-message.ts | 26 +- assistant/src/providers/retry.ts | 104 ++++++ assistant/src/providers/types.ts | 11 + 4 files changed, 478 insertions(+), 4 deletions(-) create mode 100644 assistant/src/providers/__tests__/retry-callsite.test.ts diff --git a/assistant/src/providers/__tests__/retry-callsite.test.ts b/assistant/src/providers/__tests__/retry-callsite.test.ts new file mode 100644 index 00000000000..f9b2ffe4a4b --- /dev/null +++ b/assistant/src/providers/__tests__/retry-callsite.test.ts @@ -0,0 +1,341 @@ +import { beforeEach, describe, expect, mock, test } from "bun:test"; + +// ── Module mocks ──────────────────────────────────────────────────────────── +// +// Stub the logger so retry diagnostics don't pollute test output. +mock.module("../../util/logger.js", () => ({ + getLogger: () => + new Proxy({} as Record, { get: () => () => {} }), +})); + +// Mutable test fixtures for `getConfig()`. Each test rebuilds the relevant +// pieces via `setLlmConfig(...)` / `setInferenceProvider(...)` before +// exercising the path. The mock is registered once and reads from these +// closures so subsequent tests don't need to remock the module. +let mockLlmConfig: Record = {}; +let mockInferenceProvider = "anthropic"; + +mock.module("../../config/loader.js", () => ({ + getConfig: () => ({ + llm: mockLlmConfig, + services: { inference: { provider: mockInferenceProvider } }, + }), +})); + +// Provider registry mock. Tests populate `mockProviders` via `beforeEach` / +// per-test `set` so `getProvider(name)` can return the right stub. +const mockProviders = new Map(); + +mock.module("../registry.js", () => ({ + getProvider: (name: string) => { + const p = mockProviders.get(name); + if (!p) throw new Error(`unknown provider: ${name}`); + return p; + }, + initializeProviders: async () => {}, + listProviders: () => Array.from(mockProviders.values()), +})); + +// ── Imports (after mocks) ─────────────────────────────────────────────────── + +import { LLMSchema } from "../../config/schemas/llm.js"; +import { + getConfiguredProvider, + resolveConfiguredProvider, +} from "../provider-send-message.js"; +import { RetryProvider } from "../retry.js"; +import type { + Message, + Provider, + ProviderResponse, + SendMessageOptions, +} from "../types.js"; + +// ── Test fixtures ────────────────────────────────────────────────────────── + +const DUMMY_MESSAGES: Message[] = [ + { role: "user", content: [{ type: "text", text: "hello" }] }, +]; + +function makeResponse(model: string): ProviderResponse { + return { + content: [{ type: "text", text: "ok" }], + model, + usage: { inputTokens: 1, outputTokens: 1 }, + stopReason: "end_turn", + }; +} + +function makeProvider( + name: string, + onCall: (options: SendMessageOptions | undefined) => void, +): Provider { + return { + name, + async sendMessage(_messages, _tools, _systemPrompt, options) { + onCall(options); + const config = options?.config as Record | undefined; + return makeResponse( + (config?.model as string | undefined) ?? "default-model", + ); + }, + }; +} + +function setLlmConfig(raw: unknown): void { + // Parse through the schema so defaults cascade through every nesting level, + // matching what `getConfig().llm` would produce in production. + mockLlmConfig = LLMSchema.parse(raw) as Record; +} + +beforeEach(() => { + mockLlmConfig = LLMSchema.parse({}) as Record; + mockInferenceProvider = "anthropic"; + mockProviders.clear(); +}); + +// ── RetryProvider — call-site path ────────────────────────────────────────── + +describe("RetryProvider — callSite resolution", () => { + test("resolves provider/model/maxTokens from llm.callSites.", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + memoryRetrieval: { + model: "claude-haiku-4-5-20251001", + maxTokens: 4096, + }, + }, + }); + + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "memoryRetrieval" }, + }); + + const config = seen?.config as Record; + expect(config.model).toBe("claude-haiku-4-5-20251001"); + expect(config.max_tokens).toBe(4096); + // Both opt-in routing keys are stripped before delegating downstream. + expect(config.callSite).toBeUndefined(); + expect(config.modelIntent).toBeUndefined(); + }); + + test("falls back to llm.default when llm.callSites[id] is absent", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-default-fallback", + maxTokens: 32000, + }, + // No `callSites.memoryRetrieval` entry. + }); + + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "memoryRetrieval" }, + }); + + const config = seen?.config as Record; + expect(config.model).toBe("claude-default-fallback"); + expect(config.max_tokens).toBe(32000); + }); + + test("propagates resolved effort/speed/temperature/thinking/contextWindow", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-opus-4-7", + effort: "high", + speed: "fast", + temperature: 0.7, + }, + callSites: { + heartbeatAgent: { + thinking: { enabled: false }, + }, + }, + }); + + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "heartbeatAgent" }, + }); + + const config = seen?.config as Record; + expect(config.effort).toBe("high"); + expect(config.speed).toBe("fast"); + expect(config.temperature).toBe(0.7); + // Deep-merged: enabled overridden by callSite, streamThinking inherited. + expect(config.thinking).toEqual({ enabled: false, streamThinking: true }); + // contextWindow comes through from the resolved default. + expect(config.contextWindow).toBeDefined(); + }); + + test("strips effort/speed/thinking for providers that don't support them", async () => { + setLlmConfig({ + default: { + provider: "anthropic", + model: "claude-opus-4-7", + effort: "high", + speed: "fast", + }, + callSites: { + memoryRetrieval: { thinking: { enabled: true } }, + }, + }); + + let seen: SendMessageOptions | undefined; + // gemini does not support effort/speed/thinking — they must be stripped. + const wrapped = new RetryProvider( + makeProvider("gemini", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "memoryRetrieval" }, + }); + + const config = seen?.config as Record; + expect(config.effort).toBeUndefined(); + expect(config.speed).toBeUndefined(); + expect(config.thinking).toBeUndefined(); + // Model still comes through. + expect(config.model).toBe("claude-opus-4-7"); + }); + + test("explicit per-call config.model wins over resolved callSite model", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "resolved-model" }, + }); + + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { callSite: "mainAgent", model: "explicit-override" }, + }); + + const config = seen?.config as Record; + expect(config.model).toBe("explicit-override"); + }); +}); + +// ── RetryProvider — legacy modelIntent path is preserved ──────────────────── + +describe("RetryProvider — legacy modelIntent path (no callSite)", () => { + test("passing only modelIntent does not consult llm.* config", async () => { + // Seed the llm config with a value that, if accidentally consulted, would + // produce a clearly-wrong model. The legacy path must ignore it entirely. + setLlmConfig({ + default: { provider: "anthropic", model: "MUST-NOT-LEAK" }, + callSites: { + mainAgent: { model: "ALSO-MUST-NOT-LEAK" }, + }, + }); + + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { modelIntent: "quality-optimized" }, + }); + + const config = seen?.config as Record; + // Legacy path uses model-intents.ts mapping for "quality-optimized" on + // anthropic, which is "claude-opus-4-7". It must NOT be the llm.default + // value, which would indicate the new path was triggered. + expect(config.model).toBe("claude-opus-4-7"); + expect(config.model).not.toBe("MUST-NOT-LEAK"); + expect(config.model).not.toBe("ALSO-MUST-NOT-LEAK"); + expect(config.modelIntent).toBeUndefined(); + }); + + test("no callSite and no modelIntent leaves config untouched (existing fast-path)", async () => { + let seen: SendMessageOptions | undefined; + const wrapped = new RetryProvider( + makeProvider("anthropic", (options) => { + seen = options; + }), + ); + + await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { + config: { model: "explicit-model", max_tokens: 1234 }, + }); + + const config = seen?.config as Record; + expect(config.model).toBe("explicit-model"); + expect(config.max_tokens).toBe(1234); + }); +}); + +// ── getConfiguredProvider — call-site routing ────────────────────────────── + +describe("getConfiguredProvider — callSite routing", () => { + test("selects provider from llm.callSites[id].provider when callSite given", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + heartbeatAgent: { provider: "openai", model: "gpt-5.4" }, + }, + }); + mockProviders.set("openai", { name: "openai" }); + mockProviders.set("anthropic", { name: "anthropic" }); + + const provider = await getConfiguredProvider("heartbeatAgent"); + expect(provider?.name).toBe("openai"); + }); + + test("falls back to llm.default.provider when callSite has no provider override", async () => { + setLlmConfig({ + default: { provider: "anthropic", model: "claude-opus-4-7" }, + callSites: { + // No provider field — default takes over. + heartbeatAgent: { model: "claude-haiku-4-5-20251001" }, + }, + }); + mockProviders.set("anthropic", { name: "anthropic" }); + + const provider = await getConfiguredProvider("heartbeatAgent"); + expect(provider?.name).toBe("anthropic"); + }); + + test("legacy call (no callSite arg) uses services.inference.provider", async () => { + // The legacy path consults `services.inference.provider`. The shared + // loader mock reads `mockInferenceProvider` at call time, so we just + // overwrite it for this test. + mockInferenceProvider = "fireworks"; + mockProviders.set("fireworks", { name: "fireworks" }); + + const result = await resolveConfiguredProvider(); + expect(result?.configuredProviderName).toBe("fireworks"); + expect(result?.provider.name).toBe("fireworks"); + }); +}); diff --git a/assistant/src/providers/provider-send-message.ts b/assistant/src/providers/provider-send-message.ts index 51eef055db6..3c28710295e 100644 --- a/assistant/src/providers/provider-send-message.ts +++ b/assistant/src/providers/provider-send-message.ts @@ -5,6 +5,8 @@ */ import { getConfig } from "../config/loader.js"; +import { resolveCallSiteConfig } from "../config/llm-resolver.js"; +import type { LLMCallSite } from "../config/schemas/llm.js"; import { getProvider, initializeProviders, @@ -36,9 +38,16 @@ let lazyInitPromise: Promise | null = null; * If providers haven't been initialized yet (e.g. non-daemon code paths), * performs a one-shot `initializeProviders(getConfig())`. * + * When `callSite` is provided, the provider name comes from + * `resolveCallSiteConfig(callSite, config.llm).provider` — i.e. the unified + * `llm` block drives selection. Otherwise the legacy + * `services.inference.provider` is used unchanged. + * * Returns `null` when no providers are available at all. */ -export async function resolveConfiguredProvider(): Promise { +export async function resolveConfiguredProvider( + callSite?: LLMCallSite, +): Promise { const config = getConfig(); if (listProviders().length === 0) { @@ -54,7 +63,10 @@ export async function resolveConfiguredProvider(): Promise { - const result = await resolveConfiguredProvider(); +export async function getConfiguredProvider( + callSite?: LLMCallSite, +): Promise { + const result = await resolveConfiguredProvider(callSite); return result?.provider ?? null; } diff --git a/assistant/src/providers/retry.ts b/assistant/src/providers/retry.ts index 9e966dca23a..27193c89cf3 100644 --- a/assistant/src/providers/retry.ts +++ b/assistant/src/providers/retry.ts @@ -1,3 +1,5 @@ +import { getConfig } from "../config/loader.js"; +import { resolveCallSiteConfig } from "../config/llm-resolver.js"; import { ProviderError } from "../util/errors.js"; import { getLogger } from "../util/logger.js"; import { @@ -76,6 +78,17 @@ function normalizeSendMessageOptions( const config = options?.config; if (!config) return options; + // ── Call-site path ────────────────────────────────────────────────── + // When `config.callSite` is set, route through `resolveCallSiteConfig` + // to fully resolve provider/model/maxTokens/effort/speed/temperature/ + // thinking/contextWindow from `llm.default + profile + site` overrides. + // This is the new unified path; the legacy `modelIntent` branch below is + // preserved unchanged for unmigrated callers. + if (config.callSite !== undefined) { + return normalizeViaCallSite(providerName, options, config); + } + + // ── Legacy `modelIntent` path (preserved) ─────────────────────────── const explicitModel = typeof config.model === "string" && config.model.trim().length > 0 ? config.model.trim() @@ -141,6 +154,97 @@ function normalizeSendMessageOptions( }; } +/** + * Normalize options when the caller opted into call-site resolution. + * + * Resolves provider/model/maxTokens/effort/speed/temperature/thinking/ + * contextWindow via `resolveCallSiteConfig` and writes them into `nextConfig` + * using the wire-format names that downstream provider clients consume + * (`max_tokens` snake-case for the token cap; camelCase for the rest, which + * matches the resolver's shape). Per-call explicit overrides on the original + * `config` object win over the resolved values, mirroring the legacy + * "explicit `config.model` beats `modelIntent`" semantics so unmigrated + * callers that pass both can't be silently broken. + * + * Both `callSite` and `modelIntent` are stripped from the downstream config. + * Per-provider stripping (`thinking`/`effort`/`speed`) is applied based on + * the wrapped provider's name, identical to the legacy path. + */ +function normalizeViaCallSite( + providerName: string, + options: SendMessageOptions | undefined, + config: NonNullable, +): SendMessageOptions | undefined { + const callSite = config.callSite!; + const resolved = resolveCallSiteConfig(callSite, getConfig().llm); + + const explicitModel = + typeof config.model === "string" && config.model.trim().length > 0 + ? config.model.trim() + : undefined; + + const nextConfig: Record = { ...config }; + // Both opt-in routing keys are consumed by the RetryProvider layer and + // must not leak downstream. + delete nextConfig.callSite; + delete nextConfig.modelIntent; + + // Apply resolved values, letting per-call explicit fields win where set. + nextConfig.model = explicitModel ?? resolved.model; + if (nextConfig.max_tokens === undefined) { + nextConfig.max_tokens = resolved.maxTokens; + } + if (nextConfig.effort === undefined) { + nextConfig.effort = resolved.effort; + } + if (nextConfig.speed === undefined) { + nextConfig.speed = resolved.speed; + } + if (nextConfig.temperature === undefined) { + nextConfig.temperature = resolved.temperature; + } + if (nextConfig.thinking === undefined) { + nextConfig.thinking = resolved.thinking; + } + if (nextConfig.contextWindow === undefined) { + nextConfig.contextWindow = resolved.contextWindow; + } + // Provider name from the resolver — informational; the wrapped provider + // is the actual transport. Downstream consumers may inspect this for + // diagnostics or wire-format decisions, but the request still routes + // through the inner provider that this RetryProvider wraps. + if (nextConfig.provider === undefined) { + nextConfig.provider = resolved.provider; + } + + // thinking is Anthropic-specific on the wire; OpenRouter reads it as a + // signal for its unified reasoning parameter. Strip it for other providers. + if ( + !THINKING_AWARE_PROVIDERS.has(providerName) && + nextConfig.thinking !== undefined + ) { + delete nextConfig.thinking; + } + + // effort is supported by Anthropic, OpenAI, and OpenAI-compatible providers; strip for others + if ( + !EFFORT_SUPPORTED_PROVIDERS.has(providerName) && + nextConfig.effort !== undefined + ) { + delete nextConfig.effort; + } + + // speed (fast mode) is Anthropic-specific; strip for other providers + if (providerName !== "anthropic" && nextConfig.speed !== undefined) { + delete nextConfig.speed; + } + + return { + ...options, + config: nextConfig, + }; +} + export class RetryProvider implements Provider { public readonly name: string; diff --git a/assistant/src/providers/types.ts b/assistant/src/providers/types.ts index 6324dbb1ada..6358277b411 100644 --- a/assistant/src/providers/types.ts +++ b/assistant/src/providers/types.ts @@ -1,3 +1,5 @@ +import type { LLMCallSite } from "../config/schemas/llm.js"; + export interface TextContent { type: "text"; text: string; @@ -131,6 +133,15 @@ export type ProviderEvent = export interface SendMessageConfig { model?: string; modelIntent?: ModelIntent; + /** + * Opt-in routing through the unified LLM call-site resolver. When set, + * `RetryProvider` resolves provider/model/maxTokens/effort/speed/temperature/ + * thinking/contextWindow via `resolveCallSiteConfig(callSite, config.llm)` + * instead of consulting `modelIntent`. Both fields may coexist; `callSite` + * wins when present, and the legacy `modelIntent` path is preserved for + * unmigrated callers. + */ + callSite?: LLMCallSite; effort?: "low" | "medium" | "high" | "max"; speed?: "standard" | "fast"; [key: string]: unknown;