diff --git a/assistant/src/__tests__/conversation-title-service.test.ts b/assistant/src/__tests__/conversation-title-service.test.ts index b9758e800e4..c84f329523d 100644 --- a/assistant/src/__tests__/conversation-title-service.test.ts +++ b/assistant/src/__tests__/conversation-title-service.test.ts @@ -85,7 +85,7 @@ describe("conversation-title-service", () => { provider, systemPrompt: expect.stringContaining("conversation titles"), tools: [], - modelIntent: "quality-optimized", + callSite: "conversationTitle", timeoutMs: 10_000, }), ); @@ -203,7 +203,7 @@ describe("conversation-title-service", () => { provider, systemPrompt: expect.stringContaining("conversation titles"), tools: [], - modelIntent: "quality-optimized", + callSite: "conversationTitle", timeoutMs: 10_000, }), ); diff --git a/assistant/src/__tests__/provider-commit-message-generator.test.ts b/assistant/src/__tests__/provider-commit-message-generator.test.ts index 1141d325ea2..504e1d4a0ef 100644 --- a/assistant/src/__tests__/provider-commit-message-generator.test.ts +++ b/assistant/src/__tests__/provider-commit-message-generator.test.ts @@ -221,7 +221,7 @@ describe("ProviderCommitMessageGenerator", () => { }); // 6. LLM success - test('LLM success → returns LLM message, source "llm", fast model passed', async () => { + test('LLM success → returns LLM message, source "llm", fast model + callSite passed', async () => { const commitMsg = "feat: add new feature"; mockSendMessage.mockResolvedValueOnce(makeSuccessResponse(commitMsg)); const gen = getCommitMessageGenerator(); @@ -232,10 +232,16 @@ describe("ProviderCommitMessageGenerator", () => { expect(result.message).toBe(commitMsg); expect(result.reason).toBeUndefined(); - // Verify the fast model was passed in the config + // Verify the fast model and callSite were passed in the config so the + // provider's RetryProvider routes through `resolveCallSiteConfig` for + // max_tokens/temperature while preserving the explicit fast-model + // override. const callArgs = mockSendMessage.mock.calls[0]; - const options = callArgs[3] as { config: { model: string } }; + const options = callArgs[3] as { + config: { model: string; callSite: string }; + }; expect(options.config.model).toBe("claude-haiku-4-5-20251001"); + expect(options.config.callSite).toBe("commitMessage"); }); // 7. fast-model override @@ -253,8 +259,11 @@ describe("ProviderCommitMessageGenerator", () => { expect(result.message).toBe(commitMsg); const callArgs = mockSendMessage.mock.calls[0]; - const options = callArgs[3] as { config: { model: string } }; + const options = callArgs[3] as { + config: { model: string; callSite: string }; + }; expect(options.config.model).toBe("claude-sonnet-4-20250514"); + expect(options.config.callSite).toBe("commitMessage"); }); // 8. LLM timeout diff --git a/assistant/src/memory/conversation-title-service.ts b/assistant/src/memory/conversation-title-service.ts index b656eada024..4f5bc700761 100644 --- a/assistant/src/memory/conversation-title-service.ts +++ b/assistant/src/memory/conversation-title-service.ts @@ -133,7 +133,7 @@ export async function generateAndPersistConversationTitle( provider, systemPrompt: buildTitleSystemPrompt(), tools: [], - modelIntent: "quality-optimized", + callSite: "conversationTitle", signal, timeoutMs: 10_000, }); @@ -236,7 +236,7 @@ export async function regenerateConversationTitle( provider, systemPrompt: buildTitleSystemPrompt(), tools: [], - modelIntent: "quality-optimized", + callSite: "conversationTitle", signal, timeoutMs: 10_000, }); diff --git a/assistant/src/runtime/btw-sidechain.ts b/assistant/src/runtime/btw-sidechain.ts index f01f1820f51..6ba2a788d7c 100644 --- a/assistant/src/runtime/btw-sidechain.ts +++ b/assistant/src/runtime/btw-sidechain.ts @@ -1,3 +1,4 @@ +import type { LLMCallSite } from "../config/schemas/llm.js"; import { buildToolDefinitions } from "../daemon/conversation-tool-setup.js"; import { buildSystemPrompt } from "../prompts/system-prompt.js"; import { @@ -29,6 +30,13 @@ export interface RunBtwSidechainParams { systemPrompt?: string; tools?: ToolDefinition[]; maxTokens?: number; + /** + * Opt-in routing through the unified LLM call-site resolver. When set, the + * provider resolves provider/model/maxTokens/etc. via + * `resolveCallSiteConfig(callSite, config.llm)` instead of `modelIntent`. + * `callSite` wins when both are passed. + */ + callSite?: LLMCallSite; modelIntent?: ModelIntent; signal?: AbortSignal; timeoutMs?: number; @@ -89,7 +97,9 @@ export async function runBtwSidechain( config: { max_tokens: params.maxTokens ?? 1024, tool_choice: { type: "none" }, - modelIntent: params.modelIntent ?? "latency-optimized", + ...(params.callSite !== undefined + ? { callSite: params.callSite } + : { modelIntent: params.modelIntent ?? "latency-optimized" }), }, onEvent: (event) => { if (event.type === "text_delta") { diff --git a/assistant/src/workspace/provider-commit-message-generator.ts b/assistant/src/workspace/provider-commit-message-generator.ts index 627bb12615e..c50d44bacf5 100644 --- a/assistant/src/workspace/provider-commit-message-generator.ts +++ b/assistant/src/workspace/provider-commit-message-generator.ts @@ -263,9 +263,19 @@ export class ProviderCommitMessageGenerator { { signal: ac.signal, config: { + // `callSite` lets the provider resolve `max_tokens` and + // `temperature` from `llm.callSites.commitMessage` (populated by + // the workspace migration from the legacy + // `workspaceGit.commitMessageLLM.{maxTokens,temperature}` keys). + // Operational fields (`enabled`, `timeoutMs`, `breaker`, + // `maxFilesInPrompt`, `maxDiffBytes`, `minRemainingTurnBudgetMs`) + // remain on `workspaceGit.commitMessageLLM` and are read above. + callSite: "commitMessage", + // `fastModel` overrides the resolver's `model` because commit + // message generation enforces its own provider-specific fast + // model selection (see `PROVIDER_DEFAULT_FAST_MODELS` and + // `providerFastModelOverrides`). model: fastModel, - max_tokens: llmConfig.maxTokens, - temperature: llmConfig.temperature, }, }, );