vellum-ai · siddseethepalli · Apr 16, 2026 · Apr 16, 2026 · devin-ai-integration · Apr 16, 2026
diff --git a/assistant/src/__tests__/conversation-title-service.test.ts b/assistant/src/__tests__/conversation-title-service.test.ts
@@ -85,7 +85,7 @@ describe("conversation-title-service", () => {
         provider,
         systemPrompt: expect.stringContaining("conversation titles"),
         tools: [],
-        modelIntent: "quality-optimized",
+        callSite: "conversationTitle",
         timeoutMs: 10_000,
       }),
     );
@@ -203,7 +203,7 @@ describe("conversation-title-service", () => {
         provider,
         systemPrompt: expect.stringContaining("conversation titles"),
         tools: [],
-        modelIntent: "quality-optimized",
+        callSite: "conversationTitle",
         timeoutMs: 10_000,
       }),
     );

diff --git a/assistant/src/__tests__/provider-commit-message-generator.test.ts b/assistant/src/__tests__/provider-commit-message-generator.test.ts
@@ -221,7 +221,7 @@ describe("ProviderCommitMessageGenerator", () => {
   });
 
   // 6. LLM success
-  test('LLM success → returns LLM message, source "llm", fast model passed', async () => {
+  test('LLM success → returns LLM message, source "llm", fast model + callSite passed', async () => {
     const commitMsg = "feat: add new feature";
     mockSendMessage.mockResolvedValueOnce(makeSuccessResponse(commitMsg));
     const gen = getCommitMessageGenerator();
@@ -232,10 +232,16 @@ describe("ProviderCommitMessageGenerator", () => {
     expect(result.message).toBe(commitMsg);
     expect(result.reason).toBeUndefined();
 
-    // Verify the fast model was passed in the config
+    // Verify the fast model and callSite were passed in the config so the
+    // provider's RetryProvider routes through `resolveCallSiteConfig` for
+    // max_tokens/temperature while preserving the explicit fast-model
+    // override.
     const callArgs = mockSendMessage.mock.calls[0];
-    const options = callArgs[3] as { config: { model: string } };
+    const options = callArgs[3] as {
+      config: { model: string; callSite: string };
+    };
     expect(options.config.model).toBe("claude-haiku-4-5-20251001");
+    expect(options.config.callSite).toBe("commitMessage");
   });
 
   // 7. fast-model override
@@ -253,8 +259,11 @@ describe("ProviderCommitMessageGenerator", () => {
     expect(result.message).toBe(commitMsg);
 
     const callArgs = mockSendMessage.mock.calls[0];
-    const options = callArgs[3] as { config: { model: string } };
+    const options = callArgs[3] as {
+      config: { model: string; callSite: string };
+    };
     expect(options.config.model).toBe("claude-sonnet-4-20250514");
+    expect(options.config.callSite).toBe("commitMessage");
   });
 
   // 8. LLM timeout

diff --git a/assistant/src/memory/conversation-title-service.ts b/assistant/src/memory/conversation-title-service.ts
@@ -133,7 +133,7 @@ export async function generateAndPersistConversationTitle(
     provider,
     systemPrompt: buildTitleSystemPrompt(),
     tools: [],
-    modelIntent: "quality-optimized",
+    callSite: "conversationTitle",
     signal,
     timeoutMs: 10_000,
   });
@@ -236,7 +236,7 @@ export async function regenerateConversationTitle(
     provider,
     systemPrompt: buildTitleSystemPrompt(),
     tools: [],
-    modelIntent: "quality-optimized",
+    callSite: "conversationTitle",
     signal,
     timeoutMs: 10_000,
   });

diff --git a/assistant/src/runtime/btw-sidechain.ts b/assistant/src/runtime/btw-sidechain.ts
@@ -1,3 +1,4 @@
+import type { LLMCallSite } from "../config/schemas/llm.js";
 import { buildToolDefinitions } from "../daemon/conversation-tool-setup.js";
 import { buildSystemPrompt } from "../prompts/system-prompt.js";
 import {
@@ -29,6 +30,13 @@ export interface RunBtwSidechainParams {
   systemPrompt?: string;
   tools?: ToolDefinition[];
   maxTokens?: number;
+  /**
+   * Opt-in routing through the unified LLM call-site resolver. When set, the
+   * provider resolves provider/model/maxTokens/etc. via
+   * `resolveCallSiteConfig(callSite, config.llm)` instead of `modelIntent`.
+   * `callSite` wins when both are passed.
+   */
+  callSite?: LLMCallSite;
   modelIntent?: ModelIntent;
   signal?: AbortSignal;
   timeoutMs?: number;
@@ -89,7 +97,9 @@ export async function runBtwSidechain(
       config: {
         max_tokens: params.maxTokens ?? 1024,
         tool_choice: { type: "none" },
-        modelIntent: params.modelIntent ?? "latency-optimized",
+        ...(params.callSite !== undefined
+          ? { callSite: params.callSite }
+          : { modelIntent: params.modelIntent ?? "latency-optimized" }),
       },
       onEvent: (event) => {
         if (event.type === "text_delta") {

diff --git a/assistant/src/workspace/provider-commit-message-generator.ts b/assistant/src/workspace/provider-commit-message-generator.ts
@@ -263,9 +263,19 @@ export class ProviderCommitMessageGenerator {
           {
             signal: ac.signal,
             config: {
+              // `callSite` lets the provider resolve `max_tokens` and
+              // `temperature` from `llm.callSites.commitMessage` (populated by
+              // the workspace migration from the legacy
+              // `workspaceGit.commitMessageLLM.{maxTokens,temperature}` keys).
+              // Operational fields (`enabled`, `timeoutMs`, `breaker`,
+              // `maxFilesInPrompt`, `maxDiffBytes`, `minRemainingTurnBudgetMs`)
+              // remain on `workspaceGit.commitMessageLLM` and are read above.
+              callSite: "commitMessage",
+              // `fastModel` overrides the resolver's `model` because commit
+              // message generation enforces its own provider-specific fast
+              // model selection (see `PROVIDER_DEFAULT_FAST_MODELS` and
+              // `providerFastModelOverrides`).
               model: fastModel,
-              max_tokens: llmConfig.maxTokens,
-              temperature: llmConfig.temperature,
             },
           },
         );