vellum-ai · siddseethepalli · Apr 17, 2026 · Apr 17, 2026
diff --git a/assistant/src/__tests__/conversation-process-callsite.test.ts b/assistant/src/__tests__/conversation-process-callsite.test.ts
@@ -1,20 +1,11 @@
 /**
- * PR 6 — verify that `callSite` threads from `Conversation.processMessage`
- * options all the way down to the per-call provider config.
+ * Verify that `callSite` threads from `Conversation.processMessage` options
+ * all the way down to the per-call provider config, and that user-initiated
+ * turns default to `'mainAgent'` when no caller-supplied `callSite` is set.
  *
  * The test mocks `AgentLoop.run()` so it can capture the `callSite` parameter
  * the conversation passes after `processMessage` runs the slash-resolver and
  * runtime-injection pipeline.
- *
- * NOTE: PR 6 originally defaulted absent callers to `'mainAgent'`, but
- * Codex/Devin flagged that this routes every conversation turn through
- * `RetryProvider`'s new call-site resolver while `config-model.setModel`
- * still writes to `services.inference` without syncing `llm.default`.
- * Defer the cutover to a future PR that handles the model-sync. Until
- * then, agent-loop turns leave `callSite` undefined so the legacy
- * `modelIntent` path remains in effect for the user-message conversation
- * flow, while adapter callers (heartbeat/filing/scheduler) keep passing
- * their explicit `callSite` through.
  */
 import { describe, expect, mock, test } from "bun:test";
 
@@ -274,13 +265,7 @@ describe("processMessage callSite threading", () => {
     expect(captured.callSite).toBe("heartbeatAgent");
   });
 
-  test("leaves callSite undefined when not supplied (legacy modelIntent path)", async () => {
-    // PR 6 originally defaulted absent callers to 'mainAgent', but
-    // Codex/Devin flagged that this routes every conversation turn through
-    // the new RetryProvider call-site resolver while `services.inference`
-    // writes don't sync to `llm.default`. Defer the cutover to a future PR
-    // that handles the model-sync. Until then, agent-loop turns keep using
-    // the legacy modelIntent path by leaving `callSite` undefined.
+  test("defaults to 'mainAgent' when not supplied", async () => {
     mockConversation = {
       id: "conv-1",
       contextSummary: null,
@@ -297,6 +282,6 @@ describe("processMessage callSite threading", () => {
 
     await conversation.processMessage("Plain user message", [], () => {});
 
-    expect(captured.callSite).toBeUndefined();
+    expect(captured.callSite).toBe("mainAgent");
   });
 });
diff --git a/assistant/src/agent/loop.ts b/assistant/src/agent/loop.ts
@@ -268,10 +268,10 @@ export class AgentLoop {
 
         // Per-call LLM call-site identifier. Surfaces on the per-call
         // `config.callSite` so `RetryProvider.normalizeSendMessageOptions`
-        // can route through `resolveCallSiteConfig`. PRs 7-11 will switch
-        // individual callers from legacy `modelIntent`/hardcoded providers
-        // to call-site routing one at a time; until then the parameter is
-        // optional and absence preserves the legacy code path.
+        // can route through `resolveCallSiteConfig`. User-initiated
+        // conversation turns default to `mainAgent` in the agent loop's
+        // caller; other invocation contexts (heartbeat, filing, analyze,
+        // etc.) pass their own `callSite`.
         if (callSite) {
           providerConfig.callSite = callSite;
         }

diff --git a/assistant/src/daemon/conversation-agent-loop.ts b/assistant/src/daemon/conversation-agent-loop.ts
@@ -388,26 +388,12 @@ export async function runAgentLoopImpl(
   });
   let yieldedForHandoff = false;
 
-  // Resolve the LLM call-site for this turn.
-  //
-  // Reviewers (Codex P1 + Devin) flagged that defaulting absent callers to
-  // 'mainAgent' would route every conversation turn through
-  // `RetryProvider`'s new call-site resolver, which reads model/provider
-  // settings from `config.llm`. That creates a regression because
-  // `config-model.setModel` still writes to `services.inference` without
-  // syncing `llm.default`, so a model switch could leave agent-loop turns
-  // using stale or incompatible model IDs (e.g. an Anthropic model on an
-  // OpenAI transport).
-  //
-  // Defer the cutover. Leave `turnCallSite` undefined when the caller does
-  // not pass one, so `agentLoop.run()` omits `callSite` from the per-call
-  // provider config and `RetryProvider` keeps using the legacy
-  // `modelIntent` path. Adapter callers (heartbeat/filing/scheduler in
-  // PRs 7-11) still pass an explicit `callSite` and route through the new
-  // resolver as intended. A future PR will migrate the agent-loop turn to
-  // an explicit `'mainAgent'` callSite once the model-sync writers are
-  // wired up.
-  const turnCallSite: LLMCallSite | undefined = options?.callSite;
+  // Default user-initiated turns to the `mainAgent` call site. Other
+  // invocation contexts (heartbeat, filing, analyze, etc.) pass their own
+  // `callSite`. The provider layer resolves provider/model/maxTokens via
+  // `resolveCallSiteConfig`, picking up any user overrides under
+  // `llm.callSites.mainAgent`.
+  const turnCallSite: LLMCallSite = options?.callSite ?? "mainAgent";
 
   // Capture the turn channel context *before* any awaits so a second
   // message from a different channel can't overwrite it mid-flight.