vellum-ai · noanflaherty · May 11, 2026 · May 11, 2026 · devin-ai-integration · May 11, 2026
diff --git a/assistant/src/__tests__/inference-no-mode-boot-e2e.test.ts b/assistant/src/__tests__/inference-no-mode-boot-e2e.test.ts
@@ -92,6 +92,7 @@ mock.module("@anthropic-ai/sdk", () => ({
 // ---------------------------------------------------------------------------
 
 import { AssistantConfigSchema } from "../config/schema.js";
+import { LLMSchema } from "../config/schemas/llm.js";
 import { ConnectionResolutionError } from "../providers/connection-resolution.js";
 import {
   getProvider,
@@ -105,6 +106,8 @@ import { credentialKey } from "../security/credential-key.js";
 // Helpers
 // ---------------------------------------------------------------------------
 
+const baseLlm = LLMSchema.parse({});
+
 function makeConfig(overrides?: Record<string, unknown>) {
   return {
     services: {
@@ -117,8 +120,10 @@ function makeConfig(overrides?: Record<string, unknown>) {
       "web-search": { mode: "your-own" as const, provider: "inference-provider-native" },
     },
     llm: {
+      ...baseLlm,
       default: {
-        provider: "anthropic",
+        ...baseLlm.default,
+        provider: "anthropic" as const,
         model: "claude-opus-4-7",
         provider_connection: "anthropic-personal",
       },

diff --git a/assistant/src/__tests__/provider-managed-proxy-integration.test.ts b/assistant/src/__tests__/provider-managed-proxy-integration.test.ts
@@ -67,7 +67,7 @@ mock.module("../config/loader.js", () => ({
   }),
 }));
 
-import { LLMSchema } from "../config/schemas/llm.js";
+import { type LLMConfigBase, LLMSchema } from "../config/schemas/llm.js";
 import type { ProvidersConfig } from "../providers/registry.js";
 import {
   getProvider,
@@ -77,6 +77,7 @@ import {
 } from "../providers/registry.js";
 
 function makeProvidersConfig(provider: string, model: string): ProvidersConfig {
+  const baseLlm = LLMSchema.parse({});
   return {
     services: {
       inference: {},
@@ -87,7 +88,14 @@ function makeProvidersConfig(provider: string, model: string): ProvidersConfig {
       },
       "web-search": { mode: "your-own", provider: "inference-provider-native" },
     },
-    llm: { default: { provider, model } },
+    llm: {
+      ...baseLlm,
+      default: {
+        ...baseLlm.default,
+        provider: provider as LLMConfigBase["provider"],
+        model,
+      },
+    },
   };
 }
 

diff --git a/assistant/src/__tests__/provider-registry-ollama.test.ts b/assistant/src/__tests__/provider-registry-ollama.test.ts
@@ -7,12 +7,15 @@ mock.module("../security/secure-keys.js", () => ({
   getSecureKeyAsync: async () => undefined,
 }));
 
+import { LLMSchema } from "../config/schemas/llm.js";
 import {
   getProvider,
   initializeProviders,
   listProviders,
 } from "../providers/registry.js";
 
+const baseLlm = LLMSchema.parse({});
+
 describe("provider registry (ollama)", () => {
   test("registers ollama when selected provider has no API key", async () => {
     await initializeProviders({
@@ -28,7 +31,14 @@ describe("provider registry (ollama)", () => {
           provider: "inference-provider-native",
         },
       },
-      llm: { default: { provider: "ollama", model: "claude-opus-4-6" } },
+      llm: {
+        ...baseLlm,
+        default: {
+          ...baseLlm.default,
+          provider: "ollama" as const,
+          model: "claude-opus-4-6",
+        },
+      },
     });
 
     const provider = getProvider("ollama");

diff --git a/assistant/src/__tests__/secret-routes-managed-proxy.test.ts b/assistant/src/__tests__/secret-routes-managed-proxy.test.ts
@@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, mock, test } from "bun:test";
 
+import { LLMSchema } from "../config/schemas/llm.js";
 import { credentialKey } from "../security/credential-key.js";
 
 let lastGeminiConstructorOpts: Record<string, unknown> | null = null;
@@ -15,6 +16,8 @@ const MANAGED_PROVIDERS = ["anthropic", "openai", "gemini"] as const;
 
 let platformBaseUrlOverride: string | undefined;
 
+const baseLlm = LLMSchema.parse({});
+
 const mockConfig = {
   services: {
     inference: {},
@@ -28,7 +31,14 @@ const mockConfig = {
       provider: "inference-provider-native",
     },
   },
-  llm: { default: { provider: "anthropic", model: "test-model" } },
+  llm: {
+    ...baseLlm,
+    default: {
+      ...baseLlm.default,
+      provider: "anthropic" as const,
+      model: "test-model",
+    },
+  },
 };
 
 mock.module("@google/genai", () => ({

diff --git a/assistant/src/daemon/conversation-agent-loop.ts b/assistant/src/daemon/conversation-agent-loop.ts
@@ -29,6 +29,7 @@ import {
   contextWindowConfigFromEffective,
   resolveEffectiveContextWindow,
 } from "../config/llm-context-resolution.js";
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import { getConfig } from "../config/loader.js";
 import type { LLMCallSite } from "../config/schemas/llm.js";
 import type { ContextWindowConfig } from "../config/types.js";
@@ -681,7 +682,9 @@ export async function runAgentLoopImpl(
     overrideProfile: turnOverrideProfile ?? undefined,
   });
   const turnContextWindowConfig = contextWindowConfigFromEffective(
-    config.llm.default.contextWindow,
+    resolveCallSiteConfig(turnCallSite, config.llm, {
+      overrideProfile: turnOverrideProfile ?? undefined,
+    }).contextWindow,
     effectiveContextWindow,
   );
   (

diff --git a/assistant/src/daemon/conversation-slash.ts b/assistant/src/daemon/conversation-slash.ts
@@ -1,5 +1,6 @@
 import type { InterfaceId } from "../channels/types.js";
 import { resolveEffectiveContextWindow } from "../config/llm-context-resolution.js";
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import { getConfig } from "../config/loader.js";
 import { getConversationOverrideProfile } from "../memory/conversation-crud.js";
 import { PROVIDER_CATALOG } from "../providers/model-catalog.js";
@@ -111,6 +112,7 @@ const DEPRECATED_MODEL_SHORTCUTS = new Set([
 
 async function resolveModelList(): Promise<SlashResolution> {
   const config = getConfig();
+  const resolvedMainAgent = resolveCallSiteConfig("mainAgent", config.llm);
   const configuredProviders = new Set<string>(await getConfiguredProviders());
 
   const lines = ["Available models:\n"];
@@ -125,8 +127,8 @@ async function resolveModelList(): Promise<SlashResolution> {
     lines.push(`**${providerName}** ${status}`);
     for (const { id, displayName } of models) {
       const isCurrent =
-        config.llm.default.provider === provider &&
-        config.llm.default.model === id;
+        resolvedMainAgent.provider === provider &&
+        resolvedMainAgent.model === id;
       const current = isCurrent ? " **[current]**" : "";
       lines.push(`  - ${displayName} (\`${id}\`)${current}`);
     }

diff --git a/assistant/src/daemon/conversation-store.ts b/assistant/src/daemon/conversation-store.ts
@@ -14,6 +14,7 @@
  * shared rate-limit timestamps, broadcast).
  */
 
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import { getConfig } from "../config/loader.js";
 import type { CesClient } from "../credential-execution/client.js";
 import { buildSystemPrompt } from "../prompts/system-prompt.js";
@@ -230,7 +231,7 @@ export async function getOrCreateConversation(
       const baseProvider = await resolveDefaultProvider(config);
       if (!baseProvider) {
         throw new Error(
-          `Conversation: default provider '${config.llm.default.provider}' is not registered`,
+          `Conversation: default provider '${resolveCallSiteConfig("mainAgent", config.llm).provider}' is not registered`,
         );
       }
       // Per-call `callSite` routing layered on top, with connection-awareness

diff --git a/assistant/src/daemon/conversation.ts b/assistant/src/daemon/conversation.ts
@@ -27,6 +27,7 @@ import {
   contextWindowConfigFromEffective,
   resolveEffectiveContextWindow,
 } from "../config/llm-context-resolution.js";
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import { getConfig } from "../config/loader.js";
 import type { LLMCallSite, Speed } from "../config/schemas/llm.js";
 import type { ContextWindowConfig } from "../config/types.js";
@@ -433,7 +434,8 @@ export class Conversation {
     );
 
     const config = getConfig();
-    this.streamThinking = config.llm.default.thinking.streamThinking ?? false;
+    const resolvedMainAgent = resolveCallSiteConfig("mainAgent", config.llm);
+    this.streamThinking = resolvedMainAgent.thinking.streamThinking ?? false;
 
     // CES (Credential Execution Service) — use the shared server-level client.
     // The CES sidecar accepts exactly one bootstrap connection, so the
@@ -485,20 +487,19 @@ export class Conversation {
     };
 
     const fastModeEnabled = isAssistantFeatureFlagEnabled("fast-mode", config);
-    const resolvedSpeed = speedOverride ?? config.llm.default.speed;
-    const llmDefault = config.llm.default;
+    const resolvedSpeed = speedOverride ?? resolvedMainAgent.speed;
     const initialContextWindow = resolveEffectiveContextWindow({
       llm: config.llm,
       callSite: "mainAgent",
     });
     const initialContextWindowConfig = contextWindowConfigFromEffective(
-      llmDefault.contextWindow,
+      resolvedMainAgent.contextWindow,
       initialContextWindow,
     );
 
     const agentLoopConfig: Partial<AgentLoopConfig> = {
-      thinking: llmDefault.thinking,
-      effort: llmDefault.effort,
+      thinking: resolvedMainAgent.thinking,
+      effort: resolvedMainAgent.effort,
       ...(fastModeEnabled && resolvedSpeed === "fast"
         ? { speed: resolvedSpeed }
         : {}),
@@ -1035,7 +1036,9 @@ export class Conversation {
       }
     ).updateConfig?.(
       contextWindowConfigFromEffective(
-        config.llm.default.contextWindow,
+        resolveCallSiteConfig("mainAgent", config.llm, {
+          overrideProfile: overrideProfile ?? undefined,
+        }).contextWindow,
         effectiveContextWindow,
       ),
     );

diff --git a/assistant/src/daemon/handlers/config-model.ts b/assistant/src/daemon/handlers/config-model.ts
@@ -1,3 +1,4 @@
+import { resolveCallSiteConfig } from "../../config/llm-resolver.js";
 import {
   getConfig,
   loadRawConfig,
@@ -85,10 +86,11 @@ export function projectProviderForWire(
 /** Return current model configuration. */
 export async function getModelInfo(): Promise<ModelInfo> {
   const config = getConfig();
-  const provider = config.llm.default.provider;
+  const resolved = resolveCallSiteConfig("mainAgent", config.llm);
+  const provider = resolved.provider;
 
   return {
-    model: config.llm.default.model,
+    model: resolved.model,
     provider,
     configuredProviders: await getConfiguredProviders(),
     availableModels: PROVIDER_CATALOG.find(
@@ -134,24 +136,25 @@ export async function setModel(
 
   // Resolve provider: explicit > MODEL_TO_PROVIDER lookup > current
   const current = getConfig();
+  const resolvedCurrent = resolveCallSiteConfig("mainAgent", current.llm);
   const resolvedProvider =
     explicitProvider ??
     MODEL_TO_PROVIDER[modelId] ??
-    current.llm.default.provider;
+    resolvedCurrent.provider;
 
   // Auto-reset model when provider changes and current modelId doesn't
   // belong to the new provider's catalog.
   if (
-    resolvedProvider !== current.llm.default.provider &&
+    resolvedProvider !== resolvedCurrent.provider &&
     !isModelInCatalog(resolvedProvider, modelId)
   ) {
     modelId = getProviderDefaultModel(resolvedProvider);
   }
 
   // No-op guard: skip expensive reinitialization when nothing changed
   if (
-    modelId === current.llm.default.model &&
-    resolvedProvider === current.llm.default.provider
+    modelId === resolvedCurrent.model &&
+    resolvedProvider === resolvedCurrent.provider
   ) {
     return await getModelInfo();
   }

@@ -2,6 +2,7 @@ import { createHash } from "node:crypto";
 
 import { isAssistantFeatureFlagEnabled } from "../config/assistant-feature-flags.js";
 import { getOllamaBaseUrlEnv } from "../config/env.js";
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import type { AssistantConfig } from "../config/types.js";
 import { MANAGED_PROVIDER_META } from "../providers/managed-proxy/constants.js";
 import { resolveManagedProxyContext } from "../providers/managed-proxy/context.js";
@@ -778,7 +779,7 @@ export async function selectedBackendSupportsMultimodal(
 
 async function isOllamaConfigured(config: AssistantConfig): Promise<boolean> {
   return (
-    config.llm.default.provider === "ollama" ||
+    resolveCallSiteConfig("mainAgent", config.llm).provider === "ollama" ||
     Boolean(await getProviderKeyAsync("ollama")) ||
     Boolean(getOllamaBaseUrlEnv())
   );

diff --git a/assistant/src/providers/__tests__/satellite-connection-routing.test.ts b/assistant/src/providers/__tests__/satellite-connection-routing.test.ts
@@ -130,6 +130,7 @@ mock.module("../registry.js", () => ({
 // Imports (after mocks).
 // ---------------------------------------------------------------------------
 
+import { LLMSchema } from "../../config/schemas/llm.js";
 import { wrapWithCallSiteRouting } from "../call-site-routing.js";
 import { ConnectionResolutionError } from "../connection-resolution.js";
 
@@ -162,7 +163,7 @@ function reset(): void {
 // helper passes it straight to `resolveProviderFromConnection`, which is
 // fully mocked above — so a minimal shape is fine.
 const providersConfigStub = {
-  llm: { default: { provider: "anthropic", model: "claude-opus-4-7" } },
+  llm: LLMSchema.parse({}),
   services: {
     inference: {},
     "image-generation": {

diff --git a/assistant/src/providers/connection-resolution.ts b/assistant/src/providers/connection-resolution.ts
@@ -27,6 +27,7 @@
  *      a conversation offline.
  */
 
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
 import { getDb } from "../memory/db-connection.js";
 import { getLogger } from "../util/logger.js";
 import { getConnection } from "./inference/connections.js";
@@ -145,8 +146,8 @@ export async function tryResolveProviderForConnectionName(
 export async function resolveDefaultProvider(
   config: ProvidersConfig,
 ): Promise<Provider | null> {
-  const profile = config.llm.default;
-  const connectionName = profile.provider_connection;
+  const resolved = resolveCallSiteConfig("mainAgent", config.llm);
+  const connectionName = resolved.provider_connection;
   if (!connectionName) {
     throw new ConnectionResolutionError(
       "<llm.default>",
@@ -157,6 +158,6 @@ export async function resolveDefaultProvider(
   return tryResolveProviderForConnectionName(
     connectionName,
     config,
-    profile.provider,
+    resolved.provider,
   );
 }
diff --git a/assistant/src/providers/registry.ts b/assistant/src/providers/registry.ts
@@ -1,3 +1,5 @@
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
+import { type LLMConfig } from "../config/schemas/llm.js";
 import { getProviderKeyAsync } from "../security/secure-keys.js";
 import { ProviderNotConfiguredError } from "../util/errors.js";
 import { getLogger } from "../util/logger.js";
@@ -66,28 +68,14 @@ export interface ProvidersConfig {
       provider: string;
     };
   };
-  llm: {
-    default: {
-      provider: string;
-      model: string;
-      /**
-       * Name of a `provider_connections` row to use for this profile.
-       * Mirrors the runtime field added by `profileConfigFragment` in
-       * `config/llm-resolver.ts` and the Zod field on `LLMConfigBase`
-       * in `config/schemas/llm.ts`. Optional at the type level so
-       * pre-backfill / hand-crafted configs still compile; the
-       * connection-resolution helpers throw a clear configuration
-       * error when a profile has no connection at dispatch time.
-       */
-      provider_connection?: string;
-    };
-  };
+  llm: LLMConfig;
   timeouts?: { providerStreamTimeoutSec?: number };
 }
 
 function resolveModel(config: ProvidersConfig, providerName: string): string {
-  const inferenceProvider = config.llm.default.provider;
-  const inferenceModel = config.llm.default.model;
+  const resolved = resolveCallSiteConfig("mainAgent", config.llm);
+  const inferenceProvider = resolved.provider;
+  const inferenceModel = resolved.model;
   if (inferenceProvider === providerName) {
     if (
       providerName !== "anthropic" &&
@@ -193,7 +181,7 @@ export async function initializeProviders(
 
   // Ollama (keyless provider — always init when configured or key present)
   const ollamaKey = await getProviderKeyAsync("ollama");
-  if (config.llm.default.provider === "ollama" || ollamaKey) {
+  if (resolveCallSiteConfig("mainAgent", config.llm).provider === "ollama" || ollamaKey) {
     const model = resolveModel(config, "ollama");
     registerProvider(
       "ollama",