diff --git a/assistant/src/__tests__/approval-cascade.test.ts b/assistant/src/__tests__/approval-cascade.test.ts index 3ade54c5e7f..04c71202cbf 100644 --- a/assistant/src/__tests__/approval-cascade.test.ts +++ b/assistant/src/__tests__/approval-cascade.test.ts @@ -56,16 +56,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ - ui: {}, - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, + ui: {}, + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, + }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, timeouts: { permissionTimeoutSec: 300 }, diff --git a/assistant/src/__tests__/compaction.benchmark.test.ts b/assistant/src/__tests__/compaction.benchmark.test.ts index 4599c7b7a2a..6317fc06b65 100644 --- a/assistant/src/__tests__/compaction.benchmark.test.ts +++ b/assistant/src/__tests__/compaction.benchmark.test.ts @@ -71,7 +71,7 @@ function makeLongMessages(turns: number): Message[] { function makeConfig() { return { - ...DEFAULT_CONFIG.contextWindow, + ...DEFAULT_CONFIG.llm.default.contextWindow, maxInputTokens: 6000, targetBudgetRatio: 0.58, compactThreshold: 0.6, diff --git a/assistant/src/__tests__/config-analysis.test.ts b/assistant/src/__tests__/config-analysis.test.ts index ba09306fd67..3367c7105d5 100644 --- a/assistant/src/__tests__/config-analysis.test.ts +++ b/assistant/src/__tests__/config-analysis.test.ts @@ -10,30 +10,27 @@ describe("AnalysisConfigSchema", () => { const parsed = AnalysisConfigSchema.parse({}); expect(parsed.batchSize).toBe(30); expect(parsed.idleTimeoutMs).toBe(600_000); - expect(parsed.modelIntent).toBeUndefined(); - expect(parsed.modelOverride).toBeUndefined(); }); - test("custom values round-trip", () => { + test("custom batch/idle values round-trip", () => { const input = { batchSize: 50, idleTimeoutMs: 120_000, - modelIntent: "quality-optimized" as const, - modelOverride: "anthropic/claude-opus-4-6", }; const parsed = AnalysisConfigSchema.parse(input); expect(parsed).toEqual(input); }); - test("accepts each valid modelIntent value", () => { - for (const intent of [ - "latency-optimized", - "quality-optimized", - "vision-optimized", - ] as const) { - const parsed = AnalysisConfigSchema.parse({ modelIntent: intent }); - expect(parsed.modelIntent).toBe(intent); - } + test("legacy modelIntent/modelOverride are stripped after PR 19 cleanup", () => { + // Both fields moved to llm.callSites.analyzeConversation in PR 4 and + // were removed from the schema in PR 19. Zod silently strips unknown + // keys; migration 039 erases them from disk. + const parsed = AnalysisConfigSchema.parse({ + modelIntent: "quality-optimized", + modelOverride: "anthropic/claude-opus-4-6", + }); + expect((parsed as Record).modelIntent).toBeUndefined(); + expect((parsed as Record).modelOverride).toBeUndefined(); }); test("rejects batchSize: 0 (must be positive)", () => { @@ -60,18 +57,6 @@ describe("AnalysisConfigSchema", () => { const result = AnalysisConfigSchema.safeParse({ idleTimeoutMs: -1000 }); expect(result.success).toBe(false); }); - - test("rejects invalid modelIntent value", () => { - const result = AnalysisConfigSchema.safeParse({ - modelIntent: "bogus-intent", - }); - expect(result.success).toBe(false); - }); - - test("rejects non-string modelOverride", () => { - const result = AnalysisConfigSchema.safeParse({ modelOverride: 42 }); - expect(result.success).toBe(false); - }); }); describe("AssistantConfigSchema — analysis integration", () => { @@ -88,13 +73,11 @@ describe("AssistantConfigSchema — analysis integration", () => { analysis: { batchSize: 15, idleTimeoutMs: 300_000, - modelIntent: "latency-optimized", }, }); expect(parsed.analysis).toEqual({ batchSize: 15, idleTimeoutMs: 300_000, - modelIntent: "latency-optimized", }); }); }); diff --git a/assistant/src/__tests__/config-schema.test.ts b/assistant/src/__tests__/config-schema.test.ts index a4c3b450abd..91a432045af 100644 --- a/assistant/src/__tests__/config-schema.test.ts +++ b/assistant/src/__tests__/config-schema.test.ts @@ -81,9 +81,11 @@ function writeConfig(obj: unknown): void { describe("AssistantConfigSchema", () => { test("parses empty object with full defaults", () => { const result = AssistantConfigSchema.parse({}); - expect(result.services.inference.provider).toBe("anthropic"); - expect(result.services.inference.model).toBe("claude-opus-4-6"); + // services.inference now carries only `mode`; provider/model live under + // llm.default.{provider,model} (see PR 19 of unify-llm-callsites). expect(result.services.inference.mode).toBe("your-own"); + expect(result.llm.default.provider).toBe("anthropic"); + expect(result.llm.default.model).toBe("claude-opus-4-6"); expect(result.services["image-generation"].provider).toBe("gemini"); expect(result.services["image-generation"].model).toBe( "gemini-3.1-flash-image-preview", @@ -93,12 +95,12 @@ describe("AssistantConfigSchema", () => { "inference-provider-native", ); expect(result.services["web-search"].mode).toBe("your-own"); - expect(result.maxTokens).toBe(64000); - expect(result.thinking).toEqual({ + expect(result.llm.default.maxTokens).toBe(64000); + expect(result.llm.default.thinking).toEqual({ enabled: true, streamThinking: true, }); - expect(result.contextWindow).toEqual({ + expect(result.llm.default.contextWindow).toEqual({ enabled: true, maxInputTokens: 200000, targetBudgetRatio: 0.3, @@ -134,11 +136,9 @@ describe("AssistantConfigSchema", () => { test("accepts valid complete config", () => { const input = { - services: { - inference: { provider: "openai", model: "gpt-4" }, + llm: { + default: { provider: "openai" as const, model: "gpt-4", maxTokens: 4096 }, }, - maxTokens: 4096, - thinking: { enabled: true }, timeouts: { shellDefaultTimeoutSec: 30, shellMaxTimeoutSec: 300, @@ -154,10 +154,10 @@ describe("AssistantConfigSchema", () => { auditLog: { retentionDays: 30 }, }; const result = AssistantConfigSchema.parse(input); - expect(result.services.inference.provider).toBe("openai"); - expect(result.services.inference.model).toBe("gpt-4"); - expect(result.maxTokens).toBe(4096); - expect(result.thinking.enabled).toBe(true); + expect(result.llm.default.provider).toBe("openai"); + expect(result.llm.default.model).toBe("gpt-4"); + expect(result.llm.default.maxTokens).toBe(4096); + expect(result.llm.default.thinking.enabled).toBe(true); expect(result.secretDetection.action).toBe("block"); }); @@ -273,27 +273,32 @@ describe("AssistantConfigSchema", () => { expect(() => AssistantConfigSchema.parse(input)).toThrow(/missing-profile/); }); - test("legacy top-level inference keys still parse alongside the new llm block", () => { - // Backward compatibility: configs that set the legacy top-level keys - // (maxTokens, effort, speed, thinking, contextWindow, services.inference) - // continue to parse correctly. PR 19 removes these once adoption is done. + test("legacy top-level inference keys are ignored after PR 19 cleanup", () => { + // The legacy keys (top-level maxTokens, effort, speed, thinking, + // contextWindow, services.inference.{provider,model}) were removed in PR + // 19. Configs that still carry them parse cleanly because Zod strips + // unknown fields, and migration 039 erases them from the on-disk file + // entirely. const input = { services: { - inference: { provider: "openai" as const, model: "gpt-4" }, + inference: { provider: "openai", model: "gpt-4" }, }, maxTokens: 8000, - effort: "medium" as const, - speed: "fast" as const, + effort: "medium", + speed: "fast", thinking: { enabled: false, streamThinking: false }, }; const result = AssistantConfigSchema.parse(input); - expect(result.services.inference.provider).toBe("openai"); - expect(result.maxTokens).toBe(8000); - expect(result.effort).toBe("medium"); - expect(result.speed).toBe("fast"); - expect(result.thinking.enabled).toBe(false); - // The new llm block falls back to its own defaults (independent of the - // legacy top-level keys until the migration in PR 4 backfills it). + expect((result as Record).maxTokens).toBeUndefined(); + expect((result as Record).effort).toBeUndefined(); + expect((result as Record).speed).toBeUndefined(); + expect((result as Record).thinking).toBeUndefined(); + expect( + (result.services.inference as Record).provider, + ).toBeUndefined(); + expect( + (result.services.inference as Record).model, + ).toBeUndefined(); expect(result.llm.default.provider).toBe("anthropic"); expect(result.llm.default.model).toBe("claude-opus-4-6"); }); @@ -302,16 +307,15 @@ describe("AssistantConfigSchema", () => { // Regression guard: previously LLMConfigBase had no schema-level defaults, // so any `llm: {}` block would fail validation and the loader's recovery // path would fall through to `cloneDefaultConfig()`, discarding unrelated - // valid settings (like a custom `maxTokens`). With leaf-level defaults, - // `llm: {}` parses cleanly and the user's other settings are preserved. + // valid settings (like a custom `llm.default.maxTokens`). With leaf-level + // defaults, `llm: {}` parses cleanly and the user's other settings are + // preserved. const result = AssistantConfigSchema.parse({ - maxTokens: 32000, - llm: {}, + llm: { default: { maxTokens: 32000 } }, }); - expect(result.maxTokens).toBe(32000); + expect(result.llm.default.maxTokens).toBe(32000); expect(result.llm.default.provider).toBe("anthropic"); expect(result.llm.default.model).toBe("claude-opus-4-6"); - expect(result.llm.default.maxTokens).toBe(64000); }); test("llm.default with one missing field still parses (defaults applied)", () => { @@ -416,13 +420,15 @@ describe("AssistantConfigSchema", () => { test("rejects invalid provider", () => { const result = AssistantConfigSchema.safeParse({ - services: { inference: { provider: "invalid" } }, + llm: { default: { provider: "invalid" } }, }); expect(result.success).toBe(false); }); - test("rejects negative maxTokens", () => { - const result = AssistantConfigSchema.safeParse({ maxTokens: -100 }); + test("rejects negative llm.default.maxTokens", () => { + const result = AssistantConfigSchema.safeParse({ + llm: { default: { maxTokens: -100 } }, + }); expect(result.success).toBe(false); if (!result.success) { expect( @@ -431,8 +437,10 @@ describe("AssistantConfigSchema", () => { } }); - test("rejects non-integer maxTokens", () => { - const result = AssistantConfigSchema.safeParse({ maxTokens: 3.14 }); + test("rejects non-integer llm.default.maxTokens", () => { + const result = AssistantConfigSchema.safeParse({ + llm: { default: { maxTokens: 3.14 } }, + }); expect(result.success).toBe(false); if (!result.success) { expect( @@ -441,9 +449,9 @@ describe("AssistantConfigSchema", () => { } }); - test("rejects string maxTokens", () => { + test("rejects string llm.default.maxTokens", () => { const result = AssistantConfigSchema.safeParse({ - maxTokens: "not-a-number", + llm: { default: { maxTokens: "not-a-number" } }, }); expect(result.success).toBe(false); if (!result.success) { @@ -469,7 +477,7 @@ describe("AssistantConfigSchema", () => { test("rejects invalid thinking config", () => { const result = AssistantConfigSchema.safeParse({ - thinking: { enabled: "yes" }, + llm: { default: { thinking: { enabled: "yes" } } }, }); expect(result.success).toBe(false); if (!result.success) { @@ -479,16 +487,21 @@ describe("AssistantConfigSchema", () => { test("rejects contextWindow targetBudgetRatio >= compactThreshold", () => { const result = AssistantConfigSchema.safeParse({ - contextWindow: { targetBudgetRatio: 0.8, compactThreshold: 0.8 }, + llm: { + default: { + contextWindow: { targetBudgetRatio: 0.8, compactThreshold: 0.8 }, + }, + }, }); expect(result.success).toBe(false); if (!result.success) { expect( result.error.issues.some( (issue) => - issue.path.join(".") === "contextWindow.targetBudgetRatio" && + issue.path.join(".") === + "llm.default.contextWindow.targetBudgetRatio" && issue.message.includes( - "must be less than contextWindow.compactThreshold", + "must be less than llm.default.contextWindow.compactThreshold", ), ), ).toBe(true); @@ -498,7 +511,11 @@ describe("AssistantConfigSchema", () => { test("rejects overflowRecovery safetyMarginRatio out of (0,1) range", () => { for (const bad of [0, 1, -0.1, 1.5]) { const result = AssistantConfigSchema.safeParse({ - contextWindow: { overflowRecovery: { safetyMarginRatio: bad } }, + llm: { + default: { + contextWindow: { overflowRecovery: { safetyMarginRatio: bad } }, + }, + }, }); expect(result.success).toBe(false); if (!result.success) { @@ -513,8 +530,12 @@ describe("AssistantConfigSchema", () => { test("rejects invalid overflowRecovery interactiveLatestTurnCompression", () => { const result = AssistantConfigSchema.safeParse({ - contextWindow: { - overflowRecovery: { interactiveLatestTurnCompression: "explode" }, + llm: { + default: { + contextWindow: { + overflowRecovery: { interactiveLatestTurnCompression: "explode" }, + }, + }, }, }); expect(result.success).toBe(false); @@ -529,8 +550,12 @@ describe("AssistantConfigSchema", () => { test("rejects invalid overflowRecovery nonInteractiveLatestTurnCompression", () => { const result = AssistantConfigSchema.safeParse({ - contextWindow: { - overflowRecovery: { nonInteractiveLatestTurnCompression: "nope" }, + llm: { + default: { + contextWindow: { + overflowRecovery: { nonInteractiveLatestTurnCompression: "nope" }, + }, + }, }, }); expect(result.success).toBe(false); @@ -601,7 +626,7 @@ describe("AssistantConfigSchema", () => { "ollama", ] as const) { const result = AssistantConfigSchema.safeParse({ - services: { inference: { provider } }, + llm: { default: { provider } }, }); expect(result.success).toBe(true); } @@ -618,13 +643,19 @@ describe("AssistantConfigSchema", () => { test("provides helpful error messages", () => { const result = AssistantConfigSchema.safeParse({ - maxTokens: -1, + llm: { default: { maxTokens: -1 } }, secretDetection: { action: "explode" }, }); expect(result.success).toBe(false); if (!result.success) { const messages = result.error.issues.map((i) => i.message); - expect(messages.some((m) => m.includes("positive"))).toBe(true); + // The llm.default.maxTokens validation rejects -1 with a "Too small" + // / "expected number to be >0" message from Zod's default issue text. + expect( + messages.some( + (m) => m.includes("positive") || /expected number to be >0/i.test(m), + ), + ).toBe(true); expect( messages.some( (m) => @@ -688,11 +719,7 @@ describe("AssistantConfigSchema", () => { enrichmentMaxRetries: 2, commitMessageLLM: { enabled: false, - useConfiguredProvider: true, - providerFastModelOverrides: {}, timeoutMs: 600, - maxTokens: 120, - temperature: 0.2, maxFilesInPrompt: 30, maxDiffBytes: 12000, minRemainingTurnBudgetMs: 1000, @@ -746,11 +773,7 @@ describe("AssistantConfigSchema", () => { const result = AssistantConfigSchema.parse({}); const llm = result.workspaceGit.commitMessageLLM; expect(llm.enabled).toBe(false); - expect(llm.useConfiguredProvider).toBe(true); - expect(llm.providerFastModelOverrides).toEqual({}); expect(llm.timeoutMs).toBe(600); - expect(llm.maxTokens).toBe(120); - expect(llm.temperature).toBe(0.2); expect(llm.maxFilesInPrompt).toBe(30); expect(llm.maxDiffBytes).toBe(12000); expect(llm.minRemainingTurnBudgetMs).toBe(1000); @@ -763,13 +786,6 @@ describe("AssistantConfigSchema", () => { expect(result.success).toBe(false); }); - test("rejects commitMessageLLM.temperature > 2", () => { - const result = AssistantConfigSchema.safeParse({ - workspaceGit: { commitMessageLLM: { temperature: 2.5 } }, - }); - expect(result.success).toBe(false); - }); - test("breaker settings have correct defaults", () => { const result = AssistantConfigSchema.parse({}); const breaker = result.workspaceGit.commitMessageLLM.breaker; @@ -784,14 +800,12 @@ describe("AssistantConfigSchema", () => { commitMessageLLM: { enabled: true, timeoutMs: 1000, - temperature: 0.5, breaker: { openAfterFailures: 5 }, }, }, }); expect(result.workspaceGit.commitMessageLLM.enabled).toBe(true); expect(result.workspaceGit.commitMessageLLM.timeoutMs).toBe(1000); - expect(result.workspaceGit.commitMessageLLM.temperature).toBe(0.5); expect(result.workspaceGit.commitMessageLLM.breaker.openAfterFailures).toBe( 5, ); @@ -801,18 +815,18 @@ describe("AssistantConfigSchema", () => { ); }); - test("rejects commitMessageLLM.temperature < 0", () => { - const result = AssistantConfigSchema.safeParse({ - workspaceGit: { commitMessageLLM: { temperature: -0.1 } }, - }); - expect(result.success).toBe(false); - }); - - test("rejects non-integer commitMessageLLM.maxTokens", () => { - const result = AssistantConfigSchema.safeParse({ - workspaceGit: { commitMessageLLM: { maxTokens: 3.5 } }, + test("ignores legacy commitMessageLLM.{maxTokens,temperature} keys", () => { + // PR 19 removed maxTokens/temperature from the schema; Zod silently + // strips them on parse. Migration 039 erases them from disk so they + // don't accumulate over time. + const result = AssistantConfigSchema.parse({ + workspaceGit: { + commitMessageLLM: { maxTokens: 200, temperature: 0.5 }, + }, }); - expect(result.success).toBe(false); + const cm = result.workspaceGit.commitMessageLLM as Record; + expect(cm.maxTokens).toBeUndefined(); + expect(cm.temperature).toBeUndefined(); }); // ── Calls config ──────────────────────────────────────────────────── @@ -971,16 +985,13 @@ describe("AssistantConfigSchema", () => { ).toBeUndefined(); }); - test("accepts optional calls.model", () => { + test("legacy calls.model key is stripped after PR 19 cleanup", () => { + // calls.model moved to llm.callSites.callAgent.model in PR 4 and the + // legacy field was removed in PR 19. Zod silently strips unknown keys. const result = AssistantConfigSchema.parse({ calls: { model: "claude-haiku-4-5-20251001" }, }); - expect(result.calls.model).toBe("claude-haiku-4-5-20251001"); - }); - - test("calls.model is undefined by default", () => { - const result = AssistantConfigSchema.parse({}); - expect(result.calls.model).toBeUndefined(); + expect((result.calls as Record).model).toBeUndefined(); }); // ── Caller identity config ──────────────────────────────────────── @@ -2106,28 +2117,27 @@ describe("loadConfig with schema validation", () => { // intermittently trigger unhandled ENOENT in CI if the directory is removed. test("loads valid config", () => { writeConfig({ - services: { - inference: { provider: "openai", model: "gpt-4" }, + llm: { + default: { provider: "openai", model: "gpt-4", maxTokens: 4096 }, }, - maxTokens: 4096, }); const config = loadConfig(); - expect(config.services.inference.provider).toBe("openai"); - expect(config.services.inference.model).toBe("gpt-4"); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.provider).toBe("openai"); + expect(config.llm.default.model).toBe("gpt-4"); + expect(config.llm.default.maxTokens).toBe(4096); }); test("applies defaults for missing fields", () => { writeConfig({}); const config = loadConfig(); - expect(config.services.inference.provider).toBe("anthropic"); - expect(config.services.inference.model).toBe("claude-opus-4-6"); - expect(config.maxTokens).toBe(64000); - expect(config.thinking).toEqual({ + expect(config.llm.default.provider).toBe("anthropic"); + expect(config.llm.default.model).toBe("claude-opus-4-6"); + expect(config.llm.default.maxTokens).toBe(64000); + expect(config.llm.default.thinking).toEqual({ enabled: true, streamThinking: true, }); - expect(config.contextWindow).toEqual({ + expect(config.llm.default.contextWindow).toEqual({ enabled: true, maxInputTokens: 200000, targetBudgetRatio: 0.3, @@ -2145,16 +2155,16 @@ describe("loadConfig with schema validation", () => { test("falls back to default for invalid provider", () => { writeConfig({ - services: { inference: { provider: "invalid-provider" } }, + llm: { default: { provider: "invalid-provider" } }, }); const config = loadConfig(); - expect(config.services.inference.provider).toBe("anthropic"); + expect(config.llm.default.provider).toBe("anthropic"); }); test("falls back to default for invalid maxTokens", () => { - writeConfig({ maxTokens: -100 }); + writeConfig({ llm: { default: { maxTokens: -100 } } }); const config = loadConfig(); - expect(config.maxTokens).toBe(64000); + expect(config.llm.default.maxTokens).toBe(64000); }); test("falls back to defaults for invalid nested values", () => { @@ -2169,23 +2179,26 @@ describe("loadConfig with schema validation", () => { test("preserves valid fields when other fields are invalid", () => { writeConfig({ - services: { - inference: { provider: "openai", model: "gpt-4" }, + llm: { + default: { + provider: "openai", + model: "gpt-4", + maxTokens: -1, + thinking: { enabled: true }, + }, }, - maxTokens: -1, - thinking: { enabled: true }, }); const config = loadConfig(); - expect(config.services.inference.provider).toBe("openai"); - expect(config.services.inference.model).toBe("gpt-4"); - expect(config.thinking.enabled).toBe(true); - expect(config.maxTokens).toBe(64000); + expect(config.llm.default.provider).toBe("openai"); + expect(config.llm.default.model).toBe("gpt-4"); + expect(config.llm.default.thinking.enabled).toBe(true); + expect(config.llm.default.maxTokens).toBe(64000); }); test("handles no config file", () => { const config = loadConfig(); - expect(config.services.inference.provider).toBe("anthropic"); - expect(config.maxTokens).toBe(64000); + expect(config.llm.default.provider).toBe("anthropic"); + expect(config.llm.default.maxTokens).toBe(64000); }); test("partial nested objects get defaults for missing fields", () => { @@ -2206,11 +2219,15 @@ describe("loadConfig with schema validation", () => { test("falls back for invalid contextWindow relationship", () => { writeConfig({ - contextWindow: { targetBudgetRatio: 0.8, compactThreshold: 0.8 }, + llm: { + default: { + contextWindow: { targetBudgetRatio: 0.8, compactThreshold: 0.8 }, + }, + }, }); const config = loadConfig(); - expect(config.contextWindow.targetBudgetRatio).toBe(0.3); - expect(config.contextWindow.compactThreshold).toBe(0.8); + expect(config.llm.default.contextWindow.targetBudgetRatio).toBe(0.3); + expect(config.llm.default.contextWindow.compactThreshold).toBe(0.8); }); test("falls back for invalid rateLimit values", () => { @@ -2271,13 +2288,13 @@ describe("loadConfig with schema validation", () => { // Only activeHoursStart is set. The superRefine must emit the issue so // the loader's delete-and-retry can strip the set field; otherwise the // mismatch persists and the config falls back to full defaults (which - // would reset maxTokens below to 64000). + // would reset llm.default.maxTokens below to 64000). writeConfig({ - maxTokens: 4096, + llm: { default: { maxTokens: 4096 } }, filing: { activeHoursStart: 8 }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.maxTokens).toBe(4096); expect(config.filing.activeHoursStart).toBeNull(); expect(config.filing.activeHoursEnd).toBeNull(); }); @@ -2285,13 +2302,13 @@ describe("loadConfig with schema validation", () => { test("recovers from partial heartbeat.activeHours without wiping unrelated fields", () => { // activeHoursStart is explicitly nulled while activeHoursEnd defaults to // 22 — a mismatch. Dual-emit strips both sides; both defaults restore - // (8, 22). maxTokens is unaffected. + // (8, 22). llm.default.maxTokens is unaffected. writeConfig({ - maxTokens: 4096, + llm: { default: { maxTokens: 4096 } }, heartbeat: { activeHoursStart: null }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.maxTokens).toBe(4096); expect(config.heartbeat.activeHoursStart).toBe(8); expect(config.heartbeat.activeHoursEnd).toBe(22); }); @@ -2299,14 +2316,14 @@ describe("loadConfig with schema validation", () => { test("recovers from heartbeat.activeHours null-mismatch where explicit value equals opposite default", () => { // { start: null, end: 8 } — single-emit on the null side would strip // start, the default 8 would restore it, and the equal-hours check would - // fire, cascading to a full defaults reset that wipes maxTokens. + // fire, cascading to a full defaults reset that wipes llm.default.maxTokens. // Dual-emit strips both sides in one pass. writeConfig({ - maxTokens: 4096, + llm: { default: { maxTokens: 4096 } }, heartbeat: { activeHoursStart: null, activeHoursEnd: 8 }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.maxTokens).toBe(4096); expect(config.heartbeat.activeHoursStart).toBe(8); expect(config.heartbeat.activeHoursEnd).toBe(22); }); @@ -2314,11 +2331,11 @@ describe("loadConfig with schema validation", () => { test("recovers from heartbeat.activeHours null-mismatch on the end side", () => { // { start: 22, end: null } — same cascade class as above, mirrored. writeConfig({ - maxTokens: 4096, + llm: { default: { maxTokens: 4096 } }, heartbeat: { activeHoursStart: 22, activeHoursEnd: null }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.maxTokens).toBe(4096); expect(config.heartbeat.activeHoursStart).toBe(8); expect(config.heartbeat.activeHoursEnd).toBe(22); }); @@ -2327,13 +2344,13 @@ describe("loadConfig with schema validation", () => { // { start: 22, end: 22 } — both equal to the default for end. Single-emit // on one path would strip one side, the default would recreate the // equal-hours mismatch, and the loader would fall back to full defaults, - // wiping maxTokens. Dual-emit strips both sides at once. + // wiping llm.default.maxTokens. Dual-emit strips both sides at once. writeConfig({ - maxTokens: 4096, + llm: { default: { maxTokens: 4096 } }, heartbeat: { activeHoursStart: 22, activeHoursEnd: 22 }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(4096); + expect(config.llm.default.maxTokens).toBe(4096); expect(config.heartbeat.activeHoursStart).toBe(8); expect(config.heartbeat.activeHoursEnd).toBe(22); }); @@ -2342,14 +2359,14 @@ describe("loadConfig with schema validation", () => { // activeHoursStart === activeHoursEnd is invalid (empty window). Filing's // defaults are null/null, so single-emit on one path would strip one side // and the null default would recreate a mismatch — cascading to a full - // defaults reset that wipes maxTokens. Dual-emit strips both sides so - // both defaults restore to null. + // defaults reset that wipes llm.default.maxTokens. Dual-emit strips both + // sides so both defaults restore to null. writeConfig({ - maxTokens: 1234, + llm: { default: { maxTokens: 1234 } }, filing: { activeHoursStart: 5, activeHoursEnd: 5 }, }); const config = loadConfig(); - expect(config.maxTokens).toBe(1234); + expect(config.llm.default.maxTokens).toBe(1234); expect(config.filing.activeHoursStart).toBeNull(); expect(config.filing.activeHoursEnd).toBeNull(); }); @@ -2369,7 +2386,7 @@ describe("loadConfig with schema validation", () => { expect( (config.calls.voice as Record).ttsProvider, ).toBeUndefined(); - expect(config.calls.model).toBeUndefined(); + expect((config.calls as Record).model).toBeUndefined(); expect(config.calls.callerIdentity).toEqual({ allowPerCallOverride: true, }); diff --git a/assistant/src/__tests__/conversation-abort-tool-results.test.ts b/assistant/src/__tests__/conversation-abort-tool-results.test.ts index 792f7cd3185..441cad2d800 100644 --- a/assistant/src/__tests__/conversation-abort-tool-results.test.ts +++ b/assistant/src/__tests__/conversation-abort-tool-results.test.ts @@ -25,23 +25,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, daemon: { diff --git a/assistant/src/__tests__/conversation-agent-loop-overflow.test.ts b/assistant/src/__tests__/conversation-agent-loop-overflow.test.ts index 03bf2945568..f60ceeae546 100644 --- a/assistant/src/__tests__/conversation-agent-loop-overflow.test.ts +++ b/assistant/src/__tests__/conversation-agent-loop-overflow.test.ts @@ -28,23 +28,34 @@ mock.module("../util/logger.js", () => ({ })); mock.module("../config/loader.js", () => ({ - getConfig: () => ({ - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 200_000, - thresholdTokens: 160_000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + getConfig: () => ({ + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, workspaceGit: { turnCommitMaxWaitMs: 10 }, diff --git a/assistant/src/__tests__/conversation-agent-loop.test.ts b/assistant/src/__tests__/conversation-agent-loop.test.ts index 767d315e89c..e606cd0ed39 100644 --- a/assistant/src/__tests__/conversation-agent-loop.test.ts +++ b/assistant/src/__tests__/conversation-agent-loop.test.ts @@ -16,23 +16,34 @@ mock.module("../util/logger.js", () => ({ })); mock.module("../config/loader.js", () => ({ - getConfig: () => ({ - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + getConfig: () => ({ + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, workspaceGit: { turnCommitMaxWaitMs: 10 }, diff --git a/assistant/src/__tests__/conversation-confirmation-signals.test.ts b/assistant/src/__tests__/conversation-confirmation-signals.test.ts index d740f7afc75..ecd2a8b155c 100644 --- a/assistant/src/__tests__/conversation-confirmation-signals.test.ts +++ b/assistant/src/__tests__/conversation-confirmation-signals.test.ts @@ -57,15 +57,33 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, + }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, timeouts: { permissionTimeoutSec: 1 }, diff --git a/assistant/src/__tests__/conversation-load-history-repair.test.ts b/assistant/src/__tests__/conversation-load-history-repair.test.ts index 5184f2584fb..51006ee2f82 100644 --- a/assistant/src/__tests__/conversation-load-history-repair.test.ts +++ b/assistant/src/__tests__/conversation-load-history-repair.test.ts @@ -16,16 +16,33 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, + }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, }), diff --git a/assistant/src/__tests__/conversation-pre-run-repair.test.ts b/assistant/src/__tests__/conversation-pre-run-repair.test.ts index 62be27d840c..cbd76cccdcc 100644 --- a/assistant/src/__tests__/conversation-pre-run-repair.test.ts +++ b/assistant/src/__tests__/conversation-pre-run-repair.test.ts @@ -23,23 +23,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, daemon: { diff --git a/assistant/src/__tests__/conversation-process-callsite.test.ts b/assistant/src/__tests__/conversation-process-callsite.test.ts index 3a330afd247..7325afcd107 100644 --- a/assistant/src/__tests__/conversation-process-callsite.test.ts +++ b/assistant/src/__tests__/conversation-process-callsite.test.ts @@ -46,22 +46,33 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + llm: { + default: { + provider: "anthropic", + model: "claude-opus-4-6", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, daemon: { @@ -74,8 +85,6 @@ mock.module("../config/loader.js", () => ({ services: { inference: { mode: "your-own", - provider: "anthropic", - model: "claude-opus-4-6", }, "image-generation": { mode: "your-own", diff --git a/assistant/src/__tests__/conversation-provider-retry-repair.test.ts b/assistant/src/__tests__/conversation-provider-retry-repair.test.ts index 41e60d1d822..62677a4553e 100644 --- a/assistant/src/__tests__/conversation-provider-retry-repair.test.ts +++ b/assistant/src/__tests__/conversation-provider-retry-repair.test.ts @@ -26,23 +26,34 @@ mock.module("../config/loader.js", () => ({ daemon: { titleGenerationMaxTokens: 30, }, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - enabled: true, - maxInputTokens: 100000, - targetBudgetRatio: 0.3, - compactThreshold: 0.8, - summaryBudgetRatio: 0.05, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, services: { diff --git a/assistant/src/__tests__/conversation-queue.test.ts b/assistant/src/__tests__/conversation-queue.test.ts index 92c1ba2d562..9b0f867ae76 100644 --- a/assistant/src/__tests__/conversation-queue.test.ts +++ b/assistant/src/__tests__/conversation-queue.test.ts @@ -55,23 +55,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, timeouts: { permissionTimeoutSec: 1 }, diff --git a/assistant/src/__tests__/conversation-slash-queue.test.ts b/assistant/src/__tests__/conversation-slash-queue.test.ts index 3106f807afe..778f896d9a0 100644 --- a/assistant/src/__tests__/conversation-slash-queue.test.ts +++ b/assistant/src/__tests__/conversation-slash-queue.test.ts @@ -30,23 +30,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, daemon: { diff --git a/assistant/src/__tests__/conversation-slash-unknown.test.ts b/assistant/src/__tests__/conversation-slash-unknown.test.ts index 47e416ecbbe..66606d33f45 100644 --- a/assistant/src/__tests__/conversation-slash-unknown.test.ts +++ b/assistant/src/__tests__/conversation-slash-unknown.test.ts @@ -30,23 +30,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, daemon: { diff --git a/assistant/src/__tests__/conversation-speed-override.test.ts b/assistant/src/__tests__/conversation-speed-override.test.ts index 3dd28ee6cd6..dc32eea84c7 100644 --- a/assistant/src/__tests__/conversation-speed-override.test.ts +++ b/assistant/src/__tests__/conversation-speed-override.test.ts @@ -57,17 +57,33 @@ let mockConfigSpeed: "standard" | "fast" = "fast"; mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - speed: mockConfigSpeed, - effort: "high", - contextWindow: { - maxInputTokens: 100000, - thresholdTokens: 80000, - preserveRecentMessages: 6, - summaryModel: "mock-model", - maxSummaryTokens: 512, + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "high" as const, + speed: mockConfigSpeed, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, + }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, timeouts: { permissionTimeoutSec: 1 }, diff --git a/assistant/src/__tests__/conversation-workspace-cache-state.test.ts b/assistant/src/__tests__/conversation-workspace-cache-state.test.ts index 4efe3c756da..ab0824082db 100644 --- a/assistant/src/__tests__/conversation-workspace-cache-state.test.ts +++ b/assistant/src/__tests__/conversation-workspace-cache-state.test.ts @@ -24,16 +24,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - enabled: true, - maxInputTokens: 100000, - targetBudgetRatio: 0.3, - compactThreshold: 0.8, - summaryBudgetRatio: 0.05, + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, + }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, memory: { enabled: false }, diff --git a/assistant/src/__tests__/conversation-workspace-injection.test.ts b/assistant/src/__tests__/conversation-workspace-injection.test.ts index 8256edef1d6..f0220ad622b 100644 --- a/assistant/src/__tests__/conversation-workspace-injection.test.ts +++ b/assistant/src/__tests__/conversation-workspace-injection.test.ts @@ -33,23 +33,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - enabled: true, - maxInputTokens: 100000, - targetBudgetRatio: 0.3, - compactThreshold: 0.8, - summaryBudgetRatio: 0.05, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, memory: { enabled: false }, diff --git a/assistant/src/__tests__/conversation-workspace-tool-tracking.test.ts b/assistant/src/__tests__/conversation-workspace-tool-tracking.test.ts index cd5d311652b..babebc3ceb8 100644 --- a/assistant/src/__tests__/conversation-workspace-tool-tracking.test.ts +++ b/assistant/src/__tests__/conversation-workspace-tool-tracking.test.ts @@ -31,23 +31,34 @@ mock.module("../providers/registry.js", () => ({ mock.module("../config/loader.js", () => ({ getConfig: () => ({ ui: {}, - - provider: "mock-provider", - maxTokens: 4096, - thinking: false, - contextWindow: { - enabled: true, - maxInputTokens: 100000, - targetBudgetRatio: 0.3, - compactThreshold: 0.8, - summaryBudgetRatio: 0.05, - overflowRecovery: { - enabled: true, - safetyMarginRatio: 0.05, - maxAttempts: 3, - interactiveLatestTurnCompression: "summarize", - nonInteractiveLatestTurnCompression: "truncate", + + llm: { + default: { + provider: "mock-provider", + model: "mock-model", + maxTokens: 4096, + effort: "max" as const, + speed: "standard" as const, + temperature: null, + thinking: { enabled: false, streamThinking: true }, + contextWindow: { + enabled: true, + maxInputTokens: 100000, + targetBudgetRatio: 0.3, + compactThreshold: 0.8, + summaryBudgetRatio: 0.05, + overflowRecovery: { + enabled: true, + safetyMarginRatio: 0.05, + maxAttempts: 3, + interactiveLatestTurnCompression: "summarize", + nonInteractiveLatestTurnCompression: "truncate", + }, + }, }, + profiles: {}, + callSites: {}, + pricingOverrides: [], }, rateLimit: { maxRequestsPerMinute: 0 }, memory: { enabled: false }, diff --git a/assistant/src/__tests__/model-intents.test.ts b/assistant/src/__tests__/model-intents.test.ts index e245d8c0b0c..267acd55b1a 100644 --- a/assistant/src/__tests__/model-intents.test.ts +++ b/assistant/src/__tests__/model-intents.test.ts @@ -5,45 +5,6 @@ import { isModelIntent, resolveModelIntent, } from "../providers/model-intents.js"; -import { RetryProvider } from "../providers/retry.js"; -import type { - Message, - Provider, - ProviderResponse, - SendMessageOptions, -} from "../providers/types.js"; - -const DUMMY_MESSAGES: Message[] = [ - { role: "user", content: [{ type: "text", text: "hello" }] }, -]; - -function makeResponse(model: string): ProviderResponse { - return { - content: [{ type: "text", text: "ok" }], - model, - usage: { - inputTokens: 1, - outputTokens: 1, - }, - stopReason: "end_turn", - }; -} - -function makeProvider( - name: string, - onCall: (options: SendMessageOptions | undefined) => void, -): Provider { - return { - name, - async sendMessage(_messages, _tools, _systemPrompt, options) { - onCall(options); - const config = options?.config as Record | undefined; - return makeResponse( - (config?.model as string | undefined) ?? "default-model", - ); - }, - }; -} describe("model intents", () => { test("validates model intent strings", () => { @@ -77,45 +38,8 @@ describe("model intents", () => { }); }); -describe("RetryProvider model intent normalization", () => { - test("translates modelIntent into concrete model and strips modelIntent key", async () => { - let seen: SendMessageOptions | undefined; - const wrapped = new RetryProvider( - makeProvider("anthropic", (options) => { - seen = options; - }), - ); - - await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { - config: { - modelIntent: "quality-optimized", - max_tokens: 123, - }, - }); - - const config = seen?.config as Record; - expect(config.model).toBe("claude-opus-4-7"); - expect(config.modelIntent).toBeUndefined(); - expect(config.max_tokens).toBe(123); - }); - - test("explicit model override wins over modelIntent", async () => { - let seen: SendMessageOptions | undefined; - const wrapped = new RetryProvider( - makeProvider("openai", (options) => { - seen = options; - }), - ); - - await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { - config: { - model: "custom-model-v1", - modelIntent: "latency-optimized", - }, - }); - - const config = seen?.config as Record; - expect(config.model).toBe("custom-model-v1"); - expect(config.modelIntent).toBeUndefined(); - }); -}); +// `RetryProvider`'s legacy `modelIntent` normalization path was removed in +// PR 19 of the unify-llm-callsites plan. The remaining `resolveModelIntent` +// helper lives in `providers/model-intents.ts` for use by the workspace +// migration's snapshot table — see `workspace/migrations/038-unify-llm- +// callsite-configs.ts`. diff --git a/assistant/src/__tests__/provider-commit-message-generator.test.ts b/assistant/src/__tests__/provider-commit-message-generator.test.ts index 504e1d4a0ef..daa0bdd39f3 100644 --- a/assistant/src/__tests__/provider-commit-message-generator.test.ts +++ b/assistant/src/__tests__/provider-commit-message-generator.test.ts @@ -20,15 +20,11 @@ mock.module("../security/secure-keys.js", () => ({ // --------------------------------------------------------------------------- function cloneConfig(): AssistantConfig { const cfg = structuredClone(DEFAULT_CONFIG); - cfg.services.inference.provider = "anthropic"; + cfg.llm.default.provider = "anthropic"; cfg.workspaceGit.commitMessageLLM = { ...cfg.workspaceGit.commitMessageLLM, enabled: true, - useConfiguredProvider: true, - providerFastModelOverrides: {}, timeoutMs: 5000, - maxTokens: 120, - temperature: 0.2, maxFilesInPrompt: 30, maxDiffBytes: 12000, minRemainingTurnBudgetMs: 1000, @@ -140,17 +136,6 @@ describe("ProviderCommitMessageGenerator", () => { expect(result.reason).toBe("disabled"); }); - // 2. useConfiguredProvider false - test('useConfiguredProvider false → returns deterministic, reason "disabled"', async () => { - currentConfig.workspaceGit.commitMessageLLM.useConfiguredProvider = false; - const gen = getCommitMessageGenerator(); - const result = await gen.generateCommitMessage(baseContext, { - changedFiles: baseContext.changedFiles, - }); - expect(result.source).toBe("deterministic"); - expect(result.reason).toBe("disabled"); - }); - // 3. missing API key test('missing API key → returns deterministic, reason "missing_provider_api_key"', async () => { mockSecureKeys = {}; @@ -221,7 +206,7 @@ describe("ProviderCommitMessageGenerator", () => { }); // 6. LLM success - test('LLM success → returns LLM message, source "llm", fast model + callSite passed', async () => { + test('LLM success → returns LLM message, source "llm", callSite passed', async () => { const commitMsg = "feat: add new feature"; mockSendMessage.mockResolvedValueOnce(makeSuccessResponse(commitMsg)); const gen = getCommitMessageGenerator(); @@ -232,37 +217,12 @@ describe("ProviderCommitMessageGenerator", () => { expect(result.message).toBe(commitMsg); expect(result.reason).toBeUndefined(); - // Verify the fast model and callSite were passed in the config so the - // provider's RetryProvider routes through `resolveCallSiteConfig` for - // max_tokens/temperature while preserving the explicit fast-model - // override. + // Verify the callSite was passed so the provider's RetryProvider routes + // through `resolveCallSiteConfig` for model/max_tokens/temperature. const callArgs = mockSendMessage.mock.calls[0]; const options = callArgs[3] as { - config: { model: string; callSite: string }; + config: { callSite: string }; }; - expect(options.config.model).toBe("claude-haiku-4-5-20251001"); - expect(options.config.callSite).toBe("commitMessage"); - }); - - // 7. fast-model override - test("fast-model override → uses override instead of default", async () => { - currentConfig.workspaceGit.commitMessageLLM.providerFastModelOverrides = { - anthropic: "claude-sonnet-4-20250514", - }; - const commitMsg = "fix: resolve issue"; - mockSendMessage.mockResolvedValueOnce(makeSuccessResponse(commitMsg)); - const gen = getCommitMessageGenerator(); - const result = await gen.generateCommitMessage(baseContext, { - changedFiles: baseContext.changedFiles, - }); - expect(result.source).toBe("llm"); - expect(result.message).toBe(commitMsg); - - const callArgs = mockSendMessage.mock.calls[0]; - const options = callArgs[3] as { - config: { model: string; callSite: string }; - }; - expect(options.config.model).toBe("claude-sonnet-4-20250514"); expect(options.config.callSite).toBe("commitMessage"); }); @@ -339,53 +299,16 @@ describe("ProviderCommitMessageGenerator", () => { expect(result.message).toBe("b".repeat(72) + body); }); - // 12. Keyless provider (Ollama) without fast model → missing_fast_model (skips API key check) - test('Ollama without API key or fast model → returns deterministic, reason "missing_fast_model"', async () => { - currentConfig.services.inference.provider = "ollama"; + // 12. Ollama (keyless provider) — passes the API-key preflight even without + // a stored secret, then succeeds because the call-site resolver supplies + // the model from `llm.default`/`llm.callSites.commitMessage`. + test("Ollama (keyless) — succeeds because call-site resolver supplies the model", async () => { + currentConfig.llm.default.provider = "ollama"; mockSecureKeys = {}; resolvedProvider = { provider: mockProvider, configuredProviderName: "ollama", }; - const gen = getCommitMessageGenerator(); - const result = await gen.generateCommitMessage(baseContext, { - changedFiles: baseContext.changedFiles, - }); - expect(result.source).toBe("deterministic"); - expect(result.reason).toBe("missing_fast_model"); - expect(result.reason).not.toBe("missing_provider_api_key"); - expect(mockSendMessage).not.toHaveBeenCalled(); - }); - - // 13. Unknown provider without fast model default → missing_fast_model, no provider call - test('Unknown provider without fast model default → returns deterministic, reason "missing_fast_model"', async () => { - (currentConfig.services.inference as Record).provider = - "exotic-provider"; - mockSecureKeys = { "exotic-provider": "sk-exotic" }; - resolvedProvider = { - provider: mockProvider, - configuredProviderName: "exotic-provider", - }; - const gen = getCommitMessageGenerator(); - const result = await gen.generateCommitMessage(baseContext, { - changedFiles: baseContext.changedFiles, - }); - expect(result.source).toBe("deterministic"); - expect(result.reason).toBe("missing_fast_model"); - expect(mockSendMessage).not.toHaveBeenCalled(); - }); - - // 14. Fast-model override enables LLM path for provider without built-in default - test("fast-model override enables LLM path for provider without built-in default", async () => { - currentConfig.services.inference.provider = "ollama"; - mockSecureKeys = {}; // Ollama is keyless - resolvedProvider = { - provider: mockProvider, - configuredProviderName: "ollama", - }; - currentConfig.workspaceGit.commitMessageLLM.providerFastModelOverrides = { - ollama: "llama3.2:3b", - }; const commitMsg = "fix: local model commit"; mockSendMessage.mockResolvedValueOnce(makeSuccessResponse(commitMsg)); const gen = getCommitMessageGenerator(); @@ -394,10 +317,8 @@ describe("ProviderCommitMessageGenerator", () => { }); expect(result.source).toBe("llm"); expect(result.message).toBe(commitMsg); - - // Verify the override model was passed const callArgs = mockSendMessage.mock.calls[0]; - const options = callArgs[3] as { config: { model: string } }; - expect(options.config.model).toBe("llama3.2:3b"); + const options = callArgs[3] as { config: { callSite: string } }; + expect(options.config.callSite).toBe("commitMessage"); }); }); diff --git a/assistant/src/__tests__/provider-managed-proxy-integration.test.ts b/assistant/src/__tests__/provider-managed-proxy-integration.test.ts index 2581be09cd3..5b4a9ac2fd7 100644 --- a/assistant/src/__tests__/provider-managed-proxy-integration.test.ts +++ b/assistant/src/__tests__/provider-managed-proxy-integration.test.ts @@ -65,7 +65,7 @@ import { function makeProvidersConfig(provider: string, model: string): ProvidersConfig { return { services: { - inference: { mode: "your-own", provider, model }, + inference: { mode: "your-own" }, "image-generation": { mode: "your-own", provider: "gemini", @@ -73,6 +73,7 @@ function makeProvidersConfig(provider: string, model: string): ProvidersConfig { }, "web-search": { mode: "your-own", provider: "inference-provider-native" }, }, + llm: { default: { provider, model } }, }; } diff --git a/assistant/src/__tests__/provider-registry-ollama.test.ts b/assistant/src/__tests__/provider-registry-ollama.test.ts index 4f93b44b715..bb78a427572 100644 --- a/assistant/src/__tests__/provider-registry-ollama.test.ts +++ b/assistant/src/__tests__/provider-registry-ollama.test.ts @@ -19,8 +19,6 @@ describe("provider registry (ollama)", () => { services: { inference: { mode: "your-own", - provider: "ollama", - model: "claude-opus-4-6", }, "image-generation": { mode: "your-own", @@ -32,6 +30,7 @@ describe("provider registry (ollama)", () => { provider: "inference-provider-native", }, }, + llm: { default: { provider: "ollama", model: "claude-opus-4-6" } }, }); const provider = getProvider("ollama"); diff --git a/assistant/src/__tests__/secret-routes-managed-proxy.test.ts b/assistant/src/__tests__/secret-routes-managed-proxy.test.ts index 327cb00925f..b608128a6ad 100644 --- a/assistant/src/__tests__/secret-routes-managed-proxy.test.ts +++ b/assistant/src/__tests__/secret-routes-managed-proxy.test.ts @@ -19,8 +19,6 @@ const mockConfig = { services: { inference: { mode: "your-own" as const, - provider: "anthropic", - model: "test-model", }, "image-generation": { mode: "your-own" as const, @@ -32,6 +30,7 @@ const mockConfig = { provider: "inference-provider-native", }, }, + llm: { default: { provider: "anthropic", model: "test-model" } }, }; mock.module("@google/genai", () => ({ diff --git a/assistant/src/__tests__/suggestion-routes.test.ts b/assistant/src/__tests__/suggestion-routes.test.ts index e0f67ece6c7..d2904641a4f 100644 --- a/assistant/src/__tests__/suggestion-routes.test.ts +++ b/assistant/src/__tests__/suggestion-routes.test.ts @@ -383,7 +383,7 @@ describe("GET /v1/suggestion", () => { expect(body.suggestion).toBeNull(); }); - test("uses latency-optimized model intent", async () => { + test("uses conversationStarters call site", async () => { const provider = makeMockProvider("Quick reply"); mockGetConfiguredProvider.mockImplementation(async () => provider); mockGetConversationByKey.mockImplementation(() => ({ @@ -407,8 +407,8 @@ describe("GET /v1/suggestion", () => { expect(provider.sendMessage).toHaveBeenCalledTimes(1); const callArgs = provider.sendMessage.mock.calls[0] as unknown[]; const options = callArgs[3] as - | { config?: { modelIntent?: string } } + | { config?: { callSite?: string } } | undefined; - expect(options?.config?.modelIntent).toBe("latency-optimized"); + expect(options?.config?.callSite).toBe("conversationStarters"); }); }); diff --git a/assistant/src/__tests__/workspace-migration-unify-llm-callsite-configs.test.ts b/assistant/src/__tests__/workspace-migration-unify-llm-callsite-configs.test.ts index bbe1e3dc5a9..8a213f3dd57 100644 --- a/assistant/src/__tests__/workspace-migration-unify-llm-callsite-configs.test.ts +++ b/assistant/src/__tests__/workspace-migration-unify-llm-callsite-configs.test.ts @@ -692,57 +692,31 @@ describe("038-unify-llm-callsite-configs migration", () => { expect("pricingOverrides" in llm).toBe(false); }); - // ─── down() rollback ─────────────────────────────────────────────────── - - test("down() reverses a migrated config to original shape", () => { + // ─── down() — documented no-op since PR 19 ────────────────────────── + + test("down() is a no-op since PR 19 cleanup", () => { + // PR 19 of the unify-llm-callsites plan removed the legacy keys from + // `AssistantConfigSchema`, so re-creating them in `down()` would have + // no effect on the running daemon. The migration's `down()` is now a + // documented no-op — it leaves the config exactly as it found it, + // whether the `llm` block is present or absent. const original = { - services: { inference: { provider: "openai", model: "gpt-5.4" } }, + services: { inference: { mode: "your-own", provider: "openai", model: "gpt-5.4" } }, maxTokens: 32000, - effort: "high", - speed: "standard", - thinking: { enabled: true, streamThinking: true }, - contextWindow: { maxInputTokens: 150000 }, - heartbeat: { speed: "fast" }, - filing: { speed: "fast" }, - analysis: { modelOverride: "anthropic/claude-opus-4-6" }, - workspaceGit: { - commitMessageLLM: { maxTokens: 200, temperature: 0.4 }, - }, - calls: { model: "gpt-5.4-nano" }, - pricingOverrides: [ - { + llm: { + default: { provider: "openai", - modelPattern: "gpt-5.4", - inputPer1M: 1, - outputPer1M: 2, + model: "gpt-5.4", + maxTokens: 32000, }, - ], + }, }; writeConfig(original); - unifyLlmCallSiteConfigsMigration.run(workspaceDir); - // Sanity: llm block exists after run() - expect((readConfig() as { llm?: unknown }).llm).toBeDefined(); - unifyLlmCallSiteConfigsMigration.down(workspaceDir); const config = readConfig(); - // The llm block must be removed. - expect("llm" in config).toBe(false); - // Every original scalar/object key that had a reverse mapping must be - // restored to its original value. - expect(config.services).toEqual(original.services); - expect(config.maxTokens).toBe(original.maxTokens); - expect(config.effort).toBe(original.effort); - expect(config.speed).toBe(original.speed); - expect(config.thinking).toEqual(original.thinking); - expect(config.contextWindow).toEqual(original.contextWindow); - expect(config.heartbeat).toEqual(original.heartbeat); - expect(config.filing).toEqual(original.filing); - expect(config.analysis).toEqual(original.analysis); - expect(config.workspaceGit).toEqual(original.workspaceGit); - expect(config.calls).toEqual(original.calls); - expect(config.pricingOverrides).toEqual(original.pricingOverrides); + expect(config).toEqual(original); }); test("down() is a no-op when llm block is absent", () => { diff --git a/assistant/src/calls/guardian-question-copy.ts b/assistant/src/calls/guardian-question-copy.ts index a69a47bf20a..46b80b406b4 100644 --- a/assistant/src/calls/guardian-question-copy.ts +++ b/assistant/src/calls/guardian-question-copy.ts @@ -52,7 +52,7 @@ export async function generateGuardianCopy( const fallback = buildFallbackCopy(questionText); // If no provider is configured, return fallback immediately - const resolved = await resolveConfiguredProvider(); + const resolved = await resolveConfiguredProvider("guardianQuestionCopy"); if (!resolved) { log.debug( "No provider available for guardian copy generation, using fallback", diff --git a/assistant/src/cli/commands/config.ts b/assistant/src/cli/commands/config.ts index f9a4922ec48..a7b91e2e8e1 100644 --- a/assistant/src/cli/commands/config.ts +++ b/assistant/src/cli/commands/config.ts @@ -54,9 +54,9 @@ and "assistant keys set " to view and manage API keys. Examples: $ assistant config list - $ assistant config get services.inference.provider + $ assistant config get llm.default.provider $ assistant config schema services - $ assistant config set services.inference.provider anthropic + $ assistant config set llm.default.provider anthropic $ assistant config set calls.enabled true`, ); @@ -69,7 +69,7 @@ Examples: "after", ` Arguments: - key Dotted path to the config key (e.g. services.inference.provider, + key Dotted path to the config key (e.g. llm.default.provider, calls.enabled, twilio.accountSid). Intermediate objects are created automatically. value The value to store. Parsed as JSON first (so "true" becomes boolean @@ -81,7 +81,7 @@ After writing the value to config.json, the change takes effect immediately. To manage API keys, use "assistant keys set " instead. Examples: - $ assistant config set services.inference.provider anthropic + $ assistant config set llm.default.provider anthropic $ assistant config set calls.enabled true`, ) .action( @@ -114,7 +114,7 @@ Examples: "after", ` Arguments: - key Dotted path to the config key (e.g. services.inference.provider, + key Dotted path to the config key (e.g. llm.default.provider, calls.enabled) Prints the value at the given key path. If the key is not set, prints @@ -123,7 +123,7 @@ Prints the value at the given key path. If the key is not set, prints To view API keys, use "assistant keys list" instead. Examples: - $ assistant config get services.inference.provider + $ assistant config get llm.default.provider $ assistant config get calls.enabled`, ) .action((key: string) => { diff --git a/assistant/src/config/bundled-skills/media-processing/services/reduce.ts b/assistant/src/config/bundled-skills/media-processing/services/reduce.ts index 5991e62d433..d896009a645 100644 --- a/assistant/src/config/bundled-skills/media-processing/services/reduce.ts +++ b/assistant/src/config/bundled-skills/media-processing/services/reduce.ts @@ -179,7 +179,7 @@ async function sendToClaude( model?: string, onProgress?: (msg: string) => void, ): Promise { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("mainAgent"); if (!provider) { throw new Error("No LLM provider available. Please configure an API key."); } diff --git a/assistant/src/config/raw-config-utils.ts b/assistant/src/config/raw-config-utils.ts index 1724108d70b..d36e9255e68 100644 --- a/assistant/src/config/raw-config-utils.ts +++ b/assistant/src/config/raw-config-utils.ts @@ -1,11 +1,39 @@ +/** + * Safely set a nested field on a raw config object's `llm.default` map. + * + * Ensures the `llm` and `llm.default` objects exist before writing, so + * callers don't need to guard against undefined intermediate keys. + * + * Example: `setLlmDefaultField(raw, "model", "claude-sonnet-4-6")` + * produces `raw.llm.default.model = "claude-sonnet-4-6"`. + */ +export function setLlmDefaultField( + raw: Record, + field: string, + value: unknown, +): void { + const llm: Record = + raw.llm != null && typeof raw.llm === "object" && !Array.isArray(raw.llm) + ? (raw.llm as Record) + : {}; + const existing = llm.default; + const defaultBlock: Record = + existing != null && typeof existing === "object" && !Array.isArray(existing) + ? (existing as Record) + : {}; + defaultBlock[field] = value; + llm.default = defaultBlock; + raw.llm = llm; +} + /** * Safely set a nested field on a raw config object's `services` map. * * Ensures the `services` and service-level objects exist before writing, * so callers don't need to guard against undefined intermediate keys. * - * Example: `setServiceField(raw, "inference", "model", "claude-sonnet-4-6")` - * produces `raw.services.inference.model = "claude-sonnet-4-6"`. + * Example: `setServiceField(raw, "inference", "mode", "managed")` + * produces `raw.services.inference.mode = "managed"`. */ export function setServiceField( raw: Record, diff --git a/assistant/src/config/schema.ts b/assistant/src/config/schema.ts index e69c4714984..fc6e12250dc 100644 --- a/assistant/src/config/schema.ts +++ b/assistant/src/config/schema.ts @@ -239,13 +239,6 @@ import { import { FilingConfigSchema } from "./schemas/filing.js"; import { HeartbeatConfigSchema } from "./schemas/heartbeat.js"; import { HostBrowserConfigSchema } from "./schemas/host-browser.js"; -import { - ContextWindowConfigSchema, - EffortSchema, - ModelPricingOverrideSchema, - SpeedSchema, - ThinkingConfigSchema, -} from "./schemas/inference.js"; import { IngressConfigSchema } from "./schemas/ingress.js"; import { JournalConfigSchema } from "./schemas/journal.js"; import { LLMSchema } from "./schemas/llm.js"; @@ -277,18 +270,6 @@ import { WorkspaceGitConfigSchema } from "./schemas/workspace-git.js"; export const AssistantConfigSchema = z .object({ services: ServicesSchema.default(ServicesSchema.parse({})), - maxTokens: z - .number({ error: "maxTokens must be a number" }) - .int("maxTokens must be an integer") - .positive("maxTokens must be a positive integer") - .default(64000) - .describe("Maximum number of output tokens per LLM response"), - effort: EffortSchema, - speed: SpeedSchema, - thinking: ThinkingConfigSchema.default(ThinkingConfigSchema.parse({})), - contextWindow: ContextWindowConfigSchema.default( - ContextWindowConfigSchema.parse({}), - ), memory: MemoryConfigSchema.default(MemoryConfigSchema.parse({})), dataDir: z .string({ error: "dataDir must be a string" }) @@ -306,18 +287,9 @@ export const AssistantConfigSchema = z logFile: LogFileConfigSchema.default( LogFileConfigSchema.parse({ dir: getDataDir() + "/logs" }), ), - pricingOverrides: z - .array(ModelPricingOverrideSchema) - .default([]) - .describe( - "Custom pricing overrides for specific provider/model combinations", - ), - // Unified LLM configuration block. Defaults mirror the legacy top-level - // inference settings (services.inference, maxTokens, effort, speed, - // thinking, contextWindow) so existing configs without an `llm` block - // continue to behave identically. No callers consume this yet — PRs 5+ - // migrate call sites to read through the resolver. PR 19 removes the - // legacy keys once adoption is complete. + // Unified LLM configuration block. The unique source of truth for + // provider/model/maxTokens/effort/speed/temperature/thinking/contextWindow + // and pricing overrides for every call site in the assistant. // // Default values live on each leaf inside `LLMSchema` (see // `schemas/llm.ts`), so `LLMSchema.parse({})` returns a fully-populated @@ -378,30 +350,29 @@ export const AssistantConfigSchema = z ), }) .superRefine((config, ctx) => { + const llmContextWindow = config.llm?.default?.contextWindow; if ( - config.contextWindow?.targetBudgetRatio != null && - config.contextWindow?.compactThreshold != null && - config.contextWindow.targetBudgetRatio >= - config.contextWindow.compactThreshold + llmContextWindow?.targetBudgetRatio != null && + llmContextWindow?.compactThreshold != null && + llmContextWindow.targetBudgetRatio >= llmContextWindow.compactThreshold ) { ctx.addIssue({ code: z.ZodIssueCode.custom, - path: ["contextWindow", "targetBudgetRatio"], + path: ["llm", "default", "contextWindow", "targetBudgetRatio"], message: - "contextWindow.targetBudgetRatio must be less than contextWindow.compactThreshold", + "llm.default.contextWindow.targetBudgetRatio must be less than llm.default.contextWindow.compactThreshold", }); } if ( - config.contextWindow?.targetBudgetRatio != null && - config.contextWindow?.summaryBudgetRatio != null && - config.contextWindow.targetBudgetRatio <= - config.contextWindow.summaryBudgetRatio + llmContextWindow?.targetBudgetRatio != null && + llmContextWindow?.summaryBudgetRatio != null && + llmContextWindow.targetBudgetRatio <= llmContextWindow.summaryBudgetRatio ) { ctx.addIssue({ code: z.ZodIssueCode.custom, - path: ["contextWindow", "targetBudgetRatio"], + path: ["llm", "default", "contextWindow", "targetBudgetRatio"], message: - "contextWindow.targetBudgetRatio must be greater than contextWindow.summaryBudgetRatio", + "llm.default.contextWindow.targetBudgetRatio must be greater than llm.default.contextWindow.summaryBudgetRatio", }); } const segmentation = config.memory?.segmentation; diff --git a/assistant/src/config/schemas/analysis.ts b/assistant/src/config/schemas/analysis.ts index c93614b50b5..a9dc28e7a93 100644 --- a/assistant/src/config/schemas/analysis.ts +++ b/assistant/src/config/schemas/analysis.ts @@ -24,28 +24,9 @@ export const AnalysisConfigSchema = z .describe( "Milliseconds of idle time after the last message before the debounced analysis job fires", ), - - // Optional model intent for the analysis agent loop. When omitted, - // the analysis agent uses the same model as the main agent. - // Accepted values match the main agent's model-intent vocabulary. - modelIntent: z - .enum(["latency-optimized", "quality-optimized", "vision-optimized"], { - error: "analysis.modelIntent must be a valid model intent", - }) - .optional() - .describe( - "Model selection strategy for the analysis agent loop — falls back to the main agent's model when omitted", - ), - - // Optional explicit model override (provider/model string). Takes - // precedence over modelIntent when both are set. - modelOverride: z - .string({ error: "analysis.modelOverride must be a string" }) - .optional() - .describe( - "Explicit model override (provider/model string) for the analysis agent loop — takes precedence over modelIntent when both are set", - ), }) - .describe("Controls the auto-analyze agent loop triggered by conversation activity"); + .describe( + "Controls the auto-analyze agent loop triggered by conversation activity. Model selection lives under llm.callSites.analyzeConversation.", + ); export type AnalysisConfig = z.infer; diff --git a/assistant/src/config/schemas/calls.ts b/assistant/src/config/schemas/calls.ts index b24b94459cc..d5c30b0d0d6 100644 --- a/assistant/src/config/schemas/calls.ts +++ b/assistant/src/config/schemas/calls.ts @@ -222,10 +222,6 @@ export const CallsConfigSchema = z ), safety: CallsSafetyConfigSchema.default(CallsSafetyConfigSchema.parse({})), voice: CallsVoiceConfigSchema.default(CallsVoiceConfigSchema.parse({})), - model: z - .string({ error: "calls.model must be a string" }) - .optional() - .describe("Override the default model for phone call conversations"), callerIdentity: CallerIdentityConfigSchema.default( CallerIdentityConfigSchema.parse({}), ), diff --git a/assistant/src/config/schemas/filing.ts b/assistant/src/config/schemas/filing.ts index f47516ce022..0ae3a3effaf 100644 --- a/assistant/src/config/schemas/filing.ts +++ b/assistant/src/config/schemas/filing.ts @@ -1,7 +1,5 @@ import { z } from "zod"; -import { SpeedSchema } from "./inference.js"; - export const FilingConfigSchema = z .object({ enabled: z @@ -16,9 +14,6 @@ export const FilingConfigSchema = z .positive("filing.intervalMs must be a positive integer") .default(4 * 3_600_000) .describe("Time between filing runs in milliseconds"), - speed: SpeedSchema.default("standard").describe( - "Inference speed mode for filing conversations", - ), activeHoursStart: z .number({ error: "filing.activeHoursStart must be a number" }) .int("filing.activeHoursStart must be an integer") diff --git a/assistant/src/config/schemas/heartbeat.ts b/assistant/src/config/schemas/heartbeat.ts index 91ac0003c2e..a5a3b3e6040 100644 --- a/assistant/src/config/schemas/heartbeat.ts +++ b/assistant/src/config/schemas/heartbeat.ts @@ -1,7 +1,5 @@ import { z } from "zod"; -import { SpeedSchema } from "./inference.js"; - export const HeartbeatConfigSchema = z .object({ enabled: z @@ -14,9 +12,6 @@ export const HeartbeatConfigSchema = z .positive("heartbeat.intervalMs must be a positive integer") .default(6 * 3_600_000) .describe("Time between heartbeat checks in milliseconds"), - speed: SpeedSchema.default("standard").describe( - "Inference speed mode for heartbeat conversations — defaults to standard to avoid inheriting the global fast mode multiplier", - ), activeHoursStart: z .number({ error: "heartbeat.activeHoursStart must be a number" }) .int("heartbeat.activeHoursStart must be an integer") diff --git a/assistant/src/config/schemas/memory-processing.ts b/assistant/src/config/schemas/memory-processing.ts index 6f9e4376089..575454e90f8 100644 --- a/assistant/src/config/schemas/memory-processing.ts +++ b/assistant/src/config/schemas/memory-processing.ts @@ -43,17 +43,9 @@ export const MemorySummarizationConfigSchema = z .describe( "Whether to use an LLM for summarizing and consolidating memory items", ), - modelIntent: z - .enum(["latency-optimized", "quality-optimized", "vision-optimized"], { - error: "memory.summarization.modelIntent must be a valid model intent", - }) - .default("quality-optimized") - .describe( - "Model selection strategy for summarization — trade off speed vs quality", - ), }) .describe( - "Controls how memory items are summarized and consolidated over time", + "Controls how memory items are summarized and consolidated over time. Model selection lives under llm.callSites.conversationSummarization.", ); export type MemoryExtractionConfig = z.infer< diff --git a/assistant/src/config/schemas/notifications.ts b/assistant/src/config/schemas/notifications.ts index 718b4a9e0db..1a119d95257 100644 --- a/assistant/src/config/schemas/notifications.ts +++ b/assistant/src/config/schemas/notifications.ts @@ -1,16 +1,9 @@ import { z } from "zod"; export const NotificationsConfigSchema = z - .object({ - decisionModelIntent: z - .enum(["latency-optimized", "quality-optimized", "vision-optimized"], { - error: "notifications.decisionModelIntent must be a valid model intent", - }) - .default("latency-optimized") - .describe( - "Model selection strategy for deciding whether to send a notification", - ), - }) - .describe("Notification delivery configuration"); + .object({}) + .describe( + "Notification delivery configuration. Model selection lives under llm.callSites.notificationDecision and llm.callSites.preferenceExtraction.", + ); export type NotificationsConfig = z.infer; diff --git a/assistant/src/config/schemas/platform.ts b/assistant/src/config/schemas/platform.ts index eb92c3efc46..8221fddce2c 100644 --- a/assistant/src/config/schemas/platform.ts +++ b/assistant/src/config/schemas/platform.ts @@ -56,16 +56,10 @@ export const UiConfigSchema = z .describe( "IANA timezone identifier for displaying dates and times (e.g. 'America/New_York')", ), - greetingModelIntent: z - .enum(["latency-optimized", "quality-optimized"], { - error: "ui.greetingModelIntent must be 'latency-optimized' or 'quality-optimized'", - }) - .default("latency-optimized") - .describe( - "Model intent for empty-state greeting generation (latency-optimized = fast/small model, quality-optimized = primary model)", - ), }) - .describe("User interface display settings"); + .describe( + "User interface display settings. Empty-state greeting model selection lives under llm.callSites.emptyStateGreeting.", + ); export type DaemonConfig = z.infer; export type UiConfig = z.infer; diff --git a/assistant/src/config/schemas/services.ts b/assistant/src/config/schemas/services.ts index 2457a566d92..7a3812f93b5 100644 --- a/assistant/src/config/schemas/services.ts +++ b/assistant/src/config/schemas/services.ts @@ -28,10 +28,15 @@ export const BaseServiceSchema = z.object({ }); export type BaseService = z.infer; -export const InferenceServiceSchema = BaseServiceSchema.extend({ - provider: z.enum(VALID_INFERENCE_PROVIDERS).default("anthropic"), - model: z.string().default("claude-opus-4-6"), -}); +/** + * Inference service entry. Carries only the routing `mode` + * (`managed` vs `your-own`) — the provider and model live under + * `llm.default.{provider, model}` (see `schemas/llm.ts`). PR 19 of the + * unify-llm-callsites plan removed the `provider` and `model` fields here; + * legacy configs that still carry them have those keys stripped by + * workspace migration `039-drop-legacy-llm-keys`. + */ +export const InferenceServiceSchema = BaseServiceSchema; export type InferenceService = z.infer; export const ImageGenerationServiceSchema = BaseServiceSchema.extend({ diff --git a/assistant/src/config/schemas/workspace-git.ts b/assistant/src/config/schemas/workspace-git.ts index 59e383fe701..516c834dbaf 100644 --- a/assistant/src/config/schemas/workspace-git.ts +++ b/assistant/src/config/schemas/workspace-git.ts @@ -74,21 +74,6 @@ export const WorkspaceGitConfigSchema = z }) .default(false) .describe("Whether to use an LLM to generate commit messages"), - useConfiguredProvider: z - .boolean({ - error: - "workspaceGit.commitMessageLLM.useConfiguredProvider must be a boolean", - }) - .default(true) - .describe( - "Whether to use the globally configured LLM provider for commit messages", - ), - providerFastModelOverrides: z - .record(z.string(), z.string()) - .default({} as Record) - .describe( - "Map of provider names to fast model overrides for commit message generation", - ), timeoutMs: z .number({ error: "workspaceGit.commitMessageLLM.timeoutMs must be a number", @@ -99,26 +84,6 @@ export const WorkspaceGitConfigSchema = z ) .default(600) .describe("Timeout for LLM commit message generation (ms)"), - maxTokens: z - .number({ - error: "workspaceGit.commitMessageLLM.maxTokens must be a number", - }) - .int("workspaceGit.commitMessageLLM.maxTokens must be an integer") - .positive( - "workspaceGit.commitMessageLLM.maxTokens must be a positive integer", - ) - .default(120) - .describe("Maximum number of tokens in the generated commit message"), - temperature: z - .number({ - error: "workspaceGit.commitMessageLLM.temperature must be a number", - }) - .min(0, "workspaceGit.commitMessageLLM.temperature must be >= 0") - .max(2, "workspaceGit.commitMessageLLM.temperature must be <= 2") - .default(0.2) - .describe( - "LLM sampling temperature for commit message generation (lower = more deterministic)", - ), maxFilesInPrompt: z .number({ error: @@ -203,11 +168,7 @@ export const WorkspaceGitConfigSchema = z }) .default({ enabled: false, - useConfiguredProvider: true, - providerFastModelOverrides: {}, timeoutMs: 600, - maxTokens: 120, - temperature: 0.2, maxFilesInPrompt: 30, maxDiffBytes: 12000, minRemainingTurnBudgetMs: 1000, @@ -217,7 +178,9 @@ export const WorkspaceGitConfigSchema = z backoffMaxMs: 60000, }, }) - .describe("LLM-powered commit message generation settings"), + .describe( + "LLM-powered commit message generation operational settings. Provider/model/maxTokens/temperature live under llm.callSites.commitMessage.", + ), }) .describe( "Workspace git integration — auto-commits, enrichment, and LLM-generated commit messages", diff --git a/assistant/src/config/skills.ts b/assistant/src/config/skills.ts index 201ecc21d88..3f0840b3505 100644 --- a/assistant/src/config/skills.ts +++ b/assistant/src/config/skills.ts @@ -1148,7 +1148,7 @@ async function generateSkillIcon( name: string, description: string, ): Promise { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("skillCategoryInference"); if (!provider) { throw new Error("Configured provider unavailable for icon generation"); } @@ -1163,7 +1163,7 @@ async function generateSkillIcon( 'You are a pixel art icon designer. When asked, return ONLY a single element — no explanation, no markdown, no code fences. The SVG must be a 16x16 grid pixel art icon using elements. Use a limited palette (3-5 colors). Keep it under 2KB. The viewBox should be "0 0 16 16" with each pixel being a 1x1 rect.', { config: { - modelIntent: "latency-optimized", + callSite: "skillCategoryInference", max_tokens: 1024, }, }, diff --git a/assistant/src/daemon/approval-generators.ts b/assistant/src/daemon/approval-generators.ts index f6f51ca7cca..95ee00601db 100644 --- a/assistant/src/daemon/approval-generators.ts +++ b/assistant/src/daemon/approval-generators.ts @@ -91,7 +91,7 @@ export function createApprovalCopyGenerator(): ApprovalCopyGenerator { const config = loadConfig(); let provider; try { - provider = getProvider(config.services.inference.provider); + provider = getProvider(config.llm.default.provider); } catch { return null; } @@ -142,10 +142,10 @@ export function createApprovalCopyGenerator(): ApprovalCopyGenerator { export function createApprovalConversationGenerator(): ApprovalConversationGenerator { return async (context) => { const config = loadConfig(); - if (!listProviders().includes(config.services.inference.provider)) { + if (!listProviders().includes(config.llm.default.provider)) { throw new Error("No provider available for approval conversation"); } - const provider = getProvider(config.services.inference.provider); + const provider = getProvider(config.llm.default.provider); const pendingDescription = context.pendingApprovals .map((p) => `- Request ${p.requestId}: tool "${p.toolName}"`) diff --git a/assistant/src/daemon/classifier.ts b/assistant/src/daemon/classifier.ts index 69bd455303d..a476fea2e28 100644 --- a/assistant/src/daemon/classifier.ts +++ b/assistant/src/daemon/classifier.ts @@ -28,7 +28,7 @@ export async function classifyInteraction( return "text_qa"; } - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("interactionClassifier"); if (!provider) { log.warn( "No configured provider available, falling back to heuristic classification", diff --git a/assistant/src/daemon/conversation-agent-loop.ts b/assistant/src/daemon/conversation-agent-loop.ts index 2a4d8b143d9..e1cbc13ed64 100644 --- a/assistant/src/daemon/conversation-agent-loop.ts +++ b/assistant/src/daemon/conversation-agent-loop.ts @@ -907,8 +907,8 @@ export async function runAgentLoopImpl( // and proactively invoke the reducer if already above budget. This avoids // a wasted provider round-trip that would just fail with context_too_large. const config = getConfig(); - const overflowRecovery = config.contextWindow.overflowRecovery; - const providerMaxTokens = config.contextWindow.maxInputTokens; + const overflowRecovery = config.llm.default.contextWindow.overflowRecovery; + const providerMaxTokens = config.llm.default.contextWindow.maxInputTokens; // Widen safety margin for large conversations where estimation error // compounds across many messages with tool results. const baseSafetyMargin = overflowRecovery.safetyMarginRatio; @@ -954,7 +954,7 @@ export async function runAgentLoopImpl( { providerName: ctx.provider.name, systemPrompt: ctx.systemPrompt, - contextWindow: config.contextWindow, + contextWindow: config.llm.default.contextWindow, targetTokens: preflightBudget, toolTokenBudget, }, @@ -1388,7 +1388,7 @@ export async function runAgentLoopImpl( { providerName: ctx.provider.name, systemPrompt: ctx.systemPrompt, - contextWindow: config.contextWindow, + contextWindow: config.llm.default.contextWindow, targetTokens: correctedTarget, toolTokenBudget, }, @@ -1899,7 +1899,7 @@ export async function runAgentLoopImpl( state.exchangeLlmCallCount, { tokens: state.lastCallInputTokens, - maxTokens: config.contextWindow.maxInputTokens, + maxTokens: config.llm.default.contextWindow.maxInputTokens, }, ); diff --git a/assistant/src/daemon/conversation-process.ts b/assistant/src/daemon/conversation-process.ts index caad7648796..886b6fa770d 100644 --- a/assistant/src/daemon/conversation-process.ts +++ b/assistant/src/daemon/conversation-process.ts @@ -261,9 +261,9 @@ function buildSlashContext( messageCount: conversation.messages.length, inputTokens: conversation.usageStats.inputTokens, outputTokens: conversation.usageStats.outputTokens, - maxInputTokens: config.contextWindow.maxInputTokens, - model: config.services.inference.model, - provider: config.services.inference.provider, + maxInputTokens: config.llm.default.contextWindow.maxInputTokens, + model: config.llm.default.model, + provider: config.llm.default.provider, estimatedCost: conversation.usageStats.estimatedCost, userMessageInterface: turnInterface?.userMessageInterface, }; diff --git a/assistant/src/daemon/conversation-slash.ts b/assistant/src/daemon/conversation-slash.ts index dd5e78bb41e..121898a6635 100644 --- a/assistant/src/daemon/conversation-slash.ts +++ b/assistant/src/daemon/conversation-slash.ts @@ -78,8 +78,8 @@ async function resolveModelList(): Promise { lines.push(`**${providerName}** ${status}`); for (const { id, displayName } of models) { const isCurrent = - config.services.inference.provider === provider && - config.services.inference.model === id; + config.llm.default.provider === provider && + config.llm.default.model === id; const current = isCurrent ? " **[current]**" : ""; lines.push(` - ${displayName} (\`${id}\`)${current}`); } diff --git a/assistant/src/daemon/conversation-usage.ts b/assistant/src/daemon/conversation-usage.ts index b37bfc8be79..cda6def5e34 100644 --- a/assistant/src/daemon/conversation-usage.ts +++ b/assistant/src/daemon/conversation-usage.ts @@ -109,7 +109,7 @@ function resolveStructuredPricing( providerName, model, usage, - config.pricingOverrides, + config.llm.pricingOverrides, ); } catch (err) { log.warn({ err, model, providerName }, "Failed to resolve usage pricing"); diff --git a/assistant/src/daemon/conversation.ts b/assistant/src/daemon/conversation.ts index 1a03daca8c2..1c3fa9af48e 100644 --- a/assistant/src/daemon/conversation.ts +++ b/assistant/src/daemon/conversation.ts @@ -63,8 +63,7 @@ import { } from "../permissions/v2-consent-policy.js"; import { resolvePersonaContext } from "../prompts/persona-resolver.js"; import { buildSystemPrompt } from "../prompts/system-prompt.js"; -import { resolveModelIntent } from "../providers/model-intents.js"; -import type { Message, ModelIntent } from "../providers/types.js"; +import type { Message } from "../providers/types.js"; import type { Provider } from "../providers/types.js"; import type { TrustClass } from "../runtime/actor-trust-resolver.js"; import type { AuthContext } from "../runtime/auth/types.js"; @@ -339,7 +338,6 @@ export class Conversation { sharedCesClient?: CesClient, speedOverride?: Speed, cacheTtl?: "5m" | "1h", - modelIntent?: ModelIntent, modelOverride?: string, ) { this.conversationId = conversationId; @@ -425,7 +423,7 @@ export class Conversation { ); const config = getConfig(); - this.streamThinking = config.thinking.streamThinking ?? false; + this.streamThinking = config.llm.default.thinking.streamThinking ?? false; // CES (Credential Execution Service) — use the shared server-level client. // The CES sidecar accepts exactly one bootstrap connection, so the @@ -442,13 +440,10 @@ export class Conversation { const hasSystemPromptOverride = systemPrompt !== buildSystemPrompt(); this.hasSystemPromptOverride = hasSystemPromptOverride; - // If an explicit modelOverride is supplied, use it verbatim. Otherwise, - // if modelIntent is set, resolve it against the active provider's - // intent → model mapping. The AgentLoop passes the resulting string - // through to `providerConfig.model` on every turn. - const resolvedModel: string | undefined = - modelOverride ?? - (modelIntent ? resolveModelIntent(provider.name, modelIntent) : undefined); + // If an explicit modelOverride is supplied, use it verbatim. Otherwise + // leave the model unset and let `RetryProvider`'s call-site resolver pick + // it up from `llm.default` / `llm.callSites.` on every turn. + const resolvedModel: string | undefined = modelOverride; const resolveSystemPromptCallback = ( _history: import("../providers/types.js").Message[], @@ -478,16 +473,17 @@ export class Conversation { }; const fastModeEnabled = isAssistantFeatureFlagEnabled("fast-mode", config); - const resolvedSpeed = speedOverride ?? config.speed; + const resolvedSpeed = speedOverride ?? config.llm.default.speed; + const llmDefault = config.llm.default; this.agentLoop = new AgentLoop( provider, systemPrompt, { maxTokens, - maxInputTokens: config.contextWindow.maxInputTokens, - thinking: config.thinking, - effort: config.effort, + maxInputTokens: llmDefault.contextWindow.maxInputTokens, + thinking: llmDefault.thinking, + effort: llmDefault.effort, ...(fastModeEnabled && resolvedSpeed === "fast" ? { speed: resolvedSpeed } : {}), @@ -501,7 +497,7 @@ export class Conversation { this.contextWindowManager = new ContextWindowManager({ provider, systemPrompt: () => resolveSystemPromptCallback([]).systemPrompt, - config: config.contextWindow, + config: llmDefault.contextWindow, toolTokenBudget: this.agentLoop.getToolTokenBudget(), }); diff --git a/assistant/src/daemon/guardian-action-generators.ts b/assistant/src/daemon/guardian-action-generators.ts index 474e8b20aa0..cb5b33ccd0a 100644 --- a/assistant/src/daemon/guardian-action-generators.ts +++ b/assistant/src/daemon/guardian-action-generators.ts @@ -18,8 +18,8 @@ import type { /** * Create the daemon-owned guardian action copy generator that resolves * providers and calls `provider.sendMessage` to generate guardian action - * copy text. Uses `latency-optimized` model intent since these are - * time-sensitive voice responses. + * copy text. Uses the `guardianQuestionCopy` call site so model selection + * tracks the unified `llm.callSites` configuration. * * This keeps all provider awareness in the daemon lifecycle, away from * the runtime composer. @@ -29,7 +29,7 @@ export function createGuardianActionCopyGenerator(): GuardianActionCopyGenerator const config = loadConfig(); let provider; try { - provider = getProvider(config.services.inference.provider); + provider = getProvider(config.llm.default.provider); } catch { return null; } @@ -52,7 +52,7 @@ export function createGuardianActionCopyGenerator(): GuardianActionCopyGenerator { config: { max_tokens: options.maxTokens ?? GUARDIAN_ACTION_COPY_MAX_TOKENS, - modelIntent: "latency-optimized", + callSite: "guardianQuestionCopy", }, signal: AbortSignal.timeout( options.timeoutMs ?? GUARDIAN_ACTION_COPY_TIMEOUT_MS, @@ -131,7 +131,7 @@ const VALID_FOLLOWUP_DISPOSITIONS: ReadonlySet = new Set([ export function createGuardianFollowUpConversationGenerator(): GuardianFollowUpConversationGenerator { return async (context) => { const config = loadConfig(); - const provider = getProvider(config.services.inference.provider); + const provider = getProvider(config.llm.default.provider); const userPrompt = [ `Original question from the voice call: "${context.questionText}"`, @@ -146,7 +146,7 @@ export function createGuardianFollowUpConversationGenerator(): GuardianFollowUpC { config: { max_tokens: FOLLOWUP_CONVERSATION_MAX_TOKENS, - modelIntent: "latency-optimized", + callSite: "guardianQuestionCopy", }, signal: AbortSignal.timeout(FOLLOWUP_CONVERSATION_TIMEOUT_MS), }, diff --git a/assistant/src/daemon/handlers/config-model.ts b/assistant/src/daemon/handlers/config-model.ts index 5cdf5be6e4c..1aa8cdeab24 100644 --- a/assistant/src/daemon/handlers/config-model.ts +++ b/assistant/src/daemon/handlers/config-model.ts @@ -3,7 +3,10 @@ import { loadRawConfig, saveRawConfig, } from "../../config/loader.js"; -import { setServiceField } from "../../config/raw-config-utils.js"; +import { + setLlmDefaultField, + setServiceField, +} from "../../config/raw-config-utils.js"; import { VALID_INFERENCE_PROVIDERS } from "../../config/schemas/services.js"; import type { ProviderCatalogEntry } from "../../providers/model-catalog.js"; import { @@ -48,10 +51,10 @@ export interface ModelInfo { /** Return current model configuration. */ export async function getModelInfo(): Promise { const config = getConfig(); - const provider = config.services.inference.provider; + const provider = config.llm.default.provider; return { - model: config.services.inference.model, + model: config.llm.default.model, provider, configuredProviders: await getConfiguredProviders(), availableModels: PROVIDER_CATALOG.find((p) => p.id === provider)?.models, @@ -102,12 +105,12 @@ export async function setModel( const resolvedProvider = explicitProvider ?? MODEL_TO_PROVIDER[modelId] ?? - current.services.inference.provider; + current.llm.default.provider; // Auto-reset model when provider changes and current modelId doesn't // belong to the new provider's catalog. if ( - resolvedProvider !== current.services.inference.provider && + resolvedProvider !== current.llm.default.provider && !isModelInCatalog(resolvedProvider, modelId) ) { modelId = getProviderDefaultModel(resolvedProvider); @@ -115,8 +118,8 @@ export async function setModel( // No-op guard: skip expensive reinitialization when nothing changed if ( - modelId === current.services.inference.model && - resolvedProvider === current.services.inference.provider + modelId === current.llm.default.model && + resolvedProvider === current.llm.default.provider ) { return await getModelInfo(); } @@ -129,8 +132,8 @@ export async function setModel( // Use raw config to avoid persisting env-var API keys to disk const raw = loadRawConfig(); - setServiceField(raw, "inference", "model", modelId); - setServiceField(raw, "inference", "provider", resolvedProvider); + setLlmDefaultField(raw, "model", modelId); + setLlmDefaultField(raw, "provider", resolvedProvider); // Suppress the file watcher callback — setModel already does // the full reload sequence; a redundant watcher-triggered reload diff --git a/assistant/src/daemon/handlers/conversations.ts b/assistant/src/daemon/handlers/conversations.ts index 35f754b3b52..af138b7aab4 100644 --- a/assistant/src/daemon/handlers/conversations.ts +++ b/assistant/src/daemon/handlers/conversations.ts @@ -435,7 +435,7 @@ export function handleUsageRequest( totalInputTokens: conversation.totalInputTokens, totalOutputTokens: conversation.totalOutputTokens, estimatedCost: conversation.totalEstimatedCost, - model: config.services.inference.model, + model: config.llm.default.model, }); } diff --git a/assistant/src/daemon/handlers/shared.ts b/assistant/src/daemon/handlers/shared.ts index 5212d13838f..3ce19037d57 100644 --- a/assistant/src/daemon/handlers/shared.ts +++ b/assistant/src/daemon/handlers/shared.ts @@ -5,7 +5,6 @@ import type { Speed } from "../../config/schemas/inference.js"; import type { LLMCallSite } from "../../config/schemas/llm.js"; import type { HeartbeatService } from "../../heartbeat/heartbeat-service.js"; import type { SecretPromptResult } from "../../permissions/secret-prompter.js"; -import type { ModelIntent } from "../../providers/types.js"; import type { AuthContext } from "../../runtime/auth/types.js"; import type { DebouncerMap } from "../../util/debounce.js"; import { getLogger } from "../../util/logger.js"; @@ -129,24 +128,17 @@ export interface ConversationCreateOptions { commandIntent?: { type: string; payload?: string; languageCode?: string }; /** Optional callback to receive real-time agent loop events (text deltas, tool starts, etc.). */ onEvent?: (msg: ServerMessage) => void; - /** - * Optional model selection strategy for this conversation's agent loop. - * When set, overrides the provider's default model per-turn. Used by the - * auto-analyze loop to route the analysis agent to a dedicated model. - */ - modelIntent?: ModelIntent; /** * Optional explicit model override (provider/model string) for this - * conversation's agent loop. Takes precedence over `modelIntent` when - * both are set. Used by the auto-analyze loop to pin the analysis agent - * to a specific model. + * conversation's agent loop. Used by the auto-analyze loop to pin the + * analysis agent to a specific model. */ modelOverride?: string; /** * Optional LLM call-site identifier threaded through to the per-call * provider config. Adapter callers (heartbeat, filing, schedule, etc.) - * pass their call-site here so PRs 7-11 can route those flows through - * `resolveCallSiteConfig` instead of the legacy `speed`/`modelIntent` paths. + * pass their call-site here so the agent loop routes through + * `resolveCallSiteConfig` instead of the global default. */ callSite?: LLMCallSite; } diff --git a/assistant/src/daemon/handlers/skills.ts b/assistant/src/daemon/handlers/skills.ts index 0463ac213ad..8abf0227cc3 100644 --- a/assistant/src/daemon/handlers/skills.ts +++ b/assistant/src/daemon/handlers/skills.ts @@ -1517,7 +1517,7 @@ export async function draftSkill( if (missing.length > 0) { let llmGenerated = false; try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("skillCategoryInference"); if (provider) { const { signal, cleanup } = createTimeout(LLM_DRAFT_TIMEOUT_MS); try { diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts index cd4d15d1b59..54f8d56538a 100644 --- a/assistant/src/daemon/server.ts +++ b/assistant/src/daemon/server.ts @@ -1033,7 +1033,7 @@ export class DaemonServer { const createPromise = (async () => { const config = getConfig(); - let provider = getProvider(config.services.inference.provider); + let provider = getProvider(config.llm.default.provider); const { rateLimit } = config; if (rateLimit.maxRequestsPerMinute > 0) { provider = new RateLimitProvider( @@ -1046,7 +1046,8 @@ export class DaemonServer { const systemPrompt = storedOptions?.systemPromptOverride ?? buildSystemPrompt(); - const maxTokens = storedOptions?.maxResponseTokens ?? config.maxTokens; + const maxTokens = + storedOptions?.maxResponseTokens ?? config.llm.default.maxTokens; const memoryPolicy = this.deriveMemoryPolicy(conversationId); // Resolve the shared CES client (may still be initializing). @@ -1065,7 +1066,6 @@ export class DaemonServer { sharedCesClient, storedOptions?.speed, undefined, - storedOptions?.modelIntent, storedOptions?.modelOverride, ); newConversation.updateClient(sendToClient, true); @@ -1438,9 +1438,9 @@ export class DaemonServer { messageCount: conversation.getMessages().length, inputTokens: conversation.usageStats.inputTokens, outputTokens: conversation.usageStats.outputTokens, - maxInputTokens: config.contextWindow.maxInputTokens, - model: config.services.inference.model, - provider: config.services.inference.provider, + maxInputTokens: config.llm.default.contextWindow.maxInputTokens, + model: config.llm.default.model, + provider: config.llm.default.provider, estimatedCost: conversation.usageStats.estimatedCost, userMessageInterface: serverInterfaceCtx?.userMessageInterface, }; diff --git a/assistant/src/daemon/watch-handler.ts b/assistant/src/daemon/watch-handler.ts index 1f5478db494..74ff29f6274 100644 --- a/assistant/src/daemon/watch-handler.ts +++ b/assistant/src/daemon/watch-handler.ts @@ -113,7 +113,7 @@ export async function handleWatchObservation( async function generateCommentary(session: WatchSession): Promise { try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("watchCommentary"); if (!provider) { log.warn( { watchId: session.watchId }, @@ -225,7 +225,7 @@ export async function generateSummary(session: WatchSession): Promise { }, "generateSummary starting — calling LLM", ); - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("watchSummary"); if (!provider) { log.warn( { watchId: session.watchId }, diff --git a/assistant/src/home/rollup-producer.ts b/assistant/src/home/rollup-producer.ts index ca5e790c239..58ad0b6df86 100644 --- a/assistant/src/home/rollup-producer.ts +++ b/assistant/src/home/rollup-producer.ts @@ -261,10 +261,10 @@ export async function runRollupProducer( function resolveDefaultProvider(): ReturnType | null { const config = loadConfig(); - if (!listProviders().includes(config.services.inference.provider)) { + if (!listProviders().includes(config.llm.default.provider)) { return null; } - return getProvider(config.services.inference.provider); + return getProvider(config.llm.default.provider); } /** diff --git a/assistant/src/memory/conversation-title-service.ts b/assistant/src/memory/conversation-title-service.ts index 4f5bc700761..6a1b19ed565 100644 --- a/assistant/src/memory/conversation-title-service.ts +++ b/assistant/src/memory/conversation-title-service.ts @@ -118,7 +118,8 @@ export async function generateAndPersistConversationTitle( return { title: conversation.title!, updated: false }; } - const provider = params.provider ?? (await getConfiguredProvider()); + const provider = + params.provider ?? (await getConfiguredProvider("conversationTitle")); if (!provider) { // No provider available — fall back to context-derived title or untitled const fallback = deriveFallbackTitle(context) ?? UNTITLED_FALLBACK; @@ -219,7 +220,8 @@ export async function regenerateConversationTitle( return { title: conversation?.title ?? UNTITLED_FALLBACK, updated: false }; } - const provider = params.provider ?? (await getConfiguredProvider()); + const provider = + params.provider ?? (await getConfiguredProvider("conversationTitle")); if (!provider) { return { title: conversation.title ?? UNTITLED_FALLBACK, updated: false }; } diff --git a/assistant/src/memory/embedding-backend.ts b/assistant/src/memory/embedding-backend.ts index 2815a48b460..4b4a1f01bba 100644 --- a/assistant/src/memory/embedding-backend.ts +++ b/assistant/src/memory/embedding-backend.ts @@ -795,7 +795,7 @@ export async function selectedBackendSupportsMultimodal( async function isOllamaConfigured(config: AssistantConfig): Promise { return ( - config.services.inference.provider === "ollama" || + config.llm.default.provider === "ollama" || Boolean(await getProviderKeyAsync("ollama")) || Boolean(getOllamaBaseUrlEnv()) ); diff --git a/assistant/src/memory/graph/consolidation.ts b/assistant/src/memory/graph/consolidation.ts index 6ff0226f720..cfa490f87a8 100644 --- a/assistant/src/memory/graph/consolidation.ts +++ b/assistant/src/memory/graph/consolidation.ts @@ -262,7 +262,7 @@ async function identifyDuplicateGroups( ): Promise { if (nodes.length < 2) return []; - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryConsolidation"); if (!provider) return []; // Compact listing: ID + first 100 chars of content @@ -428,7 +428,7 @@ async function consolidateChunk( return true; }); - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryConsolidation"); if (!provider) { throw new BackendUnavailableError("Provider unavailable for consolidation"); } diff --git a/assistant/src/memory/graph/extraction.ts b/assistant/src/memory/graph/extraction.ts index 8b37ee4500c..108e3266958 100644 --- a/assistant/src/memory/graph/extraction.ts +++ b/assistant/src/memory/graph/extraction.ts @@ -844,7 +844,7 @@ export async function runGraphExtraction( } // 2. Get provider - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryExtraction"); if (!provider) { throw new BackendUnavailableError( "Provider unavailable for graph extraction", diff --git a/assistant/src/memory/graph/narrative.ts b/assistant/src/memory/graph/narrative.ts index 7fccfd583ef..6bcce2f2e98 100644 --- a/assistant/src/memory/graph/narrative.ts +++ b/assistant/src/memory/graph/narrative.ts @@ -165,7 +165,7 @@ export async function runNarrativeRefinement( .sort((a, b) => b.significance - a.significance) .slice(0, 150); - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("narrativeRefinement"); if (!provider) { throw new BackendUnavailableError( "Provider unavailable for narrative refinement", diff --git a/assistant/src/memory/graph/pattern-scan.ts b/assistant/src/memory/graph/pattern-scan.ts index d2f71a470a4..0aafee7cc2f 100644 --- a/assistant/src/memory/graph/pattern-scan.ts +++ b/assistant/src/memory/graph/pattern-scan.ts @@ -141,7 +141,7 @@ export async function runPatternScan( return result; } - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("patternScan"); if (!provider) { throw new BackendUnavailableError("Provider unavailable for pattern scan"); } diff --git a/assistant/src/memory/graph/retriever.ts b/assistant/src/memory/graph/retriever.ts index d15818e7cd0..73d1a557dbb 100644 --- a/assistant/src/memory/graph/retriever.ts +++ b/assistant/src/memory/graph/retriever.ts @@ -81,7 +81,7 @@ async function rerankAndDedup( if (candidates.length <= maxNodes) return candidates; try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryRetrieval"); if (!provider) return candidates.slice(0, maxNodes); // Numbered listing for the LLM: index + age + full content @@ -180,7 +180,7 @@ async function dedupForTurn( query: string, ): Promise<{ nodes: ScoredNode[]; llmApplied: boolean }> { try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryRetrieval"); if (!provider) return { nodes: candidates.slice(0, maxNodes), llmApplied: false }; @@ -273,7 +273,7 @@ async function dedupCrossCategory( maxNodes: number, ): Promise { try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("memoryRetrieval"); if (!provider) return candidates.slice(0, maxNodes); const now = Date.now(); diff --git a/assistant/src/memory/job-handlers/conversation-starters.ts b/assistant/src/memory/job-handlers/conversation-starters.ts index 50d7e9586c2..f066ae5acc3 100644 --- a/assistant/src/memory/job-handlers/conversation-starters.ts +++ b/assistant/src/memory/job-handlers/conversation-starters.ts @@ -174,7 +174,7 @@ interface GeneratedStarter { } async function generateStarters(scopeId: string): Promise { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("conversationStarters"); if (!provider) { log.info("No configured provider for conversation starters generation"); return []; diff --git a/assistant/src/memory/job-handlers/summarization.ts b/assistant/src/memory/job-handlers/summarization.ts index fa4cd232c36..5af05dd9b03 100644 --- a/assistant/src/memory/job-handlers/summarization.ts +++ b/assistant/src/memory/job-handlers/summarization.ts @@ -161,7 +161,7 @@ async function summarizeWithLLM( return buildFallbackSummary(existingSummary, newContent, label); } - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("conversationSummarization"); if (!provider) { log.debug( { label }, diff --git a/assistant/src/memory/migrations/140-backfill-usage-cache-accounting.ts b/assistant/src/memory/migrations/140-backfill-usage-cache-accounting.ts index b1b98ba3d56..cc2321cf6e7 100644 --- a/assistant/src/memory/migrations/140-backfill-usage-cache-accounting.ts +++ b/assistant/src/memory/migrations/140-backfill-usage-cache-accounting.ts @@ -196,7 +196,7 @@ export function migrateBackfillUsageCacheAccounting(database: DrizzleDb): void { const requestLogsByConversation = buildRequestLogMap(requestLogRows); const requestOffsets = new Map(); const previousUsageEventCreatedAt = new Map(); - const pricingOverrides = getConfig().pricingOverrides; + const pricingOverrides = getConfig().llm.pricingOverrides; let scannedAnthropicRows = 0; let updatedRows = 0; diff --git a/assistant/src/messaging/style-analyzer.ts b/assistant/src/messaging/style-analyzer.ts index 0cd3fb738db..3e2451b7bab 100644 --- a/assistant/src/messaging/style-analyzer.ts +++ b/assistant/src/messaging/style-analyzer.ts @@ -127,7 +127,7 @@ export async function extractStylePatterns( .map((e, i) => `--- Message ${i + 1} ---\n${e}`) .join("\n\n"); - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("styleAnalyzer"); if (!provider) { return { stylePatterns: [], contactObservations: [] }; } diff --git a/assistant/src/providers/__tests__/retry-callsite.test.ts b/assistant/src/providers/__tests__/retry-callsite.test.ts index f9b2ffe4a4b..84e6e0d69ef 100644 --- a/assistant/src/providers/__tests__/retry-callsite.test.ts +++ b/assistant/src/providers/__tests__/retry-callsite.test.ts @@ -9,16 +9,15 @@ mock.module("../../util/logger.js", () => ({ })); // Mutable test fixtures for `getConfig()`. Each test rebuilds the relevant -// pieces via `setLlmConfig(...)` / `setInferenceProvider(...)` before -// exercising the path. The mock is registered once and reads from these -// closures so subsequent tests don't need to remock the module. +// pieces via `setLlmConfig(...)` before exercising the path. The mock is +// registered once and reads from these closures so subsequent tests don't +// need to remock the module. let mockLlmConfig: Record = {}; -let mockInferenceProvider = "anthropic"; mock.module("../../config/loader.js", () => ({ getConfig: () => ({ llm: mockLlmConfig, - services: { inference: { provider: mockInferenceProvider } }, + services: { inference: { mode: "your-own" } }, }), })); @@ -39,10 +38,7 @@ mock.module("../registry.js", () => ({ // ── Imports (after mocks) ─────────────────────────────────────────────────── import { LLMSchema } from "../../config/schemas/llm.js"; -import { - getConfiguredProvider, - resolveConfiguredProvider, -} from "../provider-send-message.js"; +import { getConfiguredProvider } from "../provider-send-message.js"; import { RetryProvider } from "../retry.js"; import type { Message, @@ -90,7 +86,6 @@ function setLlmConfig(raw: unknown): void { beforeEach(() => { mockLlmConfig = LLMSchema.parse({}) as Record; - mockInferenceProvider = "anthropic"; mockProviders.clear(); }); @@ -244,12 +239,13 @@ describe("RetryProvider — callSite resolution", () => { }); }); -// ── RetryProvider — legacy modelIntent path is preserved ──────────────────── +// ── RetryProvider — pre-resolved model fast-path ──────────────────────────── -describe("RetryProvider — legacy modelIntent path (no callSite)", () => { - test("passing only modelIntent does not consult llm.* config", async () => { - // Seed the llm config with a value that, if accidentally consulted, would - // produce a clearly-wrong model. The legacy path must ignore it entirely. +describe("RetryProvider — no callSite (pre-resolved config passes through)", () => { + test("config without callSite is forwarded untouched (no llm.* lookup)", async () => { + // Seed the llm config with a value that, if accidentally consulted, + // would clobber the explicit model. The pre-resolved fast-path must + // ignore it entirely. setLlmConfig({ default: { provider: "anthropic", model: "MUST-NOT-LEAK" }, callSites: { @@ -264,28 +260,6 @@ describe("RetryProvider — legacy modelIntent path (no callSite)", () => { }), ); - await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { - config: { modelIntent: "quality-optimized" }, - }); - - const config = seen?.config as Record; - // Legacy path uses model-intents.ts mapping for "quality-optimized" on - // anthropic, which is "claude-opus-4-7". It must NOT be the llm.default - // value, which would indicate the new path was triggered. - expect(config.model).toBe("claude-opus-4-7"); - expect(config.model).not.toBe("MUST-NOT-LEAK"); - expect(config.model).not.toBe("ALSO-MUST-NOT-LEAK"); - expect(config.modelIntent).toBeUndefined(); - }); - - test("no callSite and no modelIntent leaves config untouched (existing fast-path)", async () => { - let seen: SendMessageOptions | undefined; - const wrapped = new RetryProvider( - makeProvider("anthropic", (options) => { - seen = options; - }), - ); - await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, { config: { model: "explicit-model", max_tokens: 1234 }, }); @@ -293,6 +267,8 @@ describe("RetryProvider — legacy modelIntent path (no callSite)", () => { const config = seen?.config as Record; expect(config.model).toBe("explicit-model"); expect(config.max_tokens).toBe(1234); + expect(config.model).not.toBe("MUST-NOT-LEAK"); + expect(config.model).not.toBe("ALSO-MUST-NOT-LEAK"); }); }); @@ -327,15 +303,4 @@ describe("getConfiguredProvider — callSite routing", () => { expect(provider?.name).toBe("anthropic"); }); - test("legacy call (no callSite arg) uses services.inference.provider", async () => { - // The legacy path consults `services.inference.provider`. The shared - // loader mock reads `mockInferenceProvider` at call time, so we just - // overwrite it for this test. - mockInferenceProvider = "fireworks"; - mockProviders.set("fireworks", { name: "fireworks" }); - - const result = await resolveConfiguredProvider(); - expect(result?.configuredProviderName).toBe("fireworks"); - expect(result?.provider.name).toBe("fireworks"); - }); }); diff --git a/assistant/src/providers/provider-send-message.ts b/assistant/src/providers/provider-send-message.ts index 3c28710295e..11cb8f737c8 100644 --- a/assistant/src/providers/provider-send-message.ts +++ b/assistant/src/providers/provider-send-message.ts @@ -4,8 +4,8 @@ * and response extraction helpers. */ -import { getConfig } from "../config/loader.js"; import { resolveCallSiteConfig } from "../config/llm-resolver.js"; +import { getConfig } from "../config/loader.js"; import type { LLMCallSite } from "../config/schemas/llm.js"; import { getProvider, @@ -38,15 +38,16 @@ let lazyInitPromise: Promise | null = null; * If providers haven't been initialized yet (e.g. non-daemon code paths), * performs a one-shot `initializeProviders(getConfig())`. * - * When `callSite` is provided, the provider name comes from + * The provider name is sourced from * `resolveCallSiteConfig(callSite, config.llm).provider` — i.e. the unified - * `llm` block drives selection. Otherwise the legacy - * `services.inference.provider` is used unchanged. + * `llm` block drives selection. The `callSite` argument is required so the + * resolver can layer per-call-site overrides; pass the closest matching + * call-site identifier from `LLMCallSiteEnum` when adding a new caller. * * Returns `null` when no providers are available at all. */ export async function resolveConfiguredProvider( - callSite?: LLMCallSite, + callSite: LLMCallSite, ): Promise { const config = getConfig(); @@ -63,10 +64,7 @@ export async function resolveConfiguredProvider( } } - const inferenceProvider = - callSite !== undefined - ? resolveCallSiteConfig(callSite, config.llm).provider - : config.services.inference.provider; + const inferenceProvider = resolveCallSiteConfig(callSite, config.llm).provider; try { const provider = getProvider(inferenceProvider); @@ -84,14 +82,11 @@ export async function resolveConfiguredProvider( * Thin wrapper around `resolveConfiguredProvider()` for callsites * that only need the Provider instance. * - * When `callSite` is provided, resolves the provider via the unified - * `llm` block (see `resolveConfiguredProvider`). Otherwise preserves the - * legacy behavior of selecting `services.inference.provider`. - * - * Returns `null` when no providers are available. + * `callSite` is required — see `resolveConfiguredProvider`. Returns `null` + * when no providers are available. */ export async function getConfiguredProvider( - callSite?: LLMCallSite, + callSite: LLMCallSite, ): Promise { const result = await resolveConfiguredProvider(callSite); return result?.provider ?? null; diff --git a/assistant/src/providers/registry.ts b/assistant/src/providers/registry.ts index 569b1953b81..f159ca08099 100644 --- a/assistant/src/providers/registry.ts +++ b/assistant/src/providers/registry.ts @@ -51,8 +51,6 @@ export interface ProvidersConfig { services: { inference: { mode: "managed" | "your-own"; - provider: string; - model: string; }; "image-generation": { mode: "managed" | "your-own"; @@ -64,12 +62,18 @@ export interface ProvidersConfig { provider: string; }; }; + llm: { + default: { + provider: string; + model: string; + }; + }; timeouts?: { providerStreamTimeoutSec?: number }; } function resolveModel(config: ProvidersConfig, providerName: string): string { - const inferenceProvider = config.services.inference.provider; - const inferenceModel = config.services.inference.model; + const inferenceProvider = config.llm.default.provider; + const inferenceModel = config.llm.default.model; if (inferenceProvider === providerName) { // If a non-Anthropic provider is selected with the untouched global default // model, use a provider-appropriate fallback instead. @@ -203,7 +207,7 @@ export async function initializeProviders( // Ollama (keyless provider — always init when configured or key present) const ollamaKey = await getProviderKeyAsync("ollama"); - if (config.services.inference.provider === "ollama" || ollamaKey) { + if (config.llm.default.provider === "ollama" || ollamaKey) { const model = resolveModel(config, "ollama"); registerProvider( "ollama", diff --git a/assistant/src/providers/retry.ts b/assistant/src/providers/retry.ts index 27193c89cf3..e3e8df9d6f0 100644 --- a/assistant/src/providers/retry.ts +++ b/assistant/src/providers/retry.ts @@ -1,5 +1,5 @@ -import { getConfig } from "../config/loader.js"; import { resolveCallSiteConfig } from "../config/llm-resolver.js"; +import { getConfig } from "../config/loader.js"; import { ProviderError } from "../util/errors.js"; import { getLogger } from "../util/logger.js"; import { @@ -9,7 +9,6 @@ import { isRetryableNetworkError, sleep, } from "../util/retry.js"; -import { isModelIntent, resolveModelIntent } from "./model-intents.js"; import type { Message, Provider, @@ -71,6 +70,23 @@ function isRetryableError(error: unknown): boolean { return isRetryableNetworkError(error); } +/** + * Normalize per-call options before handing them to the wrapped provider. + * + * When `config.callSite` is set, resolves provider/model/maxTokens/effort/ + * speed/temperature/thinking/contextWindow via `resolveCallSiteConfig` and + * writes them into `nextConfig` using the wire-format names that downstream + * provider clients consume (`max_tokens` snake-case for the token cap; + * camelCase for the rest, which matches the resolver's shape). Per-call + * explicit overrides on the original `config` object win over the resolved + * values, so callers can pin a model or other parameter for a single request. + * + * Whether or not `callSite` is set, this function applies per-provider + * stripping (`thinking`/`effort`/`speed`) based on the wrapped provider's + * name — agent-loop callers that pre-resolve provider/model still need this + * stripping so they don't accidentally send Anthropic-only knobs to OpenAI + * etc. + */ function normalizeSendMessageOptions( providerName: string, options?: SendMessageOptions, @@ -78,143 +94,47 @@ function normalizeSendMessageOptions( const config = options?.config; if (!config) return options; - // ── Call-site path ────────────────────────────────────────────────── - // When `config.callSite` is set, route through `resolveCallSiteConfig` - // to fully resolve provider/model/maxTokens/effort/speed/temperature/ - // thinking/contextWindow from `llm.default + profile + site` overrides. - // This is the new unified path; the legacy `modelIntent` branch below is - // preserved unchanged for unmigrated callers. - if (config.callSite !== undefined) { - return normalizeViaCallSite(providerName, options, config); - } - - // ── Legacy `modelIntent` path (preserved) ─────────────────────────── - const explicitModel = - typeof config.model === "string" && config.model.trim().length > 0 - ? config.model.trim() - : undefined; - const intent = isModelIntent(config.modelIntent) - ? config.modelIntent - : undefined; - const hasIntent = config.modelIntent !== undefined; - - const needsThinkingStrip = - !THINKING_AWARE_PROVIDERS.has(providerName) && config.thinking !== undefined; - const needsEffortStrip = - !EFFORT_SUPPORTED_PROVIDERS.has(providerName) && config.effort !== undefined; - const needsSpeedStrip = - providerName !== "anthropic" && config.speed !== undefined; - - if ( - !hasIntent && - explicitModel === config.model && - !needsThinkingStrip && - !needsEffortStrip && - !needsSpeedStrip - ) { - return options; - } - const nextConfig: Record = { ...config }; - delete nextConfig.modelIntent; - - // thinking is Anthropic-specific on the wire; OpenRouter reads it as a - // signal for its unified reasoning parameter. Strip it for other providers. - if ( - !THINKING_AWARE_PROVIDERS.has(providerName) && - nextConfig.thinking !== undefined - ) { - delete nextConfig.thinking; - } - - // effort is supported by Anthropic, OpenAI, and OpenAI-compatible providers; strip for others - if ( - !EFFORT_SUPPORTED_PROVIDERS.has(providerName) && - nextConfig.effort !== undefined - ) { - delete nextConfig.effort; - } - - // speed (fast mode) is Anthropic-specific; strip for other providers - if (providerName !== "anthropic" && nextConfig.speed !== undefined) { - delete nextConfig.speed; - } - if (explicitModel) { - nextConfig.model = explicitModel; - } else if (intent) { - nextConfig.model = resolveModelIntent(providerName, intent); - } else { - delete nextConfig.model; - } - - return { - ...options, - config: nextConfig, - }; -} - -/** - * Normalize options when the caller opted into call-site resolution. - * - * Resolves provider/model/maxTokens/effort/speed/temperature/thinking/ - * contextWindow via `resolveCallSiteConfig` and writes them into `nextConfig` - * using the wire-format names that downstream provider clients consume - * (`max_tokens` snake-case for the token cap; camelCase for the rest, which - * matches the resolver's shape). Per-call explicit overrides on the original - * `config` object win over the resolved values, mirroring the legacy - * "explicit `config.model` beats `modelIntent`" semantics so unmigrated - * callers that pass both can't be silently broken. - * - * Both `callSite` and `modelIntent` are stripped from the downstream config. - * Per-provider stripping (`thinking`/`effort`/`speed`) is applied based on - * the wrapped provider's name, identical to the legacy path. - */ -function normalizeViaCallSite( - providerName: string, - options: SendMessageOptions | undefined, - config: NonNullable, -): SendMessageOptions | undefined { - const callSite = config.callSite!; - const resolved = resolveCallSiteConfig(callSite, getConfig().llm); + if (config.callSite !== undefined) { + const resolved = resolveCallSiteConfig(config.callSite, getConfig().llm); - const explicitModel = - typeof config.model === "string" && config.model.trim().length > 0 - ? config.model.trim() - : undefined; + const explicitModel = + typeof config.model === "string" && config.model.trim().length > 0 + ? config.model.trim() + : undefined; - const nextConfig: Record = { ...config }; - // Both opt-in routing keys are consumed by the RetryProvider layer and - // must not leak downstream. - delete nextConfig.callSite; - delete nextConfig.modelIntent; + // Routing key is consumed by the RetryProvider layer and must not leak + // downstream. + delete nextConfig.callSite; - // Apply resolved values, letting per-call explicit fields win where set. - nextConfig.model = explicitModel ?? resolved.model; - if (nextConfig.max_tokens === undefined) { - nextConfig.max_tokens = resolved.maxTokens; - } - if (nextConfig.effort === undefined) { - nextConfig.effort = resolved.effort; - } - if (nextConfig.speed === undefined) { - nextConfig.speed = resolved.speed; - } - if (nextConfig.temperature === undefined) { - nextConfig.temperature = resolved.temperature; - } - if (nextConfig.thinking === undefined) { - nextConfig.thinking = resolved.thinking; - } - if (nextConfig.contextWindow === undefined) { - nextConfig.contextWindow = resolved.contextWindow; - } - // Provider name from the resolver — informational; the wrapped provider - // is the actual transport. Downstream consumers may inspect this for - // diagnostics or wire-format decisions, but the request still routes - // through the inner provider that this RetryProvider wraps. - if (nextConfig.provider === undefined) { - nextConfig.provider = resolved.provider; + // Apply resolved values, letting per-call explicit fields win where set. + nextConfig.model = explicitModel ?? resolved.model; + if (nextConfig.max_tokens === undefined) { + nextConfig.max_tokens = resolved.maxTokens; + } + if (nextConfig.effort === undefined) { + nextConfig.effort = resolved.effort; + } + if (nextConfig.speed === undefined) { + nextConfig.speed = resolved.speed; + } + if (nextConfig.temperature === undefined) { + nextConfig.temperature = resolved.temperature; + } + if (nextConfig.thinking === undefined) { + nextConfig.thinking = resolved.thinking; + } + if (nextConfig.contextWindow === undefined) { + nextConfig.contextWindow = resolved.contextWindow; + } + // Provider name from the resolver — informational; the wrapped provider + // is the actual transport. Downstream consumers may inspect this for + // diagnostics or wire-format decisions, but the request still routes + // through the inner provider that this RetryProvider wraps. + if (nextConfig.provider === undefined) { + nextConfig.provider = resolved.provider; + } } // thinking is Anthropic-specific on the wire; OpenRouter reads it as a diff --git a/assistant/src/providers/types.ts b/assistant/src/providers/types.ts index 6358277b411..1179a23b00b 100644 --- a/assistant/src/providers/types.ts +++ b/assistant/src/providers/types.ts @@ -132,14 +132,12 @@ export type ProviderEvent = export interface SendMessageConfig { model?: string; - modelIntent?: ModelIntent; /** - * Opt-in routing through the unified LLM call-site resolver. When set, - * `RetryProvider` resolves provider/model/maxTokens/effort/speed/temperature/ - * thinking/contextWindow via `resolveCallSiteConfig(callSite, config.llm)` - * instead of consulting `modelIntent`. Both fields may coexist; `callSite` - * wins when present, and the legacy `modelIntent` path is preserved for - * unmigrated callers. + * LLM call-site identifier. `RetryProvider` resolves + * provider/model/maxTokens/effort/speed/temperature/thinking/contextWindow + * via `resolveCallSiteConfig(callSite, config.llm)`. Required for any new + * caller; the legacy `modelIntent`-based fallback was removed in PR 19 of + * the unify-llm-callsites plan. */ callSite?: LLMCallSite; effort?: "low" | "medium" | "high" | "max"; diff --git a/assistant/src/runtime/btw-sidechain.ts b/assistant/src/runtime/btw-sidechain.ts index ec62f2d5707..19bffd33a8a 100644 --- a/assistant/src/runtime/btw-sidechain.ts +++ b/assistant/src/runtime/btw-sidechain.ts @@ -8,7 +8,6 @@ import { } from "../providers/provider-send-message.js"; import type { Message, - ModelIntent, Provider, ProviderEvent, ProviderResponse, @@ -30,12 +29,10 @@ export interface RunBtwSidechainParams { systemPrompt?: string; tools?: ToolDefinition[]; maxTokens?: number; - modelIntent?: ModelIntent; /** - * Unified call-site identifier. When set, the provider layer resolves + * Unified call-site identifier. The provider layer resolves * provider/model/maxTokens/effort/speed/temperature/thinking/contextWindow - * via `resolveCallSiteConfig(callSite, config.llm)`. `callSite` wins over - * `modelIntent` when both are passed. When neither is passed, defaults to + * via `resolveCallSiteConfig(callSite, config.llm)`. Defaults to * `'identityIntro'` since this side-chain runner was originally introduced * for the identity intro generation path; callers (greeting, title, etc.) * override it with their own call-site ID. @@ -100,16 +97,11 @@ export async function runBtwSidechain( config: { max_tokens: params.maxTokens ?? 1024, tool_choice: { type: "none" }, - // Resolution precedence: explicit callSite → explicit modelIntent → - // default callSite "identityIntro" (the original purpose of this - // side-chain runner). PR 5's contract says `callSite` wins over - // `modelIntent` when both are present, so we set them mutually - // exclusively here for clarity. - ...(params.callSite !== undefined - ? { callSite: params.callSite } - : params.modelIntent !== undefined - ? { modelIntent: params.modelIntent } - : { callSite: "identityIntro" as LLMCallSite }), + // Resolution: explicit callSite → default "identityIntro" (the + // original purpose of this side-chain runner). The legacy + // `modelIntent` parameter was removed in PR 19 of the + // unify-llm-callsites plan. + callSite: params.callSite ?? ("identityIntro" as LLMCallSite), }, onEvent: (event) => { if (event.type === "text_delta") { diff --git a/assistant/src/runtime/invite-instruction-generator.ts b/assistant/src/runtime/invite-instruction-generator.ts index bb03bfdb7e2..66155590735 100644 --- a/assistant/src/runtime/invite-instruction-generator.ts +++ b/assistant/src/runtime/invite-instruction-generator.ts @@ -70,7 +70,7 @@ export async function generateInviteInstruction(params: { ? `Send ${contact} this link: ${params.shareUrl} — or tell them to message me${handle} with the code below.` : `Tell ${contact} to message me${handle} with the code below.`; - const resolved = await resolveConfiguredProvider(); + const resolved = await resolveConfiguredProvider("inviteInstructionGenerator"); if (!resolved) { log.debug( "No provider available for invite instruction generation, using fallback", diff --git a/assistant/src/runtime/routes/conversation-routes.ts b/assistant/src/runtime/routes/conversation-routes.ts index eac67ecd545..50daba22530 100644 --- a/assistant/src/runtime/routes/conversation-routes.ts +++ b/assistant/src/runtime/routes/conversation-routes.ts @@ -1929,9 +1929,9 @@ export async function handleSendMessage( messageCount: conversation.getMessages().length, inputTokens: conversation.usageStats.inputTokens, outputTokens: conversation.usageStats.outputTokens, - maxInputTokens: config.contextWindow.maxInputTokens, - model: config.services.inference.model, - provider: config.services.inference.provider, + maxInputTokens: config.llm.default.contextWindow.maxInputTokens, + model: config.llm.default.model, + provider: config.llm.default.provider, estimatedCost: conversation.usageStats.estimatedCost, userMessageInterface: sourceInterface, }; @@ -2181,7 +2181,7 @@ async function generateLlmSuggestion( [{ role: "user", content: [{ type: "text", text: prompt }] }], [], // no tools systemPrompt, - { config: { modelIntent: "latency-optimized" } }, + { config: { callSite: "conversationStarters" } }, ); const textBlock = response.content.find((b) => b.type === "text"); @@ -2300,7 +2300,7 @@ export async function handleGetSuggestion( } // Try LLM suggestion using the configured provider - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("conversationStarters"); if (provider) { try { // Deduplicate concurrent requests diff --git a/assistant/src/runtime/routes/debug-routes.ts b/assistant/src/runtime/routes/debug-routes.ts index 83a741034dd..8e565237ad1 100644 --- a/assistant/src/runtime/routes/debug-routes.ts +++ b/assistant/src/runtime/routes/debug-routes.ts @@ -65,7 +65,7 @@ function handleDebug(): Response { startedAt: new Date(startedAt).toISOString(), }, provider: { - configuredProvider: config.services.inference.provider, + configuredProvider: config.llm.default.provider, registeredProviders, routingSources, inferenceMode: config.services.inference.mode, diff --git a/assistant/src/runtime/routes/diagnostics-routes.ts b/assistant/src/runtime/routes/diagnostics-routes.ts index d0d7a398c83..69f9bcc57ec 100644 --- a/assistant/src/runtime/routes/diagnostics-routes.ts +++ b/assistant/src/runtime/routes/diagnostics-routes.ts @@ -222,7 +222,7 @@ async function handleDictation(body: DictationBody): Promise { const transcription = expandSnippets(body.transcription, profile.snippets); try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("interactionClassifier"); if (!provider) { log.warn( "Dictation: no provider available, using heuristic + raw transcription", @@ -288,7 +288,7 @@ async function handleDictation(body: DictationBody): Promise { systemPrompt, { config: { - modelIntent: "latency-optimized", + callSite: "interactionClassifier", max_tokens: maxTokens, tool_choice: { type: "tool" as const, @@ -381,7 +381,7 @@ async function handleCommandMode( const maxTokens = Math.max(1024, computeMaxTokens(inputLength)); try { - const provider = await getConfiguredProvider(); + const provider = await getConfiguredProvider("interactionClassifier"); if (!provider) { log.warn("Command mode: no provider available, returning selected text"); const normalizedText = applyDictionary( @@ -399,7 +399,9 @@ async function handleCommandMode( [userMessage(body.transcription)], [], systemPrompt, - { config: { modelIntent: "latency-optimized", max_tokens: maxTokens } }, + { + config: { callSite: "interactionClassifier", max_tokens: maxTokens }, + }, ); const textBlock = response.content.find((b) => b.type === "text"); diff --git a/assistant/src/subagent/manager.ts b/assistant/src/subagent/manager.ts index 63cac65d41c..08f1e7834c2 100644 --- a/assistant/src/subagent/manager.ts +++ b/assistant/src/subagent/manager.ts @@ -211,7 +211,7 @@ export class SubagentManager { // ── Build conversation dependencies ───────────────────────────── const appConfig = getConfig(); - let provider = getProvider(appConfig.services.inference.provider); + let provider = getProvider(appConfig.llm.default.provider); const { rateLimit } = appConfig; if (rateLimit.maxRequestsPerMinute > 0) { provider = new RateLimitProvider( @@ -247,7 +247,7 @@ export class SubagentManager { config.systemPromptOverride ?? buildSubagentSystemPrompt({ ...config, id: subagentId }, role); } - const maxTokens = appConfig.maxTokens; + const maxTokens = appConfig.llm.default.maxTokens; const workingDir = getSandboxWorkingDir(); const memoryPolicy: ConversationMemoryPolicy = isFork diff --git a/assistant/src/workspace/migrations/038-unify-llm-callsite-configs.ts b/assistant/src/workspace/migrations/038-unify-llm-callsite-configs.ts index ccbb94455cb..57d9fb9c49a 100644 --- a/assistant/src/workspace/migrations/038-unify-llm-callsite-configs.ts +++ b/assistant/src/workspace/migrations/038-unify-llm-callsite-configs.ts @@ -279,131 +279,22 @@ export const unifyLlmCallSiteConfigsMigration: WorkspaceMigration = { writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n"); }, - down(workspaceDir: string): void { - const configPath = join(workspaceDir, "config.json"); - if (!existsSync(configPath)) return; - - let config: Record; - try { - const raw = JSON.parse(readFileSync(configPath, "utf-8")); - if (!raw || typeof raw !== "object" || Array.isArray(raw)) return; - config = raw as Record; - } catch { - return; - } - - const llm = readObject(config.llm); - if (llm === null) return; - - // ── Reverse llm.default → top-level + services.inference ────────── - const defaultBlock = readObject(llm.default); - if (defaultBlock !== null) { - const services = ensureObj(config, "services"); - const inference = ensureObj(services, "inference"); - const provider = readString(defaultBlock.provider); - if (provider !== undefined) { - inference.provider = provider; - } - const model = readString(defaultBlock.model); - if (model !== undefined) { - inference.model = model; - } - const maxTokens = readPositiveInt(defaultBlock.maxTokens); - if (maxTokens !== undefined) { - config.maxTokens = maxTokens; - } - const effort = readEnum(defaultBlock.effort, EFFORT_VALUES); - if (effort !== undefined) { - config.effort = effort; - } - const speed = readEnum(defaultBlock.speed, SPEED_VALUES); - if (speed !== undefined) { - config.speed = speed; - } - const thinking = readObject(defaultBlock.thinking); - if (thinking !== null) { - config.thinking = thinking; - } - const contextWindow = readObject(defaultBlock.contextWindow); - if (contextWindow !== null) { - config.contextWindow = contextWindow; - } - } - - // ── Reverse llm.callSites → scattered keys ──────────────────────── - const callSites = readObject(llm.callSites) ?? {}; - - const heartbeatAgent = readObject(callSites.heartbeatAgent); - if (heartbeatAgent !== null) { - const speed = readEnum(heartbeatAgent.speed, SPEED_VALUES); - if (speed !== undefined) { - const heartbeat = ensureObj(config, "heartbeat"); - heartbeat.speed = speed; - } - } - - const filingAgent = readObject(callSites.filingAgent); - if (filingAgent !== null) { - const speed = readEnum(filingAgent.speed, SPEED_VALUES); - if (speed !== undefined) { - const filing = ensureObj(config, "filing"); - filing.speed = speed; - } - } - - const analyzeConversation = readObject(callSites.analyzeConversation); - if (analyzeConversation !== null) { - const provider = readString(analyzeConversation.provider); - const model = readString(analyzeConversation.model); - const recombined = - provider !== undefined && model !== undefined - ? `${provider}/${model}` - : (model ?? undefined); - if (recombined !== undefined) { - const analysis = ensureObj(config, "analysis"); - analysis.modelOverride = recombined; - } - } - - const callAgent = readObject(callSites.callAgent); - if (callAgent !== null) { - const model = readString(callAgent.model); - if (model !== undefined) { - const calls = ensureObj(config, "calls"); - calls.model = model; - } - } - - const commitMessage = readObject(callSites.commitMessage); - if (commitMessage !== null) { - const cmMaxTokens = readPositiveInt(commitMessage.maxTokens); - const cmTemperature = readTemperature(commitMessage.temperature); - if (cmMaxTokens !== undefined || cmTemperature !== undefined) { - const workspaceGit = ensureObj(config, "workspaceGit"); - const commitMessageLLM = ensureObj(workspaceGit, "commitMessageLLM"); - if (cmMaxTokens !== undefined) { - commitMessageLLM.maxTokens = cmMaxTokens; - } - if (cmTemperature !== undefined) { - commitMessageLLM.temperature = cmTemperature; - } - } - } - // Note: `conversationSummarization`, `emptyStateGreeting`, - // `notificationDecision`, and `preferenceExtraction` were derived from - // `modelIntent` keys — `down()` intentionally does not synthesize a - // reverse intent (we only have a resolved model, not the intent that - // produced it). Callers reading those legacy keys after a rollback will - // fall back to schema defaults. - - // ── Reverse llm.pricingOverrides → top-level pricingOverrides ───── - if (Array.isArray(llm.pricingOverrides)) { - config.pricingOverrides = llm.pricingOverrides; - } - - delete config.llm; - - writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n"); + /** + * Documented no-op since PR 19 of the unify-llm-callsites plan. + * + * The legacy keys that this migration consolidates (`services.inference. + * {provider,model}`, top-level `maxTokens`/`effort`/`speed`/`thinking`/ + * `contextWindow`/`pricingOverrides`, `heartbeat.speed`, `filing.speed`, + * `analysis.modelIntent`/`modelOverride`, `memory.summarization.modelIntent`, + * `notifications.decisionModelIntent`, `ui.greetingModelIntent`, + * `calls.model`, and `workspaceGit.commitMessageLLM.{maxTokens,temperature}`) + * were removed from `AssistantConfigSchema` in PR 19. Re-creating them in + * `down()` would have no effect on the running daemon (no code reads them + * any more), so a rollback that needs to undo this migration must instead + * roll back the application binary to a build that predates PR 19. + */ + down(_workspaceDir: string): void { + // Forward-only after PR 19. See comment above. }, }; @@ -509,18 +400,3 @@ function readTemperature(value: unknown): number | undefined { ? value : undefined; } - -function ensureObj( - parent: Record, - key: string, -): Record { - if ( - !(key in parent) || - parent[key] == null || - typeof parent[key] !== "object" || - Array.isArray(parent[key]) - ) { - parent[key] = {}; - } - return parent[key] as Record; -} diff --git a/assistant/src/workspace/migrations/039-drop-legacy-llm-keys.ts b/assistant/src/workspace/migrations/039-drop-legacy-llm-keys.ts new file mode 100644 index 00000000000..5ea9c81c094 --- /dev/null +++ b/assistant/src/workspace/migrations/039-drop-legacy-llm-keys.ts @@ -0,0 +1,171 @@ +import { existsSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; + +import type { WorkspaceMigration } from "./types.js"; + +/** + * Strip the now-removed legacy LLM-related keys from existing `config.json` + * files. PR 19 of the unify-llm-callsites plan removed these keys from + * `AssistantConfigSchema`; Zod silently strips unknown fields when re-parsing, + * but the keys would otherwise persist on disk forever and re-appear in any + * exported config snapshot. Erasing them keeps `config.json` lean and matches + * the schema that the in-memory loader sees. + * + * Keys removed: + * - Top level: `maxTokens`, `effort`, `speed`, `thinking`, `contextWindow`, + * `pricingOverrides`. + * - `services.inference.{provider, model}` (the `mode` field stays — it + * governs `managed` vs `your-own` routing, which is orthogonal to LLM + * model selection). + * - `heartbeat.speed`, `filing.speed`. + * - `analysis.modelIntent`, `analysis.modelOverride`. + * - `memory.summarization.modelIntent`. + * - `notifications.decisionModelIntent`. + * - `ui.greetingModelIntent`. + * - `calls.model`. + * - `workspaceGit.commitMessageLLM.{maxTokens, temperature, + * useConfiguredProvider, providerFastModelOverrides}`. + * + * Preconditions: this migration depends on + * `038-unify-llm-callsite-configs` having already populated `llm.default` / + * `llm.callSites` / `llm.pricingOverrides` from these legacy keys. The + * registry guarantees ordering. + * + * Idempotency: each delete is wrapped in a key-exists check so re-runs are + * no-ops. Empty objects are left in place rather than recursively pruned — + * that matches Zod's default behavior of treating an absent value the same + * as an empty `{}` for nested schemas. + */ +export const dropLegacyLlmKeysMigration: WorkspaceMigration = { + id: "039-drop-legacy-llm-keys", + description: + "Strip deprecated scattered LLM-related keys from config.json (post-PR-19 cleanup)", + run(workspaceDir: string): void { + const configPath = join(workspaceDir, "config.json"); + if (!existsSync(configPath)) return; + + let config: Record; + try { + const raw = JSON.parse(readFileSync(configPath, "utf-8")); + if (!raw || typeof raw !== "object" || Array.isArray(raw)) return; + config = raw as Record; + } catch { + return; + } + + let mutated = false; + + for (const key of [ + "maxTokens", + "effort", + "speed", + "thinking", + "contextWindow", + "pricingOverrides", + ]) { + if (key in config) { + delete config[key]; + mutated = true; + } + } + + const services = readObject(config.services); + if (services !== null) { + const inference = readObject(services.inference); + if (inference !== null) { + for (const key of ["provider", "model"]) { + if (key in inference) { + delete inference[key]; + mutated = true; + } + } + } + } + + const heartbeat = readObject(config.heartbeat); + if (heartbeat !== null && "speed" in heartbeat) { + delete heartbeat.speed; + mutated = true; + } + + const filing = readObject(config.filing); + if (filing !== null && "speed" in filing) { + delete filing.speed; + mutated = true; + } + + const analysis = readObject(config.analysis); + if (analysis !== null) { + for (const key of ["modelIntent", "modelOverride"]) { + if (key in analysis) { + delete analysis[key]; + mutated = true; + } + } + } + + const memory = readObject(config.memory); + if (memory !== null) { + const summarization = readObject(memory.summarization); + if (summarization !== null && "modelIntent" in summarization) { + delete summarization.modelIntent; + mutated = true; + } + } + + const notifications = readObject(config.notifications); + if (notifications !== null && "decisionModelIntent" in notifications) { + delete notifications.decisionModelIntent; + mutated = true; + } + + const ui = readObject(config.ui); + if (ui !== null && "greetingModelIntent" in ui) { + delete ui.greetingModelIntent; + mutated = true; + } + + const calls = readObject(config.calls); + if (calls !== null && "model" in calls) { + delete calls.model; + mutated = true; + } + + const workspaceGit = readObject(config.workspaceGit); + if (workspaceGit !== null) { + const commitMessageLLM = readObject(workspaceGit.commitMessageLLM); + if (commitMessageLLM !== null) { + for (const key of [ + "maxTokens", + "temperature", + "useConfiguredProvider", + "providerFastModelOverrides", + ]) { + if (key in commitMessageLLM) { + delete commitMessageLLM[key]; + mutated = true; + } + } + } + } + + if (!mutated) return; + + writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n"); + }, + /** + * Forward-only. Restoring the deleted keys would re-introduce schema-validation + * warnings and have no runtime effect — every reader migrated to `llm.default` + * / `llm.callSites` in PR 19. + */ + down(_workspaceDir: string): void { + // no-op + }, +}; + +function readObject(value: unknown): Record | null { + if (value === null || typeof value !== "object" || Array.isArray(value)) { + return null; + } + return value as Record; +} diff --git a/assistant/src/workspace/migrations/registry.ts b/assistant/src/workspace/migrations/registry.ts index 1e8ec0f511f..8a70d522872 100644 --- a/assistant/src/workspace/migrations/registry.ts +++ b/assistant/src/workspace/migrations/registry.ts @@ -36,6 +36,7 @@ import { seedSlackChannelPersonaMigration } from "./035-seed-slack-channel-perso import { updatePkbIndexBarMigration } from "./036-update-pkb-index-bar.js"; import { createMeetsDirMigration } from "./037-create-meets-dir.js"; import { unifyLlmCallSiteConfigsMigration } from "./038-unify-llm-callsite-configs.js"; +import { dropLegacyLlmKeysMigration } from "./039-drop-legacy-llm-keys.js"; import { migrateToWorkspaceVolumeMigration } from "./migrate-to-workspace-volume.js"; import type { WorkspaceMigration } from "./types.js"; @@ -83,4 +84,5 @@ export const WORKSPACE_MIGRATIONS: WorkspaceMigration[] = [ updatePkbIndexBarMigration, createMeetsDirMigration, unifyLlmCallSiteConfigsMigration, + dropLegacyLlmKeysMigration, ]; diff --git a/assistant/src/workspace/provider-commit-message-generator.ts b/assistant/src/workspace/provider-commit-message-generator.ts index c50d44bacf5..863f01e4419 100644 --- a/assistant/src/workspace/provider-commit-message-generator.ts +++ b/assistant/src/workspace/provider-commit-message-generator.ts @@ -14,7 +14,6 @@ export type LLMFallbackReason = | "missing_provider_api_key" | "breaker_open" | "insufficient_budget" - | "missing_fast_model" | "provider_not_initialized" | "timeout" | "provider_error" @@ -40,19 +39,13 @@ Rules: - Total output must be under 300 characters - If you cannot determine a meaningful message, respond with exactly: FALLBACK`; -const PROVIDER_DEFAULT_FAST_MODELS: Record = { - anthropic: "claude-haiku-4-5-20251001", - openai: "gpt-4o-mini", - gemini: "gemini-2.0-flash", -}; - // Providers that can be initialized without an API key (e.g., Ollama runs locally) const KEYLESS_PROVIDERS = new Set(["ollama"]); const deterministicProvider = new DefaultCommitMessageProvider(); function getProviderCandidates(config: ReturnType): string[] { - return [config.services.inference.provider]; + return [config.llm.default.provider]; } function buildDeterministicResult( @@ -118,22 +111,22 @@ export class ProviderCommitMessageGenerator { // 3. selected-provider API key preflight (except keyless providers) // 4. breaker_open // 5. insufficient_budget - // 6. missing_fast_model - // 7. call provider → timeout / provider_error / invalid_output + // 6. call provider → timeout / provider_error / invalid_output // ────────────────────────────────────────────────────────────────── // Step 1: Feature gate if (!llmConfig.enabled) { return buildDeterministicResult(context, "disabled"); } - if (!llmConfig.useConfiguredProvider) { - return buildDeterministicResult(context, "disabled"); - } - // Step 2: Resolve configured provider. - // If nothing is resolvable, differentiate likely missing-key cases from - // true registry/init failures. - const resolved = await resolveConfiguredProvider(); + // Step 2: Resolve configured provider via the commit-message call site, + // so model + maxTokens + temperature come from `llm.callSites.commitMessage` + // (with `llm.default` as the fallback). Operational fields (`enabled`, + // `timeoutMs`, `breaker`, `maxFilesInPrompt`, `maxDiffBytes`, + // `minRemainingTurnBudgetMs`) remain on `workspaceGit.commitMessageLLM` + // and are read above. If nothing is resolvable, differentiate likely + // missing-key cases from true registry/init failures. + const resolved = await resolveConfiguredProvider("commitMessage"); if (!resolved) { const candidates = getProviderCandidates(config); const hasAnyKeylessCandidate = candidates.some((name) => @@ -153,7 +146,7 @@ export class ProviderCommitMessageGenerator { return buildDeterministicResult(context, "missing_provider_api_key"); } log.debug( - { provider: config.services.inference.provider }, + { provider: config.llm.default.provider }, "Provider not initialized; falling back to deterministic", ); return buildDeterministicResult(context, "provider_not_initialized"); @@ -200,23 +193,7 @@ export class ProviderCommitMessageGenerator { } } - // Step 5: Fast model preflight — resolve before any provider call - const fastModel = - llmConfig.providerFastModelOverrides[providerName] ?? - PROVIDER_DEFAULT_FAST_MODELS[providerName]; - - if (!fastModel) { - log.debug( - { - provider: providerName, - configuredProvider: config.services.inference.provider, - }, - "No fast model resolvable for provider; falling back to deterministic", - ); - return buildDeterministicResult(context, "missing_fast_model"); - } - - // Step 6 + 7: Call the provider + // Step 5: Call the provider try { // Build prompt const fileList = options.changedFiles @@ -263,19 +240,13 @@ export class ProviderCommitMessageGenerator { { signal: ac.signal, config: { - // `callSite` lets the provider resolve `max_tokens` and - // `temperature` from `llm.callSites.commitMessage` (populated by - // the workspace migration from the legacy - // `workspaceGit.commitMessageLLM.{maxTokens,temperature}` keys). - // Operational fields (`enabled`, `timeoutMs`, `breaker`, - // `maxFilesInPrompt`, `maxDiffBytes`, `minRemainingTurnBudgetMs`) - // remain on `workspaceGit.commitMessageLLM` and are read above. + // `callSite` lets the provider resolve model, max_tokens, and + // temperature from `llm.callSites.commitMessage` (with + // `llm.default` as the fallback). Operational fields + // (`enabled`, `timeoutMs`, `breaker`, `maxFilesInPrompt`, + // `maxDiffBytes`, `minRemainingTurnBudgetMs`) remain on + // `workspaceGit.commitMessageLLM` and are read above. callSite: "commitMessage", - // `fastModel` overrides the resolver's `model` because commit - // message generation enforces its own provider-specific fast - // model selection (see `PROVIDER_DEFAULT_FAST_MODELS` and - // `providerFastModelOverrides`). - model: fastModel, }, }, );