From 4d0314e500d35c8d9980b589a2d76e7f199bf8a6 Mon Sep 17 00:00:00 2001 From: siddseethepalli Date: Fri, 17 Apr 2026 02:13:26 +0000 Subject: [PATCH] fix(cleanup): remove dead code, refresh comments, add migration test, update docs --- assistant/ARCHITECTURE.md | 2 +- .../src/__tests__/call-controller.test.ts | 3 +- .../conversation-process-callsite.test.ts | 11 +- .../src/__tests__/heartbeat-service.test.ts | 17 +- assistant/src/__tests__/model-intents.test.ts | 12 +- ...migration-039-drop-legacy-llm-keys.test.ts | 343 ++++++++++++++++++ assistant/src/agent/loop.ts | 9 +- .../phone-calls/references/CONFIG.md | 2 +- assistant/src/config/schema.ts | 12 +- assistant/src/config/schemas/inference.ts | 22 -- .../src/daemon/conversation-agent-loop.ts | 9 +- assistant/src/daemon/conversation.ts | 3 +- assistant/src/daemon/handlers/shared.ts | 3 +- assistant/src/filing/filing-service.ts | 3 +- assistant/src/notifications/README.md | 14 +- assistant/src/providers/types.ts | 5 +- assistant/src/runtime/btw-sidechain.ts | 6 +- .../runtime/services/analyze-conversation.ts | 4 +- cli/src/lib/config-utils.ts | 8 +- .../Features/Settings/SettingsStore.swift | 70 +--- .../MockSettingsClient.swift | 5 +- gateway/ARCHITECTURE.md | 2 +- 22 files changed, 417 insertions(+), 148 deletions(-) create mode 100644 assistant/src/__tests__/workspace-migration-039-drop-legacy-llm-keys.test.ts diff --git a/assistant/ARCHITECTURE.md b/assistant/ARCHITECTURE.md index 8fc7a31baf8..6540811b5b8 100644 --- a/assistant/ARCHITECTURE.md +++ b/assistant/ARCHITECTURE.md @@ -2094,7 +2094,7 @@ Connected channels are resolved at signal emission time: vellum is always includ **Audit trail (SQLite):** `notification_events` → `notification_decisions` (with `conversationActions` in validation results) → `notification_deliveries` (with `conversation_id`, `message_id`, `conversation_strategy`, `conversation_action`, `conversation_target_id`, `conversation_fallback_used`) -**Configuration:** `notifications.decisionModelIntent` in `config.json`. +**Configuration:** `llm.callSites.notificationDecision` (decision engine) and `llm.callSites.preferenceExtraction` (preference extractor) in `config.json`. Both fall back to `llm.default` when unset. --- diff --git a/assistant/src/__tests__/call-controller.test.ts b/assistant/src/__tests__/call-controller.test.ts index 8d4208f62b0..3c204c9a193 100644 --- a/assistant/src/__tests__/call-controller.test.ts +++ b/assistant/src/__tests__/call-controller.test.ts @@ -35,10 +35,9 @@ mock.module("../config/loader.js", () => { silenceTimeoutSeconds: 30, disclosure: { enabled: false, text: "" }, safety: { denyCategories: [] }, - model: undefined, }, memory: { enabled: false }, - notifications: { decisionModelIntent: "latency-optimized" }, + notifications: {}, services: { tts: { mode: "your-own" as const, diff --git a/assistant/src/__tests__/conversation-process-callsite.test.ts b/assistant/src/__tests__/conversation-process-callsite.test.ts index bb14d7fc856..8abc7df9dd5 100644 --- a/assistant/src/__tests__/conversation-process-callsite.test.ts +++ b/assistant/src/__tests__/conversation-process-callsite.test.ts @@ -1,11 +1,14 @@ /** - * Verify that `callSite` threads from `Conversation.processMessage` options - * all the way down to the per-call provider config, and that user-initiated - * turns default to `'mainAgent'` when no caller-supplied `callSite` is set. + * Verifies that `callSite` threads from `Conversation.processMessage` + * options all the way down to the per-call provider config, and that + * user-initiated turns default to `'mainAgent'` when no caller-supplied + * `callSite` is set. * * The test mocks `AgentLoop.run()` so it can capture the `callSite` parameter * the conversation passes after `processMessage` runs the slash-resolver and - * runtime-injection pipeline. + * runtime-injection pipeline. Adapter callers (heartbeat, filing, scheduler) + * pass an explicit `callSite` so `RetryProvider` resolves their per-call + * config from `llm.callSites.`. */ import { describe, expect, mock, test } from "bun:test"; diff --git a/assistant/src/__tests__/heartbeat-service.test.ts b/assistant/src/__tests__/heartbeat-service.test.ts index 063d284bc44..1249c480368 100644 --- a/assistant/src/__tests__/heartbeat-service.test.ts +++ b/assistant/src/__tests__/heartbeat-service.test.ts @@ -9,7 +9,6 @@ let mockConfig = { heartbeat: { enabled: true, intervalMs: 60_000, - speed: "standard" as "standard" | "fast", activeHoursStart: undefined as number | undefined, activeHoursEnd: undefined as number | undefined, }, @@ -185,7 +184,6 @@ describe("HeartbeatService", () => { heartbeat: { enabled: true, intervalMs: 60_000, - speed: "standard", activeHoursStart: undefined, activeHoursEnd: undefined, }, @@ -547,17 +545,18 @@ describe("HeartbeatService", () => { }); }); - test("callSite is passed regardless of legacy heartbeat.speed value", async () => { - // The legacy `heartbeat.speed` field is no longer read by the heartbeat - // service — every run unconditionally passes `callSite: 'heartbeatAgent'` - // and the resolver maps that to whatever `llm.callSites.heartbeatAgent` - // (or the default) configures. PR 19 will remove the schema field. - mockConfig.heartbeat.speed = "fast"; + test("processMessage receives only callSite — no legacy speed knob", async () => { + // The heartbeat service unconditionally passes `callSite: + // 'heartbeatAgent'` and nothing else. The resolver maps that identifier + // to whatever `llm.callSites.heartbeatAgent` (or `llm.default`) + // configures. const service = createService(); await service.runOnce(); expect(processMessageCalls).toHaveLength(1); - expect(processMessageCalls[0].options?.callSite).toBe("heartbeatAgent"); + expect(processMessageCalls[0].options).toEqual({ + callSite: "heartbeatAgent", + }); }); test("end-to-end: llm.callSites.heartbeatAgent.speed resolves to 'fast'", async () => { diff --git a/assistant/src/__tests__/model-intents.test.ts b/assistant/src/__tests__/model-intents.test.ts index 267acd55b1a..0841ca94a2f 100644 --- a/assistant/src/__tests__/model-intents.test.ts +++ b/assistant/src/__tests__/model-intents.test.ts @@ -38,8 +38,10 @@ describe("model intents", () => { }); }); -// `RetryProvider`'s legacy `modelIntent` normalization path was removed in -// PR 19 of the unify-llm-callsites plan. The remaining `resolveModelIntent` -// helper lives in `providers/model-intents.ts` for use by the workspace -// migration's snapshot table — see `workspace/migrations/038-unify-llm- -// callsite-configs.ts`. +// `RetryProvider` normalizes outbound calls through call-site routing +// (`resolveCallSiteConfig` against `llm.callSites.` / `llm.default`). +// The `resolveModelIntent` helper exercised above lives in +// `providers/model-intents.ts` and is used by the unify-llm workspace +// migration's snapshot table (see +// `workspace/migrations/038-unify-llm-callsite-configs.ts`) to seed +// `llm.default` from any pre-existing `modelIntent` setting. diff --git a/assistant/src/__tests__/workspace-migration-039-drop-legacy-llm-keys.test.ts b/assistant/src/__tests__/workspace-migration-039-drop-legacy-llm-keys.test.ts new file mode 100644 index 00000000000..6da7ca7f411 --- /dev/null +++ b/assistant/src/__tests__/workspace-migration-039-drop-legacy-llm-keys.test.ts @@ -0,0 +1,343 @@ +import { + existsSync, + mkdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; + +import { dropLegacyLlmKeysMigration } from "../workspace/migrations/039-drop-legacy-llm-keys.js"; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +let workspaceDir: string; + +function freshWorkspace(): void { + workspaceDir = join( + tmpdir(), + `vellum-migration-039-test-${Date.now()}-${Math.random().toString(36).slice(2)}`, + ); + mkdirSync(workspaceDir, { recursive: true }); +} + +function writeConfig(data: Record): void { + writeFileSync( + join(workspaceDir, "config.json"), + JSON.stringify(data, null, 2) + "\n", + ); +} + +function readConfig(): Record { + return JSON.parse(readFileSync(join(workspaceDir, "config.json"), "utf-8")); +} + +// --------------------------------------------------------------------------- +// Setup / teardown +// --------------------------------------------------------------------------- + +beforeEach(() => { + freshWorkspace(); +}); + +afterEach(() => { + if (existsSync(workspaceDir)) { + rmSync(workspaceDir, { recursive: true, force: true }); + } +}); + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +describe("039-drop-legacy-llm-keys migration", () => { + test("has correct migration id and description", () => { + expect(dropLegacyLlmKeysMigration.id).toBe("039-drop-legacy-llm-keys"); + expect(dropLegacyLlmKeysMigration.description).toBe( + "Strip deprecated scattered LLM-related keys from config.json (post-PR-19 cleanup)", + ); + }); + + // ─── No-op cases ──────────────────────────────────────────────────────── + + test("no-op when config.json does not exist", () => { + dropLegacyLlmKeysMigration.run(workspaceDir); + expect(existsSync(join(workspaceDir, "config.json"))).toBe(false); + }); + + test("gracefully handles invalid JSON in config file", () => { + writeFileSync(join(workspaceDir, "config.json"), "not-valid-json"); + dropLegacyLlmKeysMigration.run(workspaceDir); + expect(readFileSync(join(workspaceDir, "config.json"), "utf-8")).toBe( + "not-valid-json", + ); + }); + + test("gracefully handles array-shaped config", () => { + writeFileSync(join(workspaceDir, "config.json"), JSON.stringify([1, 2, 3])); + dropLegacyLlmKeysMigration.run(workspaceDir); + const raw = JSON.parse( + readFileSync(join(workspaceDir, "config.json"), "utf-8"), + ); + expect(raw).toEqual([1, 2, 3]); + }); + + test("no-op when config has none of the targeted legacy keys", () => { + const original = { + llm: { + default: { + provider: "anthropic", + model: "claude-opus-4-6", + maxTokens: 64000, + effort: "max", + speed: "standard", + temperature: null, + }, + }, + services: { inference: { mode: "your-own" } }, + otherSetting: true, + }; + writeConfig(original); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + expect(readConfig()).toEqual(original); + }); + + // ─── Full strip — every legacy key present ───────────────────────────── + + test("strips every targeted legacy key when all are present", () => { + writeConfig({ + // Top-level scattered keys + maxTokens: 16000, + effort: "low", + speed: "fast", + thinking: { enabled: false, streamThinking: false }, + contextWindow: { enabled: true, maxInputTokens: 100000 }, + pricingOverrides: [ + { + provider: "anthropic", + modelPattern: "*", + inputPer1M: 1, + outputPer1M: 5, + }, + ], + // services.inference.{provider, model} + services: { + inference: { + mode: "your-own", + provider: "openai", + model: "gpt-5.4", + }, + }, + // heartbeat.speed / filing.speed + heartbeat: { enabled: true, intervalMs: 60000, speed: "fast" }, + filing: { enabled: true, intervalMs: 30000, speed: "fast" }, + // analysis + analysis: { + enabled: true, + modelIntent: "quality-optimized", + modelOverride: "claude-opus-4-7", + }, + // memory.summarization.modelIntent + memory: { summarization: { modelIntent: "latency-optimized" } }, + // notifications.decisionModelIntent + notifications: { decisionModelIntent: "latency-optimized" }, + // ui.greetingModelIntent + ui: { greetingModelIntent: "quality-optimized" }, + // calls.model + calls: { enabled: true, model: "gpt-5.4-nano" }, + // workspaceGit.commitMessageLLM.* + workspaceGit: { + commitMessageLLM: { + maxTokens: 1024, + temperature: 0.2, + useConfiguredProvider: true, + providerFastModelOverrides: { anthropic: "claude-haiku-3-5" }, + }, + }, + // Unrelated key — must survive + otherSetting: "preserved", + }); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + const config = readConfig(); + + // Top-level + expect(config.maxTokens).toBeUndefined(); + expect(config.effort).toBeUndefined(); + expect(config.speed).toBeUndefined(); + expect(config.thinking).toBeUndefined(); + expect(config.contextWindow).toBeUndefined(); + expect(config.pricingOverrides).toBeUndefined(); + + // services.inference: provider/model stripped, mode preserved + const services = config.services as { inference: Record }; + expect(services.inference.provider).toBeUndefined(); + expect(services.inference.model).toBeUndefined(); + expect(services.inference.mode).toBe("your-own"); + + // heartbeat / filing + const heartbeat = config.heartbeat as Record; + expect(heartbeat.speed).toBeUndefined(); + expect(heartbeat.enabled).toBe(true); + expect(heartbeat.intervalMs).toBe(60000); + const filing = config.filing as Record; + expect(filing.speed).toBeUndefined(); + expect(filing.enabled).toBe(true); + expect(filing.intervalMs).toBe(30000); + + // analysis + const analysis = config.analysis as Record; + expect(analysis.modelIntent).toBeUndefined(); + expect(analysis.modelOverride).toBeUndefined(); + expect(analysis.enabled).toBe(true); + + // memory.summarization.modelIntent + const memory = config.memory as { summarization: Record }; + expect(memory.summarization.modelIntent).toBeUndefined(); + + // notifications.decisionModelIntent + const notifications = config.notifications as Record; + expect(notifications.decisionModelIntent).toBeUndefined(); + + // ui.greetingModelIntent + const ui = config.ui as Record; + expect(ui.greetingModelIntent).toBeUndefined(); + + // calls.model + const calls = config.calls as Record; + expect(calls.model).toBeUndefined(); + expect(calls.enabled).toBe(true); + + // workspaceGit.commitMessageLLM + const workspaceGit = config.workspaceGit as { + commitMessageLLM: Record; + }; + expect(workspaceGit.commitMessageLLM.maxTokens).toBeUndefined(); + expect(workspaceGit.commitMessageLLM.temperature).toBeUndefined(); + expect(workspaceGit.commitMessageLLM.useConfiguredProvider).toBeUndefined(); + expect( + workspaceGit.commitMessageLLM.providerFastModelOverrides, + ).toBeUndefined(); + + // Unrelated key untouched + expect(config.otherSetting).toBe("preserved"); + }); + + // ─── Partial strip ───────────────────────────────────────────────────── + + test("strips only the legacy keys actually present", () => { + writeConfig({ + // Only a few legacy keys present + speed: "fast", + services: { inference: { mode: "managed", provider: "anthropic" } }, + notifications: { decisionModelIntent: "latency-optimized" }, + // Unrelated keys + maxStepsPerSession: 50, + }); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + const config = readConfig(); + expect(config.speed).toBeUndefined(); + const services = config.services as { inference: Record }; + expect(services.inference.provider).toBeUndefined(); + expect(services.inference.mode).toBe("managed"); + expect( + (config.notifications as Record).decisionModelIntent, + ).toBeUndefined(); + expect(config.maxStepsPerSession).toBe(50); + }); + + // ─── services.inference.mode preservation ────────────────────────────── + + test("preserves services.inference.mode while stripping provider/model", () => { + writeConfig({ + services: { + inference: { mode: "your-own", provider: "openai", model: "gpt-5.4" }, + }, + }); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + const config = readConfig(); + const services = config.services as { inference: Record }; + expect(services.inference.mode).toBe("your-own"); + expect(services.inference.provider).toBeUndefined(); + expect(services.inference.model).toBeUndefined(); + }); + + // ─── Idempotency ─────────────────────────────────────────────────────── + + test("idempotency: re-running the migration yields no further mutation", () => { + writeConfig({ + speed: "fast", + services: { + inference: { mode: "your-own", provider: "openai", model: "gpt-5.4" }, + }, + heartbeat: { enabled: true, speed: "fast" }, + calls: { enabled: true, model: "gpt-5.4-nano" }, + }); + + dropLegacyLlmKeysMigration.run(workspaceDir); + const afterFirst = readConfig(); + + dropLegacyLlmKeysMigration.run(workspaceDir); + const afterSecond = readConfig(); + + expect(afterSecond).toEqual(afterFirst); + }); + + test("idempotency: writes nothing on a clean config (no targeted keys)", () => { + const clean = { + llm: { default: { provider: "anthropic", model: "claude-opus-4-6" } }, + services: { inference: { mode: "your-own" } }, + }; + writeConfig(clean); + + const beforeStat = JSON.parse( + readFileSync(join(workspaceDir, "config.json"), "utf-8"), + ); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + const afterStat = JSON.parse( + readFileSync(join(workspaceDir, "config.json"), "utf-8"), + ); + + expect(afterStat).toEqual(beforeStat); + }); + + // ─── Defensive shape handling ────────────────────────────────────────── + + test("ignores non-object values at sub-paths (e.g. numbers, arrays)", () => { + // Attacker-style or otherwise corrupt config — the migration should not + // throw on unexpected shapes. Defensive readObject() helper coerces + // non-objects to null and the migration skips them. + writeConfig({ + services: 42, + heartbeat: ["a", "b"], + calls: null, + workspaceGit: { commitMessageLLM: "not-an-object" }, + // A real legacy key alongside garbage + speed: "fast", + }); + + dropLegacyLlmKeysMigration.run(workspaceDir); + + const config = readConfig(); + // The valid top-level legacy key still gets stripped + expect(config.speed).toBeUndefined(); + // Garbage shapes preserved as-is + expect(config.services).toBe(42); + expect(config.heartbeat).toEqual(["a", "b"]); + expect(config.calls).toBeNull(); + expect(config.workspaceGit).toEqual({ commitMessageLLM: "not-an-object" }); + }); +}); diff --git a/assistant/src/agent/loop.ts b/assistant/src/agent/loop.ts index b34791b9402..228bd8f472c 100644 --- a/assistant/src/agent/loop.ts +++ b/assistant/src/agent/loop.ts @@ -268,10 +268,11 @@ export class AgentLoop { // Per-call LLM call-site identifier. Surfaces on the per-call // `config.callSite` so `RetryProvider.normalizeSendMessageOptions` - // can route through `resolveCallSiteConfig`. User-initiated - // conversation turns default to `mainAgent` in the agent loop's - // caller; other invocation contexts (heartbeat, filing, analyze, - // etc.) pass their own `callSite`. + // can route through `resolveCallSiteConfig` against + // `llm.callSites.` (falling back to `llm.default` when absent). + // User-initiated conversation turns default to `mainAgent` in the + // agent loop's caller; other invocation contexts (heartbeat, filing, + // analyze, etc.) pass their own `callSite`. if (callSite) { providerConfig.callSite = callSite; } diff --git a/assistant/src/config/bundled-skills/phone-calls/references/CONFIG.md b/assistant/src/config/bundled-skills/phone-calls/references/CONFIG.md index a494853f1ee..759df738ad6 100644 --- a/assistant/src/config/bundled-skills/phone-calls/references/CONFIG.md +++ b/assistant/src/config/bundled-skills/phone-calls/references/CONFIG.md @@ -10,7 +10,7 @@ All call-related settings can be managed via `assistant config`: | `calls.userConsultTimeoutSeconds` | How long to wait for user answers | `120` (2 min) | | `calls.disclosure.enabled` | Whether the AI announces itself at call start | `true` | | `calls.disclosure.text` | The disclosure message spoken at call start | `"At the very beginning of the call, introduce yourself as an assistant calling on behalf of my human."` | -| `calls.model` | Override LLM model for call orchestration | _(uses default model)_ | +| `llm.callSites.callAgent.model` | Override LLM model for call orchestration | _(unset — falls back to `llm.default.model`)_ | | `calls.callerIdentity.allowPerCallOverride` | Allow per-call caller identity selection | `true` | | `calls.callerIdentity.userNumber` | E.164 phone number for user-number mode | _(empty)_ | | `calls.voice.language` | Language code for TTS and transcription | `en-US` | diff --git a/assistant/src/config/schema.ts b/assistant/src/config/schema.ts index fc6e12250dc..99f03a7eac7 100644 --- a/assistant/src/config/schema.ts +++ b/assistant/src/config/schema.ts @@ -62,17 +62,13 @@ export { export type { ContextOverflowRecoveryConfig, ContextWindowConfig, - Effort, ModelPricingOverride, - Speed, ThinkingConfig, } from "./schemas/inference.js"; export { ContextOverflowRecoveryConfigSchema, ContextWindowConfigSchema, - EffortSchema, ModelPricingOverrideSchema, - SpeedSchema, ThinkingConfigSchema, } from "./schemas/inference.js"; export type { @@ -87,6 +83,14 @@ export { } from "./schemas/ingress.js"; export type { JournalConfig } from "./schemas/journal.js"; export { JournalConfigSchema } from "./schemas/journal.js"; +export type { + LLMCallSite, + LLMCallSiteConfig, + LLMConfig, + LLMConfigBase, + LLMConfigFragment, +} from "./schemas/llm.js"; +export { LLMCallSiteEnum, LLMSchema } from "./schemas/llm.js"; export type { AuditLogConfig, LogFileConfig } from "./schemas/logging.js"; export { AuditLogConfigSchema, diff --git a/assistant/src/config/schemas/inference.ts b/assistant/src/config/schemas/inference.ts index 21fc696af18..208845851ba 100644 --- a/assistant/src/config/schemas/inference.ts +++ b/assistant/src/config/schemas/inference.ts @@ -25,28 +25,6 @@ export const ThinkingConfigSchema = z }) .describe("Extended thinking (chain-of-thought) configuration"); -export const EffortSchema = z - .enum(["low", "medium", "high", "max"], { - error: 'effort must be "low", "medium", "high", or "max"', - }) - .default("max") - .describe( - "How much effort the model should put into its response — lower effort is faster and cheaper", - ); - -export type Effort = z.infer; - -export const SpeedSchema = z - .enum(["standard", "fast"], { - error: 'speed must be "standard" or "fast"', - }) - .default("standard") - .describe( - 'Inference speed mode — "fast" enables higher output token throughput on supported models (Opus 4.6, Opus 4.7) at premium pricing', - ); - -export type Speed = z.infer; - export const ContextOverflowRecoveryConfigSchema = z .object({ enabled: z diff --git a/assistant/src/daemon/conversation-agent-loop.ts b/assistant/src/daemon/conversation-agent-loop.ts index d23c0637f4a..e9f5a015c81 100644 --- a/assistant/src/daemon/conversation-agent-loop.ts +++ b/assistant/src/daemon/conversation-agent-loop.ts @@ -359,10 +359,9 @@ export async function runAgentLoopImpl( titleText?: string; /** * LLM call-site identifier threaded into the per-call provider config. - * When unset, defaults to `'mainAgent'` so this turn routes through the - * main-agent profile in the unified `llm` config. Adapter callers - * (heartbeat, filing, schedule, etc.) override with their own call-site - * id as PRs 7-11 migrate them off the legacy `speed` / `modelIntent` paths. + * Adapter callers (heartbeat, filing, scheduler, etc.) pass their own + * call-site id so the resolver picks `llm.callSites.`. When unset, + * the agent loop defaults to `'mainAgent'` for user-initiated turns. */ callSite?: LLMCallSite; }, @@ -392,7 +391,7 @@ export async function runAgentLoopImpl( // invocation contexts (heartbeat, filing, analyze, etc.) pass their own // `callSite`. The provider layer resolves provider/model/maxTokens via // `resolveCallSiteConfig`, picking up any user overrides under - // `llm.callSites.mainAgent`. + // `llm.callSites.mainAgent` (falling back to `llm.default` when absent). const turnCallSite: LLMCallSite = options?.callSite ?? "mainAgent"; // Capture the turn channel context *before* any awaits so a second diff --git a/assistant/src/daemon/conversation.ts b/assistant/src/daemon/conversation.ts index 1c3fa9af48e..4d4167cf7df 100644 --- a/assistant/src/daemon/conversation.ts +++ b/assistant/src/daemon/conversation.ts @@ -24,8 +24,7 @@ import type { } from "../channels/types.js"; import { isAssistantFeatureFlagEnabled } from "../config/assistant-feature-flags.js"; import { getConfig } from "../config/loader.js"; -import type { Speed } from "../config/schemas/inference.js"; -import type { LLMCallSite } from "../config/schemas/llm.js"; +import type { LLMCallSite, Speed } from "../config/schemas/llm.js"; import { ContextWindowManager, type ContextWindowResult, diff --git a/assistant/src/daemon/handlers/shared.ts b/assistant/src/daemon/handlers/shared.ts index 3ce19037d57..95beaca64ec 100644 --- a/assistant/src/daemon/handlers/shared.ts +++ b/assistant/src/daemon/handlers/shared.ts @@ -1,8 +1,7 @@ import { v4 as uuid } from "uuid"; import { getConfig } from "../../config/loader.js"; -import type { Speed } from "../../config/schemas/inference.js"; -import type { LLMCallSite } from "../../config/schemas/llm.js"; +import type { LLMCallSite, Speed } from "../../config/schemas/llm.js"; import type { HeartbeatService } from "../../heartbeat/heartbeat-service.js"; import type { SecretPromptResult } from "../../permissions/secret-prompter.js"; import type { AuthContext } from "../../runtime/auth/types.js"; diff --git a/assistant/src/filing/filing-service.ts b/assistant/src/filing/filing-service.ts index ae9bf729d4c..f007a305c21 100644 --- a/assistant/src/filing/filing-service.ts +++ b/assistant/src/filing/filing-service.ts @@ -2,7 +2,6 @@ import { existsSync, readFileSync } from "node:fs"; import { join } from "node:path"; import { getConfig } from "../config/loader.js"; -import type { Speed } from "../config/schemas/inference.js"; import type { LLMCallSite } from "../config/schemas/llm.js"; import { bootstrapConversation } from "../memory/conversation-bootstrap.js"; import { getLogger } from "../util/logger.js"; @@ -43,7 +42,7 @@ export interface FilingDeps { processMessage: ( conversationId: string, content: string, - options?: { speed?: Speed; callSite?: LLMCallSite }, + options?: { callSite?: LLMCallSite }, ) => Promise<{ messageId: string }>; onConversationCreated?: (info: { conversationId: string; diff --git a/assistant/src/notifications/README.md b/assistant/src/notifications/README.md index 710a68df6fa..5b371b94a64 100644 --- a/assistant/src/notifications/README.md +++ b/assistant/src/notifications/README.md @@ -31,7 +31,7 @@ The candidate set is serialized into a compact `` block ### 3. Decision -The decision engine (`decision-engine.ts`) sends the signal to an LLM (configured via `notifications.decisionModelIntent`) along with available channels, the user's preference summary, and the conversation candidate set. The LLM responds with a structured decision: whether to notify, which channels, rendered copy per channel, a deduplication key, and **per-channel conversation actions**. +The decision engine (`decision-engine.ts`) sends the signal to an LLM (configured via `llm.callSites.notificationDecision`) along with available channels, the user's preference summary, and the conversation candidate set. The LLM responds with a structured decision: whether to notify, which channels, rendered copy per channel, a deduplication key, and **per-channel conversation actions**. **Conversation actions:** For each selected channel, the LLM decides: @@ -538,10 +538,14 @@ Preferences are sanitized against prompt injection (angle brackets replaced with ## Configuration -All settings live under the `notifications` key in `config.json`: +The decision engine and preference extractor pick their per-call LLM config +from the unified `llm` block. Override defaults by setting either of: -| Key | Type | Default | Description | -| ----------------------------------- | ------ | --------------------- | ------------------------------------------------------------------------ | -| `notifications.decisionModelIntent` | string | `"latency-optimized"` | Model intent used for both the decision engine and preference extraction | +| Key | Type | Default | Description | +| ------------------------------------ | ------- | -------------- | --------------------------------------------------------------------------- | +| `llm.callSites.notificationDecision` | object | _(unset)_ | Provider/model/effort/etc. override for the decision engine call site | +| `llm.callSites.preferenceExtraction` | object | _(unset)_ | Provider/model/effort/etc. override for the preference extractor call site | + +When a call site override is unset, the resolver falls back to `llm.default`. The notification pipeline is always active -- signals are processed and dispatched as soon as the daemon is running. The audit trail (events, decisions, deliveries) is written for every signal. diff --git a/assistant/src/providers/types.ts b/assistant/src/providers/types.ts index 1179a23b00b..8aed31acd9a 100644 --- a/assistant/src/providers/types.ts +++ b/assistant/src/providers/types.ts @@ -135,9 +135,8 @@ export interface SendMessageConfig { /** * LLM call-site identifier. `RetryProvider` resolves * provider/model/maxTokens/effort/speed/temperature/thinking/contextWindow - * via `resolveCallSiteConfig(callSite, config.llm)`. Required for any new - * caller; the legacy `modelIntent`-based fallback was removed in PR 19 of - * the unify-llm-callsites plan. + * via `resolveCallSiteConfig(callSite, config.llm)`, falling back to + * `llm.default` when no callSite-specific entry is present. */ callSite?: LLMCallSite; effort?: "low" | "medium" | "high" | "max"; diff --git a/assistant/src/runtime/btw-sidechain.ts b/assistant/src/runtime/btw-sidechain.ts index 19bffd33a8a..50c6776d73f 100644 --- a/assistant/src/runtime/btw-sidechain.ts +++ b/assistant/src/runtime/btw-sidechain.ts @@ -97,10 +97,8 @@ export async function runBtwSidechain( config: { max_tokens: params.maxTokens ?? 1024, tool_choice: { type: "none" }, - // Resolution: explicit callSite → default "identityIntro" (the - // original purpose of this side-chain runner). The legacy - // `modelIntent` parameter was removed in PR 19 of the - // unify-llm-callsites plan. + // Default call site is "identityIntro" — the original purpose of + // this side-chain runner. Callers may override per invocation. callSite: params.callSite ?? ("identityIntro" as LLMCallSite), }, onEvent: (event) => { diff --git a/assistant/src/runtime/services/analyze-conversation.ts b/assistant/src/runtime/services/analyze-conversation.ts index 90d584e6ce4..62cfa59e413 100644 --- a/assistant/src/runtime/services/analyze-conversation.ts +++ b/assistant/src/runtime/services/analyze-conversation.ts @@ -17,8 +17,8 @@ * * Both triggers route the agent loop through `callSite: 'analyzeConversation'` * so per-call provider/model selection flows through `resolveCallSiteConfig` - * against `llm.callSites.analyzeConversation` (populated by the unify-llm - * workspace migration from the legacy `analysis.modelIntent`/`modelOverride`). + * against `llm.callSites.analyzeConversation` (falling back to `llm.default` + * when no override is set). */ import type { ServerMessage } from "../../daemon/message-protocol.js"; import { diff --git a/cli/src/lib/config-utils.ts b/cli/src/lib/config-utils.ts index 5d468f6cbc0..10eb785a470 100644 --- a/cli/src/lib/config-utils.ts +++ b/cli/src/lib/config-utils.ts @@ -5,8 +5,8 @@ import { join } from "path"; /** * Convert flat dot-notation key=value pairs into a nested config object. * - * e.g. {"services.inference.provider": "anthropic", "services.inference.model": "claude-opus-4-6"} - * → {services: {inference: {provider: "anthropic", model: "claude-opus-4-6"}}} + * e.g. {"llm.default.provider": "anthropic", "llm.default.model": "claude-opus-4-6"} + * → {llm: {default: {provider: "anthropic", model: "claude-opus-4-6"}}} */ export function buildNestedConfig( configValues: Record, @@ -39,8 +39,8 @@ export function buildNestedConfig( * values into its workspace config on first boot. * * Keys use dot-notation to address nested fields. For example: - * "services.inference.provider" → {services: {inference: {provider: ...}}} - * "services.inference.model" → {services: {inference: {model: ...}}} + * "llm.default.provider" → {llm: {default: {provider: ...}}} + * "llm.default.model" → {llm: {default: {model: ...}}} * * Returns undefined when configValues is empty (nothing to write). */ diff --git a/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift b/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift index 51b7446edf8..2cafac0d82f 100644 --- a/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift +++ b/clients/macos/vellum-assistant/Features/Settings/SettingsStore.swift @@ -2170,19 +2170,15 @@ public final class SettingsStore: ObservableObject { /// Loads service modes (inference, image-generation) from workspace config. /// Called during init and when the daemon reconnects. func loadServiceModes(config: [String: Any]) { - // Resolve inference provider/model with `llm.default.*` as the - // canonical source (PR 4 backfills it from `services.inference`). - // Fall back to `services.inference.{provider,model}` for unmigrated - // configs — defensive, since the migration should have run for - // existing users but skips early-return cases (missing config, - // malformed JSON, etc.). + // Resolve inference provider/model from `llm.default.*` — the unified + // LLM config block is the only source of truth. The + // `services.inference.mode` field stays under `services` because it is + // an inference-delivery setting (managed vs. your-own), orthogonal to + // model selection. let services = config["services"] as? [String: Any] let llmDefault = (config["llm"] as? [String: Any])?["default"] as? [String: Any] let inference = services?["inference"] as? [String: Any] - // Inference mode remains under `services.inference.mode` — it is an - // inference-delivery setting (managed vs. your-own), not part of the - // LLM model config. if let inference, let mode = inference["mode"] as? String { self.inferenceMode = mode } @@ -2191,10 +2187,10 @@ public final class SettingsStore: ObservableObject { // via applyModelInfoResponse, its values take precedence over local // config which may be stale (especially for remote assistants). if lastDaemonProvider == nil { - if let provider = (llmDefault?["provider"] as? String) ?? (inference?["provider"] as? String) { + if let provider = llmDefault?["provider"] as? String { self.selectedInferenceProvider = provider } - if let model = (llmDefault?["model"] as? String) ?? (inference?["model"] as? String) { + if let model = llmDefault?["model"] as? String { self.selectedModel = model } } @@ -2960,29 +2956,8 @@ public final class SettingsStore: ObservableObject { return task } - // TODO PR 19: remove. Superseded by setLLMDefaultProvider — kept for any - // legacy callers / tests that still write to `services.inference.provider` - // directly while the unification rollout is in progress. - @discardableResult - func setInferenceProvider(_ provider: String) -> Task { - selectedInferenceProvider = provider - let task = Task { - let success = await settingsClient.patchConfig([ - "services": ["inference": ["provider": provider]] - ]) - if !success { - log.error("Failed to patch config for inference provider") - } - return success - } - scheduleRoutingSourceRefresh() - return task - } - /// Persists the selected default LLM provider to the daemon config under - /// the unified `llm.default.provider` key. This is the canonical write - /// path now that the workspace migration consolidates LLM call-site - /// settings under `llm.*` (see PR 4 of the unify-llm-callsites plan). + /// the unified `llm.default.provider` key. /// /// Prefer `setLLMDefault(provider:model:)` when both keys change together /// — it writes them atomically in a single PATCH so the daemon's @@ -3793,35 +3768,6 @@ public final class SettingsStore: ObservableObject { return "http://\(ip):\(LockfilePaths.resolveGatewayPort(connectedAssistantId: connectedId))" } - // MARK: - Model Actions - - // TODO PR 19: remove. Superseded by setLLMDefaultModel — kept for any - // legacy callers (e.g. routes through the daemon's set-model HTTP endpoint - // which still enriches state via applyModelInfoResponse) while the - // unification rollout is in progress. - func setModel(_ model: String, provider: String? = nil, force: Bool = false) { - // Skip if neither model nor provider changed (unless forced, - // e.g. after an inference-mode switch that requires re-persisting - // the model+provider pair even when IDs haven't changed). - if !force { - let modelUnchanged = model == lastDaemonModel - let providerUnchanged = provider == nil || provider == lastDaemonProvider - guard !modelUnchanged || !providerUnchanged else { return } - } - lastDaemonModel = model - if let provider { lastDaemonProvider = provider } - Task { - let info = await settingsClient.setModel(model: model, provider: provider) - if let info { - applyModelInfoResponse(info) - } else if lastDaemonModel == model { - // Request failed — revert only if no newer call overwrote lastDaemonModel - lastDaemonModel = nil - lastDaemonProvider = nil - } - } - } - // MARK: - User Timezone Actions /// Saves a user timezone override under `ui.userTimezone`. diff --git a/clients/macos/vellum-assistantTests/MockSettingsClient.swift b/clients/macos/vellum-assistantTests/MockSettingsClient.swift index 535a6c0f588..e6416f050fd 100644 --- a/clients/macos/vellum-assistantTests/MockSettingsClient.swift +++ b/clients/macos/vellum-assistantTests/MockSettingsClient.swift @@ -10,7 +10,6 @@ final class MockSettingsClient: SettingsClientProtocol { var saveVercelConfigCalls: [String] = [] var deleteVercelConfigCallCount = 0 var fetchModelInfoCallCount = 0 - var setModelCalls: [(model: String, provider: String?)] = [] var setImageGenModelCalls: [String] = [] var fetchTelegramConfigCallCount = 0 var setTelegramConfigCalls: [(action: String, botToken: String?, commands: [TelegramConfigRequestCommand]?)] = [] @@ -26,7 +25,6 @@ final class MockSettingsClient: SettingsClientProtocol { var saveVercelConfigResponse: VercelApiConfigResponseMessage? var deleteVercelConfigResponse: VercelApiConfigResponseMessage? var modelInfoResponse: ModelInfoMessage? - var setModelResponse: ModelInfoMessage? var setImageGenModelResponse: ModelInfoMessage? var embeddingConfigResponse: EmbeddingConfigMessage? var setEmbeddingConfigResponse: EmbeddingConfigMessage? @@ -73,8 +71,7 @@ final class MockSettingsClient: SettingsClientProtocol { } func setModel(model: String, provider: String? = nil) async -> ModelInfoMessage? { - setModelCalls.append((model: model, provider: provider)) - return setModelResponse + nil } func setImageGenModel(modelId: String) async -> ModelInfoMessage? { diff --git a/gateway/ARCHITECTURE.md b/gateway/ARCHITECTURE.md index b92b45f33eb..9e2450c2d6a 100644 --- a/gateway/ARCHITECTURE.md +++ b/gateway/ARCHITECTURE.md @@ -1112,7 +1112,7 @@ Call behavior is controlled via the `calls` config block in the assistant config | `calls.disclosure.enabled` | boolean | `true` | Whether the AI should disclose it is an AI at the start of the call. | | `calls.disclosure.text` | string | _(default disclosure prompt)_ | The disclosure instruction included in the system prompt. | | `calls.safety.denyCategories` | string[] | `[]` | Categories of calls to deny (e.g., emergency numbers are always denied regardless of this setting). | -| `calls.model` | string | _(unset — uses default model)_ | Optional override for the LLM model used in voice call conversations. | +| `llm.callSites.callAgent.model` | string | _(unset — falls back to `llm.default.model`)_ | Optional override for the LLM model used in voice call conversations. | | `calls.voice.language` | string | `'en-US'` | Language code for TTS and transcription. | | `services.stt.provider` | enum | `'deepgram'` | STT provider for all boundaries including telephony. Determines the Twilio integration path (ConversationRelay-native for `deepgram`/`google-gemini`, media-stream for `openai-whisper`). | | `services.tts.provider` | enum | `'elevenlabs'` | Active TTS provider for speech synthesis (catalog-driven; see [TTS Provider Abstraction](../assistant/ARCHITECTURE.md#tts-provider-abstraction-servicestts)). |