vellum-ai · siddseethepalli · Apr 16, 2026 · Apr 16, 2026 · Apr 16, 2026
diff --git a/assistant/src/__tests__/llm-resolver.test.ts b/assistant/src/__tests__/llm-resolver.test.ts
@@ -0,0 +1,213 @@
+import { describe, expect, test } from "bun:test";
+
+import { z } from "zod";
+
+import { resolveCallSiteConfig } from "../config/llm-resolver.js";
+import { LLMSchema } from "../config/schemas/llm.js";
+
+const fullDefault = {
+  provider: "anthropic" as const,
+  model: "claude-opus-4-7",
+  maxTokens: 64000,
+  effort: "max" as const,
+  speed: "standard" as const,
+  temperature: null,
+  thinking: { enabled: true, streamThinking: true },
+  contextWindow: {
+    enabled: true,
+    maxInputTokens: 200000,
+    targetBudgetRatio: 0.3,
+    compactThreshold: 0.8,
+    summaryBudgetRatio: 0.05,
+    overflowRecovery: {
+      enabled: true,
+      safetyMarginRatio: 0.05,
+      maxAttempts: 3,
+      interactiveLatestTurnCompression: "summarize" as const,
+      nonInteractiveLatestTurnCompression: "truncate" as const,
+    },
+  },
+};
+
+describe("resolveCallSiteConfig", () => {
+  test("returns default when call site is absent and no profile", () => {
+    const llm = LLMSchema.parse({ default: fullDefault });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    expect(resolved).toEqual(fullDefault);
+  });
+
+  test("site-level field overrides default", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      callSites: {
+        mainAgent: { model: "claude-sonnet-4-7" },
+      },
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    expect(resolved.model).toBe("claude-sonnet-4-7");
+    // Sibling fields are preserved.
+    expect(resolved.provider).toBe("anthropic");
+    expect(resolved.maxTokens).toBe(64000);
+  });
+
+  test("profile field overrides default when call site references it", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        fast: { speed: "fast", effort: "low" },
+      },
+      callSites: {
+        memoryExtraction: { profile: "fast" },
+      },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    expect(resolved.speed).toBe("fast");
+    expect(resolved.effort).toBe("low");
+    // Untouched defaults persist.
+    expect(resolved.provider).toBe("anthropic");
+    expect(resolved.model).toBe("claude-opus-4-7");
+  });
+
+  test("site field beats both profile and default (precedence test)", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        fast: { speed: "fast", effort: "low", model: "profile-model" },
+      },
+      callSites: {
+        memoryExtraction: {
+          profile: "fast",
+          model: "site-model",
+          effort: "high",
+        },
+      },
+    });
+    const resolved = resolveCallSiteConfig("memoryExtraction", llm);
+    // Site-level wins where it sets a value.
+    expect(resolved.model).toBe("site-model");
+    expect(resolved.effort).toBe("high");
+    // Profile wins where site is silent.
+    expect(resolved.speed).toBe("fast");
+    // Default wins where neither overrides.
+    expect(resolved.provider).toBe("anthropic");
+  });
+
+  test("thinking.enabled override does not nuke thinking.streamThinking (deep merge)", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      callSites: {
+        mainAgent: { thinking: { enabled: false } },
+      },
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    expect(resolved.thinking.enabled).toBe(false);
+    expect(resolved.thinking.streamThinking).toBe(true);
+  });
+
+  test("contextWindow.overflowRecovery.maxAttempts override preserves siblings (depth 2 deep merge)", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      callSites: {
+        mainAgent: {
+          contextWindow: {
+            overflowRecovery: { maxAttempts: 7 },
+          },
+        },
+      },
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    // Overridden leaf at depth 2.
+    expect(resolved.contextWindow.overflowRecovery.maxAttempts).toBe(7);
+    // Sibling leaves of overflowRecovery survive.
+    expect(resolved.contextWindow.overflowRecovery.enabled).toBe(true);
+    expect(resolved.contextWindow.overflowRecovery.safetyMarginRatio).toBe(
+      0.05,
+    );
+    expect(
+      resolved.contextWindow.overflowRecovery.interactiveLatestTurnCompression,
+    ).toBe("summarize");
+    expect(
+      resolved.contextWindow.overflowRecovery
+        .nonInteractiveLatestTurnCompression,
+    ).toBe("truncate");
+    // Sibling leaves of contextWindow itself survive.
+    expect(resolved.contextWindow.enabled).toBe(true);
+    expect(resolved.contextWindow.maxInputTokens).toBe(200000);
+    expect(resolved.contextWindow.targetBudgetRatio).toBe(0.3);
+  });
+
+  test("site without profile uses only default + site overrides", () => {
+    const llm = LLMSchema.parse({
+      default: fullDefault,
+      profiles: {
+        // Defined but unused — must not leak into the resolved config.
+        fast: { speed: "fast", effort: "low" },
+      },
+      callSites: {
+        mainAgent: { temperature: 0.5 },
+      },
+    });
+    const resolved = resolveCallSiteConfig("mainAgent", llm);
+    expect(resolved.temperature).toBe(0.5);
+    // Profile fields must not appear because mainAgent didn't reference them.
+    expect(resolved.speed).toBe("standard");
+    expect(resolved.effort).toBe("max");
+  });
+
+  test("returns isolated nested objects (not aliased to llm.default)", () => {
+    // Resolve a call site that has no override touching `thinking` or
+    // `contextWindow` — the bug being guarded against would have those
+    // nested objects aliased directly to `llm.default`. We resolve once,
+    // mutate the returned config's nested objects, then resolve again and
+    // verify the second call sees the original `llm.default` values
+    // (i.e. the source was never corrupted).
+    const llm = LLMSchema.parse({ default: fullDefault });
+
+    const first = resolveCallSiteConfig("mainAgent", llm);
+    expect(first.thinking.enabled).toBe(true);
+    expect(first.contextWindow.overflowRecovery.maxAttempts).toBe(3);
+
+    // Mutate the result. If nested objects were aliased into `llm.default`,
+    // these writes would silently corrupt the source config.
+    first.thinking.enabled = false;
+    first.contextWindow.overflowRecovery.maxAttempts = 999;
+
+    // Defensive: the source `fullDefault` literal should be untouched.
+    expect(fullDefault.thinking.enabled).toBe(true);
+    expect(fullDefault.contextWindow.overflowRecovery.maxAttempts).toBe(3);
+
+    // The real test: resolving the same call site again must see the
+    // original `llm.default` values, not the mutations applied to `first`.
+    const second = resolveCallSiteConfig("mainAgent", llm);
+    expect(second.thinking.enabled).toBe(true);
+    expect(second.contextWindow.overflowRecovery.maxAttempts).toBe(3);
+
+    // Sanity: the two resolutions must return distinct nested object
+    // references — otherwise the mutation on `first` would have been
+    // visible on `second` and the previous assertions would have failed,
+    // but assert it explicitly so the isolation contract is documented.
+    expect(second.thinking).not.toBe(first.thinking);
+    expect(second.contextWindow).not.toBe(first.contextWindow);
+    expect(second.contextWindow.overflowRecovery).not.toBe(
+      first.contextWindow.overflowRecovery,
+    );
+  });
+
+  test("defensive throw on unknown profile reference (bypassing superRefine)", () => {
+    // Hand-craft an `LLMSchema`-typed object that bypasses validation by
+    // referencing a profile that doesn't exist in `profiles`. The schema's
+    // `superRefine` would reject this at parse time, so we construct it
+    // manually to exercise the defensive throw in the resolver.
+    const llm: z.infer<typeof LLMSchema> = {
+      default: fullDefault,
+      profiles: {},
+      callSites: {
+        mainAgent: { profile: "nonexistent" },
+      },
+      pricingOverrides: [],
+    };
+    expect(() => resolveCallSiteConfig("mainAgent", llm)).toThrow(
+      /references undefined profile "nonexistent"/,
+    );
+  });
+});
diff --git a/assistant/src/config/llm-resolver.ts b/assistant/src/config/llm-resolver.ts
@@ -0,0 +1,128 @@
+import { z } from "zod";
+
+import {
+  type LLMCallSite,
+  LLMConfigBase,
+  type LLMConfigFragment,
+  type LLMSchema,
+} from "./schemas/llm.js";
+
+/**
+ * Resolves a fully-specified `LLMConfigBase` for a given call site by layering
+ * the call-site override on top of an optional named profile on top of the
+ * required `llm.default`.
+ *
+ * Resolution order (highest precedence wins):
+ *   1. `llm.callSites[callSite]` fields (call-site override)
+ *   2. `llm.profiles[site.profile]` fields (named profile)
+ *   3. `llm.default` fields (required base)
+ *
+ * Nested objects (`thinking`, `contextWindow`, and
+ * `contextWindow.overflowRecovery`) are deep-merged so partial overrides at
+ * any nesting level merge into — rather than replace — the corresponding
+ * base value.
+ *
+ * Pure & synchronous: no I/O, no async work.
+ */
+export function resolveCallSiteConfig(
+  callSite: LLMCallSite,
+  llm: z.infer<typeof LLMSchema>,
+): z.infer<typeof LLMConfigBase> {
+  const site = llm.callSites?.[callSite];
+
+  // No site-level entry: deep-merge `default` against an empty fragment so
+  // every code path goes through the same merge codepath.
+  if (site == null) {
+    return finalize(deepMerge(llm.default as Mergeable, {} as Mergeable));
+  }
+
+  let profileFragment: LLMConfigFragment | undefined;
+  if (site.profile != null) {
+    profileFragment = llm.profiles?.[site.profile];
+    if (profileFragment == null) {
+      // Defensive: `LLMSchema.superRefine` already rejects unknown profile
+      // references at config load, so this branch is unreachable for any
+      // config that survived schema validation. Throw a clear error in case
+      // a hand-crafted (un-parsed) config slips through.
+      throw new Error(
+        `LLM call site "${callSite}" references undefined profile "${site.profile}"`,
+      );
+    }
+  }
+
+  // Strip the `profile` discriminator before merging — it isn't a
+  // `LLMConfigBase` field.
+  const { profile: _profile, ...siteFragment } = site;
+
+  const merged = deepMerge(
+    llm.default as Mergeable,
+    (profileFragment ?? {}) as Mergeable,
+    siteFragment as Mergeable,
+  );
+
+  return finalize(merged);
+}
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+type Mergeable = Record<string, unknown>;
+
+/**
+ * Returns true for objects we should recurse into during deep merge. We
+ * deliberately exclude arrays so that array-valued fields (e.g.
+ * `pricingOverrides` siblings) get full replacement semantics.
+ */
+function isPlainObject(value: unknown): value is Mergeable {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    !Array.isArray(value) &&
+    Object.getPrototypeOf(value) === Object.prototype
+  );
+}
+
+/**
+ * Deep-merges a sequence of fragments where each rightward source overrides
+ * the previous. For nested plain objects, recurse so partial overrides merge
+ * leaf-by-leaf rather than wholesale-replacing the nested object.
+ *
+ * `undefined` values in a source are skipped (treated as "no opinion"); this
+ * matches Zod fragment semantics where unset optional fields are absent.
+ *
+ * Plain-object values are always cloned (via recursion) rather than aliased,
+ * so the returned config is an isolated snapshot — mutating any nested object
+ * on the result cannot affect `llm.default`, named profiles, or other call
+ * sites' resolutions. Arrays and primitives are copied by reference; the
+ * resolver does not return arrays, and primitives are immutable.
+ */
+function deepMerge(...sources: Mergeable[]): Mergeable {
+  const out: Mergeable = {};
+  for (const source of sources) {
+    for (const [key, value] of Object.entries(source)) {
+      if (value === undefined) continue;
+      const existing = out[key];
+      if (isPlainObject(value)) {
+        // Recurse for any plain-object source. Using `existing` as the base
+        // when it's also a plain object preserves leaf-by-leaf merge
+        // semantics; otherwise we recurse against an empty object so the
+        // result is a freshly-allocated clone rather than an alias.
+        const base = isPlainObject(existing) ? existing : ({} as Mergeable);
+        out[key] = deepMerge(base, value);
+      } else {
+        out[key] = value;
+      }
+    }
+  }
+  return out;
+}
+
+/**
+ * Cast helper that documents the intent: after merging `llm.default` (which
+ * is `LLMConfigBase`) with optional fragments, every required field is still
+ * present, so the result satisfies `LLMConfigBase`.
+ */
+function finalize(merged: Mergeable): z.infer<typeof LLMConfigBase> {
+  return merged as unknown as z.infer<typeof LLMConfigBase>;
+}