vellum-ai · siddseethepalli · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/assistant/src/__tests__/extension-id-sync-guard.test.ts b/assistant/src/__tests__/extension-id-sync-guard.test.ts
@@ -41,11 +41,15 @@ function parseCanonicalConfig(): AllowlistConfig {
   const parsed = JSON.parse(raw) as Partial<AllowlistConfig>;
 
   if (!Number.isInteger(parsed.version) || (parsed.version ?? 0) <= 0) {
-    throw new Error("Invalid canonical config: version must be a positive integer");
+    throw new Error(
+      "Invalid canonical config: version must be a positive integer",
+    );
   }
 
   if (!Array.isArray(parsed.allowedExtensionIds)) {
-    throw new Error("Invalid canonical config: allowedExtensionIds must be an array");
+    throw new Error(
+      "Invalid canonical config: allowedExtensionIds must be an array",
+    );
   }
 
   if (parsed.allowedExtensionIds.length === 0) {

diff --git a/assistant/src/__tests__/model-intents.test.ts b/assistant/src/__tests__/model-intents.test.ts
@@ -31,9 +31,9 @@ describe("model intents", () => {
   });
 
   test("falls back to provider default for unknown providers", () => {
-    expect(getProviderDefaultModel("unknown-provider")).toBe("claude-opus-4-6");
+    expect(getProviderDefaultModel("unknown-provider")).toBe("claude-opus-4-7");
     expect(resolveModelIntent("unknown-provider", "quality-optimized")).toBe(
-      "claude-opus-4-6",
+      "claude-opus-4-7",
     );
   });
 });

diff --git a/assistant/src/agent/loop.ts b/assistant/src/agent/loop.ts
@@ -460,12 +460,13 @@ export class AgentLoop {
           for (let i = history.length - 1; i >= 0; i--) {
             const msg = history[i];
             if (msg.role !== "assistant") continue;
-            return msg.content.some(
+            const hasText = msg.content.some(
               (block) =>
                 block.type === "text" &&
                 typeof (block as { text?: unknown }).text === "string" &&
                 (block as { text: string }).text.trim().length > 0,
             );
+            if (hasText) return true;
           }
           return false;
         })();

diff --git a/assistant/src/calls/voice-session-bridge.ts b/assistant/src/calls/voice-session-bridge.ts
@@ -522,10 +522,6 @@ export async function startVoiceTurn(
           // Note: tool_use_preview_start is intentionally not handled here.
           // Voice only reacts to the definitive tool_use_start event.
         },
-        // Route every voice-call agent loop turn through the unified
-        // `llm.callSites.callAgent` resolver. PR 4 backfilled this entry
-        // from the legacy `config.calls.model` setting, so existing
-        // overrides continue to apply.
         { callSite: "callAgent" },
       );
       if (lastError) {

diff --git a/assistant/src/cli/commands/__tests__/email-list.test.ts b/assistant/src/cli/commands/__tests__/email-list.test.ts
@@ -17,6 +17,7 @@ import { runAssistantCommand } from "../../__tests__/run-assistant-command.js";
 
 const ASSISTANT_ID = "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee";
 const API_KEY_CREDENTIAL = credentialKey("vellum", "assistant_api_key");
+const ASSISTANT_ID_CREDENTIAL = credentialKey("vellum", "platform_assistant_id");
 
 /**
  * Return the recorded fetch calls, excluding the feature-flag fetch that
@@ -83,6 +84,11 @@ beforeEach(async () => {
   _setOverridesForTesting({ "email-channel": true });
   setPlatformAssistantId(ASSISTANT_ID);
   await setSecureKeyAsync(API_KEY_CREDENTIAL, "test-api-key");
+  // Ensure VellumPlatformClient.create() cannot fall back to a real
+  // platform_assistant_id from the encrypted credential store on dev
+  // machines — the "missing assistant ID" test relies on the fallback
+  // lookup returning empty.
+  await deleteSecureKeyAsync(ASSISTANT_ID_CREDENTIAL);
 });
 
 afterEach(() => {

diff --git a/assistant/src/config/schemas/heartbeat.ts b/assistant/src/config/schemas/heartbeat.ts
@@ -38,22 +38,16 @@ export const HeartbeatConfigSchema = z
     const startNull = config.activeHoursStart == null;
     const endNull = config.activeHoursEnd == null;
     if (startNull !== endNull) {
-      // Emit on both fields so validateWithSchema's delete-and-retry strips
-      // both sides in one pass. Single-emit on the null side can cascade when
-      // the explicit value happens to equal the opposite default (e.g.
-      // { start: null, end: 8 } → strip start → default 8 → equal check fires
-      // → loader falls back to full defaults, wiping unrelated keys like
-      // maxTokens).
+      // Emit only on the null side so validateWithSchema's delete-and-retry
+      // preserves the explicit non-null value. Dual-emit would delete both
+      // keys, losing valid explicit values for mixed-null configs like
+      // { activeHoursStart: null, activeHoursEnd: 20 } → (8, 22) instead of
+      // retaining the explicit 20.
       const message =
         "heartbeat.activeHoursStart and heartbeat.activeHoursEnd must both be set or both be null";
       ctx.addIssue({
         code: z.ZodIssueCode.custom,
-        path: ["activeHoursStart"],
-        message,
-      });
-      ctx.addIssue({
-        code: z.ZodIssueCode.custom,
-        path: ["activeHoursEnd"],
+        path: [startNull ? "activeHoursStart" : "activeHoursEnd"],
         message,
       });
       return;
@@ -63,17 +57,11 @@ export const HeartbeatConfigSchema = z
       config.activeHoursEnd != null &&
       config.activeHoursStart === config.activeHoursEnd
     ) {
-      // Emit on both fields. Single-emit would strip one side and the default
-      // for that side could recreate a new mismatch (e.g. { start: 22, end: 22 }
-      // → strip end → default 22 → equal again), cascading to a full defaults
-      // reset that wipes unrelated fields.
+      // Emit only on activeHoursEnd so the explicit start value is preserved.
+      // Dual-emit would delete both keys, e.g. { start: 5, end: 5 } → (8, 22)
+      // instead of preserving the explicit 5 as start → (5, 22).
       const message =
         "heartbeat.activeHoursStart and heartbeat.activeHoursEnd must not be equal (would create an empty window)";
-      ctx.addIssue({
-        code: z.ZodIssueCode.custom,
-        path: ["activeHoursStart"],
-        message,
-      });
       ctx.addIssue({
         code: z.ZodIssueCode.custom,
         path: ["activeHoursEnd"],

diff --git a/assistant/src/config/schemas/llm.ts b/assistant/src/config/schemas/llm.ts
@@ -257,6 +257,38 @@ export const LLMCallSiteConfig = LLMConfigFragment.extend({
 });
 export type LLMCallSiteConfig = z.infer<typeof LLMCallSiteConfig>;
 
+// ---------------------------------------------------------------------------
+// Latency-optimized call-site defaults
+//
+// Call sites that previously used `modelIntent: "latency-optimized"` need a
+// fast model, disabled thinking, and low effort so they don't fall through to
+// the expensive `llm.default` (opus with max effort). These defaults match the
+// Anthropic provider; users on other providers override via config.
+// ---------------------------------------------------------------------------
+
+const LATENCY_OPTIMIZED_FRAGMENT = {
+  model: "claude-haiku-4-5-20251001",
+  effort: "low" as const,
+  thinking: { enabled: false },
+};
+
+export const LATENCY_OPTIMIZED_CALLSITE_DEFAULTS: Partial<
+  Record<LLMCallSite, z.input<typeof LLMCallSiteConfig>>
+> = {
+  guardianQuestionCopy: LATENCY_OPTIMIZED_FRAGMENT,
+  watchCommentary: LATENCY_OPTIMIZED_FRAGMENT,
+  interactionClassifier: LATENCY_OPTIMIZED_FRAGMENT,
+  skillCategoryInference: LATENCY_OPTIMIZED_FRAGMENT,
+  inviteInstructionGenerator: LATENCY_OPTIMIZED_FRAGMENT,
+  notificationDecision: LATENCY_OPTIMIZED_FRAGMENT,
+  preferenceExtraction: LATENCY_OPTIMIZED_FRAGMENT,
+  commitMessage: {
+    ...LATENCY_OPTIMIZED_FRAGMENT,
+    maxTokens: 120,
+    temperature: 0.2,
+  },
+};
+
 // ---------------------------------------------------------------------------
 // Top-level LLM schema
 // ---------------------------------------------------------------------------
@@ -269,7 +301,9 @@ export const LLMSchema = z
     // rejecting keys that aren't members of `LLMCallSiteEnum` — exactly the
     // behavior we want (typo detection without requiring callers to declare
     // every call site).
-    callSites: z.partialRecord(LLMCallSiteEnum, LLMCallSiteConfig).default({}),
+    callSites: z
+      .partialRecord(LLMCallSiteEnum, LLMCallSiteConfig)
+      .default(LATENCY_OPTIMIZED_CALLSITE_DEFAULTS),
     pricingOverrides: z.array(PricingOverrideSchema).default([]),
   })
   .superRefine((config, ctx) => {

diff --git a/assistant/src/credential-execution/executable-discovery.ts b/assistant/src/credential-execution/executable-discovery.ts
@@ -67,17 +67,28 @@ function getManagedBootstrapSocketPath(): string {
  * a malicious binary there. Removed to close the sandbox-escape vector.
  *
  * Search order:
- * 1. Alongside the running executable (packaged macOS app:
- *    `<App>.app/Contents/MacOS/credential-executor`). When running from
- *    source via `bun run`, `process.execPath` points at the bun binary
- *    itself, so this path won't exist and the search falls through.
+ * 1. Alongside the running executable, but ONLY when running from a
+ *    packaged macOS app bundle (`<App>.app/Contents/MacOS/credential-executor`).
+ *    In dev mode, `process.execPath` points at the bun/node install dir
+ *    (e.g. `~/.bun/bin`), where an unrelated file named `credential-executor`
+ *    could be picked up by accident.
  * 2. `<binDir>/credential-executor` — user-installed override (dev flow).
  */
 function getLocalBinarySearchPaths(): string[] {
-  return [
-    join(dirname(process.execPath), "credential-executor"),
-    join(getBinDir(), "credential-executor"),
-  ];
+  const paths: string[] = [];
+
+  // Only check the sibling of process.execPath when running from a packaged
+  // app bundle — the .app/Contents/MacOS directory is a controlled location.
+  // In dev mode, process.execPath is the bun/node binary (e.g. ~/.bun/bin/bun)
+  // and a sibling lookup there could discover an unrelated or untrusted
+  // executable.
+  const execDir = dirname(process.execPath);
+  if (execDir.includes(".app/Contents/MacOS")) {
+    paths.push(join(execDir, "credential-executor"));
+  }
+
+  paths.push(join(getBinDir(), "credential-executor"));
+  return paths;
 }
 
 // ---------------------------------------------------------------------------

diff --git a/assistant/src/daemon/guardian-action-generators.ts b/assistant/src/daemon/guardian-action-generators.ts
@@ -1,5 +1,4 @@
-import { loadConfig } from "../config/loader.js";
-import { getProvider } from "../providers/registry.js";
+import { getConfiguredProvider } from "../providers/provider-send-message.js";
 import {
   buildGuardianActionGenerationPrompt,
   getGuardianActionFallbackMessage,
@@ -26,13 +25,8 @@ import type {
  */
 export function createGuardianActionCopyGenerator(): GuardianActionCopyGenerator {
   return async (context, options = {}) => {
-    const config = loadConfig();
-    let provider;
-    try {
-      provider = getProvider(config.llm.default.provider);
-    } catch {
-      return null;
-    }
+    const provider = await getConfiguredProvider("guardianQuestionCopy");
+    if (!provider) return null;
 
     const fallbackText =
       options.fallbackText?.trim() || getGuardianActionFallbackMessage(context);
@@ -130,8 +124,10 @@ const VALID_FOLLOWUP_DISPOSITIONS: ReadonlySet<string> = new Set([
  */
 export function createGuardianFollowUpConversationGenerator(): GuardianFollowUpConversationGenerator {
   return async (context) => {
-    const config = loadConfig();
-    const provider = getProvider(config.llm.default.provider);
+    const provider = await getConfiguredProvider("guardianQuestionCopy");
+    if (!provider) {
+      throw new Error("No configured provider available for follow-up conversation");
+    }
 
     const userPrompt = [
       `Original question from the voice call: "${context.questionText}"`,

diff --git a/assistant/src/daemon/server.ts b/assistant/src/daemon/server.ts
@@ -814,6 +814,11 @@ export class DaemonServer {
     // DB, exposing only the narrow surface the wake helper needs.
     registerDefaultWakeResolver(async (conversationId) => {
       try {
+        // Only resolve existing conversations — don't create ghost
+        // conversations for stale targets (e.g. meetings that ended
+        // but a delayed opportunity callback still fires).
+        const existing = getConversation(conversationId);
+        if (!existing) return null;
         const conversation = await this.getOrCreateConversation(conversationId);
         return conversationToWakeTarget(conversation);
       } catch (err) {

diff --git a/assistant/src/memory/conversation-analyze-job.ts b/assistant/src/memory/conversation-analyze-job.ts
@@ -3,9 +3,8 @@
 //
 // Bridges the jobs worker to the shared analyzeConversation() service. The
 // deps bundle is stashed on a module singleton during daemon startup; if it
-// isn't set yet we skip this iteration. The next batch / idle / lifecycle
-// trigger from `enqueueAutoAnalysisIfEnabled()` will produce a fresh job
-// once the daemon has fully started.
+// isn't set yet the handler throws BackendUnavailableError so the worker
+// defers with exponential backoff until deps become available.
 //
 // The service itself distinguishes manual vs. auto triggers: this handler
 // always invokes with `trigger: "auto"`, so the rolling analysis conversation
@@ -15,6 +14,7 @@
 import type { AssistantConfig } from "../config/types.js";
 import { analyzeConversation } from "../runtime/services/analyze-conversation.js";
 import { getAnalysisDeps } from "../runtime/services/analyze-deps-singleton.js";
+import { BackendUnavailableError } from "../util/errors.js";
 import { getLogger } from "../util/logger.js";
 import { enqueueAutoAnalysisIfEnabled } from "./auto-analysis-enqueue.js";
 import type { MemoryJob } from "./jobs-store.js";
@@ -33,19 +33,20 @@ export async function conversationAnalyzeJob(
 
   const deps = getAnalysisDeps();
   if (!deps) {
-    // Daemon hasn't finished startup. Return without throwing — a plain
-    // Error here would be classified as fatal by `classifyError()` and the
-    // worker would mark the job permanently failed. Throwing
-    // `BackendUnavailableError` would defer, but defer counters cap out and
-    // would still permanently fail in the worst case. Since
-    // `enqueueAutoAnalysisIfEnabled()` re-enqueues on the next batch / idle
-    // / lifecycle trigger, dropping this iteration is the safest choice and
-    // avoids retry storms during slow daemon startup.
+    // Daemon hasn't finished startup. Throw BackendUnavailableError so the
+    // worker defers the job with exponential backoff instead of completing
+    // it. Returning success here would permanently drop the job via
+    // completeMemoryJob — conversations with a pre-existing queued job
+    // during startup and no subsequent activity would never be analyzed.
+    // The deferral budget (50 × up to 5min backoff) is generous enough to
+    // outlast any realistic startup delay.
     log.warn(
       { jobId: job.id, conversationId },
-      "Skipping job: analysis deps not yet initialized; will retrigger",
+      "Deferring job: analysis deps not yet initialized",
+    );
+    throw new BackendUnavailableError(
+      "Analysis deps not yet initialized during daemon startup",
     );
-    return;
   }
 
   const result = await analyzeConversation(conversationId, deps, {

diff --git a/assistant/src/memory/indexer.ts b/assistant/src/memory/indexer.ts
@@ -203,11 +203,11 @@ export async function indexMessageNow(
       );
 
       // ── Auto-analysis triggers ─────────────────────────────────────
-      // Both triggers route through `upsertDebouncedJob` in the helper,
-      // so a single pending row is shared. Order matters: the idle
-      // upsert runs first (pushing `runAfter` into the future); the
-      // batch trigger runs last so a threshold crossing pulls
-      // `runAfter` back to "now" and overrides the idle debounce.
+      // Immediate triggers (batch, compaction) and debounced triggers
+      // (idle, lifecycle) write to separate rows keyed by triggerGroup
+      // via `upsertAutoAnalysisJob`. When an immediate trigger fires,
+      // it cancels any pending debounced row for the same conversation
+      // to avoid redundant analysis runs.
       enqueueAutoAnalysisIfEnabled({
         conversationId: input.conversationId,
         trigger: "idle",