vellum-ai · noanflaherty · Feb 24, 2026 · Feb 24, 2026 · devin-ai-integration · Feb 24, 2026
diff --git a/assistant/src/__tests__/model-intents.test.ts b/assistant/src/__tests__/model-intents.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, test } from 'bun:test';
+import type { Message, Provider, ProviderResponse, SendMessageOptions } from '../providers/types.js';
+import { RetryProvider } from '../providers/retry.js';
+import { getProviderDefaultModel, isModelIntent, resolveModelIntent } from '../providers/model-intents.js';
+
+const DUMMY_MESSAGES: Message[] = [
+  { role: 'user', content: [{ type: 'text', text: 'hello' }] },
+];
+
+function makeResponse(model: string): ProviderResponse {
+  return {
+    content: [{ type: 'text', text: 'ok' }],
+    model,
+    usage: {
+      inputTokens: 1,
+      outputTokens: 1,
+    },
+    stopReason: 'end_turn',
+  };
+}
+
+function makeProvider(
+  name: string,
+  onCall: (options: SendMessageOptions | undefined) => void,
+): Provider {
+  return {
+    name,
+    async sendMessage(_messages, _tools, _systemPrompt, options) {
+      onCall(options);
+      const config = options?.config as Record<string, unknown> | undefined;
+      return makeResponse((config?.model as string | undefined) ?? 'default-model');
+    },
+  };
+}
+
+describe('model intents', () => {
+  test('validates model intent strings', () => {
+    expect(isModelIntent('latency-optimized')).toBe(true);
+    expect(isModelIntent('quality-optimized')).toBe(true);
+    expect(isModelIntent('vision-optimized')).toBe(true);
+    expect(isModelIntent('fastest-model')).toBe(false);
+    expect(isModelIntent(undefined)).toBe(false);
+  });
+
+  test('resolves intent to provider-specific model', () => {
+    expect(resolveModelIntent('anthropic', 'latency-optimized')).toBe('claude-haiku-4-5-20251001');
+    expect(resolveModelIntent('anthropic', 'quality-optimized')).toBe('claude-opus-4-6');
+    expect(resolveModelIntent('anthropic', 'vision-optimized')).toBe('claude-sonnet-4-6');
+    expect(resolveModelIntent('openai', 'latency-optimized')).toBe('gpt-4o-mini');
+  });
+
+  test('falls back to provider default for unknown providers', () => {
+    expect(getProviderDefaultModel('unknown-provider')).toBe('claude-opus-4-6');
+    expect(resolveModelIntent('unknown-provider', 'quality-optimized')).toBe('claude-opus-4-6');
+  });
+});
+
+describe('RetryProvider model intent normalization', () => {
+  test('translates modelIntent into concrete model and strips modelIntent key', async () => {
+    let seen: SendMessageOptions | undefined;
+    const wrapped = new RetryProvider(makeProvider('anthropic', (options) => {
+      seen = options;
+    }));
+
+    await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
+      config: {
+        modelIntent: 'quality-optimized',
+        max_tokens: 123,
+      },
+    });
+
+    const config = seen?.config as Record<string, unknown>;
+    expect(config.model).toBe('claude-opus-4-6');
+    expect(config.modelIntent).toBeUndefined();
+    expect(config.max_tokens).toBe(123);
+  });
+
+  test('explicit model override wins over modelIntent', async () => {
+    let seen: SendMessageOptions | undefined;
+    const wrapped = new RetryProvider(makeProvider('openai', (options) => {
+      seen = options;
+    }));
+
+    await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
+      config: {
+        model: 'custom-model-v1',
+        modelIntent: 'latency-optimized',
+      },
+    });
+
+    const config = seen?.config as Record<string, unknown>;
+    expect(config.model).toBe('custom-model-v1');
+    expect(config.modelIntent).toBeUndefined();
+  });
+});
+
@@ -76,7 +76,7 @@ async function analyzeChunk(
     undefined,
     {
       config: {
-        model: 'claude-sonnet-4-6',
+        modelIntent: 'vision-optimized',
         max_tokens: 4096,
       },
     },

@@ -897,7 +897,7 @@ async function generateSkillIcon(name: string, description: string): Promise<str
     'You are a pixel art icon designer. When asked, return ONLY a single <svg> element — no explanation, no markdown, no code fences. The SVG must be a 16x16 grid pixel art icon using <rect> elements. Use a limited palette (3-5 colors). Keep it under 2KB. The viewBox should be "0 0 16 16" with each pixel being a 1x1 rect.',
     {
       config: {
-        model: 'claude-haiku-4-5-20251001',
+        modelIntent: 'latency-optimized',
         max_tokens: 1024,
       },
     },

diff --git a/assistant/src/daemon/classifier.ts b/assistant/src/daemon/classifier.ts
@@ -8,7 +8,7 @@ const CLASSIFICATION_TIMEOUT_MS = 5000;
 export type InteractionType = 'computer_use' | 'text_qa';
 
 /**
- * Classify a user task as computer_use or text_qa using a Haiku tool-use call,
+ * Classify a user task as computer_use or text_qa using an LLM tool-use call,
  * falling back to a heuristic if the API call fails or no API key is available.
  */
 export async function classifyInteraction(task: string, source?: 'voice' | 'text'): Promise<InteractionType> {
@@ -50,7 +50,7 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
         'You are a classifier. Determine whether the user\'s request requires computer use (controlling the GUI — clicking, scrolling, typing into app windows, navigating between apps) or can be handled with local tools (answering questions, running terminal commands, creating/editing/reading files, web searches, writing code). GUI tasks → computer_use. Everything else → text_qa.',
         {
           config: {
-            model: 'claude-haiku-4-5-20251001',
+            modelIntent: 'latency-optimized',
             max_tokens: 128,
             tool_choice: { type: 'tool' as const, name: 'classify_interaction' },
           },
@@ -63,7 +63,7 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
       if (toolBlock) {
         const input = toolBlock.input as { interaction_type?: string; reasoning?: string };
         const result = input.interaction_type === 'text_qa' ? 'text_qa' : 'computer_use';
-        log.info({ result, reasoning: input.reasoning }, 'Haiku classification');
+        log.info({ result, reasoning: input.reasoning }, 'LLM classification');
         return result;
       }
 
@@ -74,14 +74,14 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
     }
   } catch (err) {
     const message = err instanceof Error ? err.message : String(err);
-    log.warn({ err: message }, 'Haiku classification failed, falling back to heuristic');
+    log.warn({ err: message }, 'LLM classification failed, falling back to heuristic');
     return classifyHeuristic(task);
   }
 }
 
 /**
  * Heuristic classifier — direct port of the Swift client's logic.
- * Used as fallback when the Haiku API call is unavailable or fails.
+ * Used as fallback when the LLM API call is unavailable or fails.
  */
 export function classifyHeuristic(task: string): InteractionType {
   const lower = task.toLowerCase().trim();

diff --git a/assistant/src/daemon/watch-handler.ts b/assistant/src/daemon/watch-handler.ts
@@ -62,7 +62,7 @@ export async function handleWatchObservation(
       'Observation added to session',
     );
 
-    // 4. Every 3 observations: call Haiku for live commentary (chat-initiated watch only)
+    // 4. Every 3 observations: call the LLM for live commentary (chat-initiated watch only)
     if (!session.isRideShotgun && session.observations.length % 3 === 0) {
       log.debug(
         { watchId: msg.watchId, observationCount: session.observations.length },
@@ -126,7 +126,7 @@ async function generateCommentary(session: WatchSession): Promise<void> {
       systemPrompt,
       {
         config: {
-          model: 'claude-haiku-4-5-20251001',
+          modelIntent: 'latency-optimized',
           max_tokens: 200,
         },
       },
@@ -155,7 +155,7 @@ export async function generateSummary(session: WatchSession): Promise<void> {
   try {
     log.debug(
       { watchId: session.watchId, sessionId: session.sessionId, observationCount: session.observations.length, commentaryCount: session.commentaryCount },
-      'generateSummary starting — calling Sonnet',
+      'generateSummary starting — calling LLM',
     );
     const provider = getConfiguredProvider();
     if (!provider) {
@@ -244,13 +244,13 @@ export async function generateSummary(session: WatchSession): Promise<void> {
       systemPrompt,
       {
         config: {
-          model: 'claude-sonnet-4-6',
+          modelIntent: 'quality-optimized',
           max_tokens: 2000,
         },
       },
     );
 
-    log.debug({ watchId: session.watchId }, 'Sonnet API call completed successfully');
+    log.debug({ watchId: session.watchId }, 'LLM API call completed successfully');
 
     const summaryText = extractText(response);
 
@@ -269,7 +269,7 @@ export async function generateSummary(session: WatchSession): Promise<void> {
       fireWatchCompletionNotifier(session.sessionId, session);
     }
   } catch (err) {
-    log.error({ err, watchId: session.watchId }, 'Error generating watch summary — Sonnet API call failed');
+    log.error({ err, watchId: session.watchId }, 'Error generating watch summary — LLM API call failed');
     const message = err instanceof Error ? err.message : String(err);
     lastSummaryBySession.set(session.sessionId, `[error] Summary generation failed: ${message}`);
     fireWatchCompletionNotifier(session.sessionId, session);

@@ -1,7 +1,8 @@
 import { getConfiguredProvider, createTimeout, extractToolUse, userMessage } from '../providers/anthropic-send-message.js';
+import type { ModelIntent } from '../providers/types.js';
 import { truncate } from '../util/truncate.js';
 
-const DEFAULT_RESOLVER_MODEL = 'claude-haiku-4-5-20251001';
+const DEFAULT_RESOLVER_MODEL_INTENT: ModelIntent = 'latency-optimized';
 const DEFAULT_RESOLVER_TIMEOUT_MS = 12_000;
 
 const DIRECTIONAL_EXISTING_CUES = ['existing', 'old', 'previous', 'first', 'earlier', 'original'];
@@ -37,6 +38,7 @@ export interface ClarificationResolverInput {
 export interface ClarificationResolverOptions {
   apiKey?: string;
   model?: string;
+  modelIntent?: ModelIntent;
   timeoutMs?: number;
 }
 
@@ -66,7 +68,8 @@ export async function resolveConflictClarification(
 
   try {
     return await resolveWithLlm(input, {
-      model: options?.model ?? DEFAULT_RESOLVER_MODEL,
+      model: options?.model,
+      modelIntent: options?.modelIntent ?? DEFAULT_RESOLVER_MODEL_INTENT,
       timeoutMs: options?.timeoutMs ?? DEFAULT_RESOLVER_TIMEOUT_MS,
     });
   } catch (err) {
@@ -165,7 +168,7 @@ function resolveWithHeuristics(input: ClarificationResolverInput): Clarification
 
 async function resolveWithLlm(
   input: ClarificationResolverInput,
-  options: { model: string; timeoutMs: number },
+  options: { model?: string; modelIntent: ModelIntent; timeoutMs: number },
 ): Promise<ClarificationResolverResult> {
   const provider = getConfiguredProvider()!;
   const userPrompt = [
@@ -213,7 +216,7 @@ async function resolveWithLlm(
       ].join('\n'),
       {
         config: {
-          model: options.model,
+          ...(options.model ? { model: options.model } : { modelIntent: options.modelIntent }),
           max_tokens: 256,
           tool_choice: { type: 'tool' as const, name: 'resolve_conflict_clarification' },
         },

@@ -243,7 +243,7 @@ async function classifyRelationship(
       CONTRADICTION_SYSTEM_PROMPT,
       {
         config: {
-          model: 'claude-haiku-4-5-20251001',
+          modelIntent: 'latency-optimized',
           max_tokens: 256,
           tool_choice: { type: 'tool' as const, name: 'classify_relationship' },
         },

diff --git a/assistant/src/messaging/thread-summarizer.ts b/assistant/src/messaging/thread-summarizer.ts
@@ -5,7 +5,7 @@ import type { ThreadMessage, ThreadSummary } from './types.js';
 
 const log = getLogger('thread-summarizer');
 
-const SUMMARIZATION_MODEL = 'claude-haiku-4-5-20251001';
+const SUMMARIZATION_MODEL_INTENT = 'latency-optimized' as const;
 const SUMMARIZATION_TIMEOUT_MS = 20_000;
 const DEFAULT_MAX_TOKENS = 4000;
 const CHARS_PER_TOKEN = 4;
@@ -207,7 +207,7 @@ async function summarizeWithLLM(
         SYSTEM_PROMPT,
         {
           config: {
-            model: SUMMARIZATION_MODEL,
+            modelIntent: SUMMARIZATION_MODEL_INTENT,
             max_tokens: 1024,
             tool_choice: { type: 'tool' as const, name: 'store_thread_summary' },
           },

diff --git a/assistant/src/messaging/triage-engine.ts b/assistant/src/messaging/triage-engine.ts
@@ -2,7 +2,7 @@
  * Channel-agnostic message triage engine.
  *
  * Classifies an inbound message by combining sender context from the
- * contact graph, matching action playbooks, and an LLM call (Haiku)
+ * contact graph, matching action playbooks, and an LLM call
  * for final classification. Results are persisted to the triageResults
  * table for accuracy review.
  */
@@ -23,7 +23,7 @@ import { DEFAULT_TRIAGE_CATEGORIES } from './types.js';
 
 const log = getLogger('triage-engine');
 
-const TRIAGE_MODEL = 'claude-haiku-4-5-20251001';
+const TRIAGE_MODEL_INTENT = 'latency-optimized' as const;
 const TRIAGE_CLASSIFICATION_TIMEOUT_MS = 15_000;
 
 // ── Playbook fetching ────────────────────────────────────────────────
@@ -229,7 +229,7 @@ async function classifyWithLLM(
       systemPrompt,
       {
         config: {
-          model: TRIAGE_MODEL,
+          modelIntent: TRIAGE_MODEL_INTENT,
           max_tokens: 1024,
           tool_choice: { type: 'tool' as const, name: 'store_triage_result' },
         },

diff --git a/assistant/src/providers/model-intents.ts b/assistant/src/providers/model-intents.ts
@@ -0,0 +1,70 @@
+import type { ModelIntent } from './types.js';
+
+const PROVIDER_DEFAULT_MODELS = {
+  anthropic: 'claude-opus-4-6',
+  openai: 'gpt-5.2',
+  gemini: 'gemini-3-flash',
+  ollama: 'llama3.2',
+  fireworks: 'accounts/fireworks/models/kimi-k2p5',
+  openrouter: 'x-ai/grok-4',
+} as const;
+
+type KnownProviderName = keyof typeof PROVIDER_DEFAULT_MODELS;
+
+const PROVIDER_MODEL_INTENTS: Record<KnownProviderName, Record<ModelIntent, string>> = {
+  anthropic: {
+    'latency-optimized': 'claude-haiku-4-5-20251001',
+    'quality-optimized': 'claude-opus-4-6',
+    'vision-optimized': 'claude-sonnet-4-6',
+  },
+  openai: {
+    'latency-optimized': 'gpt-4o-mini',
+    'quality-optimized': 'gpt-5.2',
+    'vision-optimized': 'gpt-4o',
+  },
+  gemini: {
+    'latency-optimized': 'gemini-3-flash',
+    'quality-optimized': 'gemini-3-flash',
+    'vision-optimized': 'gemini-3-flash',
+  },
+  ollama: {
+    'latency-optimized': 'llama3.2',
+    'quality-optimized': 'llama3.2',
+    'vision-optimized': 'llama3.2',
+  },
+  fireworks: {
+    'latency-optimized': 'accounts/fireworks/models/kimi-k2p5',
+    'quality-optimized': 'accounts/fireworks/models/kimi-k2p5',
+    'vision-optimized': 'accounts/fireworks/models/kimi-k2p5',
+  },
+  openrouter: {
+    'latency-optimized': 'x-ai/grok-4',
+    'quality-optimized': 'x-ai/grok-4',
+    'vision-optimized': 'x-ai/grok-4',
+  },
+};
+
+const MODEL_INTENTS = new Set<ModelIntent>([
+  'latency-optimized',
+  'quality-optimized',
+  'vision-optimized',
+]);
+
+export function isModelIntent(value: unknown): value is ModelIntent {
+  return typeof value === 'string' && MODEL_INTENTS.has(value as ModelIntent);
+}
+
+export function getProviderDefaultModel(providerName: string): string {
+  const knownProvider = providerName as KnownProviderName;
+  return PROVIDER_DEFAULT_MODELS[knownProvider] ?? PROVIDER_DEFAULT_MODELS.anthropic;
+}
+
+export function resolveModelIntent(providerName: string, intent: ModelIntent): string {
+  const knownProvider = providerName as KnownProviderName;
+  const providerIntentModels = PROVIDER_MODEL_INTENTS[knownProvider];
+  if (providerIntentModels) {
+    return providerIntentModels[intent];
+  }
+  return getProviderDefaultModel(providerName);
+}
+