Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions assistant/src/__tests__/model-intents.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import { describe, expect, test } from 'bun:test';
import type { Message, Provider, ProviderResponse, SendMessageOptions } from '../providers/types.js';
import { RetryProvider } from '../providers/retry.js';
import { getProviderDefaultModel, isModelIntent, resolveModelIntent } from '../providers/model-intents.js';

const DUMMY_MESSAGES: Message[] = [
{ role: 'user', content: [{ type: 'text', text: 'hello' }] },
];

function makeResponse(model: string): ProviderResponse {
return {
content: [{ type: 'text', text: 'ok' }],
model,
usage: {
inputTokens: 1,
outputTokens: 1,
},
stopReason: 'end_turn',
};
}

function makeProvider(
name: string,
onCall: (options: SendMessageOptions | undefined) => void,
): Provider {
return {
name,
async sendMessage(_messages, _tools, _systemPrompt, options) {
onCall(options);
const config = options?.config as Record<string, unknown> | undefined;
return makeResponse((config?.model as string | undefined) ?? 'default-model');
},
};
}

describe('model intents', () => {
test('validates model intent strings', () => {
expect(isModelIntent('latency-optimized')).toBe(true);
expect(isModelIntent('quality-optimized')).toBe(true);
expect(isModelIntent('vision-optimized')).toBe(true);
expect(isModelIntent('fastest-model')).toBe(false);
expect(isModelIntent(undefined)).toBe(false);
});

test('resolves intent to provider-specific model', () => {
expect(resolveModelIntent('anthropic', 'latency-optimized')).toBe('claude-haiku-4-5-20251001');
expect(resolveModelIntent('anthropic', 'quality-optimized')).toBe('claude-opus-4-6');
expect(resolveModelIntent('anthropic', 'vision-optimized')).toBe('claude-sonnet-4-6');
expect(resolveModelIntent('openai', 'latency-optimized')).toBe('gpt-4o-mini');
});

test('falls back to provider default for unknown providers', () => {
expect(getProviderDefaultModel('unknown-provider')).toBe('claude-opus-4-6');
expect(resolveModelIntent('unknown-provider', 'quality-optimized')).toBe('claude-opus-4-6');
});
});

describe('RetryProvider model intent normalization', () => {
test('translates modelIntent into concrete model and strips modelIntent key', async () => {
let seen: SendMessageOptions | undefined;
const wrapped = new RetryProvider(makeProvider('anthropic', (options) => {
seen = options;
}));

await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
config: {
modelIntent: 'quality-optimized',
max_tokens: 123,
},
});

const config = seen?.config as Record<string, unknown>;
expect(config.model).toBe('claude-opus-4-6');
expect(config.modelIntent).toBeUndefined();
expect(config.max_tokens).toBe(123);
});

test('explicit model override wins over modelIntent', async () => {
let seen: SendMessageOptions | undefined;
const wrapped = new RetryProvider(makeProvider('openai', (options) => {
seen = options;
}));

await wrapped.sendMessage(DUMMY_MESSAGES, undefined, undefined, {
config: {
model: 'custom-model-v1',
modelIntent: 'latency-optimized',
},
});

const config = seen?.config as Record<string, unknown>;
expect(config.model).toBe('custom-model-v1');
expect(config.modelIntent).toBeUndefined();
});
});

Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ async function analyzeChunk(
undefined,
{
config: {
model: 'claude-sonnet-4-6',
modelIntent: 'vision-optimized',
max_tokens: 4096,
},
},
Expand Down
2 changes: 1 addition & 1 deletion assistant/src/config/skills.ts
Original file line number Diff line number Diff line change
Expand Up @@ -897,7 +897,7 @@ async function generateSkillIcon(name: string, description: string): Promise<str
'You are a pixel art icon designer. When asked, return ONLY a single <svg> element — no explanation, no markdown, no code fences. The SVG must be a 16x16 grid pixel art icon using <rect> elements. Use a limited palette (3-5 colors). Keep it under 2KB. The viewBox should be "0 0 16 16" with each pixel being a 1x1 rect.',
{
config: {
model: 'claude-haiku-4-5-20251001',
modelIntent: 'latency-optimized',
max_tokens: 1024,
},
},
Expand Down
10 changes: 5 additions & 5 deletions assistant/src/daemon/classifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const CLASSIFICATION_TIMEOUT_MS = 5000;
export type InteractionType = 'computer_use' | 'text_qa';

/**
* Classify a user task as computer_use or text_qa using a Haiku tool-use call,
* Classify a user task as computer_use or text_qa using an LLM tool-use call,
* falling back to a heuristic if the API call fails or no API key is available.
*/
export async function classifyInteraction(task: string, source?: 'voice' | 'text'): Promise<InteractionType> {
Expand Down Expand Up @@ -50,7 +50,7 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
'You are a classifier. Determine whether the user\'s request requires computer use (controlling the GUI — clicking, scrolling, typing into app windows, navigating between apps) or can be handled with local tools (answering questions, running terminal commands, creating/editing/reading files, web searches, writing code). GUI tasks → computer_use. Everything else → text_qa.',
{
config: {
model: 'claude-haiku-4-5-20251001',
modelIntent: 'latency-optimized',
max_tokens: 128,
tool_choice: { type: 'tool' as const, name: 'classify_interaction' },
},
Expand All @@ -63,7 +63,7 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
if (toolBlock) {
const input = toolBlock.input as { interaction_type?: string; reasoning?: string };
const result = input.interaction_type === 'text_qa' ? 'text_qa' : 'computer_use';
log.info({ result, reasoning: input.reasoning }, 'Haiku classification');
log.info({ result, reasoning: input.reasoning }, 'LLM classification');
return result;
}

Expand All @@ -74,14 +74,14 @@ export async function classifyInteraction(task: string, source?: 'voice' | 'text
}
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
log.warn({ err: message }, 'Haiku classification failed, falling back to heuristic');
log.warn({ err: message }, 'LLM classification failed, falling back to heuristic');
return classifyHeuristic(task);
}
}

/**
* Heuristic classifier — direct port of the Swift client's logic.
* Used as fallback when the Haiku API call is unavailable or fails.
* Used as fallback when the LLM API call is unavailable or fails.
*/
export function classifyHeuristic(task: string): InteractionType {
const lower = task.toLowerCase().trim();
Expand Down
12 changes: 6 additions & 6 deletions assistant/src/daemon/watch-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ export async function handleWatchObservation(
'Observation added to session',
);

// 4. Every 3 observations: call Haiku for live commentary (chat-initiated watch only)
// 4. Every 3 observations: call the LLM for live commentary (chat-initiated watch only)
if (!session.isRideShotgun && session.observations.length % 3 === 0) {
log.debug(
{ watchId: msg.watchId, observationCount: session.observations.length },
Expand Down Expand Up @@ -126,7 +126,7 @@ async function generateCommentary(session: WatchSession): Promise<void> {
systemPrompt,
{
config: {
model: 'claude-haiku-4-5-20251001',
modelIntent: 'latency-optimized',
max_tokens: 200,
},
},
Expand Down Expand Up @@ -155,7 +155,7 @@ export async function generateSummary(session: WatchSession): Promise<void> {
try {
log.debug(
{ watchId: session.watchId, sessionId: session.sessionId, observationCount: session.observations.length, commentaryCount: session.commentaryCount },
'generateSummary starting — calling Sonnet',
'generateSummary starting — calling LLM',
);
const provider = getConfiguredProvider();
if (!provider) {
Expand Down Expand Up @@ -244,13 +244,13 @@ export async function generateSummary(session: WatchSession): Promise<void> {
systemPrompt,
{
config: {
model: 'claude-sonnet-4-6',
modelIntent: 'quality-optimized',
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Summary generation silently upgraded from Sonnet to Opus due to intent taxonomy gap

The generateSummary function in watch-handler.ts previously used model: 'claude-sonnet-4-6' (mid-tier). The migration changes it to modelIntent: 'quality-optimized', which resolves to claude-opus-4-6 for the Anthropic provider (assistant/src/providers/model-intents.ts:17). This is a significant model upgrade — Opus is substantially more expensive and slower than Sonnet.

Root cause: intent taxonomy lacks a mid-tier mapping

The intent taxonomy only offers three tiers:

  • latency-optimized → Haiku (cheap/fast)
  • quality-optimized → Opus (expensive/best)
  • vision-optimized → Sonnet (mid-tier, but semantically tied to vision)

The original Sonnet was a deliberate choice for summary generation — good enough quality at moderate cost. The migration had no semantically appropriate intent for a "balanced/general-purpose" tier, so quality-optimized was chosen, but it maps to the most expensive model.

For comparison, the commentary generation in the same file correctly maps Haiku → latency-optimized → Haiku (1:1 equivalent at watch-handler.ts:129). But summary maps Sonnet → quality-optimized → Opus, which is NOT a 1:1 equivalent.

Impact: Every watch session summary now uses Opus instead of Sonnet, increasing API cost significantly (~5x per summary call) and potentially increasing latency for users.

Prompt for agents
In assistant/src/daemon/watch-handler.ts line 247, the modelIntent 'quality-optimized' maps to claude-opus-4-6 whereas the original code used claude-sonnet-4-6. To preserve the original behavior, either: (1) Add a new 'balanced' or 'general-purpose' intent to the ModelIntent type in assistant/src/providers/types.ts and map it to Sonnet-tier models in assistant/src/providers/model-intents.ts, then use that intent here. Or (2) If upgrading to Opus is intentional, keep this as-is but document the cost increase. Or (3) Use 'vision-optimized' which maps to Sonnet, though the name is misleading for a text summary use case.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

max_tokens: 2000,
},
},
);

log.debug({ watchId: session.watchId }, 'Sonnet API call completed successfully');
log.debug({ watchId: session.watchId }, 'LLM API call completed successfully');

const summaryText = extractText(response);

Expand All @@ -269,7 +269,7 @@ export async function generateSummary(session: WatchSession): Promise<void> {
fireWatchCompletionNotifier(session.sessionId, session);
}
} catch (err) {
log.error({ err, watchId: session.watchId }, 'Error generating watch summary — Sonnet API call failed');
log.error({ err, watchId: session.watchId }, 'Error generating watch summary — LLM API call failed');
const message = err instanceof Error ? err.message : String(err);
lastSummaryBySession.set(session.sessionId, `[error] Summary generation failed: ${message}`);
fireWatchCompletionNotifier(session.sessionId, session);
Expand Down
11 changes: 7 additions & 4 deletions assistant/src/memory/clarification-resolver.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { getConfiguredProvider, createTimeout, extractToolUse, userMessage } from '../providers/anthropic-send-message.js';
import type { ModelIntent } from '../providers/types.js';
import { truncate } from '../util/truncate.js';

const DEFAULT_RESOLVER_MODEL = 'claude-haiku-4-5-20251001';
const DEFAULT_RESOLVER_MODEL_INTENT: ModelIntent = 'latency-optimized';
const DEFAULT_RESOLVER_TIMEOUT_MS = 12_000;

const DIRECTIONAL_EXISTING_CUES = ['existing', 'old', 'previous', 'first', 'earlier', 'original'];
Expand Down Expand Up @@ -37,6 +38,7 @@ export interface ClarificationResolverInput {
export interface ClarificationResolverOptions {
apiKey?: string;
model?: string;
modelIntent?: ModelIntent;
timeoutMs?: number;
}

Expand Down Expand Up @@ -66,7 +68,8 @@ export async function resolveConflictClarification(

try {
return await resolveWithLlm(input, {
model: options?.model ?? DEFAULT_RESOLVER_MODEL,
model: options?.model,
modelIntent: options?.modelIntent ?? DEFAULT_RESOLVER_MODEL_INTENT,
timeoutMs: options?.timeoutMs ?? DEFAULT_RESOLVER_TIMEOUT_MS,
});
} catch (err) {
Expand Down Expand Up @@ -165,7 +168,7 @@ function resolveWithHeuristics(input: ClarificationResolverInput): Clarification

async function resolveWithLlm(
input: ClarificationResolverInput,
options: { model: string; timeoutMs: number },
options: { model?: string; modelIntent: ModelIntent; timeoutMs: number },
): Promise<ClarificationResolverResult> {
const provider = getConfiguredProvider()!;
const userPrompt = [
Expand Down Expand Up @@ -213,7 +216,7 @@ async function resolveWithLlm(
].join('\n'),
{
config: {
model: options.model,
...(options.model ? { model: options.model } : { modelIntent: options.modelIntent }),
max_tokens: 256,
tool_choice: { type: 'tool' as const, name: 'resolve_conflict_clarification' },
},
Expand Down
2 changes: 1 addition & 1 deletion assistant/src/memory/contradiction-checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ async function classifyRelationship(
CONTRADICTION_SYSTEM_PROMPT,
{
config: {
model: 'claude-haiku-4-5-20251001',
modelIntent: 'latency-optimized',
max_tokens: 256,
tool_choice: { type: 'tool' as const, name: 'classify_relationship' },
},
Expand Down
4 changes: 2 additions & 2 deletions assistant/src/messaging/thread-summarizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import type { ThreadMessage, ThreadSummary } from './types.js';

const log = getLogger('thread-summarizer');

const SUMMARIZATION_MODEL = 'claude-haiku-4-5-20251001';
const SUMMARIZATION_MODEL_INTENT = 'latency-optimized' as const;
const SUMMARIZATION_TIMEOUT_MS = 20_000;
const DEFAULT_MAX_TOKENS = 4000;
const CHARS_PER_TOKEN = 4;
Expand Down Expand Up @@ -207,7 +207,7 @@ async function summarizeWithLLM(
SYSTEM_PROMPT,
{
config: {
model: SUMMARIZATION_MODEL,
modelIntent: SUMMARIZATION_MODEL_INTENT,
max_tokens: 1024,
tool_choice: { type: 'tool' as const, name: 'store_thread_summary' },
},
Expand Down
6 changes: 3 additions & 3 deletions assistant/src/messaging/triage-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Channel-agnostic message triage engine.
*
* Classifies an inbound message by combining sender context from the
* contact graph, matching action playbooks, and an LLM call (Haiku)
* contact graph, matching action playbooks, and an LLM call
* for final classification. Results are persisted to the triageResults
* table for accuracy review.
*/
Expand All @@ -23,7 +23,7 @@ import { DEFAULT_TRIAGE_CATEGORIES } from './types.js';

const log = getLogger('triage-engine');

const TRIAGE_MODEL = 'claude-haiku-4-5-20251001';
const TRIAGE_MODEL_INTENT = 'latency-optimized' as const;
const TRIAGE_CLASSIFICATION_TIMEOUT_MS = 15_000;

// ── Playbook fetching ────────────────────────────────────────────────
Expand Down Expand Up @@ -229,7 +229,7 @@ async function classifyWithLLM(
systemPrompt,
{
config: {
model: TRIAGE_MODEL,
modelIntent: TRIAGE_MODEL_INTENT,
max_tokens: 1024,
tool_choice: { type: 'tool' as const, name: 'store_triage_result' },
},
Expand Down
70 changes: 70 additions & 0 deletions assistant/src/providers/model-intents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import type { ModelIntent } from './types.js';

const PROVIDER_DEFAULT_MODELS = {
anthropic: 'claude-opus-4-6',
openai: 'gpt-5.2',
gemini: 'gemini-3-flash',
ollama: 'llama3.2',
fireworks: 'accounts/fireworks/models/kimi-k2p5',
openrouter: 'x-ai/grok-4',
} as const;

type KnownProviderName = keyof typeof PROVIDER_DEFAULT_MODELS;

const PROVIDER_MODEL_INTENTS: Record<KnownProviderName, Record<ModelIntent, string>> = {
anthropic: {
'latency-optimized': 'claude-haiku-4-5-20251001',
'quality-optimized': 'claude-opus-4-6',
'vision-optimized': 'claude-sonnet-4-6',
},
openai: {
'latency-optimized': 'gpt-4o-mini',
'quality-optimized': 'gpt-5.2',
'vision-optimized': 'gpt-4o',
},
gemini: {
'latency-optimized': 'gemini-3-flash',
'quality-optimized': 'gemini-3-flash',
'vision-optimized': 'gemini-3-flash',
},
ollama: {
'latency-optimized': 'llama3.2',
'quality-optimized': 'llama3.2',
'vision-optimized': 'llama3.2',
},
fireworks: {
'latency-optimized': 'accounts/fireworks/models/kimi-k2p5',
'quality-optimized': 'accounts/fireworks/models/kimi-k2p5',
'vision-optimized': 'accounts/fireworks/models/kimi-k2p5',
},
openrouter: {
'latency-optimized': 'x-ai/grok-4',
'quality-optimized': 'x-ai/grok-4',
'vision-optimized': 'x-ai/grok-4',
},
};

const MODEL_INTENTS = new Set<ModelIntent>([
'latency-optimized',
'quality-optimized',
'vision-optimized',
]);

export function isModelIntent(value: unknown): value is ModelIntent {
return typeof value === 'string' && MODEL_INTENTS.has(value as ModelIntent);
}

export function getProviderDefaultModel(providerName: string): string {
const knownProvider = providerName as KnownProviderName;
return PROVIDER_DEFAULT_MODELS[knownProvider] ?? PROVIDER_DEFAULT_MODELS.anthropic;
}

export function resolveModelIntent(providerName: string, intent: ModelIntent): string {
const knownProvider = providerName as KnownProviderName;
const providerIntentModels = PROVIDER_MODEL_INTENTS[knownProvider];
if (providerIntentModels) {
return providerIntentModels[intent];
}
return getProviderDefaultModel(providerName);
}

Loading
Loading